1
0
mirror of http://mpg123.de/trunk/.git synced 2025-10-26 15:31:15 +03:00

Moving in the library source from the mpg123lib branch.

git-svn-id: svn://scm.orgis.org/mpg123/trunk@998 35dc7657-300d-0410-a2e5-dc2837fedb53
This commit is contained in:
thor
2007-10-14 22:32:17 +00:00
parent 0061205665
commit 78a4864a32
64 changed files with 84662 additions and 0 deletions

90
src/libmpg123/Makefile.am Normal file
View File

@@ -0,0 +1,90 @@
## Makefile.am: produce Makefile.in from this
## copyright by the mpg123 project - free software under the terms of the LGPL 2.1
## see COPYING and AUTHORS files in distribution or http://mpg123.org
## initially written by Nicholas J. Humfrey
#AM_CFLAGS = @AUDIO_CFLAGS@
#AM_LDFLAGS =
EXTRA_DIST =
CLEANFILES = *.a
#lib_LIBRARIES = libmpg123.a
lib_LTLIBRARIES = libmpg123.la
include_HEADERS = mpg123.h
#libmpg123_a_LIBADD = @DECODER_OBJ@
#libmpg123_a_DEPENDENCIES = @DECODER_OBJ@
libmpg123_la_LDFLAGS = -no-undefined -version-info @LIBMPG123_VERSION@ -export-symbols libmpg123.sym
libmpg123_la_LIBADD = @DECODER_LOBJ@
libmpg123_la_DEPENDENCIES = @DECODER_LOBJ@
libmpg123_la_SOURCES = \
parse.c \
parse.h \
frame.c \
format.c \
frame.h \
reader.h \
debug.h \
decode.h \
decode_2to1.c \
decode_4to1.c \
decode_ntom.c \
equalizer.c \
huffman.h \
icy.c \
icy.h \
id3.c \
id3.h \
true.h \
l2tables.h \
layer1.c \
layer2.c \
layer3.c \
getbits.h \
optimize.h \
optimize.c \
readers.c \
tabinit.c \
stringbuf.c \
libmpg123.c \
mpg123.h \
mpg123lib_intern.h \
mangle.h \
getcpuflags.h \
libmpg123.sym
EXTRA_libmpg123_la_SOURCES = \
dct36_3dnowext.S \
dct36_3dnow.S \
dct64_3dnowext.S \
dct64_3dnow.S \
dct64_altivec.c \
dct64.c \
dct64_i386.c \
dct64_mmx.S \
dct64_sse.S \
decode_3dnowext.S \
decode_3dnow.S \
decode_altivec.c \
decode.c \
decode_i386.c \
decode_i586_dither.S \
decode_i586.S \
decode_mmx.S \
decode_sse3d.h \
decode_sse.S \
dnoise.c \
equalizer_3dnow.S \
tabinit_mmx.S \
getcpuflags.S
# explicit preprocessing since mingw32 does not honor the big .S
.S.o:
$(CPP) $(CPPFLAGS) $< > $<.s
$(CCAS) $(CCASFLAGS) -c -o $@ $<.s && rm $<.s

503
src/libmpg123/dct36_3dnow.S Normal file
View File

@@ -0,0 +1,503 @@
/*
dct64_3dnow.s: Replacement of dct36() with AMD's 3DNow! SIMD operations support
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Syuuhei Kashiyama
This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
<squash@mb.kcom.ne.jp>,only two types of changes have been made:
- remove PREFETCH instruction for speedup
- change function name for support 3DNow! automatic detect
You can find Kashiyama's original 3dnow! support patch
(for mpg123-0.59o) at
http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
<kim@comtec.co.jp> - after 1.Apr.1999
Replacement of dct36() with AMD's 3DNow! SIMD operations support
Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
The author of this program disclaim whole expressed or implied
warranties with regard to this program, and in no event shall the
author of this program liable to whatever resulted from the use of
this program. Use it at your own risk.
*/
#include "mangle.h"
.globl ASM_NAME(dct36_3dnow)
/* .type ASM_NAME(dct36_3dnow),@function */
ASM_NAME(dct36_3dnow):
pushl %ebp
movl %esp,%ebp
subl $120,%esp
pushl %esi
pushl %ebx
movl 8(%ebp),%eax
movl 12(%ebp),%esi
movl 16(%ebp),%ecx
movl 20(%ebp),%edx
movl 24(%ebp),%ebx
leal -128(%ebp),%esp
femms
movq (%eax),%mm0
movq 4(%eax),%mm1
pfadd %mm1,%mm0
movq %mm0,4(%eax)
psrlq $32,%mm1
movq 12(%eax),%mm2
punpckldq %mm2,%mm1
pfadd %mm2,%mm1
movq %mm1,12(%eax)
psrlq $32,%mm2
movq 20(%eax),%mm3
punpckldq %mm3,%mm2
pfadd %mm3,%mm2
movq %mm2,20(%eax)
psrlq $32,%mm3
movq 28(%eax),%mm4
punpckldq %mm4,%mm3
pfadd %mm4,%mm3
movq %mm3,28(%eax)
psrlq $32,%mm4
movq 36(%eax),%mm5
punpckldq %mm5,%mm4
pfadd %mm5,%mm4
movq %mm4,36(%eax)
psrlq $32,%mm5
movq 44(%eax),%mm6
punpckldq %mm6,%mm5
pfadd %mm6,%mm5
movq %mm5,44(%eax)
psrlq $32,%mm6
movq 52(%eax),%mm7
punpckldq %mm7,%mm6
pfadd %mm7,%mm6
movq %mm6,52(%eax)
psrlq $32,%mm7
movq 60(%eax),%mm0
punpckldq %mm0,%mm7
pfadd %mm0,%mm7
movq %mm7,60(%eax)
psrlq $32,%mm0
movd 68(%eax),%mm1
pfadd %mm1,%mm0
movd %mm0,68(%eax)
movd 4(%eax),%mm0
movd 12(%eax),%mm1
punpckldq %mm1,%mm0
punpckldq 20(%eax),%mm1
pfadd %mm1,%mm0
movd %mm0,12(%eax)
psrlq $32,%mm0
movd %mm0,20(%eax)
psrlq $32,%mm1
movd 28(%eax),%mm2
punpckldq %mm2,%mm1
punpckldq 36(%eax),%mm2
pfadd %mm2,%mm1
movd %mm1,28(%eax)
psrlq $32,%mm1
movd %mm1,36(%eax)
psrlq $32,%mm2
movd 44(%eax),%mm3
punpckldq %mm3,%mm2
punpckldq 52(%eax),%mm3
pfadd %mm3,%mm2
movd %mm2,44(%eax)
psrlq $32,%mm2
movd %mm2,52(%eax)
psrlq $32,%mm3
movd 60(%eax),%mm4
punpckldq %mm4,%mm3
punpckldq 68(%eax),%mm4
pfadd %mm4,%mm3
movd %mm3,60(%eax)
psrlq $32,%mm3
movd %mm3,68(%eax)
movq 24(%eax),%mm0
movq 48(%eax),%mm1
movd ASM_NAME(COS9)+12,%mm2
punpckldq %mm2,%mm2
movd ASM_NAME(COS9)+24,%mm3
punpckldq %mm3,%mm3
pfmul %mm2,%mm0
pfmul %mm3,%mm1
pushl %eax
movl $1,%eax
movd %eax,%mm7
pi2fd %mm7,%mm7
popl %eax
movq 8(%eax),%mm2
movd ASM_NAME(COS9)+4,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
pfadd %mm0,%mm2
movq 40(%eax),%mm3
movd ASM_NAME(COS9)+20,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq 56(%eax),%mm3
movd ASM_NAME(COS9)+28,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq (%eax),%mm3
movq 16(%eax),%mm4
movd ASM_NAME(COS9)+8,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
movq 32(%eax),%mm4
movd ASM_NAME(COS9)+16,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
pfadd %mm1,%mm3
movq 64(%eax),%mm4
movd ASM_NAME(COS9)+32,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+0,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 108(%edx),%mm6
punpckldq 104(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,36(%ecx)
psrlq $32,%mm5
movd %mm5,32(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 32(%edx),%mm6
punpckldq 36(%edx),%mm6
pfmul %mm6,%mm5
movd 32(%esi),%mm6
punpckldq 36(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,1024(%ebx)
psrlq $32,%mm5
movd %mm5,1152(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+32,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 140(%edx),%mm6
punpckldq 72(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,68(%ecx)
psrlq $32,%mm5
movd %mm5,0(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 0(%edx),%mm6
punpckldq 68(%edx),%mm6
pfmul %mm6,%mm5
movd 0(%esi),%mm6
punpckldq 68(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,0(%ebx)
psrlq $32,%mm5
movd %mm5,2176(%ebx)
movq 8(%eax),%mm2
movq 40(%eax),%mm3
pfsub %mm3,%mm2
movq 56(%eax),%mm3
pfsub %mm3,%mm2
movd ASM_NAME(COS9)+12,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
movq 16(%eax),%mm3
movq 32(%eax),%mm4
pfsub %mm4,%mm3
movq 64(%eax),%mm4
pfsub %mm4,%mm3
movd ASM_NAME(COS9)+24,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
movq 48(%eax),%mm4
pfsub %mm4,%mm3
movq (%eax),%mm4
pfadd %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+4,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 112(%edx),%mm6
punpckldq 100(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,40(%ecx)
psrlq $32,%mm5
movd %mm5,28(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 28(%edx),%mm6
punpckldq 40(%edx),%mm6
pfmul %mm6,%mm5
movd 28(%esi),%mm6
punpckldq 40(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,896(%ebx)
psrlq $32,%mm5
movd %mm5,1280(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+28,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 136(%edx),%mm6
punpckldq 76(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,64(%ecx)
psrlq $32,%mm5
movd %mm5,4(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 4(%edx),%mm6
punpckldq 64(%edx),%mm6
pfmul %mm6,%mm5
movd 4(%esi),%mm6
punpckldq 64(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,128(%ebx)
psrlq $32,%mm5
movd %mm5,2048(%ebx)
movq 8(%eax),%mm2
movd ASM_NAME(COS9)+20,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
pfsub %mm0,%mm2
movq 40(%eax),%mm3
movd ASM_NAME(COS9)+28,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfsub %mm3,%mm2
movq 56(%eax),%mm3
movd ASM_NAME(COS9)+4,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq (%eax),%mm3
movq 16(%eax),%mm4
movd ASM_NAME(COS9)+32,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
movq 32(%eax),%mm4
movd ASM_NAME(COS9)+8,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
pfadd %mm1,%mm3
movq 64(%eax),%mm4
movd ASM_NAME(COS9)+16,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+8,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 116(%edx),%mm6
punpckldq 96(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,44(%ecx)
psrlq $32,%mm5
movd %mm5,24(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 24(%edx),%mm6
punpckldq 44(%edx),%mm6
pfmul %mm6,%mm5
movd 24(%esi),%mm6
punpckldq 44(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,768(%ebx)
psrlq $32,%mm5
movd %mm5,1408(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+24,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 132(%edx),%mm6
punpckldq 80(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,60(%ecx)
psrlq $32,%mm5
movd %mm5,8(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 8(%edx),%mm6
punpckldq 60(%edx),%mm6
pfmul %mm6,%mm5
movd 8(%esi),%mm6
punpckldq 60(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,256(%ebx)
psrlq $32,%mm5
movd %mm5,1920(%ebx)
movq 8(%eax),%mm2
movd ASM_NAME(COS9)+28,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
pfsub %mm0,%mm2
movq 40(%eax),%mm3
movd ASM_NAME(COS9)+4,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq 56(%eax),%mm3
movd ASM_NAME(COS9)+20,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfsub %mm3,%mm2
movq (%eax),%mm3
movq 16(%eax),%mm4
movd ASM_NAME(COS9)+16,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
movq 32(%eax),%mm4
movd ASM_NAME(COS9)+32,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
pfadd %mm1,%mm3
movq 64(%eax),%mm4
movd ASM_NAME(COS9)+8,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+12,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 120(%edx),%mm6
punpckldq 92(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,48(%ecx)
psrlq $32,%mm5
movd %mm5,20(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 20(%edx),%mm6
punpckldq 48(%edx),%mm6
pfmul %mm6,%mm5
movd 20(%esi),%mm6
punpckldq 48(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,640(%ebx)
psrlq $32,%mm5
movd %mm5,1536(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+20,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 128(%edx),%mm6
punpckldq 84(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,56(%ecx)
psrlq $32,%mm5
movd %mm5,12(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 12(%edx),%mm6
punpckldq 56(%edx),%mm6
pfmul %mm6,%mm5
movd 12(%esi),%mm6
punpckldq 56(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,384(%ebx)
psrlq $32,%mm5
movd %mm5,1792(%ebx)
movq (%eax),%mm4
movq 16(%eax),%mm3
pfsub %mm3,%mm4
movq 32(%eax),%mm3
pfadd %mm3,%mm4
movq 48(%eax),%mm3
pfsub %mm3,%mm4
movq 64(%eax),%mm3
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+16,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 124(%edx),%mm6
punpckldq 88(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,52(%ecx)
psrlq $32,%mm5
movd %mm5,16(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 16(%edx),%mm6
punpckldq 52(%edx),%mm6
pfmul %mm6,%mm5
movd 16(%esi),%mm6
punpckldq 52(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,512(%ebx)
psrlq $32,%mm5
movd %mm5,1664(%ebx)
femms
popl %ebx
popl %esi
movl %ebp,%esp
popl %ebp
ret

View File

@@ -0,0 +1,510 @@
/*
dct36_3dnowext: extended 3DNow optimized DCT36
copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
Transformed back into standalone asm, with help of
gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct36_3dnowext.{S,c}
MPlayer comment follows.
*/
/*
* dct36_3dnow.c - 3DNow! optimized dct36()
*
* This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
* <squash@mb.kcom.ne.jp>, only two types of changes have been made:
*
* - removed PREFETCH instruction for speedup
* - changed function name for support 3DNow! automatic detection
*
* You can find Kashiyama's original 3dnow! support patch
* (for mpg123-0.59o) at
* http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
*
* by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
* <kim@comtec.co.jp> - after 1.Apr.1999
*
* Modified for use with MPlayer, for details see the changelog at
* http://svn.mplayerhq.hu/mplayer/trunk/
* $Id: dct36_3dnow.c 18786 2006-06-22 13:34:00Z diego $
*
* Original disclaimer:
* The author of this program disclaim whole expressed or implied
* warranties with regard to this program, and in no event shall the
* author of this program liable to whatever resulted from the use of
* this program. Use it at your own risk.
*
* 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
*/
#include "mangle.h"
.text
ALIGN32,,31
.globl ASM_NAME(dct36_3dnowext)
/* .type ASM_NAME(dct36_3dnowext), @function */
ASM_NAME(dct36_3dnowext):
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
movl 8(%ebp), %eax
movl 12(%ebp), %esi
movl 16(%ebp), %ecx
movl 20(%ebp), %edx
movl 24(%ebp), %ebx
#APP
movq (%eax),%mm0
movq 4(%eax),%mm1
pfadd %mm1,%mm0
movq %mm0,4(%eax)
psrlq $32,%mm1
movq 12(%eax),%mm2
punpckldq %mm2,%mm1
pfadd %mm2,%mm1
movq %mm1,12(%eax)
psrlq $32,%mm2
movq 20(%eax),%mm3
punpckldq %mm3,%mm2
pfadd %mm3,%mm2
movq %mm2,20(%eax)
psrlq $32,%mm3
movq 28(%eax),%mm4
punpckldq %mm4,%mm3
pfadd %mm4,%mm3
movq %mm3,28(%eax)
psrlq $32,%mm4
movq 36(%eax),%mm5
punpckldq %mm5,%mm4
pfadd %mm5,%mm4
movq %mm4,36(%eax)
psrlq $32,%mm5
movq 44(%eax),%mm6
punpckldq %mm6,%mm5
pfadd %mm6,%mm5
movq %mm5,44(%eax)
psrlq $32,%mm6
movq 52(%eax),%mm7
punpckldq %mm7,%mm6
pfadd %mm7,%mm6
movq %mm6,52(%eax)
psrlq $32,%mm7
movq 60(%eax),%mm0
punpckldq %mm0,%mm7
pfadd %mm0,%mm7
movq %mm7,60(%eax)
psrlq $32,%mm0
movd 68(%eax),%mm1
pfadd %mm1,%mm0
movd %mm0,68(%eax)
movd 4(%eax),%mm0
movd 12(%eax),%mm1
punpckldq %mm1,%mm0
punpckldq 20(%eax),%mm1
pfadd %mm1,%mm0
movd %mm0,12(%eax)
psrlq $32,%mm0
movd %mm0,20(%eax)
psrlq $32,%mm1
movd 28(%eax),%mm2
punpckldq %mm2,%mm1
punpckldq 36(%eax),%mm2
pfadd %mm2,%mm1
movd %mm1,28(%eax)
psrlq $32,%mm1
movd %mm1,36(%eax)
psrlq $32,%mm2
movd 44(%eax),%mm3
punpckldq %mm3,%mm2
punpckldq 52(%eax),%mm3
pfadd %mm3,%mm2
movd %mm2,44(%eax)
psrlq $32,%mm2
movd %mm2,52(%eax)
psrlq $32,%mm3
movd 60(%eax),%mm4
punpckldq %mm4,%mm3
punpckldq 68(%eax),%mm4
pfadd %mm4,%mm3
movd %mm3,60(%eax)
psrlq $32,%mm3
movd %mm3,68(%eax)
movq 24(%eax),%mm0
movq 48(%eax),%mm1
movd ASM_NAME(COS9)+12,%mm2
punpckldq %mm2,%mm2
movd ASM_NAME(COS9)+24,%mm3
punpckldq %mm3,%mm3
pfmul %mm2,%mm0
pfmul %mm3,%mm1
pushl %eax
movl $1,%eax
movd %eax,%mm7
pi2fd %mm7,%mm7
popl %eax
movq 8(%eax),%mm2
movd ASM_NAME(COS9)+4,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
pfadd %mm0,%mm2
movq 40(%eax),%mm3
movd ASM_NAME(COS9)+20,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq 56(%eax),%mm3
movd ASM_NAME(COS9)+28,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq (%eax),%mm3
movq 16(%eax),%mm4
movd ASM_NAME(COS9)+8,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
movq 32(%eax),%mm4
movd ASM_NAME(COS9)+16,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
pfadd %mm1,%mm3
movq 64(%eax),%mm4
movd ASM_NAME(COS9)+32,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+0,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 108(%edx),%mm6
punpckldq 104(%edx),%mm6
pfmul %mm6,%mm5
pswapd %mm5,%mm5
movq %mm5,32(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 32(%edx),%mm6
punpckldq 36(%edx),%mm6
pfmul %mm6,%mm5
movd 32(%esi),%mm6
punpckldq 36(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,1024(%ebx)
psrlq $32,%mm5
movd %mm5,1152(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+32,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 140(%edx),%mm6
punpckldq 72(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,68(%ecx)
psrlq $32,%mm5
movd %mm5,0(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 0(%edx),%mm6
punpckldq 68(%edx),%mm6
pfmul %mm6,%mm5
movd 0(%esi),%mm6
punpckldq 68(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,0(%ebx)
psrlq $32,%mm5
movd %mm5,2176(%ebx)
movq 8(%eax),%mm2
movq 40(%eax),%mm3
pfsub %mm3,%mm2
movq 56(%eax),%mm3
pfsub %mm3,%mm2
movd ASM_NAME(COS9)+12,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
movq 16(%eax),%mm3
movq 32(%eax),%mm4
pfsub %mm4,%mm3
movq 64(%eax),%mm4
pfsub %mm4,%mm3
movd ASM_NAME(COS9)+24,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
movq 48(%eax),%mm4
pfsub %mm4,%mm3
movq (%eax),%mm4
pfadd %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+4,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 112(%edx),%mm6
punpckldq 100(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,40(%ecx)
psrlq $32,%mm5
movd %mm5,28(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 28(%edx),%mm6
punpckldq 40(%edx),%mm6
pfmul %mm6,%mm5
movd 28(%esi),%mm6
punpckldq 40(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,896(%ebx)
psrlq $32,%mm5
movd %mm5,1280(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+28,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 136(%edx),%mm6
punpckldq 76(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,64(%ecx)
psrlq $32,%mm5
movd %mm5,4(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 4(%edx),%mm6
punpckldq 64(%edx),%mm6
pfmul %mm6,%mm5
movd 4(%esi),%mm6
punpckldq 64(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,128(%ebx)
psrlq $32,%mm5
movd %mm5,2048(%ebx)
movq 8(%eax),%mm2
movd ASM_NAME(COS9)+20,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
pfsub %mm0,%mm2
movq 40(%eax),%mm3
movd ASM_NAME(COS9)+28,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfsub %mm3,%mm2
movq 56(%eax),%mm3
movd ASM_NAME(COS9)+4,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq (%eax),%mm3
movq 16(%eax),%mm4
movd ASM_NAME(COS9)+32,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
movq 32(%eax),%mm4
movd ASM_NAME(COS9)+8,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
pfadd %mm1,%mm3
movq 64(%eax),%mm4
movd ASM_NAME(COS9)+16,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+8,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 116(%edx),%mm6
punpckldq 96(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,44(%ecx)
psrlq $32,%mm5
movd %mm5,24(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 24(%edx),%mm6
punpckldq 44(%edx),%mm6
pfmul %mm6,%mm5
movd 24(%esi),%mm6
punpckldq 44(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,768(%ebx)
psrlq $32,%mm5
movd %mm5,1408(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+24,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 132(%edx),%mm6
punpckldq 80(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,60(%ecx)
psrlq $32,%mm5
movd %mm5,8(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 8(%edx),%mm6
punpckldq 60(%edx),%mm6
pfmul %mm6,%mm5
movd 8(%esi),%mm6
punpckldq 60(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,256(%ebx)
psrlq $32,%mm5
movd %mm5,1920(%ebx)
movq 8(%eax),%mm2
movd ASM_NAME(COS9)+28,%mm3
punpckldq %mm3,%mm3
pfmul %mm3,%mm2
pfsub %mm0,%mm2
movq 40(%eax),%mm3
movd ASM_NAME(COS9)+4,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfadd %mm3,%mm2
movq 56(%eax),%mm3
movd ASM_NAME(COS9)+20,%mm4
punpckldq %mm4,%mm4
pfmul %mm4,%mm3
pfsub %mm3,%mm2
movq (%eax),%mm3
movq 16(%eax),%mm4
movd ASM_NAME(COS9)+16,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
movq 32(%eax),%mm4
movd ASM_NAME(COS9)+32,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfadd %mm4,%mm3
pfadd %mm1,%mm3
movq 64(%eax),%mm4
movd ASM_NAME(COS9)+8,%mm5
punpckldq %mm5,%mm5
pfmul %mm5,%mm4
pfsub %mm4,%mm3
movq %mm2,%mm4
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+12,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 120(%edx),%mm6
punpckldq 92(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,48(%ecx)
psrlq $32,%mm5
movd %mm5,20(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 20(%edx),%mm6
punpckldq 48(%edx),%mm6
pfmul %mm6,%mm5
movd 20(%esi),%mm6
punpckldq 48(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,640(%ebx)
psrlq $32,%mm5
movd %mm5,1536(%ebx)
movq %mm3,%mm4
pfsub %mm2,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+20,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 128(%edx),%mm6
punpckldq 84(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,56(%ecx)
psrlq $32,%mm5
movd %mm5,12(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 12(%edx),%mm6
punpckldq 56(%edx),%mm6
pfmul %mm6,%mm5
movd 12(%esi),%mm6
punpckldq 56(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,384(%ebx)
psrlq $32,%mm5
movd %mm5,1792(%ebx)
movq (%eax),%mm4
movq 16(%eax),%mm3
pfsub %mm3,%mm4
movq 32(%eax),%mm3
pfadd %mm3,%mm4
movq 48(%eax),%mm3
pfsub %mm3,%mm4
movq 64(%eax),%mm3
pfadd %mm3,%mm4
movq %mm7,%mm5
punpckldq ASM_NAME(tfcos36)+16,%mm5
pfmul %mm5,%mm4
movq %mm4,%mm5
pfacc %mm5,%mm5
movd 124(%edx),%mm6
punpckldq 88(%edx),%mm6
pfmul %mm6,%mm5
movd %mm5,52(%ecx)
psrlq $32,%mm5
movd %mm5,16(%ecx)
movq %mm4,%mm6
punpckldq %mm6,%mm5
pfsub %mm6,%mm5
punpckhdq %mm5,%mm5
movd 16(%edx),%mm6
punpckldq 52(%edx),%mm6
pfmul %mm6,%mm5
movd 16(%esi),%mm6
punpckldq 52(%esi),%mm6
pfadd %mm6,%mm5
movd %mm5,512(%ebx)
psrlq $32,%mm5
movd %mm5,1664(%ebx)
femms
#NO_APP
popl %ebx
popl %esi
leave
ret
/* .size ASM_NAME(dct36_3dnowext), .-ASM_NAME(dct36_3dnowext) */

174
src/libmpg123/dct64.c Normal file
View File

@@ -0,0 +1,174 @@
/*
dct64.c: DCT64, the plain C version
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
/*
* Discrete Cosine Tansform (DCT) for subband synthesis
*
* -funroll-loops (for gcc) will remove the loops for better performance
* using loops in the source-code enhances readabillity
*
*
* TODO: write an optimized version for the down-sampling modes
* (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
*/
#include "mpg123lib_intern.h"
void dct64(real *out0,real *out1,real *samples)
{
real bufs[64];
{
register int i,j;
register real *b1,*b2,*bs,*costab;
b1 = samples;
bs = bufs;
costab = pnts[0]+16;
b2 = b1 + 32;
for(i=15;i>=0;i--)
*bs++ = (*b1++ + *--b2);
for(i=15;i>=0;i--)
*bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
b1 = bufs;
costab = pnts[1]+8;
b2 = b1 + 16;
{
for(i=7;i>=0;i--)
*bs++ = (*b1++ + *--b2);
for(i=7;i>=0;i--)
*bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
b2 += 32;
costab += 8;
for(i=7;i>=0;i--)
*bs++ = (*b1++ + *--b2);
for(i=7;i>=0;i--)
*bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
b2 += 32;
}
bs = bufs;
costab = pnts[2];
b2 = b1 + 8;
for(j=2;j;j--)
{
for(i=3;i>=0;i--)
*bs++ = (*b1++ + *--b2);
for(i=3;i>=0;i--)
*bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
b2 += 16;
for(i=3;i>=0;i--)
*bs++ = (*b1++ + *--b2);
for(i=3;i>=0;i--)
*bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
b2 += 16;
}
b1 = bufs;
costab = pnts[3];
b2 = b1 + 4;
for(j=4;j;j--)
{
*bs++ = (*b1++ + *--b2);
*bs++ = (*b1++ + *--b2);
*bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
*bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
b2 += 8;
*bs++ = (*b1++ + *--b2);
*bs++ = (*b1++ + *--b2);
*bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
*bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
b2 += 8;
}
bs = bufs;
costab = pnts[4];
for(j=8;j;j--)
{
real v0,v1;
v0=*b1++; v1 = *b1++;
*bs++ = (v0 + v1);
*bs++ = REAL_MUL((v0 - v1), (*costab));
v0=*b1++; v1 = *b1++;
*bs++ = (v0 + v1);
*bs++ = REAL_MUL((v1 - v0), (*costab));
}
}
{
register real *b1;
register int i;
for(b1=bufs,i=8;i;i--,b1+=4)
b1[2] += b1[3];
for(b1=bufs,i=4;i;i--,b1+=8)
{
b1[4] += b1[6];
b1[6] += b1[5];
b1[5] += b1[7];
}
for(b1=bufs,i=2;i;i--,b1+=16)
{
b1[8] += b1[12];
b1[12] += b1[10];
b1[10] += b1[14];
b1[14] += b1[9];
b1[9] += b1[13];
b1[13] += b1[11];
b1[11] += b1[15];
}
}
out0[0x10*16] = bufs[0];
out0[0x10*15] = bufs[16+0] + bufs[16+8];
out0[0x10*14] = bufs[8];
out0[0x10*13] = bufs[16+8] + bufs[16+4];
out0[0x10*12] = bufs[4];
out0[0x10*11] = bufs[16+4] + bufs[16+12];
out0[0x10*10] = bufs[12];
out0[0x10* 9] = bufs[16+12] + bufs[16+2];
out0[0x10* 8] = bufs[2];
out0[0x10* 7] = bufs[16+2] + bufs[16+10];
out0[0x10* 6] = bufs[10];
out0[0x10* 5] = bufs[16+10] + bufs[16+6];
out0[0x10* 4] = bufs[6];
out0[0x10* 3] = bufs[16+6] + bufs[16+14];
out0[0x10* 2] = bufs[14];
out0[0x10* 1] = bufs[16+14] + bufs[16+1];
out0[0x10* 0] = bufs[1];
out1[0x10* 0] = bufs[1];
out1[0x10* 1] = bufs[16+1] + bufs[16+9];
out1[0x10* 2] = bufs[9];
out1[0x10* 3] = bufs[16+9] + bufs[16+5];
out1[0x10* 4] = bufs[5];
out1[0x10* 5] = bufs[16+5] + bufs[16+13];
out1[0x10* 6] = bufs[13];
out1[0x10* 7] = bufs[16+13] + bufs[16+3];
out1[0x10* 8] = bufs[3];
out1[0x10* 9] = bufs[16+3] + bufs[16+11];
out1[0x10*10] = bufs[11];
out1[0x10*11] = bufs[16+11] + bufs[16+7];
out1[0x10*12] = bufs[7];
out1[0x10*13] = bufs[16+7] + bufs[16+15];
out1[0x10*14] = bufs[15];
out1[0x10*15] = bufs[16+15];
}

711
src/libmpg123/dct64_3dnow.S Normal file
View File

@@ -0,0 +1,711 @@
/*
dct64_3dnow.s: Replacement of dct64() with AMD's 3DNow! SIMD operations support
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Syuuhei Kashiyama
Original "license" statement:
The author of this program disclaim whole expressed or implied
warranties with regard to this program, and in no event shall the
author of this program liable to whatever resulted from the use of
this program. Use it at your own risk.
*/
#include "mangle.h"
.globl ASM_NAME(dct64_3dnow)
/* .type ASM_NAME(dct64_3dnow),@function */
ASM_NAME(dct64_3dnow):
subl $256,%esp
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
leal 16(%esp),%ebx
movl 284(%esp),%edi
movl 276(%esp),%ebp
movl 280(%esp),%edx
leal 128(%ebx),%esi
/* femms */
/* 1 */
movl ASM_NAME(pnts),%eax
movq 0(%edi),%mm0
movq %mm0,%mm1
movd 124(%edi),%mm2
punpckldq 120(%edi),%mm2
movq 0(%eax),%mm3
pfadd %mm2,%mm0
movq %mm0,0(%ebx)
pfsub %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,124(%ebx)
psrlq $32,%mm1
movd %mm1,120(%ebx)
movq 8(%edi),%mm4
movq %mm4,%mm5
movd 116(%edi),%mm6
punpckldq 112(%edi),%mm6
movq 8(%eax),%mm7
pfadd %mm6,%mm4
movq %mm4,8(%ebx)
pfsub %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,116(%ebx)
psrlq $32,%mm5
movd %mm5,112(%ebx)
movq 16(%edi),%mm0
movq %mm0,%mm1
movd 108(%edi),%mm2
punpckldq 104(%edi),%mm2
movq 16(%eax),%mm3
pfadd %mm2,%mm0
movq %mm0,16(%ebx)
pfsub %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,108(%ebx)
psrlq $32,%mm1
movd %mm1,104(%ebx)
movq 24(%edi),%mm4
movq %mm4,%mm5
movd 100(%edi),%mm6
punpckldq 96(%edi),%mm6
movq 24(%eax),%mm7
pfadd %mm6,%mm4
movq %mm4,24(%ebx)
pfsub %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,100(%ebx)
psrlq $32,%mm5
movd %mm5,96(%ebx)
movq 32(%edi),%mm0
movq %mm0,%mm1
movd 92(%edi),%mm2
punpckldq 88(%edi),%mm2
movq 32(%eax),%mm3
pfadd %mm2,%mm0
movq %mm0,32(%ebx)
pfsub %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,92(%ebx)
psrlq $32,%mm1
movd %mm1,88(%ebx)
movq 40(%edi),%mm4
movq %mm4,%mm5
movd 84(%edi),%mm6
punpckldq 80(%edi),%mm6
movq 40(%eax),%mm7
pfadd %mm6,%mm4
movq %mm4,40(%ebx)
pfsub %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,84(%ebx)
psrlq $32,%mm5
movd %mm5,80(%ebx)
movq 48(%edi),%mm0
movq %mm0,%mm1
movd 76(%edi),%mm2
punpckldq 72(%edi),%mm2
movq 48(%eax),%mm3
pfadd %mm2,%mm0
movq %mm0,48(%ebx)
pfsub %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,76(%ebx)
psrlq $32,%mm1
movd %mm1,72(%ebx)
movq 56(%edi),%mm4
movq %mm4,%mm5
movd 68(%edi),%mm6
punpckldq 64(%edi),%mm6
movq 56(%eax),%mm7
pfadd %mm6,%mm4
movq %mm4,56(%ebx)
pfsub %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,68(%ebx)
psrlq $32,%mm5
movd %mm5,64(%ebx)
/* 2 */
movl ASM_NAME(pnts)+4,%eax
/* 0,14 */
movq 0(%ebx),%mm0
movq %mm0,%mm1
movd 60(%ebx),%mm2
punpckldq 56(%ebx),%mm2
movq 0(%eax),%mm3
pfadd %mm2,%mm0
movq %mm0,0(%esi)
pfsub %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,60(%esi)
psrlq $32,%mm1
movd %mm1,56(%esi)
/* 16,30 */
movq 64(%ebx),%mm0
movq %mm0,%mm1
movd 124(%ebx),%mm2
punpckldq 120(%ebx),%mm2
pfadd %mm2,%mm0
movq %mm0,64(%esi)
pfsubr %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,124(%esi)
psrlq $32,%mm1
movd %mm1,120(%esi)
/* 2,12 */
movq 8(%ebx),%mm4
movq %mm4,%mm5
movd 52(%ebx),%mm6
punpckldq 48(%ebx),%mm6
movq 8(%eax),%mm7
pfadd %mm6,%mm4
movq %mm4,8(%esi)
pfsub %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,52(%esi)
psrlq $32,%mm5
movd %mm5,48(%esi)
/* 18,28 */
movq 72(%ebx),%mm4
movq %mm4,%mm5
movd 116(%ebx),%mm6
punpckldq 112(%ebx),%mm6
pfadd %mm6,%mm4
movq %mm4,72(%esi)
pfsubr %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,116(%esi)
psrlq $32,%mm5
movd %mm5,112(%esi)
/* 4,10 */
movq 16(%ebx),%mm0
movq %mm0,%mm1
movd 44(%ebx),%mm2
punpckldq 40(%ebx),%mm2
movq 16(%eax),%mm3
pfadd %mm2,%mm0
movq %mm0,16(%esi)
pfsub %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,44(%esi)
psrlq $32,%mm1
movd %mm1,40(%esi)
/* 20,26 */
movq 80(%ebx),%mm0
movq %mm0,%mm1
movd 108(%ebx),%mm2
punpckldq 104(%ebx),%mm2
pfadd %mm2,%mm0
movq %mm0,80(%esi)
pfsubr %mm2,%mm1
pfmul %mm3,%mm1
movd %mm1,108(%esi)
psrlq $32,%mm1
movd %mm1,104(%esi)
/* 6,8 */
movq 24(%ebx),%mm4
movq %mm4,%mm5
movd 36(%ebx),%mm6
punpckldq 32(%ebx),%mm6
movq 24(%eax),%mm7
pfadd %mm6,%mm4
movq %mm4,24(%esi)
pfsub %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,36(%esi)
psrlq $32,%mm5
movd %mm5,32(%esi)
/* 22,24 */
movq 88(%ebx),%mm4
movq %mm4,%mm5
movd 100(%ebx),%mm6
punpckldq 96(%ebx),%mm6
pfadd %mm6,%mm4
movq %mm4,88(%esi)
pfsubr %mm6,%mm5
pfmul %mm7,%mm5
movd %mm5,100(%esi)
psrlq $32,%mm5
movd %mm5,96(%esi)
/* 3 */
movl ASM_NAME(pnts)+8,%eax
movq 0(%eax),%mm0
movq 8(%eax),%mm1
/* 0,6 */
movq 0(%esi),%mm2
movq %mm2,%mm3
movd 28(%esi),%mm4
punpckldq 24(%esi),%mm4
pfadd %mm4,%mm2
pfsub %mm4,%mm3
pfmul %mm0,%mm3
movq %mm2,0(%ebx)
movd %mm3,28(%ebx)
psrlq $32,%mm3
movd %mm3,24(%ebx)
/* 2,4 */
movq 8(%esi),%mm5
movq %mm5,%mm6
movd 20(%esi),%mm7
punpckldq 16(%esi),%mm7
pfadd %mm7,%mm5
pfsub %mm7,%mm6
pfmul %mm1,%mm6
movq %mm5,8(%ebx)
movd %mm6,20(%ebx)
psrlq $32,%mm6
movd %mm6,16(%ebx)
/* 8,14 */
movq 32(%esi),%mm2
movq %mm2,%mm3
movd 60(%esi),%mm4
punpckldq 56(%esi),%mm4
pfadd %mm4,%mm2
pfsubr %mm4,%mm3
pfmul %mm0,%mm3
movq %mm2,32(%ebx)
movd %mm3,60(%ebx)
psrlq $32,%mm3
movd %mm3,56(%ebx)
/* 10,12 */
movq 40(%esi),%mm5
movq %mm5,%mm6
movd 52(%esi),%mm7
punpckldq 48(%esi),%mm7
pfadd %mm7,%mm5
pfsubr %mm7,%mm6
pfmul %mm1,%mm6
movq %mm5,40(%ebx)
movd %mm6,52(%ebx)
psrlq $32,%mm6
movd %mm6,48(%ebx)
/* 16,22 */
movq 64(%esi),%mm2
movq %mm2,%mm3
movd 92(%esi),%mm4
punpckldq 88(%esi),%mm4
pfadd %mm4,%mm2
pfsub %mm4,%mm3
pfmul %mm0,%mm3
movq %mm2,64(%ebx)
movd %mm3,92(%ebx)
psrlq $32,%mm3
movd %mm3,88(%ebx)
/* 18,20 */
movq 72(%esi),%mm5
movq %mm5,%mm6
movd 84(%esi),%mm7
punpckldq 80(%esi),%mm7
pfadd %mm7,%mm5
pfsub %mm7,%mm6
pfmul %mm1,%mm6
movq %mm5,72(%ebx)
movd %mm6,84(%ebx)
psrlq $32,%mm6
movd %mm6,80(%ebx)
/* 24,30 */
movq 96(%esi),%mm2
movq %mm2,%mm3
movd 124(%esi),%mm4
punpckldq 120(%esi),%mm4
pfadd %mm4,%mm2
pfsubr %mm4,%mm3
pfmul %mm0,%mm3
movq %mm2,96(%ebx)
movd %mm3,124(%ebx)
psrlq $32,%mm3
movd %mm3,120(%ebx)
/* 26,28 */
movq 104(%esi),%mm5
movq %mm5,%mm6
movd 116(%esi),%mm7
punpckldq 112(%esi),%mm7
pfadd %mm7,%mm5
pfsubr %mm7,%mm6
pfmul %mm1,%mm6
movq %mm5,104(%ebx)
movd %mm6,116(%ebx)
psrlq $32,%mm6
movd %mm6,112(%ebx)
/* 4 */
movl ASM_NAME(pnts)+12,%eax
movq 0(%eax),%mm0
/* 0 */
movq 0(%ebx),%mm1
movq %mm1,%mm2
movd 12(%ebx),%mm3
punpckldq 8(%ebx),%mm3
pfadd %mm3,%mm1
pfsub %mm3,%mm2
pfmul %mm0,%mm2
movq %mm1,0(%esi)
movd %mm2,12(%esi)
psrlq $32,%mm2
movd %mm2,8(%esi)
/* 4 */
movq 16(%ebx),%mm4
movq %mm4,%mm5
movd 28(%ebx),%mm6
punpckldq 24(%ebx),%mm6
pfadd %mm6,%mm4
pfsubr %mm6,%mm5
pfmul %mm0,%mm5
movq %mm4,16(%esi)
movd %mm5,28(%esi)
psrlq $32,%mm5
movd %mm5,24(%esi)
/* 8 */
movq 32(%ebx),%mm1
movq %mm1,%mm2
movd 44(%ebx),%mm3
punpckldq 40(%ebx),%mm3
pfadd %mm3,%mm1
pfsub %mm3,%mm2
pfmul %mm0,%mm2
movq %mm1,32(%esi)
movd %mm2,44(%esi)
psrlq $32,%mm2
movd %mm2,40(%esi)
/* 12 */
movq 48(%ebx),%mm4
movq %mm4,%mm5
movd 60(%ebx),%mm6
punpckldq 56(%ebx),%mm6
pfadd %mm6,%mm4
pfsubr %mm6,%mm5
pfmul %mm0,%mm5
movq %mm4,48(%esi)
movd %mm5,60(%esi)
psrlq $32,%mm5
movd %mm5,56(%esi)
/* 16 */
movq 64(%ebx),%mm1
movq %mm1,%mm2
movd 76(%ebx),%mm3
punpckldq 72(%ebx),%mm3
pfadd %mm3,%mm1
pfsub %mm3,%mm2
pfmul %mm0,%mm2
movq %mm1,64(%esi)
movd %mm2,76(%esi)
psrlq $32,%mm2
movd %mm2,72(%esi)
/* 20 */
movq 80(%ebx),%mm4
movq %mm4,%mm5
movd 92(%ebx),%mm6
punpckldq 88(%ebx),%mm6
pfadd %mm6,%mm4
pfsubr %mm6,%mm5
pfmul %mm0,%mm5
movq %mm4,80(%esi)
movd %mm5,92(%esi)
psrlq $32,%mm5
movd %mm5,88(%esi)
/* 24 */
movq 96(%ebx),%mm1
movq %mm1,%mm2
movd 108(%ebx),%mm3
punpckldq 104(%ebx),%mm3
pfadd %mm3,%mm1
pfsub %mm3,%mm2
pfmul %mm0,%mm2
movq %mm1,96(%esi)
movd %mm2,108(%esi)
psrlq $32,%mm2
movd %mm2,104(%esi)
/* 28 */
movq 112(%ebx),%mm4
movq %mm4,%mm5
movd 124(%ebx),%mm6
punpckldq 120(%ebx),%mm6
pfadd %mm6,%mm4
pfsubr %mm6,%mm5
pfmul %mm0,%mm5
movq %mm4,112(%esi)
movd %mm5,124(%esi)
psrlq $32,%mm5
movd %mm5,120(%esi)
/* 5 */
movl $-1,%eax
movd %eax,%mm1
movl $1,%eax
/* L | H */
movd %eax,%mm0
punpckldq %mm1,%mm0
/* 1.0 | -1.0 */
pi2fd %mm0,%mm0
movd %eax,%mm1
pi2fd %mm1,%mm1
movl ASM_NAME(pnts)+16,%eax
movd 0(%eax),%mm2
/* 1.0 | cos0 */
punpckldq %mm2,%mm1
/* 0 */
movq 0(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq %mm2,0(%ebx)
movq 8(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm4,8(%ebx)
/* 4 */
movq 16(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq 24(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm2,%mm3
psrlq $32,%mm3
pfadd %mm4,%mm2
pfadd %mm3,%mm4
movq %mm2,16(%ebx)
movq %mm4,24(%ebx)
/* 8 */
movq 32(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq %mm2,32(%ebx)
movq 40(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm4,40(%ebx)
/* 12 */
movq 48(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq 56(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm2,%mm3
psrlq $32,%mm3
pfadd %mm4,%mm2
pfadd %mm3,%mm4
movq %mm2,48(%ebx)
movq %mm4,56(%ebx)
/* 16 */
movq 64(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq %mm2,64(%ebx)
movq 72(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm4,72(%ebx)
/* 20 */
movq 80(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq 88(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm2,%mm3
psrlq $32,%mm3
pfadd %mm4,%mm2
pfadd %mm3,%mm4
movq %mm2,80(%ebx)
movq %mm4,88(%ebx)
/* 24 */
movq 96(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq %mm2,96(%ebx)
movq 104(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm4,104(%ebx)
/* 28 */
movq 112(%esi),%mm2
movq %mm2,%mm3
pfmul %mm0,%mm3
pfacc %mm3,%mm2
pfmul %mm1,%mm2
movq 120(%esi),%mm4
movq %mm4,%mm5
pfmul %mm0,%mm5
pfacc %mm5,%mm4
pfmul %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
pfacc %mm5,%mm4
movq %mm2,%mm3
psrlq $32,%mm3
pfadd %mm4,%mm2
pfadd %mm3,%mm4
movq %mm2,112(%ebx)
movq %mm4,120(%ebx)
/* Phase6 */
movl 0(%ebx),%eax
movl %eax,1024(%ebp)
movl 4(%ebx),%eax
movl %eax,0(%ebp)
movl %eax,0(%edx)
movl 8(%ebx),%eax
movl %eax,512(%ebp)
movl 12(%ebx),%eax
movl %eax,512(%edx)
movl 16(%ebx),%eax
movl %eax,768(%ebp)
movl 20(%ebx),%eax
movl %eax,256(%edx)
movl 24(%ebx),%eax
movl %eax,256(%ebp)
movl 28(%ebx),%eax
movl %eax,768(%edx)
movq 32(%ebx),%mm0
movq 48(%ebx),%mm1
pfadd %mm1,%mm0
movd %mm0,896(%ebp)
psrlq $32,%mm0
movd %mm0,128(%edx)
movq 40(%ebx),%mm2
pfadd %mm2,%mm1
movd %mm1,640(%ebp)
psrlq $32,%mm1
movd %mm1,384(%edx)
movq 56(%ebx),%mm3
pfadd %mm3,%mm2
movd %mm2,384(%ebp)
psrlq $32,%mm2
movd %mm2,640(%edx)
movd 36(%ebx),%mm4
pfadd %mm4,%mm3
movd %mm3,128(%ebp)
psrlq $32,%mm3
movd %mm3,896(%edx)
movq 96(%ebx),%mm0
movq 64(%ebx),%mm1
movq 112(%ebx),%mm2
pfadd %mm2,%mm0
movq %mm0,%mm3
pfadd %mm1,%mm3
movd %mm3,960(%ebp)
psrlq $32,%mm3
movd %mm3,64(%edx)
movq 80(%ebx),%mm1
pfadd %mm1,%mm0
movd %mm0,832(%ebp)
psrlq $32,%mm0
movd %mm0,192(%edx)
movq 104(%ebx),%mm3
pfadd %mm3,%mm2
movq %mm2,%mm4
pfadd %mm1,%mm4
movd %mm4,704(%ebp)
psrlq $32,%mm4
movd %mm4,320(%edx)
movq 72(%ebx),%mm1
pfadd %mm1,%mm2
movd %mm2,576(%ebp)
psrlq $32,%mm2
movd %mm2,448(%edx)
movq 120(%ebx),%mm4
pfadd %mm4,%mm3
movq %mm3,%mm5
pfadd %mm1,%mm5
movd %mm5,448(%ebp)
psrlq $32,%mm5
movd %mm5,576(%edx)
movq 88(%ebx),%mm1
pfadd %mm1,%mm3
movd %mm3,320(%ebp)
psrlq $32,%mm3
movd %mm3,704(%edx)
movd 100(%ebx),%mm5
pfadd %mm5,%mm4
movq %mm4,%mm6
pfadd %mm1,%mm6
movd %mm6,192(%ebp)
psrlq $32,%mm6
movd %mm6,832(%edx)
movd 68(%ebx),%mm1
pfadd %mm1,%mm4
movd %mm4,64(%ebp)
psrlq $32,%mm4
movd %mm4,960(%edx)
/* femms */
popl %ebx
popl %esi
popl %edi
popl %ebp
addl $256,%esp
ret

View File

@@ -0,0 +1,712 @@
/*
dct64_3dnowext: extended 3DNow optimized DCT64
copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
Transformed back into standalone asm, with help of
gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_3dnowext.{S,c}
MPlayer comment follows.
*/
/*
* This code was taken from http://www.mpg123.org
* See ChangeLog of mpg123-0.59s-pre.1 for detail
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
* Partial 3dnowex-DSP! optimization by Nick Kurshev
*
* TODO: optimize scalar 3dnow! code
* Warning: Phases 7 & 8 are not tested
*/
#include "mangle.h"
.data
ALIGN4
/* .type plus_1f, @object
.size plus_1f, 4 */
plus_1f:
.long 1065353216
ALIGN8
/* .type x_plus_minus_3dnow, @object
.size x_plus_minus_3dnow, 8 */
x_plus_minus_3dnow:
.long 0
.long -2147483648
.text
ALIGN32,,31
.globl ASM_NAME(dct64_3dnowext)
/* .type ASM_NAME(dct64_3dnowext), @function */
ASM_NAME(dct64_3dnowext):
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $256, %esp
#APP
movl 16(%ebp),%eax
leal 128+-268(%ebp),%edx
movl 8(%ebp),%esi
movl 12(%ebp),%edi
movl $ASM_NAME(costab_mmxsse),%ebx
leal -268(%ebp),%ecx
movq (%eax), %mm0
movq 8(%eax), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 120(%eax), %mm1
pswapd 112(%eax), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, (%edx)
movq %mm4, 8(%edx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul (%ebx), %mm3
pfmul 8(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 120(%edx)
movq %mm7, 112(%edx)
movq 16(%eax), %mm0
movq 24(%eax), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 104(%eax), %mm1
pswapd 96(%eax), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 16(%edx)
movq %mm4, 24(%edx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul 16(%ebx), %mm3
pfmul 24(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 104(%edx)
movq %mm7, 96(%edx)
movq 32(%eax), %mm0
movq 40(%eax), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 88(%eax), %mm1
pswapd 80(%eax), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 32(%edx)
movq %mm4, 40(%edx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul 32(%ebx), %mm3
pfmul 40(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 88(%edx)
movq %mm7, 80(%edx)
movq 48(%eax), %mm0
movq 56(%eax), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 72(%eax), %mm1
pswapd 64(%eax), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 48(%edx)
movq %mm4, 56(%edx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul 48(%ebx), %mm3
pfmul 56(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 72(%edx)
movq %mm7, 64(%edx)
movq (%edx), %mm0
movq 8(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 56(%edx), %mm1
pswapd 48(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, (%ecx)
movq %mm4, 8(%ecx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul 64(%ebx), %mm3
pfmul 72(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 56(%ecx)
movq %mm7, 48(%ecx)
movq 16(%edx), %mm0
movq 24(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 40(%edx), %mm1
pswapd 32(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 16(%ecx)
movq %mm4, 24(%ecx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul 80(%ebx), %mm3
pfmul 88(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 40(%ecx)
movq %mm7, 32(%ecx)
movq 64(%edx), %mm0
movq 72(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 120(%edx), %mm1
pswapd 112(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 64(%ecx)
movq %mm4, 72(%ecx)
pfsubr %mm1, %mm3
pfsubr %mm5, %mm7
pfmul 64(%ebx), %mm3
pfmul 72(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 120(%ecx)
movq %mm7, 112(%ecx)
movq 80(%edx), %mm0
movq 88(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 104(%edx), %mm1
pswapd 96(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 80(%ecx)
movq %mm4, 88(%ecx)
pfsubr %mm1, %mm3
pfsubr %mm5, %mm7
pfmul 80(%ebx), %mm3
pfmul 88(%ebx), %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 104(%ecx)
movq %mm7, 96(%ecx)
movq 96(%ebx), %mm2
movq 104(%ebx), %mm6
movq (%ecx), %mm0
movq 8(%ecx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 24(%ecx), %mm1
pswapd 16(%ecx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, (%edx)
movq %mm4, 8(%edx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm6, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 24(%edx)
movq %mm7, 16(%edx)
movq 32(%ecx), %mm0
movq 40(%ecx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 56(%ecx), %mm1
pswapd 48(%ecx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 32(%edx)
movq %mm4, 40(%edx)
pfsubr %mm1, %mm3
pfsubr %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm6, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 56(%edx)
movq %mm7, 48(%edx)
movq 64(%ecx), %mm0
movq 72(%ecx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 88(%ecx), %mm1
pswapd 80(%ecx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 64(%edx)
movq %mm4, 72(%edx)
pfsub %mm1, %mm3
pfsub %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm6, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 88(%edx)
movq %mm7, 80(%edx)
movq 96(%ecx), %mm0
movq 104(%ecx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 120(%ecx), %mm1
pswapd 112(%ecx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 96(%edx)
movq %mm4, 104(%edx)
pfsubr %mm1, %mm3
pfsubr %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm6, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 120(%edx)
movq %mm7, 112(%edx)
movq 112(%ebx), %mm2
movq (%edx), %mm0
movq 16(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 8(%edx), %mm1
pswapd 24(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, (%ecx)
movq %mm4, 16(%ecx)
pfsub %mm1, %mm3
pfsubr %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm2, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 8(%ecx)
movq %mm7, 24(%ecx)
movq 32(%edx), %mm0
movq 48(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 40(%edx), %mm1
pswapd 56(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 32(%ecx)
movq %mm4, 48(%ecx)
pfsub %mm1, %mm3
pfsubr %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm2, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 40(%ecx)
movq %mm7, 56(%ecx)
movq 64(%edx), %mm0
movq 80(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 72(%edx), %mm1
pswapd 88(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 64(%ecx)
movq %mm4, 80(%ecx)
pfsub %mm1, %mm3
pfsubr %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm2, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 72(%ecx)
movq %mm7, 88(%ecx)
movq 96(%edx), %mm0
movq 112(%edx), %mm4
movq %mm0, %mm3
movq %mm4, %mm7
pswapd 104(%edx), %mm1
pswapd 120(%edx), %mm5
pfadd %mm1, %mm0
pfadd %mm5, %mm4
movq %mm0, 96(%ecx)
movq %mm4, 112(%ecx)
pfsub %mm1, %mm3
pfsubr %mm5, %mm7
pfmul %mm2, %mm3
pfmul %mm2, %mm7
pswapd %mm3, %mm3
pswapd %mm7, %mm7
movq %mm3, 104(%ecx)
movq %mm7, 120(%ecx)
movd plus_1f, %mm6
punpckldq 120(%ebx), %mm6
movq x_plus_minus_3dnow, %mm7
movq 32(%ecx), %mm0
movq 64(%ecx), %mm2
movq %mm0, %mm1
movq %mm2, %mm3
pxor %mm7, %mm1
pxor %mm7, %mm3
pfacc %mm1, %mm0
pfacc %mm3, %mm2
pfmul %mm6, %mm0
pfmul %mm6, %mm2
movq %mm0, 32(%edx)
movq %mm2, 64(%edx)
movd 44(%ecx), %mm0
movd 40(%ecx), %mm2
movd 120(%ebx), %mm3
punpckldq 76(%ecx), %mm0
punpckldq 72(%ecx), %mm2
punpckldq %mm3, %mm3
movq %mm0, %mm4
movq %mm2, %mm5
pfsub %mm2, %mm0
pfmul %mm3, %mm0
movq %mm0, %mm1
pfadd %mm5, %mm0
pfadd %mm4, %mm0
movq %mm0, %mm2
punpckldq %mm1, %mm0
punpckhdq %mm1, %mm2
movq %mm0, 40(%edx)
movq %mm2, 72(%edx)
movd 48(%ecx), %mm3
movd 60(%ecx), %mm2
pfsub 52(%ecx), %mm3
pfsub 56(%ecx), %mm2
pfmul 120(%ebx), %mm3
pfmul 120(%ebx), %mm2
movq %mm2, %mm1
pfadd 56(%ecx), %mm1
pfadd 60(%ecx), %mm1
movq %mm1, %mm0
pfadd 48(%ecx), %mm0
pfadd 52(%ecx), %mm0
pfadd %mm3, %mm1
punpckldq %mm2, %mm1
pfadd %mm3, %mm2
punpckldq %mm2, %mm0
movq %mm1, 56(%edx)
movq %mm0, 48(%edx)
movd 92(%ecx), %mm1
pfsub 88(%ecx), %mm1
pfmul 120(%ebx), %mm1
movd %mm1, 92(%edx)
pfadd 92(%ecx), %mm1
pfadd 88(%ecx), %mm1
movq %mm1, %mm0
pfadd 80(%ecx), %mm0
pfadd 84(%ecx), %mm0
movd %mm0, 80(%edx)
movd 80(%ecx), %mm0
pfsub 84(%ecx), %mm0
pfmul 120(%ebx), %mm0
pfadd %mm0, %mm1
pfadd 92(%edx), %mm0
punpckldq %mm1, %mm0
movq %mm0, 84(%edx)
movq 96(%ecx), %mm0
movq %mm0, %mm1
pxor %mm7, %mm1
pfacc %mm1, %mm0
pfmul %mm6, %mm0
movq %mm0, 96(%edx)
movd 108(%ecx), %mm0
pfsub 104(%ecx), %mm0
pfmul 120(%ebx), %mm0
movd %mm0, 108(%edx)
pfadd 104(%ecx), %mm0
pfadd 108(%ecx), %mm0
movd %mm0, 104(%edx)
movd 124(%ecx), %mm1
pfsub 120(%ecx), %mm1
pfmul 120(%ebx), %mm1
movd %mm1, 124(%edx)
pfadd 120(%ecx), %mm1
pfadd 124(%ecx), %mm1
movq %mm1, %mm0
pfadd 112(%ecx), %mm0
pfadd 116(%ecx), %mm0
movd %mm0, 112(%edx)
movd 112(%ecx), %mm0
pfsub 116(%ecx), %mm0
pfmul 120(%ebx), %mm0
pfadd %mm0,%mm1
pfadd 124(%edx), %mm0
punpckldq %mm1, %mm0
movq %mm0, 116(%edx)
jnz .L01
movd (%ecx), %mm0
pfadd 4(%ecx), %mm0
movd %mm0, 1024(%esi)
movd (%ecx), %mm0
pfsub 4(%ecx), %mm0
pfmul 120(%ebx), %mm0
movd %mm0, (%esi)
movd %mm0, (%edi)
movd 12(%ecx), %mm0
pfsub 8(%ecx), %mm0
pfmul 120(%ebx), %mm0
movd %mm0, 512(%edi)
pfadd 12(%ecx), %mm0
pfadd 8(%ecx), %mm0
movd %mm0, 512(%esi)
movd 16(%ecx), %mm0
pfsub 20(%ecx), %mm0
pfmul 120(%ebx), %mm0
movq %mm0, %mm3
movd 28(%ecx), %mm0
pfsub 24(%ecx), %mm0
pfmul 120(%ebx), %mm0
movd %mm0, 768(%edi)
movq %mm0, %mm2
pfadd 24(%ecx), %mm0
pfadd 28(%ecx), %mm0
movq %mm0, %mm1
pfadd 16(%ecx), %mm0
pfadd 20(%ecx), %mm0
movd %mm0, 768(%esi)
pfadd %mm3, %mm1
movd %mm1, 256(%esi)
pfadd %mm3, %mm2
movd %mm2, 256(%edi)
movq 32(%edx), %mm0
movq 48(%edx), %mm1
pfadd 48(%edx), %mm0
pfadd 40(%edx), %mm1
movd %mm0, 896(%esi)
movd %mm1, 640(%esi)
psrlq $32, %mm0
psrlq $32, %mm1
movd %mm0, 128(%edi)
movd %mm1, 384(%edi)
movd 40(%edx), %mm0
pfadd 56(%edx), %mm0
movd %mm0, 384(%esi)
movd 56(%edx), %mm0
pfadd 36(%edx), %mm0
movd %mm0, 128(%esi)
movd 60(%edx), %mm0
movd %mm0, 896(%edi)
pfadd 44(%edx), %mm0
movd %mm0, 640(%edi)
movq 96(%edx), %mm0
movq 112(%edx), %mm2
movq 104(%edx), %mm4
pfadd 112(%edx), %mm0
pfadd 104(%edx), %mm2
pfadd 120(%edx), %mm4
movq %mm0, %mm1
movq %mm2, %mm3
movq %mm4, %mm5
pfadd 64(%edx), %mm0
pfadd 80(%edx), %mm2
pfadd 72(%edx), %mm4
movd %mm0, 960(%esi)
movd %mm2, 704(%esi)
movd %mm4, 448(%esi)
psrlq $32, %mm0
psrlq $32, %mm2
psrlq $32, %mm4
movd %mm0, 64(%edi)
movd %mm2, 320(%edi)
movd %mm4, 576(%edi)
pfadd 80(%edx), %mm1
pfadd 72(%edx), %mm3
pfadd 88(%edx), %mm5
movd %mm1, 832(%esi)
movd %mm3, 576(%esi)
movd %mm5, 320(%esi)
psrlq $32, %mm1
psrlq $32, %mm3
psrlq $32, %mm5
movd %mm1, 192(%edi)
movd %mm3, 448(%edi)
movd %mm5, 704(%edi)
movd 120(%edx), %mm0
pfadd 100(%edx), %mm0
movq %mm0, %mm1
pfadd 88(%edx), %mm0
movd %mm0, 192(%esi)
pfadd 68(%edx), %mm1
movd %mm1, 64(%esi)
movd 124(%edx), %mm0
movd %mm0, 960(%edi)
pfadd 92(%edx), %mm0
movd %mm0, 832(%edi)
jmp .L_bye
.L01:
movq (%ecx), %mm0
movq %mm0, %mm1
pxor %mm7, %mm1
pfacc %mm1, %mm0
pfmul %mm6, %mm0
pf2iw %mm0, %mm0
movd %mm0, %eax
movw %ax, 512(%esi)
psrlq $32, %mm0
movd %mm0, %eax
movw %ax, (%esi)
movd 12(%ecx), %mm0
pfsub 8(%ecx), %mm0
pfmul 120(%ebx), %mm0
pf2iw %mm0, %mm7
movd %mm7, %eax
movw %ax, 256(%edi)
pfadd 12(%ecx), %mm0
pfadd 8(%ecx), %mm0
pf2iw %mm0, %mm0
movd %mm0, %eax
movw %ax, 256(%esi)
movd 16(%ecx), %mm3
pfsub 20(%ecx), %mm3
pfmul 120(%ebx), %mm3
movq %mm3, %mm2
movd 28(%ecx), %mm2
pfsub 24(%ecx), %mm2
pfmul 120(%ebx), %mm2
movq %mm2, %mm1
pf2iw %mm2, %mm7
movd %mm7, %eax
movw %ax, 384(%edi)
pfadd 24(%ecx), %mm1
pfadd 28(%ecx), %mm1
movq %mm1, %mm0
pfadd 16(%ecx), %mm0
pfadd 20(%ecx), %mm0
pf2iw %mm0, %mm0
movd %mm0, %eax
movw %ax, 384(%esi)
pfadd %mm3, %mm1
pf2iw %mm1, %mm1
movd %mm1, %eax
movw %ax, 128(%esi)
pfadd %mm3, %mm2
pf2iw %mm2, %mm2
movd %mm2, %eax
movw %ax, 128(%edi)
movq 32(%edx), %mm0
movq 48(%edx), %mm1
pfadd 48(%edx), %mm0
pfadd 40(%edx), %mm1
pf2iw %mm0, %mm0
pf2iw %mm1, %mm1
movd %mm0, %eax
movd %mm1, %ecx
movw %ax, 448(%esi)
movw %cx, 320(%esi)
psrlq $32, %mm0
psrlq $32, %mm1
movd %mm0, %eax
movd %mm1, %ecx
movw %ax, 64(%edi)
movw %cx, 192(%edi)
movd 40(%edx), %mm3
movd 56(%edx), %mm4
movd 60(%edx), %mm0
movd 44(%edx), %mm2
movd 120(%edx), %mm5
punpckldq %mm4, %mm3
punpckldq 124(%edx), %mm0
pfadd 100(%edx), %mm5
punpckldq 36(%edx), %mm4
punpckldq 92(%edx), %mm2
movq %mm5, %mm6
pfadd %mm4, %mm3
pf2iw %mm0, %mm1
pf2iw %mm3, %mm3
pfadd 88(%edx), %mm5
movd %mm1, %eax
movd %mm3, %ecx
movw %ax, 448(%edi)
movw %cx, 192(%esi)
pf2iw %mm5, %mm5
psrlq $32, %mm1
psrlq $32, %mm3
movd %mm5, %ebx
movd %mm1, %eax
movd %mm3, %ecx
movw %bx, 96(%esi)
movw %ax, 480(%edi)
movw %cx, 64(%esi)
pfadd %mm2, %mm0
pf2iw %mm0, %mm0
movd %mm0, %eax
pfadd 68(%edx), %mm6
movw %ax, 320(%edi)
psrlq $32, %mm0
pf2iw %mm6, %mm6
movd %mm0, %eax
movd %mm6, %ebx
movw %ax, 416(%edi)
movw %bx, 32(%esi)
movq 96(%edx), %mm0
movq 112(%edx), %mm2
movq 104(%edx), %mm4
pfadd %mm2, %mm0
pfadd %mm4, %mm2
pfadd 120(%edx), %mm4
movq %mm0, %mm1
movq %mm2, %mm3
movq %mm4, %mm5
pfadd 64(%edx), %mm0
pfadd 80(%edx), %mm2
pfadd 72(%edx), %mm4
pf2iw %mm0, %mm0
pf2iw %mm2, %mm2
pf2iw %mm4, %mm4
movd %mm0, %eax
movd %mm2, %ecx
movd %mm4, %ebx
movw %ax, 480(%esi)
movw %cx, 352(%esi)
movw %bx, 224(%esi)
psrlq $32, %mm0
psrlq $32, %mm2
psrlq $32, %mm4
movd %mm0, %eax
movd %mm2, %ecx
movd %mm4, %ebx
movw %ax, 32(%edi)
movw %cx, 160(%edi)
movw %bx, 288(%edi)
pfadd 80(%edx), %mm1
pfadd 72(%edx), %mm3
pfadd 88(%edx), %mm5
pf2iw %mm1, %mm1
pf2iw %mm3, %mm3
pf2iw %mm5, %mm5
movd %mm1, %eax
movd %mm3, %ecx
movd %mm5, %ebx
movw %ax, 416(%esi)
movw %cx, 288(%esi)
movw %bx, 160(%esi)
psrlq $32, %mm1
psrlq $32, %mm3
psrlq $32, %mm5
movd %mm1, %eax
movd %mm3, %ecx
movd %mm5, %ebx
movw %ax, 96(%edi)
movw %cx, 224(%edi)
movw %bx, 352(%edi)
movsw
.L_bye:
femms
#NO_APP
addl $256, %esp
popl %ebx
popl %esi
popl %edi
leave
ret
/* .size ASM_NAME(dct64_3dnowext), .-ASM_NAME(dct64_3dnowext) */

View File

@@ -0,0 +1,325 @@
/*
dct64_altivec.c: Discrete Cosine Tansform (DCT) for Altivec
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
altivec optimization by tmkk
*/
/*
* Discrete Cosine Tansform (DCT) for subband synthesis
*
* -funroll-loops (for gcc) will remove the loops for better performance
* using loops in the source-code enhances readabillity
*
*
* TODO: write an optimized version for the down-sampling modes
* (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
*/
#include "mpg123lib_intern.h"
#ifndef __APPLE__
#include <altivec.h>
#endif
void dct64_altivec(real *out0,real *out1,real *samples)
{
real __attribute__ ((aligned (16))) bufs[64];
{
register real *b1,*costab;
vector unsigned char vinvert,vperm1,vperm2,vperm3,vperm4;
vector float v1,v2,v3,v4,v5,v6,v7,v8;
vector float vbs1,vbs2,vbs3,vbs4,vbs5,vbs6,vbs7,vbs8;
vector float vbs9,vbs10,vbs11,vbs12,vbs13,vbs14,vbs15,vbs16;
vector float vzero;
b1 = samples;
costab = pnts[0];
vzero = vec_xor(vzero,vzero);
#ifdef __APPLE__
vinvert = (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
#else
vinvert = (vector unsigned char){12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
#endif
vperm1 = vec_lvsl(0,b1);
vperm2 = vec_perm(vperm1,vperm1,vinvert);
v1 = vec_ld(0,b1);
v2 = vec_ld(16,b1);
v3 = vec_ld(112,b1);
v4 = vec_ld(127,b1);
v5 = vec_perm(v1,v2,vperm1); /* b1[0,1,2,3] */
v6 = vec_perm(v3,v4,vperm2); /* b1[31,30,29,28] */
vbs1 = vec_add(v5,v6);
vbs8 = vec_sub(v5,v6);
v1 = vec_ld(32,b1);
v4 = vec_ld(96,b1);
v5 = vec_perm(v2,v1,vperm1); /* b1[4,5,6,7] */
v6 = vec_perm(v4,v3,vperm2); /* b1[27,26,25,24] */
vbs2 = vec_add(v5,v6);
vbs7 = vec_sub(v5,v6);
v2 = vec_ld(48,b1);
v3 = vec_ld(80,b1);
v5 = vec_perm(v1,v2,vperm1); /* b1[8,9,10,11] */
v6 = vec_perm(v3,v4,vperm2); /* b1[23,22,21,20] */
vbs3 = vec_add(v5,v6);
vbs6 = vec_sub(v5,v6);
v1 = vec_ld(64,b1);
v5 = vec_perm(v2,v1,vperm1); /* b1[12,13,14,15] */
v6 = vec_perm(v1,v3,vperm2); /* b1[19,18,17,16] */
vbs4 = vec_add(v5,v6);
vbs5 = vec_sub(v5,v6);
v1 = vec_ld(0,costab);
vbs8 = vec_madd(vbs8,v1,vzero);
v2 = vec_ld(16,costab);
vbs7 = vec_madd(vbs7,v2,vzero);
v3 = vec_ld(32,costab);
vbs6 = vec_madd(vbs6,v3,vzero);
v4 = vec_ld(48,costab);
vbs5 = vec_madd(vbs5,v4,vzero);
vbs6 = vec_perm(vbs6,vbs6,vinvert);
vbs5 = vec_perm(vbs5,vbs5,vinvert);
costab = pnts[1];
v1 = vec_perm(vbs4,vbs4,vinvert);
vbs9 = vec_add(vbs1,v1);
v3 = vec_sub(vbs1,v1);
v5 = vec_ld(0,costab);
v2 = vec_perm(vbs3,vbs3,vinvert);
vbs10 = vec_add(vbs2,v2);
v4 = vec_sub(vbs2,v2);
v6 = vec_ld(16,costab);
vbs12 = vec_madd(v3,v5,vzero);
vbs11 = vec_madd(v4,v6,vzero);
v7 = vec_sub(vbs7,vbs6);
v8 = vec_sub(vbs8,vbs5);
vbs13 = vec_add(vbs5,vbs8);
vbs14 = vec_add(vbs6,vbs7);
vbs15 = vec_madd(v7,v6,vzero);
vbs16 = vec_madd(v8,v5,vzero);
costab = pnts[2];
v1 = vec_perm(vbs10,vbs10,vinvert);
v5 = vec_perm(vbs14,vbs14,vinvert);
vbs1 = vec_add(v1,vbs9);
vbs5 = vec_add(v5,vbs13);
v2 = vec_sub(vbs9,v1);
v6 = vec_sub(vbs13,v5);
v3 = vec_ld(0,costab);
vbs11 = vec_perm(vbs11,vbs11,vinvert);
vbs15 = vec_perm(vbs15,vbs15,vinvert);
vbs3 = vec_add(vbs11,vbs12);
vbs7 = vec_add(vbs15,vbs16);
v4 = vec_sub(vbs12,vbs11);
v7 = vec_sub(vbs16,vbs15);
vbs2 = vec_madd(v2,v3,vzero);
vbs4 = vec_madd(v4,v3,vzero);
vbs6 = vec_madd(v6,v3,vzero);
vbs8 = vec_madd(v7,v3,vzero);
vbs2 = vec_perm(vbs2,vbs2,vinvert);
vbs4 = vec_perm(vbs4,vbs4,vinvert);
vbs6 = vec_perm(vbs6,vbs6,vinvert);
vbs8 = vec_perm(vbs8,vbs8,vinvert);
costab = pnts[3];
#ifdef __APPLE__
vperm1 = (vector unsigned char)(0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
vperm2 = (vector unsigned char)(12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27);
vperm3 = (vector unsigned char)(0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19);
#else
vperm1 = (vector unsigned char){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23};
vperm2 = (vector unsigned char){12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27};
vperm3 = (vector unsigned char){0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19};
#endif
vperm4 = vec_add(vperm3,vec_splat_u8(8));
v1 = vec_ld(0,costab);
v2 = vec_splat(v1,0);
v3 = vec_splat(v1,1);
v1 = vec_mergeh(v2,v3);
v2 = vec_perm(vbs1,vbs3,vperm1);
v3 = vec_perm(vbs2,vbs4,vperm1);
v4 = vec_perm(vbs1,vbs3,vperm2);
v5 = vec_perm(vbs2,vbs4,vperm2);
v6 = vec_sub(v2,v4);
v7 = vec_sub(v3,v5);
v2 = vec_add(v2,v4);
v3 = vec_add(v3,v5);
v4 = vec_madd(v6,v1,vzero);
v5 = vec_nmsub(v7,v1,vzero);
vbs9 = vec_perm(v2,v4,vperm3);
vbs11 = vec_perm(v2,v4,vperm4);
vbs10 = vec_perm(v3,v5,vperm3);
vbs12 = vec_perm(v3,v5,vperm4);
v2 = vec_perm(vbs5,vbs7,vperm1);
v3 = vec_perm(vbs6,vbs8,vperm1);
v4 = vec_perm(vbs5,vbs7,vperm2);
v5 = vec_perm(vbs6,vbs8,vperm2);
v6 = vec_sub(v2,v4);
v7 = vec_sub(v3,v5);
v2 = vec_add(v2,v4);
v3 = vec_add(v3,v5);
v4 = vec_madd(v6,v1,vzero);
v5 = vec_nmsub(v7,v1,vzero);
vbs13 = vec_perm(v2,v4,vperm3);
vbs15 = vec_perm(v2,v4,vperm4);
vbs14 = vec_perm(v3,v5,vperm3);
vbs16 = vec_perm(v3,v5,vperm4);
costab = pnts[4];
v1 = vec_lde(0,costab);
#ifdef __APPLE__
v2 = (vector float)(1.0f,-1.0f,1.0f,-1.0f);
#else
v2 = (vector float){1.0f,-1.0f,1.0f,-1.0f};
#endif
v3 = vec_splat(v1,0);
v1 = vec_madd(v2,v3,vzero);
v2 = vec_mergeh(vbs9,vbs10);
v3 = vec_mergel(vbs9,vbs10);
v4 = vec_mergeh(vbs11,vbs12);
v5 = vec_mergel(vbs11,vbs12);
v6 = vec_mergeh(v2,v3);
v7 = vec_mergel(v2,v3);
v2 = vec_mergeh(v4,v5);
v3 = vec_mergel(v4,v5);
v4 = vec_sub(v6,v7);
v5 = vec_sub(v2,v3);
v6 = vec_add(v6,v7);
v7 = vec_add(v2,v3);
v2 = vec_madd(v4,v1,vzero);
v3 = vec_madd(v5,v1,vzero);
vbs1 = vec_mergeh(v6,v2);
vbs2 = vec_mergel(v6,v2);
vbs3 = vec_mergeh(v7,v3);
vbs4 = vec_mergel(v7,v3);
v2 = vec_mergeh(vbs13,vbs14);
v3 = vec_mergel(vbs13,vbs14);
v4 = vec_mergeh(vbs15,vbs16);
v5 = vec_mergel(vbs15,vbs16);
v6 = vec_mergeh(v2,v3);
v7 = vec_mergel(v2,v3);
v2 = vec_mergeh(v4,v5);
v3 = vec_mergel(v4,v5);
v4 = vec_sub(v6,v7);
v5 = vec_sub(v2,v3);
v6 = vec_add(v6,v7);
v7 = vec_add(v2,v3);
v2 = vec_madd(v4,v1,vzero);
v3 = vec_madd(v5,v1,vzero);
vbs5 = vec_mergeh(v6,v2);
vbs6 = vec_mergel(v6,v2);
vbs7 = vec_mergeh(v7,v3);
vbs8 = vec_mergel(v7,v3);
vec_st(vbs1,0,bufs);
vec_st(vbs2,16,bufs);
vec_st(vbs3,32,bufs);
vec_st(vbs4,48,bufs);
vec_st(vbs5,64,bufs);
vec_st(vbs6,80,bufs);
vec_st(vbs7,96,bufs);
vec_st(vbs8,112,bufs);
vec_st(vbs9,128,bufs);
vec_st(vbs10,144,bufs);
vec_st(vbs11,160,bufs);
vec_st(vbs12,176,bufs);
vec_st(vbs13,192,bufs);
vec_st(vbs14,208,bufs);
vec_st(vbs15,224,bufs);
vec_st(vbs16,240,bufs);
}
{
register real *b1;
register int i;
for(b1=bufs,i=8;i;i--,b1+=4)
b1[2] += b1[3];
for(b1=bufs,i=4;i;i--,b1+=8)
{
b1[4] += b1[6];
b1[6] += b1[5];
b1[5] += b1[7];
}
for(b1=bufs,i=2;i;i--,b1+=16)
{
b1[8] += b1[12];
b1[12] += b1[10];
b1[10] += b1[14];
b1[14] += b1[9];
b1[9] += b1[13];
b1[13] += b1[11];
b1[11] += b1[15];
}
}
out0[0x10*16] = bufs[0];
out0[0x10*15] = bufs[16+0] + bufs[16+8];
out0[0x10*14] = bufs[8];
out0[0x10*13] = bufs[16+8] + bufs[16+4];
out0[0x10*12] = bufs[4];
out0[0x10*11] = bufs[16+4] + bufs[16+12];
out0[0x10*10] = bufs[12];
out0[0x10* 9] = bufs[16+12] + bufs[16+2];
out0[0x10* 8] = bufs[2];
out0[0x10* 7] = bufs[16+2] + bufs[16+10];
out0[0x10* 6] = bufs[10];
out0[0x10* 5] = bufs[16+10] + bufs[16+6];
out0[0x10* 4] = bufs[6];
out0[0x10* 3] = bufs[16+6] + bufs[16+14];
out0[0x10* 2] = bufs[14];
out0[0x10* 1] = bufs[16+14] + bufs[16+1];
out0[0x10* 0] = bufs[1];
out1[0x10* 0] = bufs[1];
out1[0x10* 1] = bufs[16+1] + bufs[16+9];
out1[0x10* 2] = bufs[9];
out1[0x10* 3] = bufs[16+9] + bufs[16+5];
out1[0x10* 4] = bufs[5];
out1[0x10* 5] = bufs[16+5] + bufs[16+13];
out1[0x10* 6] = bufs[13];
out1[0x10* 7] = bufs[16+13] + bufs[16+3];
out1[0x10* 8] = bufs[3];
out1[0x10* 9] = bufs[16+3] + bufs[16+11];
out1[0x10*10] = bufs[11];
out1[0x10*11] = bufs[16+11] + bufs[16+7];
out1[0x10*12] = bufs[7];
out1[0x10*13] = bufs[16+7] + bufs[16+15];
out1[0x10*14] = bufs[15];
out1[0x10*15] = bufs[16+15];
}

336
src/libmpg123/dct64_i386.c Normal file
View File

@@ -0,0 +1,336 @@
/*
dct64_i386.c: DCT64, a C variant for i386
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
/*
* Discrete Cosine Tansform (DCT) for subband synthesis
* optimized for machines with no auto-increment.
* The performance is highly compiler dependend. Maybe
* the dct64.c version for 'normal' processor may be faster
* even for Intel processors.
*/
#include "mpg123lib_intern.h"
static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples)
{
{
register real *costab = pnts[0];
b1[0x00] = samples[0x00] + samples[0x1F];
b1[0x01] = samples[0x01] + samples[0x1E];
b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
b1[0x02] = samples[0x02] + samples[0x1D];
b1[0x03] = samples[0x03] + samples[0x1C];
b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
b1[0x04] = samples[0x04] + samples[0x1B];
b1[0x05] = samples[0x05] + samples[0x1A];
b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
b1[0x06] = samples[0x06] + samples[0x19];
b1[0x07] = samples[0x07] + samples[0x18];
b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
b1[0x08] = samples[0x08] + samples[0x17];
b1[0x09] = samples[0x09] + samples[0x16];
b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
b1[0x0A] = samples[0x0A] + samples[0x15];
b1[0x0B] = samples[0x0B] + samples[0x14];
b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
b1[0x0C] = samples[0x0C] + samples[0x13];
b1[0x0D] = samples[0x0D] + samples[0x12];
b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
b1[0x0E] = samples[0x0E] + samples[0x11];
b1[0x0F] = samples[0x0F] + samples[0x10];
b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
}
{
register real *costab = pnts[1];
b2[0x00] = b1[0x00] + b1[0x0F];
b2[0x01] = b1[0x01] + b1[0x0E];
b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
b2[0x02] = b1[0x02] + b1[0x0D];
b2[0x03] = b1[0x03] + b1[0x0C];
b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
b2[0x04] = b1[0x04] + b1[0x0B];
b2[0x05] = b1[0x05] + b1[0x0A];
b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
b2[0x06] = b1[0x06] + b1[0x09];
b2[0x07] = b1[0x07] + b1[0x08];
b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
/* */
b2[0x10] = b1[0x10] + b1[0x1F];
b2[0x11] = b1[0x11] + b1[0x1E];
b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
b2[0x12] = b1[0x12] + b1[0x1D];
b2[0x13] = b1[0x13] + b1[0x1C];
b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
b2[0x14] = b1[0x14] + b1[0x1B];
b2[0x15] = b1[0x15] + b1[0x1A];
b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
b2[0x16] = b1[0x16] + b1[0x19];
b2[0x17] = b1[0x17] + b1[0x18];
b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
}
{
register real *costab = pnts[2];
b1[0x00] = b2[0x00] + b2[0x07];
b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
b1[0x01] = b2[0x01] + b2[0x06];
b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
b1[0x02] = b2[0x02] + b2[0x05];
b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
b1[0x03] = b2[0x03] + b2[0x04];
b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
b1[0x08] = b2[0x08] + b2[0x0F];
b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
b1[0x09] = b2[0x09] + b2[0x0E];
b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
b1[0x0A] = b2[0x0A] + b2[0x0D];
b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
b1[0x0B] = b2[0x0B] + b2[0x0C];
b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
b1[0x10] = b2[0x10] + b2[0x17];
b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
b1[0x11] = b2[0x11] + b2[0x16];
b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
b1[0x12] = b2[0x12] + b2[0x15];
b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
b1[0x13] = b2[0x13] + b2[0x14];
b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
b1[0x18] = b2[0x18] + b2[0x1F];
b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
b1[0x19] = b2[0x19] + b2[0x1E];
b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
b1[0x1A] = b2[0x1A] + b2[0x1D];
b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
b1[0x1B] = b2[0x1B] + b2[0x1C];
b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
}
{
register real const cos0 = pnts[3][0];
register real const cos1 = pnts[3][1];
b2[0x00] = b1[0x00] + b1[0x03];
b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
b2[0x01] = b1[0x01] + b1[0x02];
b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
b2[0x04] = b1[0x04] + b1[0x07];
b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
b2[0x05] = b1[0x05] + b1[0x06];
b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
b2[0x08] = b1[0x08] + b1[0x0B];
b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
b2[0x09] = b1[0x09] + b1[0x0A];
b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
b2[0x0C] = b1[0x0C] + b1[0x0F];
b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
b2[0x0D] = b1[0x0D] + b1[0x0E];
b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
b2[0x10] = b1[0x10] + b1[0x13];
b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
b2[0x11] = b1[0x11] + b1[0x12];
b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
b2[0x14] = b1[0x14] + b1[0x17];
b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
b2[0x15] = b1[0x15] + b1[0x16];
b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
b2[0x18] = b1[0x18] + b1[0x1B];
b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
b2[0x19] = b1[0x19] + b1[0x1A];
b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
b2[0x1C] = b1[0x1C] + b1[0x1F];
b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
b2[0x1D] = b1[0x1D] + b1[0x1E];
b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
}
{
register real const cos0 = pnts[4][0];
b1[0x00] = b2[0x00] + b2[0x01];
b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
b1[0x02] = b2[0x02] + b2[0x03];
b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
b1[0x02] += b1[0x03];
b1[0x04] = b2[0x04] + b2[0x05];
b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
b1[0x06] = b2[0x06] + b2[0x07];
b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
b1[0x06] += b1[0x07];
b1[0x04] += b1[0x06];
b1[0x06] += b1[0x05];
b1[0x05] += b1[0x07];
b1[0x08] = b2[0x08] + b2[0x09];
b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
b1[0x0A] = b2[0x0A] + b2[0x0B];
b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
b1[0x0A] += b1[0x0B];
b1[0x0C] = b2[0x0C] + b2[0x0D];
b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
b1[0x0E] = b2[0x0E] + b2[0x0F];
b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
b1[0x0E] += b1[0x0F];
b1[0x0C] += b1[0x0E];
b1[0x0E] += b1[0x0D];
b1[0x0D] += b1[0x0F];
b1[0x10] = b2[0x10] + b2[0x11];
b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
b1[0x12] = b2[0x12] + b2[0x13];
b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
b1[0x12] += b1[0x13];
b1[0x14] = b2[0x14] + b2[0x15];
b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
b1[0x16] = b2[0x16] + b2[0x17];
b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
b1[0x16] += b1[0x17];
b1[0x14] += b1[0x16];
b1[0x16] += b1[0x15];
b1[0x15] += b1[0x17];
b1[0x18] = b2[0x18] + b2[0x19];
b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
b1[0x1A] = b2[0x1A] + b2[0x1B];
b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
b1[0x1A] += b1[0x1B];
b1[0x1C] = b2[0x1C] + b2[0x1D];
b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
b1[0x1E] = b2[0x1E] + b2[0x1F];
b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
b1[0x1E] += b1[0x1F];
b1[0x1C] += b1[0x1E];
b1[0x1E] += b1[0x1D];
b1[0x1D] += b1[0x1F];
}
out0[0x10*16] = b1[0x00];
out0[0x10*12] = b1[0x04];
out0[0x10* 8] = b1[0x02];
out0[0x10* 4] = b1[0x06];
out0[0x10* 0] = b1[0x01];
out1[0x10* 0] = b1[0x01];
out1[0x10* 4] = b1[0x05];
out1[0x10* 8] = b1[0x03];
out1[0x10*12] = b1[0x07];
#if 1
out0[0x10*14] = b1[0x08] + b1[0x0C];
out0[0x10*10] = b1[0x0C] + b1[0x0a];
out0[0x10* 6] = b1[0x0A] + b1[0x0E];
out0[0x10* 2] = b1[0x0E] + b1[0x09];
out1[0x10* 2] = b1[0x09] + b1[0x0D];
out1[0x10* 6] = b1[0x0D] + b1[0x0B];
out1[0x10*10] = b1[0x0B] + b1[0x0F];
out1[0x10*14] = b1[0x0F];
#else
b1[0x08] += b1[0x0C];
out0[0x10*14] = b1[0x08];
b1[0x0C] += b1[0x0a];
out0[0x10*10] = b1[0x0C];
b1[0x0A] += b1[0x0E];
out0[0x10* 6] = b1[0x0A];
b1[0x0E] += b1[0x09];
out0[0x10* 2] = b1[0x0E];
b1[0x09] += b1[0x0D];
out1[0x10* 2] = b1[0x09];
b1[0x0D] += b1[0x0B];
out1[0x10* 6] = b1[0x0D];
b1[0x0B] += b1[0x0F];
out1[0x10*10] = b1[0x0B];
out1[0x10*14] = b1[0x0F];
#endif
{
real tmp;
tmp = b1[0x18] + b1[0x1C];
out0[0x10*15] = tmp + b1[0x10];
out0[0x10*13] = tmp + b1[0x14];
tmp = b1[0x1C] + b1[0x1A];
out0[0x10*11] = tmp + b1[0x14];
out0[0x10* 9] = tmp + b1[0x12];
tmp = b1[0x1A] + b1[0x1E];
out0[0x10* 7] = tmp + b1[0x12];
out0[0x10* 5] = tmp + b1[0x16];
tmp = b1[0x1E] + b1[0x19];
out0[0x10* 3] = tmp + b1[0x16];
out0[0x10* 1] = tmp + b1[0x11];
tmp = b1[0x19] + b1[0x1D];
out1[0x10* 1] = tmp + b1[0x11];
out1[0x10* 3] = tmp + b1[0x15];
tmp = b1[0x1D] + b1[0x1B];
out1[0x10* 5] = tmp + b1[0x15];
out1[0x10* 7] = tmp + b1[0x13];
tmp = b1[0x1B] + b1[0x1F];
out1[0x10* 9] = tmp + b1[0x13];
out1[0x10*11] = tmp + b1[0x17];
out1[0x10*13] = b1[0x17] + b1[0x1F];
out1[0x10*15] = b1[0x1F];
}
}
/*
* the call via dct64 is a trick to force GCC to use
* (new) registers for the b1,b2 pointer to the bufs[xx] field
*/
void dct64_i386(real *a,real *b,real *c)
{
real bufs[0x40];
dct64_1(a,b,bufs,bufs+0x20,c);
}

342
src/libmpg123/dct64_i486.c Normal file
View File

@@ -0,0 +1,342 @@
/*
dct64_i486.c: DCT64, a plain C variant for i486
copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Fabrice Bellard
*/
/* Discrete Cosine Tansform (DCT) for subband synthesis.
*
* This code is optimized for 80486. It should be compiled with gcc
* 2.7.2 or higher.
*
* Note: This code does not give the necessary accuracy. Moreover, no
* overflow test are done.
*
* (c) 1998 Fabrice Bellard.
*/
#include "mpg123lib_intern.h"
#define COS_0_0 16403
#define COS_0_1 16563
#define COS_0_2 16890
#define COS_0_3 17401
#define COS_0_4 18124
#define COS_0_5 19101
#define COS_0_6 20398
#define COS_0_7 22112
#define COS_0_8 24396
#define COS_0_9 27503
#define COS_0_10 31869
#define COS_0_11 38320
#define COS_0_12 48633
#define COS_0_13 67429
#define COS_0_14 111660
#define COS_0_15 333906
#define COS_1_0 16463
#define COS_1_1 17121
#define COS_1_2 18577
#define COS_1_3 21195
#define COS_1_4 25826
#define COS_1_5 34756
#define COS_1_6 56441
#define COS_1_7 167154
#define COS_2_0 16704
#define COS_2_1 19704
#define COS_2_2 29490
#define COS_2_3 83981
#define COS_3_0 17733
#define COS_3_1 42813
#define COS_4_0 23170
#define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr)
#define MULL(a,b) (((long long)(a)*(long long)(b)) >> 15)
#define MUL(a,b) \
(\
((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\
((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\
((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\
((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\
((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\
((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\
(((a)*(b )) >> 15))))))))
void dct64_1_486(int *out0,int *out1,int *b1,int *b2)
{
b1[0x00] = b2[0x00] + b2[0x1F];
b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0);
b1[0x01] = b2[0x01] + b2[0x1E];
b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1);
b1[0x02] = b2[0x02] + b2[0x1D];
b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2);
b1[0x03] = b2[0x03] + b2[0x1C];
b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3);
b1[0x04] = b2[0x04] + b2[0x1B];
b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4);
b1[0x05] = b2[0x05] + b2[0x1A];
b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5);
b1[0x06] = b2[0x06] + b2[0x19];
b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6);
b1[0x07] = b2[0x07] + b2[0x18];
b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7);
b1[0x08] = b2[0x08] + b2[0x17];
b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8);
b1[0x09] = b2[0x09] + b2[0x16];
b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9);
b1[0x0A] = b2[0x0A] + b2[0x15];
b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10);
b1[0x0B] = b2[0x0B] + b2[0x14];
b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11);
b1[0x0C] = b2[0x0C] + b2[0x13];
b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12);
b1[0x0D] = b2[0x0D] + b2[0x12];
b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13);
b1[0x0E] = b2[0x0E] + b2[0x11];
b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14);
b1[0x0F] = b2[0x0F] + b2[0x10];
b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15);
b2[0x00] = b1[0x00] + b1[0x0F];
b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0);
b2[0x01] = b1[0x01] + b1[0x0E];
b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1);
b2[0x02] = b1[0x02] + b1[0x0D];
b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2);
b2[0x03] = b1[0x03] + b1[0x0C];
b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3);
b2[0x04] = b1[0x04] + b1[0x0B];
b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4);
b2[0x05] = b1[0x05] + b1[0x0A];
b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5);
b2[0x06] = b1[0x06] + b1[0x09];
b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6);
b2[0x07] = b1[0x07] + b1[0x08];
b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7);
b2[0x10] = b1[0x10] + b1[0x1F];
b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0);
b2[0x11] = b1[0x11] + b1[0x1E];
b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1);
b2[0x12] = b1[0x12] + b1[0x1D];
b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2);
b2[0x13] = b1[0x13] + b1[0x1C];
b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3);
b2[0x14] = b1[0x14] + b1[0x1B];
b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4);
b2[0x15] = b1[0x15] + b1[0x1A];
b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5);
b2[0x16] = b1[0x16] + b1[0x19];
b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6);
b2[0x17] = b1[0x17] + b1[0x18];
b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7);
b1[0x00] = b2[0x00] + b2[0x07];
b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0);
b1[0x01] = b2[0x01] + b2[0x06];
b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1);
b1[0x02] = b2[0x02] + b2[0x05];
b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2);
b1[0x03] = b2[0x03] + b2[0x04];
b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3);
b1[0x08] = b2[0x08] + b2[0x0F];
b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0);
b1[0x09] = b2[0x09] + b2[0x0E];
b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1);
b1[0x0A] = b2[0x0A] + b2[0x0D];
b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2);
b1[0x0B] = b2[0x0B] + b2[0x0C];
b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3);
b1[0x10] = b2[0x10] + b2[0x17];
b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0);
b1[0x11] = b2[0x11] + b2[0x16];
b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1);
b1[0x12] = b2[0x12] + b2[0x15];
b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2);
b1[0x13] = b2[0x13] + b2[0x14];
b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3);
b1[0x18] = b2[0x18] + b2[0x1F];
b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0);
b1[0x19] = b2[0x19] + b2[0x1E];
b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1);
b1[0x1A] = b2[0x1A] + b2[0x1D];
b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2);
b1[0x1B] = b2[0x1B] + b2[0x1C];
b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3);
b2[0x00] = b1[0x00] + b1[0x03];
b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0);
b2[0x01] = b1[0x01] + b1[0x02];
b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1);
b2[0x04] = b1[0x04] + b1[0x07];
b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0);
b2[0x05] = b1[0x05] + b1[0x06];
b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1);
b2[0x08] = b1[0x08] + b1[0x0B];
b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0);
b2[0x09] = b1[0x09] + b1[0x0A];
b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1);
b2[0x0C] = b1[0x0C] + b1[0x0F];
b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0);
b2[0x0D] = b1[0x0D] + b1[0x0E];
b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1);
b2[0x10] = b1[0x10] + b1[0x13];
b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0);
b2[0x11] = b1[0x11] + b1[0x12];
b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1);
b2[0x14] = b1[0x14] + b1[0x17];
b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0);
b2[0x15] = b1[0x15] + b1[0x16];
b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1);
b2[0x18] = b1[0x18] + b1[0x1B];
b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0);
b2[0x19] = b1[0x19] + b1[0x1A];
b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1);
b2[0x1C] = b1[0x1C] + b1[0x1F];
b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0);
b2[0x1D] = b1[0x1D] + b1[0x1E];
b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1);
{
int i;
for(i=0;i<32;i+=4) {
b1[i+0x00] = b2[i+0x00] + b2[i+0x01];
b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0);
b1[i+0x02] = b2[i+0x02] + b2[i+0x03];
b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0);
}
}
b1[0x02] += b1[0x03];
b1[0x06] += b1[0x07];
b1[0x04] += b1[0x06];
b1[0x06] += b1[0x05];
b1[0x05] += b1[0x07];
b1[0x0A] += b1[0x0B];
b1[0x0E] += b1[0x0F];
b1[0x0C] += b1[0x0E];
b1[0x0E] += b1[0x0D];
b1[0x0D] += b1[0x0F];
b1[0x12] += b1[0x13];
b1[0x16] += b1[0x17];
b1[0x14] += b1[0x16];
b1[0x16] += b1[0x15];
b1[0x15] += b1[0x17];
b1[0x1A] += b1[0x1B];
b1[0x1E] += b1[0x1F];
b1[0x1C] += b1[0x1E];
b1[0x1E] += b1[0x1D];
b1[0x1D] += b1[0x1F];
SETOUT(out0,16,b1[0x00]);
SETOUT(out0,12,b1[0x04]);
SETOUT(out0, 8,b1[0x02]);
SETOUT(out0, 4,b1[0x06]);
SETOUT(out0, 0,b1[0x01]);
SETOUT(out1, 0,b1[0x01]);
SETOUT(out1, 4,b1[0x05]);
SETOUT(out1, 8,b1[0x03]);
SETOUT(out1,12,b1[0x07]);
b1[0x08] += b1[0x0C];
SETOUT(out0,14,b1[0x08]);
b1[0x0C] += b1[0x0a];
SETOUT(out0,10,b1[0x0C]);
b1[0x0A] += b1[0x0E];
SETOUT(out0, 6,b1[0x0A]);
b1[0x0E] += b1[0x09];
SETOUT(out0, 2,b1[0x0E]);
b1[0x09] += b1[0x0D];
SETOUT(out1, 2,b1[0x09]);
b1[0x0D] += b1[0x0B];
SETOUT(out1, 6,b1[0x0D]);
b1[0x0B] += b1[0x0F];
SETOUT(out1,10,b1[0x0B]);
SETOUT(out1,14,b1[0x0F]);
b1[0x18] += b1[0x1C];
SETOUT(out0,15,b1[0x10] + b1[0x18]);
SETOUT(out0,13,b1[0x18] + b1[0x14]);
b1[0x1C] += b1[0x1a];
SETOUT(out0,11,b1[0x14] + b1[0x1C]);
SETOUT(out0, 9,b1[0x1C] + b1[0x12]);
b1[0x1A] += b1[0x1E];
SETOUT(out0, 7,b1[0x12] + b1[0x1A]);
SETOUT(out0, 5,b1[0x1A] + b1[0x16]);
b1[0x1E] += b1[0x19];
SETOUT(out0, 3,b1[0x16] + b1[0x1E]);
SETOUT(out0, 1,b1[0x1E] + b1[0x11]);
b1[0x19] += b1[0x1D];
SETOUT(out1, 1,b1[0x11] + b1[0x19]);
SETOUT(out1, 3,b1[0x19] + b1[0x15]);
b1[0x1D] += b1[0x1B];
SETOUT(out1, 5,b1[0x15] + b1[0x1D]);
SETOUT(out1, 7,b1[0x1D] + b1[0x13]);
b1[0x1B] += b1[0x1F];
SETOUT(out1, 9,b1[0x13] + b1[0x1B]);
SETOUT(out1,11,b1[0x1B] + b1[0x17]);
SETOUT(out1,13,b1[0x17] + b1[0x1F]);
SETOUT(out1,15,b1[0x1F]);
}
/*
* the call via dct64 is a trick to force GCC to use
* (new) registers for the b1,b2 pointer to the bufs[xx] field
*/
void dct64_i486(int *a,int *b,real *samples)
{
int bufs[64];
int i;
#ifdef REAL_IS_FIXED
#define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR)
for(i=0;i<32;i++) {
bufs[i]=TOINT(samples[i]);
}
#else
int *p = bufs;
register double const scale = ((65536.0 * 32) + 1) * 65536.0;
for(i=0;i<32;i++) {
*((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */
}
#endif
dct64_1_486(a,b,bufs+32,bufs);
}

811
src/libmpg123/dct64_mmx.S Normal file
View File

@@ -0,0 +1,811 @@
/*
dct64_mmx.s: MMX optimized DCT64
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by the mysterious higway (apparently)
*/
#include "mangle.h"
.text
ALIGN32
.globl ASM_NAME(dct64_mmx)
ASM_NAME(dct64_mmx):
xorl %ecx,%ecx
.globl ASM_NAME(dct64_MMX)
ASM_NAME(dct64_MMX):
pushl %ebx
pushl %esi
pushl %edi
subl $256,%esp
movl 280(%esp),%eax
flds (%eax)
leal 128(%esp),%edx
fadds 124(%eax)
movl 272(%esp),%esi
fstps (%edx)
movl 276(%esp),%edi
flds 4(%eax)
movl $ASM_NAME(costab_mmxsse),%ebx
fadds 120(%eax)
orl %ecx,%ecx
fstps 4(%edx)
flds (%eax)
movl %esp,%ecx
fsubs 124(%eax)
fmuls (%ebx)
fstps 124(%edx)
flds 4(%eax)
fsubs 120(%eax)
fmuls 4(%ebx)
fstps 120(%edx)
flds 8(%eax)
fadds 116(%eax)
fstps 8(%edx)
flds 12(%eax)
fadds 112(%eax)
fstps 12(%edx)
flds 8(%eax)
fsubs 116(%eax)
fmuls 8(%ebx)
fstps 116(%edx)
flds 12(%eax)
fsubs 112(%eax)
fmuls 12(%ebx)
fstps 112(%edx)
flds 16(%eax)
fadds 108(%eax)
fstps 16(%edx)
flds 20(%eax)
fadds 104(%eax)
fstps 20(%edx)
flds 16(%eax)
fsubs 108(%eax)
fmuls 16(%ebx)
fstps 108(%edx)
flds 20(%eax)
fsubs 104(%eax)
fmuls 20(%ebx)
fstps 104(%edx)
flds 24(%eax)
fadds 100(%eax)
fstps 24(%edx)
flds 28(%eax)
fadds 96(%eax)
fstps 28(%edx)
flds 24(%eax)
fsubs 100(%eax)
fmuls 24(%ebx)
fstps 100(%edx)
flds 28(%eax)
fsubs 96(%eax)
fmuls 28(%ebx)
fstps 96(%edx)
flds 32(%eax)
fadds 92(%eax)
fstps 32(%edx)
flds 36(%eax)
fadds 88(%eax)
fstps 36(%edx)
flds 32(%eax)
fsubs 92(%eax)
fmuls 32(%ebx)
fstps 92(%edx)
flds 36(%eax)
fsubs 88(%eax)
fmuls 36(%ebx)
fstps 88(%edx)
flds 40(%eax)
fadds 84(%eax)
fstps 40(%edx)
flds 44(%eax)
fadds 80(%eax)
fstps 44(%edx)
flds 40(%eax)
fsubs 84(%eax)
fmuls 40(%ebx)
fstps 84(%edx)
flds 44(%eax)
fsubs 80(%eax)
fmuls 44(%ebx)
fstps 80(%edx)
flds 48(%eax)
fadds 76(%eax)
fstps 48(%edx)
flds 52(%eax)
fadds 72(%eax)
fstps 52(%edx)
flds 48(%eax)
fsubs 76(%eax)
fmuls 48(%ebx)
fstps 76(%edx)
flds 52(%eax)
fsubs 72(%eax)
fmuls 52(%ebx)
fstps 72(%edx)
flds 56(%eax)
fadds 68(%eax)
fstps 56(%edx)
flds 60(%eax)
fadds 64(%eax)
fstps 60(%edx)
flds 56(%eax)
fsubs 68(%eax)
fmuls 56(%ebx)
fstps 68(%edx)
flds 60(%eax)
fsubs 64(%eax)
fmuls 60(%ebx)
fstps 64(%edx)
flds (%edx)
fadds 60(%edx)
fstps (%ecx)
flds 4(%edx)
fadds 56(%edx)
fstps 4(%ecx)
flds (%edx)
fsubs 60(%edx)
fmuls 64(%ebx)
fstps 60(%ecx)
flds 4(%edx)
fsubs 56(%edx)
fmuls 68(%ebx)
fstps 56(%ecx)
flds 8(%edx)
fadds 52(%edx)
fstps 8(%ecx)
flds 12(%edx)
fadds 48(%edx)
fstps 12(%ecx)
flds 8(%edx)
fsubs 52(%edx)
fmuls 72(%ebx)
fstps 52(%ecx)
flds 12(%edx)
fsubs 48(%edx)
fmuls 76(%ebx)
fstps 48(%ecx)
flds 16(%edx)
fadds 44(%edx)
fstps 16(%ecx)
flds 20(%edx)
fadds 40(%edx)
fstps 20(%ecx)
flds 16(%edx)
fsubs 44(%edx)
fmuls 80(%ebx)
fstps 44(%ecx)
flds 20(%edx)
fsubs 40(%edx)
fmuls 84(%ebx)
fstps 40(%ecx)
flds 24(%edx)
fadds 36(%edx)
fstps 24(%ecx)
flds 28(%edx)
fadds 32(%edx)
fstps 28(%ecx)
flds 24(%edx)
fsubs 36(%edx)
fmuls 88(%ebx)
fstps 36(%ecx)
flds 28(%edx)
fsubs 32(%edx)
fmuls 92(%ebx)
fstps 32(%ecx)
flds 64(%edx)
fadds 124(%edx)
fstps 64(%ecx)
flds 68(%edx)
fadds 120(%edx)
fstps 68(%ecx)
flds 124(%edx)
fsubs 64(%edx)
fmuls 64(%ebx)
fstps 124(%ecx)
flds 120(%edx)
fsubs 68(%edx)
fmuls 68(%ebx)
fstps 120(%ecx)
flds 72(%edx)
fadds 116(%edx)
fstps 72(%ecx)
flds 76(%edx)
fadds 112(%edx)
fstps 76(%ecx)
flds 116(%edx)
fsubs 72(%edx)
fmuls 72(%ebx)
fstps 116(%ecx)
flds 112(%edx)
fsubs 76(%edx)
fmuls 76(%ebx)
fstps 112(%ecx)
flds 80(%edx)
fadds 108(%edx)
fstps 80(%ecx)
flds 84(%edx)
fadds 104(%edx)
fstps 84(%ecx)
flds 108(%edx)
fsubs 80(%edx)
fmuls 80(%ebx)
fstps 108(%ecx)
flds 104(%edx)
fsubs 84(%edx)
fmuls 84(%ebx)
fstps 104(%ecx)
flds 88(%edx)
fadds 100(%edx)
fstps 88(%ecx)
flds 92(%edx)
fadds 96(%edx)
fstps 92(%ecx)
flds 100(%edx)
fsubs 88(%edx)
fmuls 88(%ebx)
fstps 100(%ecx)
flds 96(%edx)
fsubs 92(%edx)
fmuls 92(%ebx)
fstps 96(%ecx)
flds (%ecx)
fadds 28(%ecx)
fstps (%edx)
flds (%ecx)
fsubs 28(%ecx)
fmuls 96(%ebx)
fstps 28(%edx)
flds 4(%ecx)
fadds 24(%ecx)
fstps 4(%edx)
flds 4(%ecx)
fsubs 24(%ecx)
fmuls 100(%ebx)
fstps 24(%edx)
flds 8(%ecx)
fadds 20(%ecx)
fstps 8(%edx)
flds 8(%ecx)
fsubs 20(%ecx)
fmuls 104(%ebx)
fstps 20(%edx)
flds 12(%ecx)
fadds 16(%ecx)
fstps 12(%edx)
flds 12(%ecx)
fsubs 16(%ecx)
fmuls 108(%ebx)
fstps 16(%edx)
flds 32(%ecx)
fadds 60(%ecx)
fstps 32(%edx)
flds 60(%ecx)
fsubs 32(%ecx)
fmuls 96(%ebx)
fstps 60(%edx)
flds 36(%ecx)
fadds 56(%ecx)
fstps 36(%edx)
flds 56(%ecx)
fsubs 36(%ecx)
fmuls 100(%ebx)
fstps 56(%edx)
flds 40(%ecx)
fadds 52(%ecx)
fstps 40(%edx)
flds 52(%ecx)
fsubs 40(%ecx)
fmuls 104(%ebx)
fstps 52(%edx)
flds 44(%ecx)
fadds 48(%ecx)
fstps 44(%edx)
flds 48(%ecx)
fsubs 44(%ecx)
fmuls 108(%ebx)
fstps 48(%edx)
flds 64(%ecx)
fadds 92(%ecx)
fstps 64(%edx)
flds 64(%ecx)
fsubs 92(%ecx)
fmuls 96(%ebx)
fstps 92(%edx)
flds 68(%ecx)
fadds 88(%ecx)
fstps 68(%edx)
flds 68(%ecx)
fsubs 88(%ecx)
fmuls 100(%ebx)
fstps 88(%edx)
flds 72(%ecx)
fadds 84(%ecx)
fstps 72(%edx)
flds 72(%ecx)
fsubs 84(%ecx)
fmuls 104(%ebx)
fstps 84(%edx)
flds 76(%ecx)
fadds 80(%ecx)
fstps 76(%edx)
flds 76(%ecx)
fsubs 80(%ecx)
fmuls 108(%ebx)
fstps 80(%edx)
flds 96(%ecx)
fadds 124(%ecx)
fstps 96(%edx)
flds 124(%ecx)
fsubs 96(%ecx)
fmuls 96(%ebx)
fstps 124(%edx)
flds 100(%ecx)
fadds 120(%ecx)
fstps 100(%edx)
flds 120(%ecx)
fsubs 100(%ecx)
fmuls 100(%ebx)
fstps 120(%edx)
flds 104(%ecx)
fadds 116(%ecx)
fstps 104(%edx)
flds 116(%ecx)
fsubs 104(%ecx)
fmuls 104(%ebx)
fstps 116(%edx)
flds 108(%ecx)
fadds 112(%ecx)
fstps 108(%edx)
flds 112(%ecx)
fsubs 108(%ecx)
fmuls 108(%ebx)
fstps 112(%edx)
flds (%edx)
fadds 12(%edx)
fstps (%ecx)
flds (%edx)
fsubs 12(%edx)
fmuls 112(%ebx)
fstps 12(%ecx)
flds 4(%edx)
fadds 8(%edx)
fstps 4(%ecx)
flds 4(%edx)
fsubs 8(%edx)
fmuls 116(%ebx)
fstps 8(%ecx)
flds 16(%edx)
fadds 28(%edx)
fstps 16(%ecx)
flds 28(%edx)
fsubs 16(%edx)
fmuls 112(%ebx)
fstps 28(%ecx)
flds 20(%edx)
fadds 24(%edx)
fstps 20(%ecx)
flds 24(%edx)
fsubs 20(%edx)
fmuls 116(%ebx)
fstps 24(%ecx)
flds 32(%edx)
fadds 44(%edx)
fstps 32(%ecx)
flds 32(%edx)
fsubs 44(%edx)
fmuls 112(%ebx)
fstps 44(%ecx)
flds 36(%edx)
fadds 40(%edx)
fstps 36(%ecx)
flds 36(%edx)
fsubs 40(%edx)
fmuls 116(%ebx)
fstps 40(%ecx)
flds 48(%edx)
fadds 60(%edx)
fstps 48(%ecx)
flds 60(%edx)
fsubs 48(%edx)
fmuls 112(%ebx)
fstps 60(%ecx)
flds 52(%edx)
fadds 56(%edx)
fstps 52(%ecx)
flds 56(%edx)
fsubs 52(%edx)
fmuls 116(%ebx)
fstps 56(%ecx)
flds 64(%edx)
fadds 76(%edx)
fstps 64(%ecx)
flds 64(%edx)
fsubs 76(%edx)
fmuls 112(%ebx)
fstps 76(%ecx)
flds 68(%edx)
fadds 72(%edx)
fstps 68(%ecx)
flds 68(%edx)
fsubs 72(%edx)
fmuls 116(%ebx)
fstps 72(%ecx)
flds 80(%edx)
fadds 92(%edx)
fstps 80(%ecx)
flds 92(%edx)
fsubs 80(%edx)
fmuls 112(%ebx)
fstps 92(%ecx)
flds 84(%edx)
fadds 88(%edx)
fstps 84(%ecx)
flds 88(%edx)
fsubs 84(%edx)
fmuls 116(%ebx)
fstps 88(%ecx)
flds 96(%edx)
fadds 108(%edx)
fstps 96(%ecx)
flds 96(%edx)
fsubs 108(%edx)
fmuls 112(%ebx)
fstps 108(%ecx)
flds 100(%edx)
fadds 104(%edx)
fstps 100(%ecx)
flds 100(%edx)
fsubs 104(%edx)
fmuls 116(%ebx)
fstps 104(%ecx)
flds 112(%edx)
fadds 124(%edx)
fstps 112(%ecx)
flds 124(%edx)
fsubs 112(%edx)
fmuls 112(%ebx)
fstps 124(%ecx)
flds 116(%edx)
fadds 120(%edx)
fstps 116(%ecx)
flds 120(%edx)
fsubs 116(%edx)
fmuls 116(%ebx)
fstps 120(%ecx)
flds 32(%ecx)
fadds 36(%ecx)
fstps 32(%edx)
flds 32(%ecx)
fsubs 36(%ecx)
fmuls 120(%ebx)
fstps 36(%edx)
flds 44(%ecx)
fsubs 40(%ecx)
fmuls 120(%ebx)
fsts 44(%edx)
fadds 40(%ecx)
fadds 44(%ecx)
fstps 40(%edx)
flds 48(%ecx)
fsubs 52(%ecx)
fmuls 120(%ebx)
flds 60(%ecx)
fsubs 56(%ecx)
fmuls 120(%ebx)
fld %st(0)
fadds 56(%ecx)
fadds 60(%ecx)
fld %st(0)
fadds 48(%ecx)
fadds 52(%ecx)
fstps 48(%edx)
fadd %st(2)
fstps 56(%edx)
fsts 60(%edx)
faddp %st(1)
fstps 52(%edx)
flds 64(%ecx)
fadds 68(%ecx)
fstps 64(%edx)
flds 64(%ecx)
fsubs 68(%ecx)
fmuls 120(%ebx)
fstps 68(%edx)
flds 76(%ecx)
fsubs 72(%ecx)
fmuls 120(%ebx)
fsts 76(%edx)
fadds 72(%ecx)
fadds 76(%ecx)
fstps 72(%edx)
flds 92(%ecx)
fsubs 88(%ecx)
fmuls 120(%ebx)
fsts 92(%edx)
fadds 92(%ecx)
fadds 88(%ecx)
fld %st(0)
fadds 80(%ecx)
fadds 84(%ecx)
fstps 80(%edx)
flds 80(%ecx)
fsubs 84(%ecx)
fmuls 120(%ebx)
fadd %st(0), %st(1)
fadds 92(%edx)
fstps 84(%edx)
fstps 88(%edx)
flds 96(%ecx)
fadds 100(%ecx)
fstps 96(%edx)
flds 96(%ecx)
fsubs 100(%ecx)
fmuls 120(%ebx)
fstps 100(%edx)
flds 108(%ecx)
fsubs 104(%ecx)
fmuls 120(%ebx)
fsts 108(%edx)
fadds 104(%ecx)
fadds 108(%ecx)
fstps 104(%edx)
flds 124(%ecx)
fsubs 120(%ecx)
fmuls 120(%ebx)
fsts 124(%edx)
fadds 120(%ecx)
fadds 124(%ecx)
fld %st(0)
fadds 112(%ecx)
fadds 116(%ecx)
fstps 112(%edx)
flds 112(%ecx)
fsubs 116(%ecx)
fmuls 120(%ebx)
fadd %st(0),%st(1)
fadds 124(%edx)
fstps 116(%edx)
fstps 120(%edx)
jnz .L01
flds (%ecx)
fadds 4(%ecx)
fstps 1024(%esi)
flds (%ecx)
fsubs 4(%ecx)
fmuls 120(%ebx)
fsts (%esi)
fstps (%edi)
flds 12(%ecx)
fsubs 8(%ecx)
fmuls 120(%ebx)
fsts 512(%edi)
fadds 12(%ecx)
fadds 8(%ecx)
fstps 512(%esi)
flds 16(%ecx)
fsubs 20(%ecx)
fmuls 120(%ebx)
flds 28(%ecx)
fsubs 24(%ecx)
fmuls 120(%ebx)
fsts 768(%edi)
fld %st(0)
fadds 24(%ecx)
fadds 28(%ecx)
fld %st(0)
fadds 16(%ecx)
fadds 20(%ecx)
fstps 768(%esi)
fadd %st(2)
fstps 256(%esi)
faddp %st(1)
fstps 256(%edi)
flds 32(%edx)
fadds 48(%edx)
fstps 896(%esi)
flds 48(%edx)
fadds 40(%edx)
fstps 640(%esi)
flds 40(%edx)
fadds 56(%edx)
fstps 384(%esi)
flds 56(%edx)
fadds 36(%edx)
fstps 128(%esi)
flds 36(%edx)
fadds 52(%edx)
fstps 128(%edi)
flds 52(%edx)
fadds 44(%edx)
fstps 384(%edi)
flds 60(%edx)
fsts 896(%edi)
fadds 44(%edx)
fstps 640(%edi)
flds 96(%edx)
fadds 112(%edx)
fld %st(0)
fadds 64(%edx)
fstps 960(%esi)
fadds 80(%edx)
fstps 832(%esi)
flds 112(%edx)
fadds 104(%edx)
fld %st(0)
fadds 80(%edx)
fstps 704(%esi)
fadds 72(%edx)
fstps 576(%esi)
flds 104(%edx)
fadds 120(%edx)
fld %st(0)
fadds 72(%edx)
fstps 448(%esi)
fadds 88(%edx)
fstps 320(%esi)
flds 120(%edx)
fadds 100(%edx)
fld %st(0)
fadds 88(%edx)
fstps 192(%esi)
fadds 68(%edx)
fstps 64(%esi)
flds 100(%edx)
fadds 116(%edx)
fld %st(0)
fadds 68(%edx)
fstps 64(%edi)
fadds 84(%edx)
fstps 192(%edi)
flds 116(%edx)
fadds 108(%edx)
fld %st(0)
fadds 84(%edx)
fstps 320(%edi)
fadds 76(%edx)
fstps 448(%edi)
flds 108(%edx)
fadds 124(%edx)
fld %st(0)
fadds 76(%edx)
fstps 576(%edi)
fadds 92(%edx)
fstps 704(%edi)
flds 124(%edx)
fsts 960(%edi)
fadds 92(%edx)
fstps 832(%edi)
addl $256,%esp
popl %edi
popl %esi
popl %ebx
ret
.L01:
flds (%ecx)
fadds 4(%ecx)
fistp 512(%esi)
flds (%ecx)
fsubs 4(%ecx)
fmuls 120(%ebx)
fistp (%esi)
flds 12(%ecx)
fsubs 8(%ecx)
fmuls 120(%ebx)
fist 256(%edi)
fadds 12(%ecx)
fadds 8(%ecx)
fistp 256(%esi)
flds 16(%ecx)
fsubs 20(%ecx)
fmuls 120(%ebx)
flds 28(%ecx)
fsubs 24(%ecx)
fmuls 120(%ebx)
fist 384(%edi)
fld %st(0)
fadds 24(%ecx)
fadds 28(%ecx)
fld %st(0)
fadds 16(%ecx)
fadds 20(%ecx)
fistp 384(%esi)
fadd %st(2)
fistp 128(%esi)
faddp %st(1)
fistp 128(%edi)
flds 32(%edx)
fadds 48(%edx)
fistp 448(%esi)
flds 48(%edx)
fadds 40(%edx)
fistp 320(%esi)
flds 40(%edx)
fadds 56(%edx)
fistp 192(%esi)
flds 56(%edx)
fadds 36(%edx)
fistp 64(%esi)
flds 36(%edx)
fadds 52(%edx)
fistp 64(%edi)
flds 52(%edx)
fadds 44(%edx)
fistp 192(%edi)
flds 60(%edx)
fist 448(%edi)
fadds 44(%edx)
fistp 320(%edi)
flds 96(%edx)
fadds 112(%edx)
fld %st(0)
fadds 64(%edx)
fistp 480(%esi)
fadds 80(%edx)
fistp 416(%esi)
flds 112(%edx)
fadds 104(%edx)
fld %st(0)
fadds 80(%edx)
fistp 352(%esi)
fadds 72(%edx)
fistp 288(%esi)
flds 104(%edx)
fadds 120(%edx)
fld %st(0)
fadds 72(%edx)
fistp 224(%esi)
fadds 88(%edx)
fistp 160(%esi)
flds 120(%edx)
fadds 100(%edx)
fld %st(0)
fadds 88(%edx)
fistp 96(%esi)
fadds 68(%edx)
fistp 32(%esi)
flds 100(%edx)
fadds 116(%edx)
fld %st(0)
fadds 68(%edx)
fistp 32(%edi)
fadds 84(%edx)
fistp 96(%edi)
flds 116(%edx)
fadds 108(%edx)
fld %st(0)
fadds 84(%edx)
fistp 160(%edi)
fadds 76(%edx)
fistp 224(%edi)
flds 108(%edx)
fadds 124(%edx)
fld %st(0)
fadds 76(%edx)
fistp 288(%edi)
fadds 92(%edx)
fistp 352(%edi)
flds 124(%edx)
fist 480(%edi)
fadds 92(%edx)
fistp 416(%edi)
movsw
addl $256,%esp
popl %edi
popl %esi
popl %ebx
ret

557
src/libmpg123/dct64_sse.S Normal file
View File

@@ -0,0 +1,557 @@
/*
dct64_sse: MMX/SSE optimized dct64
copyright 2006-2007 by Zuxy Meng <zuxy.meng@gmail.com> / the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by the mysterious higway for MMX (apparently)
then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
Both have agreed to distribution under LGPL 2.1 .
Transformed back into standalone asm, with help of
gcc -S -DHAVE_CONFIG_H -I. -march=pentium3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_sse.{S,c}
Original comment from MPlayer source follows:
*/
/*
* Discrete Cosine Tansform (DCT) for SSE
* based upon code from mp3lib/dct64.c, mp3lib/dct64_altivec.c
* and mp3lib/dct64_MMX.c
*/
#include "mangle.h"
#ifndef __APPLE__
.section .rodata
#else
.data
#endif
ALIGN16
/* .type nnnn, @object
.size nnnn, 16 */
nnnn:
.long -2147483648
.long -2147483648
.long -2147483648
.long -2147483648
ALIGN16
/* .type ppnn, @object
.size ppnn, 16 */
ppnn:
.long 0
.long 0
.long -2147483648
.long -2147483648
ALIGN16
/* .type pnpn, @object
.size pnpn, 16 */
pnpn:
.long 0
.long -2147483648
.long 0
.long -2147483648
ALIGN4
/* .type one.4748, @object
.size one.4748, 4 */
one.4748:
.long 1065353216
/* no .data ? */
/* .local b2.4747 */
ALIGN16
COMM(b2.4747,128,16)
/* .local b1.4746 */
ALIGN16
COMM(b1.4746,128,16)
.text
ALIGN16,,15
.globl ASM_NAME(dct64_sse)
/* .type ASM_NAME(dct64_sse), @function */
ASM_NAME(dct64_sse):
pushl %ebp
movl %esp, %ebp
movl 16(%ebp), %eax
pushl %ebx
movl 8(%ebp), %ecx
#APP
/* for (i = 0; i < 0x20 / 2; i += 4) cycle 1 */
movaps ASM_NAME(costab_mmxsse), %xmm3
shufps $27, %xmm3, %xmm3
MOVUAPS (%eax), %xmm1
movaps %xmm1, %xmm4
MOVUAPS 112(%eax), %xmm2
shufps $27, %xmm4, %xmm4
movaps %xmm2, %xmm0
shufps $27, %xmm0, %xmm0
addps %xmm0, %xmm1
movaps %xmm1, b1.4746
subps %xmm2, %xmm4
mulps %xmm3, %xmm4
movaps %xmm4, b1.4746+112
#NO_APP
movl 12(%ebp), %ebx
#APP
/* for (i = 0; i < 0x20 / 2; i += 4) cycle 2 */
movaps ASM_NAME(costab_mmxsse)+16, %xmm3
shufps $27, %xmm3, %xmm3
MOVUAPS 16(%eax), %xmm1
movaps %xmm1, %xmm4
MOVUAPS 96(%eax), %xmm2
shufps $27, %xmm4, %xmm4
movaps %xmm2, %xmm0
shufps $27, %xmm0, %xmm0
addps %xmm0, %xmm1
movaps %xmm1, b1.4746+16
subps %xmm2, %xmm4
mulps %xmm3, %xmm4
movaps %xmm4, b1.4746+96
/* for (i = 0; i < 0x20 / 2; i += 4) cycle 3 */
movaps ASM_NAME(costab_mmxsse)+32, %xmm3
shufps $27, %xmm3, %xmm3
MOVUAPS 32(%eax), %xmm1
movaps %xmm1, %xmm4
MOVUAPS 80(%eax), %xmm2
shufps $27, %xmm4, %xmm4
movaps %xmm2, %xmm0
shufps $27, %xmm0, %xmm0
addps %xmm0, %xmm1
movaps %xmm1, b1.4746+32
subps %xmm2, %xmm4
mulps %xmm3, %xmm4
movaps %xmm4, b1.4746+80
/* for (i = 0; i < 0x20 / 2; i += 4) cycle 4 */
movaps ASM_NAME(costab_mmxsse)+48, %xmm3
shufps $27, %xmm3, %xmm3
MOVUAPS 48(%eax), %xmm1
movaps %xmm1, %xmm4
MOVUAPS 64(%eax), %xmm2
shufps $27, %xmm4, %xmm4
movaps %xmm2, %xmm0
shufps $27, %xmm0, %xmm0
addps %xmm0, %xmm1
movaps %xmm1, b1.4746+48
subps %xmm2, %xmm4
mulps %xmm3, %xmm4
movaps %xmm4, b1.4746+64
movaps b1.4746, %xmm1
movaps b1.4746+16, %xmm3
movaps b1.4746+32, %xmm4
movaps b1.4746+48, %xmm6
movaps %xmm1, %xmm7
shufps $27, %xmm7, %xmm7
movaps %xmm3, %xmm5
shufps $27, %xmm5, %xmm5
movaps %xmm4, %xmm2
shufps $27, %xmm2, %xmm2
movaps %xmm6, %xmm0
shufps $27, %xmm0, %xmm0
addps %xmm0, %xmm1
movaps %xmm1, b2.4747
addps %xmm2, %xmm3
movaps %xmm3, b2.4747+16
subps %xmm4, %xmm5
movaps %xmm5, b2.4747+32
subps %xmm6, %xmm7
movaps %xmm7, b2.4747+48
movaps b1.4746+64, %xmm1
movaps b1.4746+80, %xmm3
movaps b1.4746+96, %xmm4
movaps b1.4746+112, %xmm6
movaps %xmm1, %xmm7
shufps $27, %xmm7, %xmm7
movaps %xmm3, %xmm5
shufps $27, %xmm5, %xmm5
movaps %xmm4, %xmm2
shufps $27, %xmm2, %xmm2
movaps %xmm6, %xmm0
shufps $27, %xmm0, %xmm0
addps %xmm0, %xmm1
movaps %xmm1, b2.4747+64
addps %xmm2, %xmm3
movaps %xmm3, b2.4747+80
subps %xmm4, %xmm5
movaps %xmm5, b2.4747+96
subps %xmm6, %xmm7
movaps %xmm7, b2.4747+112
movaps b2.4747+32, %xmm0
movaps b2.4747+48, %xmm1
movaps ASM_NAME(costab_mmxsse)+64, %xmm4
xorps %xmm6, %xmm6
shufps $27, %xmm4, %xmm4
mulps %xmm4, %xmm1
movaps ASM_NAME(costab_mmxsse)+80, %xmm2
xorps %xmm7, %xmm7
shufps $27, %xmm2, %xmm2
mulps %xmm2, %xmm0
movaps %xmm0, b2.4747+32
movaps %xmm1, b2.4747+48
movaps b2.4747+96, %xmm3
mulps %xmm2, %xmm3
subps %xmm3, %xmm6
movaps %xmm6, b2.4747+96
movaps b2.4747+112, %xmm5
mulps %xmm4, %xmm5
subps %xmm5, %xmm7
movaps %xmm7, b2.4747+112
movaps ASM_NAME(costab_mmxsse)+96, %xmm0
shufps $27, %xmm0, %xmm0
movaps nnnn, %xmm5
movaps %xmm5, %xmm6
movaps b2.4747, %xmm2
movaps b2.4747+16, %xmm3
movaps %xmm2, %xmm4
xorps %xmm5, %xmm6
shufps $27, %xmm4, %xmm4
movaps %xmm3, %xmm1
shufps $27, %xmm1, %xmm1
addps %xmm1, %xmm2
movaps %xmm2, b1.4746
subps %xmm3, %xmm4
xorps %xmm6, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b1.4746+16
movaps b2.4747+32, %xmm2
movaps b2.4747+48, %xmm3
movaps %xmm2, %xmm4
xorps %xmm5, %xmm6
shufps $27, %xmm4, %xmm4
movaps %xmm3, %xmm1
shufps $27, %xmm1, %xmm1
addps %xmm1, %xmm2
movaps %xmm2, b1.4746+32
subps %xmm3, %xmm4
xorps %xmm6, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b1.4746+48
movaps b2.4747+64, %xmm2
movaps b2.4747+80, %xmm3
movaps %xmm2, %xmm4
xorps %xmm5, %xmm6
shufps $27, %xmm4, %xmm4
movaps %xmm3, %xmm1
shufps $27, %xmm1, %xmm1
addps %xmm1, %xmm2
movaps %xmm2, b1.4746+64
subps %xmm3, %xmm4
xorps %xmm6, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b1.4746+80
movaps b2.4747+96, %xmm2
movaps b2.4747+112, %xmm3
movaps %xmm2, %xmm4
xorps %xmm5, %xmm6
shufps $27, %xmm4, %xmm4
movaps %xmm3, %xmm1
shufps $27, %xmm1, %xmm1
addps %xmm1, %xmm2
movaps %xmm2, b1.4746+96
subps %xmm3, %xmm4
xorps %xmm6, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b1.4746+112
movss one.4748, %xmm1
movss ASM_NAME(costab_mmxsse)+112, %xmm0
movaps %xmm1, %xmm3
unpcklps %xmm0, %xmm3
movss ASM_NAME(costab_mmxsse)+116, %xmm2
movaps %xmm1, %xmm0
unpcklps %xmm2, %xmm0
unpcklps %xmm3, %xmm0
movaps ppnn, %xmm2
movaps b1.4746, %xmm3
movaps %xmm3, %xmm4
shufps $20, %xmm4, %xmm4
shufps $235, %xmm3, %xmm3
xorps %xmm2, %xmm3
addps %xmm3, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b2.4747
movaps b1.4746+16, %xmm6
movaps %xmm6, %xmm5
shufps $27, %xmm5, %xmm5
xorps %xmm2, %xmm5
addps %xmm5, %xmm6
mulps %xmm0, %xmm6
movaps %xmm6, b2.4747+16
movaps b1.4746+32, %xmm3
movaps %xmm3, %xmm4
shufps $20, %xmm4, %xmm4
shufps $235, %xmm3, %xmm3
xorps %xmm2, %xmm3
addps %xmm3, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b2.4747+32
movaps b1.4746+48, %xmm6
movaps %xmm6, %xmm5
shufps $27, %xmm5, %xmm5
xorps %xmm2, %xmm5
addps %xmm5, %xmm6
mulps %xmm0, %xmm6
movaps %xmm6, b2.4747+48
movaps b1.4746+64, %xmm3
movaps %xmm3, %xmm4
shufps $20, %xmm4, %xmm4
shufps $235, %xmm3, %xmm3
xorps %xmm2, %xmm3
addps %xmm3, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b2.4747+64
movaps b1.4746+80, %xmm6
movaps %xmm6, %xmm5
shufps $27, %xmm5, %xmm5
xorps %xmm2, %xmm5
addps %xmm5, %xmm6
mulps %xmm0, %xmm6
movaps %xmm6, b2.4747+80
movaps b1.4746+96, %xmm3
movaps %xmm3, %xmm4
shufps $20, %xmm4, %xmm4
shufps $235, %xmm3, %xmm3
xorps %xmm2, %xmm3
addps %xmm3, %xmm4
mulps %xmm0, %xmm4
movaps %xmm4, b2.4747+96
movaps b1.4746+112, %xmm6
movaps %xmm6, %xmm5
shufps $27, %xmm5, %xmm5
xorps %xmm2, %xmm5
addps %xmm5, %xmm6
mulps %xmm0, %xmm6
movaps %xmm6, b2.4747+112
movss ASM_NAME(costab_mmxsse)+120, %xmm0
movaps %xmm1, %xmm2
movaps %xmm0, %xmm7
unpcklps %xmm1, %xmm2
unpcklps %xmm0, %xmm7
movaps pnpn, %xmm0
unpcklps %xmm7, %xmm2
movaps b2.4747+32, %xmm1
movaps %xmm1, %xmm3
shufps $224, %xmm3, %xmm3
shufps $181, %xmm1, %xmm1
xorps %xmm0, %xmm1
addps %xmm1, %xmm3
mulps %xmm2, %xmm3
movaps %xmm3, b1.4746+32
movaps b2.4747+48, %xmm4
movaps %xmm4, %xmm5
shufps $224, %xmm5, %xmm5
shufps $181, %xmm4, %xmm4
xorps %xmm0, %xmm4
addps %xmm4, %xmm5
mulps %xmm2, %xmm5
movaps %xmm5, b1.4746+48
movaps b2.4747+64, %xmm1
movaps %xmm1, %xmm3
shufps $224, %xmm3, %xmm3
shufps $181, %xmm1, %xmm1
xorps %xmm0, %xmm1
addps %xmm1, %xmm3
mulps %xmm2, %xmm3
movaps %xmm3, b1.4746+64
movaps b2.4747+80, %xmm4
movaps %xmm4, %xmm5
shufps $224, %xmm5, %xmm5
shufps $181, %xmm4, %xmm4
xorps %xmm0, %xmm4
addps %xmm4, %xmm5
mulps %xmm2, %xmm5
movaps %xmm5, b1.4746+80
movaps b2.4747+96, %xmm1
movaps %xmm1, %xmm3
shufps $224, %xmm3, %xmm3
shufps $181, %xmm1, %xmm1
xorps %xmm0, %xmm1
addps %xmm1, %xmm3
mulps %xmm2, %xmm3
movaps %xmm3, b1.4746+96
movaps b2.4747+112, %xmm4
movaps %xmm4, %xmm5
shufps $224, %xmm5, %xmm5
shufps $181, %xmm4, %xmm4
xorps %xmm0, %xmm4
addps %xmm4, %xmm5
mulps %xmm2, %xmm5
movaps %xmm5, b1.4746+112
#NO_APP
flds b1.4746+40
movl $b1.4746, %edx
movl $b2.4747, %eax
fadds b1.4746+44
fstps b1.4746+40
flds b1.4746+56
fadds b1.4746+60
flds b1.4746+48
fadd %st(1), %st
fstps b1.4746+48
fadds b1.4746+52
fstps b1.4746+56
flds b1.4746+52
fadds b1.4746+60
fstps b1.4746+52
flds b1.4746+72
fadds b1.4746+76
fstps b1.4746+72
flds b1.4746+88
fadds b1.4746+92
flds b1.4746+80
fadd %st(1), %st
fstps b1.4746+80
fadds b1.4746+84
fstps b1.4746+88
flds b1.4746+84
fadds b1.4746+92
fstps b1.4746+84
flds b1.4746+104
fadds b1.4746+108
fstps b1.4746+104
flds b1.4746+120
fadds b1.4746+124
flds b1.4746+112
fadd %st(1), %st
fstps b1.4746+112
fadds b1.4746+116
fstps b1.4746+120
flds b1.4746+116
fadds b1.4746+124
fstps b1.4746+116
#APP
flds ASM_NAME(costab_mmxsse)+120
flds (%eax)
fadds 4(%eax)
fistp 512(%ecx)
flds (%eax)
fsubs 4(%eax)
fmul %st(1)
fistp (%ecx)
flds 12(%eax)
fsubs 8(%eax)
fmul %st(1)
fist 256(%ebx)
fadds 12(%eax)
fadds 8(%eax)
fistp 256(%ecx)
flds 16(%eax)
fsubs 20(%eax)
fmul %st(1)
flds 28(%eax)
fsubs 24(%eax)
fmul %st(2)
fist 384(%ebx)
fld %st(0)
fadds 24(%eax)
fadds 28(%eax)
fld %st(0)
fadds 16(%eax)
fadds 20(%eax)
fistp 384(%ecx)
fadd %st(2)
fistp 128(%ecx)
faddp %st(1)
fistp 128(%ebx)
flds 32(%edx)
fadds 48(%edx)
fistp 448(%ecx)
flds 48(%edx)
fadds 40(%edx)
fistp 320(%ecx)
flds 40(%edx)
fadds 56(%edx)
fistp 192(%ecx)
flds 56(%edx)
fadds 36(%edx)
fistp 64(%ecx)
flds 36(%edx)
fadds 52(%edx)
fistp 64(%ebx)
flds 52(%edx)
fadds 44(%edx)
fistp 192(%ebx)
flds 60(%edx)
fist 448(%ebx)
fadds 44(%edx)
fistp 320(%ebx)
flds 96(%edx)
fadds 112(%edx)
fld %st(0)
fadds 64(%edx)
fistp 480(%ecx)
fadds 80(%edx)
fistp 416(%ecx)
flds 112(%edx)
fadds 104(%edx)
fld %st(0)
fadds 80(%edx)
fistp 352(%ecx)
fadds 72(%edx)
fistp 288(%ecx)
flds 104(%edx)
fadds 120(%edx)
fld %st(0)
fadds 72(%edx)
fistp 224(%ecx)
fadds 88(%edx)
fistp 160(%ecx)
flds 120(%edx)
fadds 100(%edx)
fld %st(0)
fadds 88(%edx)
fistp 96(%ecx)
fadds 68(%edx)
fistp 32(%ecx)
flds 100(%edx)
fadds 116(%edx)
fld %st(0)
fadds 68(%edx)
fistp 32(%ebx)
fadds 84(%edx)
fistp 96(%ebx)
flds 116(%edx)
fadds 108(%edx)
fld %st(0)
fadds 84(%edx)
fistp 160(%ebx)
fadds 76(%edx)
fistp 224(%ebx)
flds 108(%edx)
fadds 124(%edx)
fld %st(0)
fadds 76(%edx)
fistp 288(%ebx)
fadds 92(%edx)
fistp 352(%ebx)
flds 124(%edx)
fist 480(%ebx)
fadds 92(%edx)
fistp 416(%ebx)
ffreep %st(0)
#NO_APP
movzwl (%ecx), %eax
movw %ax, (%ebx)
popl %ebx
popl %ebp
ret
/* .size ASM_NAME(dct64_sse), .-ASM_NAME(dct64_sse) */

96
src/libmpg123/debug.h Normal file
View File

@@ -0,0 +1,96 @@
/*
debug.h:
if DEBUG defined: debugging macro fprintf wrappers
else: macros defined to do nothing
That saves typing #ifdef DEBUG all the time and still preserves
lean code without debugging.
public domain (or LGPL / GPL, if you like that more;-)
generated by debugdef.pl, what was
trivially written by Thomas Orgis <thomas@orgis.org>
*/
#include "config.h"
/*
I could do that with variadic macros available:
#define sdebug(me, s) fprintf(stderr, "[location] " s "\n")
#define debug(me, s, ...) fprintf(stderr, "[location] " s "}n", __VA_ARGS__)
Variadic macros are a C99 feature...
Now just predefining stuff non-variadic for up to 15 arguments.
It's cumbersome to have them all with different names, though...
*/
#ifdef DEBUG
#include <stdio.h>
#define debug(s) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__)
#define debug1(s, a) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a)
#define debug2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b)
#define debug3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c)
#define debug4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d)
#define debug5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e)
#define debug6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f)
#define debug7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g)
#define debug8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
#define debug9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
#define debug10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
#define debug11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
#define debug12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
#define debug13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
#define debug14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
#define debug15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] debug: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
#else
#define debug(s)
#define debug1(s, a)
#define debug2(s, a, b)
#define debug3(s, a, b, c)
#define debug4(s, a, b, c, d)
#define debug5(s, a, b, c, d, e)
#define debug6(s, a, b, c, d, e, f)
#define debug7(s, a, b, c, d, e, f, g)
#define debug8(s, a, b, c, d, e, f, g, h)
#define debug9(s, a, b, c, d, e, f, g, h, i)
#define debug10(s, a, b, c, d, e, f, g, h, i, j)
#define debug11(s, a, b, c, d, e, f, g, h, i, j, k)
#define debug12(s, a, b, c, d, e, f, g, h, i, j, k, l)
#define debug13(s, a, b, c, d, e, f, g, h, i, j, k, l, m)
#define debug14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
#define debug15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
#endif
/* warning macros also here... */
#define warning(s) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__)
#define warning1(s, a) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a)
#define warning2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b)
#define warning3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c)
#define warning4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d)
#define warning5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e)
#define warning6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f)
#define warning7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g)
#define warning8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
#define warning9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
#define warning10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
#define warning11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
#define warning12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
#define warning13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
#define warning14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
#define warning15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] warning: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
/* error macros also here... */
#define error(s) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__)
#define error1(s, a) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a)
#define error2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b)
#define error3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c)
#define error4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d)
#define error5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e)
#define error6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f)
#define error7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g)
#define error8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
#define error9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
#define error10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
#define error11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
#define error12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
#define error13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
#define error14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
#define error15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)

246
src/libmpg123/decode.c Normal file
View File

@@ -0,0 +1,246 @@
/*
decode.c: decoding samples...
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include "mpg123lib_intern.h"
/* 8bit functions silenced for FLOATOUT */
int synth_1to1_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
sample_t samples_tmp[64];
sample_t *tmp1 = samples_tmp + channel;
int i,ret;
/* save buffer stuff, trick samples_tmp into there, decode, restore */
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1(bandPtr, channel, fr, 0);
fr->buffer.data = samples; /* restore original value */
samples += channel + pnt;
for(i=0;i<32;i++) {
#ifdef FLOATOUT
*samples = 0;
#else
*samples = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + (final ? 64 : 0 );
return ret;
}
int synth_1to1_8bit_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[64];
sample_t *tmp1 = samples_tmp;
int i,ret;
/* save buffer stuff, trick samples_tmp into there, decode, restore */
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1(bandPtr,0, fr, 0);
fr->buffer.data = samples; /* restore original value */
samples += pnt;
for(i=0;i<32;i++) {
#ifdef FLOATOUT
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + 32;
return ret;
}
int synth_1to1_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[64];
sample_t *tmp1 = samples_tmp;
int i,ret;
/* save buffer stuff, trick samples_tmp into there, decode, restore */
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1(bandPtr, 0, fr, 0);
fr->buffer.data = samples; /* restore original value */
samples += pnt;
for(i=0;i<32;i++) {
#ifdef FLOATOUT
*samples++ = 0;
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + 64;
return ret;
}
int synth_1to1_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[64];
sample_t *tmp1 = samples_tmp;
int i,ret;
/* save buffer stuff, trick samples_tmp into there, decode, restore */
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1(bandPtr, 0, fr, 0); /* decode into samples_tmp */
fr->buffer.data = samples; /* restore original value */
/* now append samples from samples_tmp */
samples += pnt; /* just the next mem in frame buffer */
for(i=0;i<32;i++){
*( (sample_t *)samples) = *tmp1;
samples += sizeof(sample_t);
tmp1 += 2;
}
fr->buffer.fill = pnt + 32*sizeof(sample_t);
return ret;
}
int synth_1to1_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
int i,ret;
unsigned char *samples = fr->buffer.data;
ret = synth_1to1(bandPtr,0,fr,1);
samples += fr->buffer.fill - 64*sizeof(sample_t);
for(i=0;i<32;i++) {
((sample_t *)samples)[1] = ((sample_t *)samples)[0];
samples+=2*sizeof(sample_t);
}
return ret;
}
int synth_1to1(real *bandPtr,int channel,mpg123_handle *fr, int final)
{
static const int step = 2;
sample_t *samples = (sample_t *) (fr->buffer.data+fr->buffer.fill);
real *b0, **buf; /* (*buf)[0x110]; */
int clip = 0;
int bo1;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
if(!channel) {
fr->bo[0]--;
fr->bo[0] &= 0xf;
buf = fr->real_buffs[0];
}
else {
samples++;
buf = fr->real_buffs[1];
}
if(fr->bo[0] & 0x1) {
b0 = buf[0];
bo1 = fr->bo[0];
dct64(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
}
else {
b0 = buf[1];
bo1 = fr->bo[0]+1;
dct64(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
}
{
register int j;
real *window = opt_decwin(fr) + 16 - bo1;
for (j=16;j;j--,window+=0x10,samples+=step)
{
real sum;
sum = REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
sum += REAL_MUL(*window++, *b0++);
sum -= REAL_MUL(*window++, *b0++);
WRITE_SAMPLE(samples,sum,clip);
}
{
real sum;
sum = REAL_MUL(window[0x0], b0[0x0]);
sum += REAL_MUL(window[0x2], b0[0x2]);
sum += REAL_MUL(window[0x4], b0[0x4]);
sum += REAL_MUL(window[0x6], b0[0x6]);
sum += REAL_MUL(window[0x8], b0[0x8]);
sum += REAL_MUL(window[0xA], b0[0xA]);
sum += REAL_MUL(window[0xC], b0[0xC]);
sum += REAL_MUL(window[0xE], b0[0xE]);
WRITE_SAMPLE(samples,sum,clip);
b0-=0x10,window-=0x20,samples+=step;
}
window += bo1<<1;
for (j=15;j;j--,b0-=0x20,window-=0x10,samples+=step)
{
real sum;
sum = -REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
sum -= REAL_MUL(*(--window), *b0++);
WRITE_SAMPLE(samples,sum,clip);
}
}
if(final) fr->buffer.fill += 64*sizeof(sample_t);
return clip;
}

67
src/libmpg123/decode.h Normal file
View File

@@ -0,0 +1,67 @@
/*
decode.h: common definitions for decode functions
copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Thomas Orgis, taking WRITE_SAMPLE from decode.c
*/
#ifndef MPG123_DECODE_H
#define MPG123_DECODE_H
#ifdef FLOATOUT
#define WRITE_SAMPLE(samples,sum,clip) *(samples) = sum
#define sample_t float
#else
#define WRITE_SAMPLE(samples,sum,clip) \
if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \
else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \
else { *(samples) = REAL_TO_SHORT(sum); }
#define sample_t short
#endif
#define NTOM_MAX 8 /* maximum allowed factor for upsampling */
#define NTOM_MAX_FREQ 96000 /* maximum frequency to upsample to / downsample from */
#define NTOM_MUL (32768)
/* synth_1to1 in optimize.h, one should also use opts for these here... */
int synth_2to1 (real *,int, mpg123_handle*, int);
int synth_2to1_8bit (real *,int, mpg123_handle *,int);
int synth_2to1_mono (real *, mpg123_handle *);
int synth_2to1_mono2stereo (real *, mpg123_handle *);
int synth_2to1_8bit_mono (real *, mpg123_handle *);
int synth_2to1_8bit_mono2stereo (real *, mpg123_handle *);
int synth_4to1 (real *,int, mpg123_handle*, int);
int synth_4to1_8bit (real *,int, mpg123_handle *,int);
int synth_4to1_mono (real *, mpg123_handle *);
int synth_4to1_mono2stereo (real *, mpg123_handle *);
int synth_4to1_8bit_mono (real *, mpg123_handle *);
int synth_4to1_8bit_mono2stereo (real *, mpg123_handle *);
int synth_ntom (real *,int, mpg123_handle*, int);
int synth_ntom_8bit (real *,int, mpg123_handle *,int);
int synth_ntom_mono (real *, mpg123_handle *);
int synth_ntom_mono2stereo (real *, mpg123_handle *);
int synth_ntom_8bit_mono (real *, mpg123_handle *);
int synth_ntom_8bit_mono2stereo (real *, mpg123_handle *);
int synth_ntom_set_step(mpg123_handle *fr); /* prepare ntom decoding */
unsigned long ntom_val(mpg123_handle *fr, off_t frame); /* compute ntom_val for frame offset */
off_t ntom_frmouts(mpg123_handle *fr, off_t frame);
off_t ntom_ins2outs(mpg123_handle *fr, off_t ins);
off_t ntom_frameoff(mpg123_handle *fr, off_t soff);
void init_layer3(void);
void init_layer3_stuff(mpg123_handle *fr);
void init_layer2(void);
void init_layer2_stuff(mpg123_handle *fr);
int make_conv16to8_table(mpg123_handle *fr);
int do_layer3(mpg123_handle *fr);
int do_layer2(mpg123_handle *fr);
int do_layer1(mpg123_handle *fr);
void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]);
#endif

248
src/libmpg123/decode_2to1.c Normal file
View File

@@ -0,0 +1,248 @@
/*
decode_2to1.c: ...with 2to1 downsampling
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include "mpg123lib_intern.h"
int synth_2to1_8bit(real *bandPtr, int channel, mpg123_handle *fr, int final)
{
sample_t samples_tmp[32];
sample_t *tmp1 = samples_tmp + channel;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_2to1(bandPtr,channel, fr, 0);
fr->buffer.data = samples;
samples += channel + pnt;
for(i=0;i<16;i++) {
#ifdef FLOATOUT
*samples = 0;
#else
*samples = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + (final ? 32 : 0);
return ret;
}
int synth_2to1_8bit_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[32];
sample_t *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_2to1(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<16;i++) {
#ifdef FLOATOUT
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + 16;
return ret;
}
int synth_2to1_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[32];
sample_t *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_2to1(bandPtr,0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<16;i++) {
#ifdef FLOATOUT
*samples++ = 0;
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + 32;
return ret;
}
int synth_2to1_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[32];
sample_t *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_2to1(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<16;i++) {
*( (sample_t *) samples) = *tmp1;
samples += sizeof(sample_t);
tmp1 += 2;
}
fr->buffer.fill = pnt + 16*sizeof(sample_t);
return ret;
}
int synth_2to1_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
int i,ret;
unsigned char *samples = fr->buffer.data;
ret = synth_2to1(bandPtr,0, fr, 1);
samples += fr->buffer.fill - 32*sizeof(sample_t);
for(i=0;i<16;i++) {
((sample_t *)samples)[1] = ((sample_t *)samples)[0];
samples+=2*sizeof(sample_t);
}
return ret;
}
int synth_2to1(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
static const int step = 2;
sample_t *samples = (sample_t *) (fr->buffer.data + fr->buffer.fill);
real *b0, **buf; /* (*buf)[0x110]; */
int clip = 0;
int bo1;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
if(!channel) {
fr->bo[0]--;
fr->bo[0] &= 0xf;
buf = fr->real_buffs[0];
}
else {
samples++;
buf = fr->real_buffs[1];
}
if(fr->bo[0] & 0x1) {
b0 = buf[0];
bo1 = fr->bo[0];
opt_dct64(fr)(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
}
else {
b0 = buf[1];
bo1 = fr->bo[0]+1;
opt_dct64(fr)(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
}
{
register int j;
real *window = opt_decwin(fr) + 16 - bo1;
for (j=8;j;j--,b0+=0x10,window+=0x30)
{
real sum;
sum = *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#if 0
WRITE_SAMPLE(samples,sum,clip); samples += step;
#endif
}
{
real sum;
sum = window[0x0] * b0[0x0];
sum += window[0x2] * b0[0x2];
sum += window[0x4] * b0[0x4];
sum += window[0x6] * b0[0x6];
sum += window[0x8] * b0[0x8];
sum += window[0xA] * b0[0xA];
sum += window[0xC] * b0[0xC];
sum += window[0xE] * b0[0xE];
WRITE_SAMPLE(samples,sum,clip); samples += step;
#if 0
WRITE_SAMPLE(samples,sum,clip); samples += step;
#endif
b0-=0x20,window-=0x40;
}
window += bo1<<1;
for (j=7;j;j--,b0-=0x30,window-=0x30)
{
real sum;
sum = -*(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#if 0
WRITE_SAMPLE(samples,sum,clip); samples += step;
#endif
}
}
if(final) fr->buffer.fill += 32*sizeof(sample_t);
return clip;
}

View File

@@ -0,0 +1,280 @@
/*
decode_3dnow.s - 3DNow! optimized synth_1to1()
copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Syuuhei Kashiyama
This code based 'decode_3dnow.s' by Syuuhei Kashiyama
<squash@mb.kcom.ne.jp>,only two types of changes have been made:
- remove PREFETCH instruction for speedup
- change function name for support 3DNow! automatic detect
- femms moved to before 'call dct64_3dnow'
You can find Kashiyama's original 3dnow! support patch
(for mpg123-0.59o) at
http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
<kim@comtec.co.jp> - after 1.Apr.1999
Replacement of synth_1to1() with AMD's 3DNow! SIMD operations support
Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
The author of this program disclaim whole expressed or implied
warranties with regard to this program, and in no event shall the
author of this program liable to whatever resulted from the use of
this program. Use it at your own risk.
*/
#include "mangle.h"
.text
.globl ASM_NAME(synth_1to1_3dnow_asm)
/* int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
ASM_NAME(synth_1to1_3dnow_asm):
subl $24,%esp
pushl %ebp
pushl %edi
xorl %ebp,%ebp
pushl %esi
pushl %ebx
/* stack old: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28,32,36=local 40=back 44=bandptr 48=channel 52=out 56=pnt */
/* stack new: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28,32,36=local 40=back 44=bandptr 48=channel 52=out 56=buffs 60=bo 64=decwin */
#define OUT 52(%esp)
#define CHANNEL 48(%esp)
#define BANDPTR 44(%esp)
#define BUFFS 56(%esp)
#define BO 60(%esp)
#define DECWIN 64(%esp)
#define LOCAL0 16(%esp)
#define LOCAL1 20(%esp)
#define LOCAL5 36(%esp)
movl OUT,%esi
movl %esi,LOCAL0 /* save buffer start (samples pointer) to another local var */
movl CHANNEL,%ebx
movl BO,%esi /* bo address */
movl (%esi),%edx /* bo value */
femms
testl %ebx,%ebx
jne .L26
/* if(!channel) */
decl %edx /* --bo */
andl $15,%edx
movl %edx,(%esi) /* save bo */
movl BUFFS,%ecx
jmp .L27
.L26: /* if(channel) */
addl $2,LOCAL0 /* samples++ */
movl BUFFS,%ecx
addl $2176,%ecx
.L27:
/* edx (and it's lower end) still holds bo value */
testb $1,%dl /* bo & 0x1 */
je .L28
movl %edx,LOCAL5
movl %ecx,%ebx
movl BANDPTR,%esi
movl %edx,%edi
pushl %esi
sall $2,%edi
movl %ebx,%eax
movl %edi,24(%esp) /* LOCAL1, actually */
addl %edi,%eax
pushl %eax
movl %edx,%eax
incl %eax
andl $15,%eax
leal 1088(,%eax,4),%eax
addl %ebx,%eax
pushl %eax
call ASM_NAME(dct64_3dnow)
addl $12,%esp
jmp .L29
.L28:
leal 1(%edx),%esi
movl BANDPTR,%edi
movl %esi,LOCAL5
leal 1092(%ecx,%edx,4),%eax
pushl %edi
leal 1088(%ecx),%ebx
pushl %eax
sall $2,%esi
leal (%ecx,%edx,4),%eax
pushl %eax
call ASM_NAME(dct64_3dnow)
addl $12,%esp
movl %esi,LOCAL1
.L29:
movl DECWIN,%edx
addl $64,%edx
movl $16,%ecx
subl LOCAL1,%edx
movl LOCAL0,%edi
movq (%edx),%mm0
movq (%ebx),%mm1
ALIGN32
.L33:
movq 8(%edx),%mm3
pfmul %mm1,%mm0
movq 8(%ebx),%mm4
movq 16(%edx),%mm5
pfmul %mm4,%mm3
movq 16(%ebx),%mm6
pfadd %mm3,%mm0
movq 24(%edx),%mm1
pfmul %mm6,%mm5
movq 24(%ebx),%mm2
pfadd %mm5,%mm0
movq 32(%edx),%mm3
pfmul %mm2,%mm1
movq 32(%ebx),%mm4
pfadd %mm1,%mm0
movq 40(%edx),%mm5
pfmul %mm4,%mm3
movq 40(%ebx),%mm6
pfadd %mm3,%mm0
movq 48(%edx),%mm1
pfmul %mm6,%mm5
movq 48(%ebx),%mm2
pfadd %mm0,%mm5
movq 56(%edx),%mm3
pfmul %mm1,%mm2
movq 56(%ebx),%mm4
pfadd %mm5,%mm2
addl $64,%ebx
subl $-128,%edx
movq (%edx),%mm0
pfmul %mm4,%mm3
movq (%ebx),%mm1
pfadd %mm3,%mm2
movq %mm2,%mm3
psrlq $32,%mm3
pfsub %mm3,%mm2
incl %ebp
pf2id %mm2,%mm2
packssdw %mm2,%mm2
movd %mm2,%eax
movw %ax,0(%edi)
addl $4,%edi
decl %ecx
jnz .L33
movd (%ebx),%mm0
movd (%edx),%mm1
punpckldq 8(%ebx),%mm0
punpckldq 8(%edx),%mm1
movd 16(%ebx),%mm3
movd 16(%edx),%mm4
pfmul %mm1,%mm0
punpckldq 24(%ebx),%mm3
punpckldq 24(%edx),%mm4
movd 32(%ebx),%mm5
movd 32(%edx),%mm6
pfmul %mm4,%mm3
punpckldq 40(%ebx),%mm5
punpckldq 40(%edx),%mm6
pfadd %mm3,%mm0
movd 48(%ebx),%mm1
movd 48(%edx),%mm2
pfmul %mm6,%mm5
punpckldq 56(%ebx),%mm1
punpckldq 56(%edx),%mm2
pfadd %mm5,%mm0
pfmul %mm2,%mm1
pfadd %mm1,%mm0
pfacc %mm1,%mm0
pf2id %mm0,%mm0
packssdw %mm0,%mm0
movd %mm0,%eax
movw %ax,0(%edi)
incl %ebp
movl LOCAL5,%esi
addl $-64,%ebx
movl $15,%ebp
addl $4,%edi
leal -128(%edx,%esi,8),%edx
movl $15,%ecx
movd (%ebx),%mm0
movd -4(%edx),%mm1
punpckldq 4(%ebx),%mm0
punpckldq -8(%edx),%mm1
ALIGN32
.L46:
movd 8(%ebx),%mm3
movd -12(%edx),%mm4
pfmul %mm1,%mm0
punpckldq 12(%ebx),%mm3
punpckldq -16(%edx),%mm4
movd 16(%ebx),%mm5
movd -20(%edx),%mm6
pfmul %mm4,%mm3
punpckldq 20(%ebx),%mm5
punpckldq -24(%edx),%mm6
pfadd %mm3,%mm0
movd 24(%ebx),%mm1
movd -28(%edx),%mm2
pfmul %mm6,%mm5
punpckldq 28(%ebx),%mm1
punpckldq -32(%edx),%mm2
pfadd %mm5,%mm0
movd 32(%ebx),%mm3
movd -36(%edx),%mm4
pfmul %mm2,%mm1
punpckldq 36(%ebx),%mm3
punpckldq -40(%edx),%mm4
pfadd %mm1,%mm0
movd 40(%ebx),%mm5
movd -44(%edx),%mm6
pfmul %mm4,%mm3
punpckldq 44(%ebx),%mm5
punpckldq -48(%edx),%mm6
pfadd %mm3,%mm0
movd 48(%ebx),%mm1
movd -52(%edx),%mm2
pfmul %mm6,%mm5
punpckldq 52(%ebx),%mm1
punpckldq -56(%edx),%mm2
pfadd %mm0,%mm5
movd 56(%ebx),%mm3
movd -60(%edx),%mm4
pfmul %mm2,%mm1
punpckldq 60(%ebx),%mm3
punpckldq (%edx),%mm4
pfadd %mm1,%mm5
addl $-128,%edx
addl $-64,%ebx
movd (%ebx),%mm0
movd -4(%edx),%mm1
pfmul %mm4,%mm3
punpckldq 4(%ebx),%mm0
punpckldq -8(%edx),%mm1
pfadd %mm5,%mm3
pfacc %mm3,%mm3
incl %ebp
pf2id %mm3,%mm3
movd %mm3,%eax
negl %eax
movd %eax,%mm3
packssdw %mm3,%mm3
movd %mm3,%eax
movw %ax,(%edi)
addl $4,%edi
decl %ecx
jnz .L46
femms
movl %ebp,%eax
popl %ebx
popl %esi
popl %edi
popl %ebp
addl $24,%esp
ret

View File

@@ -0,0 +1,4 @@
#include "mangle.h"
#define MPL_DCT64 ASM_NAME(dct64_3dnowext)
#define SYNTH_NAME ASM_NAME(synth_1to1_3dnowext_asm)
#include "decode_sse3d.h"

257
src/libmpg123/decode_4to1.c Normal file
View File

@@ -0,0 +1,257 @@
/*
decode_4to1.c: ...with 4to1 downsampling / decoding of every 4th sample
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
dunno why it sounds THIS annoying (maybe we should adapt the window?)
absolutely not optimized for this operation
*/
#include "mpg123lib_intern.h"
int synth_4to1_8bit(real *bandPtr, int channel, mpg123_handle *fr, int final)
{
sample_t samples_tmp[16];
sample_t *tmp1 = samples_tmp + channel;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_4to1(bandPtr,channel, fr, 0);
fr->buffer.data = samples;
samples += channel + pnt;
for(i=0;i<8;i++) {
#ifdef FLOATOUT
*samples = 0;
#else
*samples = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + (final ? 16 : 0);
return ret;
}
int synth_4to1_8bit_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[16];
sample_t *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_4to1(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<8;i++) {
#ifdef FLOATOUT
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + 8;
return ret;
}
int synth_4to1_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[16];
sample_t *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_4to1(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<8;i++) {
#ifdef FLOATOUT
*samples++ = 0;
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + 16;
return ret;
}
int synth_4to1_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[16];
sample_t *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_4to1(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<8;i++) {
*( (sample_t *)samples) = *tmp1;
samples += sizeof(sample_t);
tmp1 += 2;
}
fr->buffer.fill = pnt + 8*sizeof(sample_t);
return ret;
}
int synth_4to1_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
int i,ret;
unsigned char *samples = fr->buffer.data;
ret = synth_4to1(bandPtr, 0, fr, 1);
samples += fr->buffer.fill - 16*sizeof(sample_t);
for(i=0;i<8;i++) {
((sample_t *)samples)[1] = ((sample_t *)samples)[0];
samples+=2*sizeof(sample_t);
}
return ret;
}
int synth_4to1(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
static const int step = 2;
sample_t *samples = (sample_t *) (fr->buffer.data + fr->buffer.fill);
real *b0, **buf; /* (*buf)[0x110]; */
int clip = 0;
int bo1;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
if(!channel) {
fr->bo[0]--;
fr->bo[0] &= 0xf;
buf = fr->real_buffs[0];
}
else {
samples++;
buf = fr->real_buffs[1];
}
if(fr->bo[0] & 0x1) {
b0 = buf[0];
bo1 = fr->bo[0];
opt_dct64(fr)(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
}
else {
b0 = buf[1];
bo1 = fr->bo[0]+1;
opt_dct64(fr)(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
}
{
register int j;
real *window = opt_decwin(fr) + 16 - bo1;
for (j=4;j;j--,b0+=0x30,window+=0x70)
{
real sum;
sum = *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#if 0
WRITE_SAMPLE(samples,sum,clip); samples += step;
WRITE_SAMPLE(samples,sum,clip); samples += step;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#endif
}
{
real sum;
sum = window[0x0] * b0[0x0];
sum += window[0x2] * b0[0x2];
sum += window[0x4] * b0[0x4];
sum += window[0x6] * b0[0x6];
sum += window[0x8] * b0[0x8];
sum += window[0xA] * b0[0xA];
sum += window[0xC] * b0[0xC];
sum += window[0xE] * b0[0xE];
WRITE_SAMPLE(samples,sum,clip); samples += step;
#if 0
WRITE_SAMPLE(samples,sum,clip); samples += step;
WRITE_SAMPLE(samples,sum,clip); samples += step;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#endif
b0-=0x40,window-=0x80;
}
window += bo1<<1;
for (j=3;j;j--,b0-=0x50,window-=0x70)
{
real sum;
sum = -*(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#if 0
WRITE_SAMPLE(samples,sum,clip); samples += step;
WRITE_SAMPLE(samples,sum,clip); samples += step;
WRITE_SAMPLE(samples,sum,clip); samples += step;
#endif
}
}
if(final) fr->buffer.fill += 16*sizeof(sample_t);
return clip;
}

View File

@@ -0,0 +1,593 @@
/*
decode.c: decoding samples...
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
altivec optimization by tmkk
*/
#include "mpg123lib_intern.h"
#ifndef __APPLE__
#include <altivec.h>
#endif
#define WRITE_SAMPLE(samples,sum,clip) \
if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \
else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \
else { *(samples) = REAL_TO_SHORT(sum); }
int synth_1to1_8bit_altivec(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
short samples_tmp[64];
short *tmp1 = samples_tmp + channel;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1_altivec(bandPtr, channel, fr, 0);
fr->buffer.data = samples;
samples += channel + pnt;
for(i=0;i<32;i++) {
*samples = conv16to8[*tmp1>>AUSHIFT];
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + (final ? 64 : 0 );
return ret;
}
int synth_1to1_8bit_mono_altivec(real *bandPtr, mpg123_handle *fr)
{
short samples_tmp[64];
short *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1_altivec(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<32;i++) {
*samples++ = conv16to8[*tmp1>>AUSHIFT];
tmp1 += 2;
}
fr->buffer.fill = pnt + 32;
return ret;
}
int synth_1to1_8bit_mono2stereo_altivec(real *bandPtr, mpg123_handle *fr)
{
short samples_tmp[64];
short *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1_altivec(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<32;i++) {
*samples++ = conv16to8[*tmp1>>AUSHIFT];
*samples++ = conv16to8[*tmp1>>AUSHIFT];
tmp1 += 2;
}
fr->buffer.fill = pnt + 64;
return ret;
}
int synth_1to1_mono_altivec(real *bandPtr, mpg123_handle *fr)
{
short samples_tmp[64];
short *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_1to1_altivec(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<32;i++) {
*( (short *)samples) = *tmp1;
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + 64;
return ret;
}
int synth_1to1_mono2stereo_altivec(real *bandPtr, mpg123_handle *fr)
{
int i,ret;
unsigned char *samples = fr->buffer.data;
ret = synth_1to1_altivec(bandPtr, 0, fr, 1);
samples += fr->buffer.fill - 128;
for(i=0;i<32;i++) {
((short *)samples)[1] = ((short *)samples)[0];
samples+=4;
}
return ret;
}
int synth_1to1_altivec(real *bandPtr, int channel, mpg123_handle *fr, int final)
{
static const int step = 2;
short *samples = (short *) (fr->buffer.data + fr->buffer.fill);
real *b0, **buf;
int clip = 0;
int bo1;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
if(!channel) {
fr->bo--;
fr->bo &= 0xf;
buf = fr->areal_buffs[0];
}
else {
samples++;
buf = fr->areal_buffs[1];
}
if(fr->bo & 0x1) {
b0 = buf[0];
bo1 = fr->bo;
dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
}
else {
b0 = buf[1];
bo1 = fr->bo+1;
dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
}
{
register int j;
real *window = decwin + 16 - bo1;
int __attribute__ ((aligned (16))) clip_tmp[4];
vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5;
vector float vsum,vsum2,vsum3,vsum4,vmin,vmax;
vector signed int vclip;
vector signed short vsample1,vsample2;
vclip = vec_xor(vclip,vclip);
#ifdef __APPLE__
vmax = (vector float)(32767.0f);
vmin = (vector float)(-32768.0f);
vperm5 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31);
#else
vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
vperm5 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31};
#endif
vperm1 = vec_lvsl(0,window);
vperm3 = vec_lvsl(0,samples);
vperm4 = vec_lvsr(0,samples);
for (j=4;j;j--)
{
vsum = vec_xor(vsum,vsum);
vsum2 = vec_xor(vsum2,vsum2);
vsum3 = vec_xor(vsum3,vsum3);
vsum4 = vec_xor(vsum4,vsum4);
v1 = vec_ld(0,window);
v2 = vec_ld(16,window);
v3 = vec_ld(32,window);
v4 = vec_ld(48,window);
v5 = vec_ld(64,window);
v1 = vec_perm(v1,v2,vperm1);
v6 = vec_ld(0,b0);
v2 = vec_perm(v2,v3,vperm1);
v7 = vec_ld(16,b0);
v3 = vec_perm(v3,v4,vperm1);
v8 = vec_ld(32,b0);
v4 = vec_perm(v4,v5,vperm1);
v9 = vec_ld(48,b0);
vsum = vec_madd(v1,v6,vsum);
vsum = vec_madd(v2,v7,vsum);
vsum = vec_madd(v3,v8,vsum);
vsum = vec_madd(v4,v9,vsum);
window += 32;
b0 += 16;
v1 = vec_ld(0,window);
v2 = vec_ld(16,window);
v3 = vec_ld(32,window);
v4 = vec_ld(48,window);
v5 = vec_ld(64,window);
v1 = vec_perm(v1,v2,vperm1);
v6 = vec_ld(0,b0);
v2 = vec_perm(v2,v3,vperm1);
v7 = vec_ld(16,b0);
v3 = vec_perm(v3,v4,vperm1);
v8 = vec_ld(32,b0);
v4 = vec_perm(v4,v5,vperm1);
v9 = vec_ld(48,b0);
vsum2 = vec_madd(v1,v6,vsum2);
vsum2 = vec_madd(v2,v7,vsum2);
vsum2 = vec_madd(v3,v8,vsum2);
vsum2 = vec_madd(v4,v9,vsum2);
window += 32;
b0 += 16;
v1 = vec_ld(0,window);
v2 = vec_ld(16,window);
v3 = vec_ld(32,window);
v4 = vec_ld(48,window);
v5 = vec_ld(64,window);
v1 = vec_perm(v1,v2,vperm1);
v6 = vec_ld(0,b0);
v2 = vec_perm(v2,v3,vperm1);
v7 = vec_ld(16,b0);
v3 = vec_perm(v3,v4,vperm1);
v8 = vec_ld(32,b0);
v4 = vec_perm(v4,v5,vperm1);
v9 = vec_ld(48,b0);
vsum3 = vec_madd(v1,v6,vsum3);
vsum3 = vec_madd(v2,v7,vsum3);
vsum3 = vec_madd(v3,v8,vsum3);
vsum3 = vec_madd(v4,v9,vsum3);
window += 32;
b0 += 16;
v1 = vec_ld(0,window);
v2 = vec_ld(16,window);
v3 = vec_ld(32,window);
v4 = vec_ld(48,window);
v5 = vec_ld(64,window);
v1 = vec_perm(v1,v2,vperm1);
v6 = vec_ld(0,b0);
v2 = vec_perm(v2,v3,vperm1);
v7 = vec_ld(16,b0);
v3 = vec_perm(v3,v4,vperm1);
v8 = vec_ld(32,b0);
v4 = vec_perm(v4,v5,vperm1);
v9 = vec_ld(48,b0);
vsum4 = vec_madd(v1,v6,vsum4);
vsum4 = vec_madd(v2,v7,vsum4);
vsum4 = vec_madd(v3,v8,vsum4);
vsum4 = vec_madd(v4,v9,vsum4);
window += 32;
b0 += 16;
v1 = vec_mergeh(vsum,vsum3);
v2 = vec_mergeh(vsum2,vsum4);
v3 = vec_mergel(vsum,vsum3);
v4 = vec_mergel(vsum2,vsum4);
v5 = vec_mergeh(v1,v2);
v6 = vec_mergel(v1,v2);
v7 = vec_mergeh(v3,v4);
v8 = vec_mergel(v3,v4);
vsum = vec_sub(v5,v6);
v9 = vec_sub(v7,v8);
vsum = vec_add(vsum,v9);
v3 = (vector float)vec_cts(vsum,0);
v1 = (vector float)vec_cmpgt(vsum,vmax);
v2 = (vector float)vec_cmplt(vsum,vmin);
vsample1 = vec_ld(0,samples);
vsample2 = vec_ld(15,samples);
v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
v4 = (vector float)vec_perm(vsample1,vsample2,vperm3);
v5 = (vector float)vec_perm(v3,v4,vperm5);
v6 = (vector float)vec_perm(vsample2,vsample1,vperm3);
v7 = (vector float)vec_perm(v5,v6,vperm4);
v8 = (vector float)vec_perm(v6,v5,vperm4);
vec_st((vector signed short)v7,15,samples);
vec_st((vector signed short)v8,0,samples);
samples += 8;
#ifdef __APPLE__
v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int)(31));
v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int)(31));
#else
v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int){31,31,31,31});
v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int){31,31,31,31});
#endif
v5 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
vclip = vec_sums((vector signed int)v5,vclip);
}
{
real sum;
sum = REAL_MUL(window[0x0], b0[0x0]);
sum += REAL_MUL(window[0x2], b0[0x2]);
sum += REAL_MUL(window[0x4], b0[0x4]);
sum += REAL_MUL(window[0x6], b0[0x6]);
sum += REAL_MUL(window[0x8], b0[0x8]);
sum += REAL_MUL(window[0xA], b0[0xA]);
sum += REAL_MUL(window[0xC], b0[0xC]);
sum += REAL_MUL(window[0xE], b0[0xE]);
WRITE_SAMPLE(samples,sum,clip);
b0-=0x10,window-=0x20,samples+=step;
}
window += bo1<<1;
vperm1 = vec_lvsl(0,window);
#ifdef __APPLE__
vperm2 = vec_perm(vperm1,vperm1,(vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3));
#else
vperm2 = vec_perm(vperm1,vperm1,(vector unsigned char){12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3});
#endif
vperm3 = vec_lvsl(0,samples);
vperm4 = vec_lvsr(0,samples);
for (j=3;j;j--)
{
vsum = vec_xor(vsum,vsum);
vsum2 = vec_xor(vsum2,vsum2);
vsum3 = vec_xor(vsum3,vsum3);
vsum4 = vec_xor(vsum4,vsum4);
v1 = vec_ld(-1,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum = vec_nmsub(v1,v6,vsum);
vsum = vec_nmsub(v2,v7,vsum);
vsum = vec_nmsub(v3,v8,vsum);
vsum = vec_nmsub(v4,v9,vsum);
window -= 32;
b0 -= 16;
v1 = vec_ld(0,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum2 = vec_nmsub(v1,v6,vsum2);
vsum2 = vec_nmsub(v2,v7,vsum2);
vsum2 = vec_nmsub(v3,v8,vsum2);
vsum2 = vec_nmsub(v4,v9,vsum2);
window -= 32;
b0 -= 16;
v1 = vec_ld(0,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum3 = vec_nmsub(v1,v6,vsum3);
vsum3 = vec_nmsub(v2,v7,vsum3);
vsum3 = vec_nmsub(v3,v8,vsum3);
vsum3 = vec_nmsub(v4,v9,vsum3);
window -= 32;
b0 -= 16;
v1 = vec_ld(0,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum4 = vec_nmsub(v1,v6,vsum4);
vsum4 = vec_nmsub(v2,v7,vsum4);
vsum4 = vec_nmsub(v3,v8,vsum4);
vsum4 = vec_nmsub(v4,v9,vsum4);
window -= 32;
b0 -= 16;
v1 = vec_mergeh(vsum,vsum3);
v2 = vec_mergeh(vsum2,vsum4);
v3 = vec_mergel(vsum,vsum3);
v4 = vec_mergel(vsum2,vsum4);
v5 = vec_mergeh(v1,v2);
v6 = vec_mergel(v1,v2);
v7 = vec_mergeh(v3,v4);
v8 = vec_mergel(v3,v4);
vsum = vec_add(v5,v6);
v9 = vec_add(v7,v8);
vsum = vec_add(vsum,v9);
v3 = (vector float)vec_cts(vsum,0);
v1 = (vector float)vec_cmpgt(vsum,vmax);
v2 = (vector float)vec_cmplt(vsum,vmin);
vsample1 = vec_ld(0,samples);
vsample2 = vec_ld(15,samples);
v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
v4 = (vector float)vec_perm(vsample1,vsample2,vperm3);
v5 = (vector float)vec_perm(v3,v4,vperm5);
v6 = (vector float)vec_perm(vsample2,vsample1,vperm3);
v7 = (vector float)vec_perm(v5,v6,vperm4);
v8 = (vector float)vec_perm(v6,v5,vperm4);
vec_st((vector signed short)v7,15,samples);
vec_st((vector signed short)v8,0,samples);
samples += 8;
#ifdef __APPLE__
v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int)(31));
v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int)(31));
#else
v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int){31,31,31,31});
v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int){31,31,31,31});
#endif
v5 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
vclip = vec_sums((vector signed int)v5,vclip);
}
#ifdef __APPLE__
vperm5 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,28,29,30,31);
#else
vperm5 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,28,29,30,31};
#endif
{
vsum = vec_xor(vsum,vsum);
vsum2 = vec_xor(vsum2,vsum2);
vsum3 = vec_xor(vsum3,vsum3);
vsum4 = vec_xor(vsum4,vsum4);
v1 = vec_ld(-1,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum = vec_nmsub(v1,v6,vsum);
vsum = vec_nmsub(v2,v7,vsum);
vsum = vec_nmsub(v3,v8,vsum);
vsum = vec_nmsub(v4,v9,vsum);
window -= 32;
b0 -= 16;
v1 = vec_ld(0,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum2 = vec_nmsub(v1,v6,vsum2);
vsum2 = vec_nmsub(v2,v7,vsum2);
vsum2 = vec_nmsub(v3,v8,vsum2);
vsum2 = vec_nmsub(v4,v9,vsum2);
window -= 32;
b0 -= 16;
v1 = vec_ld(0,window);
v2 = vec_ld(-16,window);
v3 = vec_ld(-32,window);
v4 = vec_ld(-48,window);
v5 = vec_ld(-64,window);
v1 = vec_perm(v2,v1,vperm2);
v6 = vec_ld(0,b0);
v2 = vec_perm(v3,v2,vperm2);
v7 = vec_ld(16,b0);
v3 = vec_perm(v4,v3,vperm2);
v8 = vec_ld(32,b0);
v4 = vec_perm(v5,v4,vperm2);
v9 = vec_ld(48,b0);
vsum3 = vec_nmsub(v1,v6,vsum3);
vsum3 = vec_nmsub(v2,v7,vsum3);
vsum3 = vec_nmsub(v3,v8,vsum3);
vsum3 = vec_nmsub(v4,v9,vsum3);
v1 = vec_mergeh(vsum,vsum3);
v2 = vec_mergeh(vsum2,vsum2);
v3 = vec_mergel(vsum,vsum3);
v4 = vec_mergel(vsum2,vsum2);
v5 = vec_mergeh(v1,v2);
v6 = vec_mergel(v1,v2);
v7 = vec_mergeh(v3,v4);
v8 = vec_mergel(v3,v4);
vsum = vec_add(v5,v6);
v9 = vec_add(v7,v8);
vsum = vec_add(vsum,v9);
v3 = (vector float)vec_cts(vsum,0);
v1 = (vector float)vec_cmpgt(vsum,vmax);
v2 = (vector float)vec_cmplt(vsum,vmin);
vsample1 = vec_ld(0,samples);
vsample2 = vec_ld(15,samples);
v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
v4 = (vector float)vec_perm(vsample1,vsample2,vperm3);
v5 = (vector float)vec_perm(v3,v4,vperm5);
v6 = (vector float)vec_perm(vsample2,vsample1,vperm3);
v7 = (vector float)vec_perm(v5,v6,vperm4);
v8 = (vector float)vec_perm(v6,v5,vperm4);
vec_st((vector signed short)v7,15,samples);
vec_st((vector signed short)v8,0,samples);
samples += 6;
#ifdef __APPLE__
v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int)(31,31,31,32));
v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int)(31,31,31,32));
#else
v1 = (vector float)vec_sr((vector unsigned int)v1,(vector unsigned int){31,31,31,32});
v2 = (vector float)vec_sr((vector unsigned int)v2,(vector unsigned int){31,31,31,32});
#endif
v5 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
vclip = vec_sums((vector signed int)v5,vclip);
vec_st(vclip,0,clip_tmp);
clip += clip_tmp[3];
}
}
if(final) fr->buffer.fill += 128;
return clip;
}

295
src/libmpg123/decode_i386.c Normal file
View File

@@ -0,0 +1,295 @@
/*
decode_i386.c: decode for i386 (really faster?)
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
slighlty optimized for machines without autoincrement/decrement.
The performance is highly compiler dependend. Maybe
the decode.c version for 'normal' processor may be faster
even for Intel processors.
*/
#include "mpg123lib_intern.h"
int synth_1to1_8bit_i386(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
short samples_tmp[64];
short *tmp1 = samples_tmp + channel;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = opt_synth_1to1(fr)(bandPtr, channel, fr , 0);
fr->buffer.data = samples;
samples += channel + pnt;
for(i=0;i<32;i++) {
*samples = fr->conv16to8[*tmp1>>AUSHIFT];
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + (final ? 64 : 0 );
return ret;
}
int synth_1to1_8bit_mono_i386(real *bandPtr, mpg123_handle *fr)
{
short samples_tmp[64];
short *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<32;i++) {
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
tmp1+=2;
}
fr->buffer.fill = pnt + 32;
return ret;
}
int synth_1to1_8bit_mono2stereo_i386(real *bandPtr, mpg123_handle *fr)
{
short samples_tmp[64];
short *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<32;i++) {
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
tmp1 += 2;
}
fr->buffer.fill = pnt + 64;
return ret;
}
int synth_1to1_mono_i386(real *bandPtr, mpg123_handle *fr)
{
short samples_tmp[64];
short *tmp1 = samples_tmp;
int i,ret;
unsigned char *samples = fr->buffer.data;
int pnt = fr->buffer.fill;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 0);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<32;i++) {
*( (short *) samples) = *tmp1;
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + 64;
return ret;
}
int synth_1to1_mono2stereo_i386(real *bandPtr, mpg123_handle *fr)
{
int i,ret;
unsigned char *samples = fr->buffer.data;
ret = opt_synth_1to1(fr)(bandPtr, 0, fr, 1);
samples += fr->buffer.fill - 128;
for(i=0;i<32;i++) {
((short *)samples)[1] = ((short *)samples)[0];
samples+=4;
}
return ret;
}
/* needed for i386, i486 */
#ifdef OPT_I386
int synth_1to1_i386(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
static const int step = 2;
short *samples = (short *) (fr->buffer.data + fr->buffer.fill);
real *b0, **buf;
int clip = 0;
int bo1;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
if(!channel) {
fr->bo[0]--;
fr->bo[0] &= 0xf;
buf = fr->real_buffs[0];
}
else {
samples++;
buf = fr->real_buffs[1];
}
if(fr->bo[0] & 0x1) {
b0 = buf[0];
bo1 = fr->bo[0];
dct64_i386(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
}
else {
b0 = buf[1];
bo1 = fr->bo[0]+1;
dct64_i386(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
}
{
register int j;
real *window = opt_decwin(fr) + 16 - bo1;
for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)
{
real sum;
sum = window[0x0] * b0[0x0];
sum -= window[0x1] * b0[0x1];
sum += window[0x2] * b0[0x2];
sum -= window[0x3] * b0[0x3];
sum += window[0x4] * b0[0x4];
sum -= window[0x5] * b0[0x5];
sum += window[0x6] * b0[0x6];
sum -= window[0x7] * b0[0x7];
sum += window[0x8] * b0[0x8];
sum -= window[0x9] * b0[0x9];
sum += window[0xA] * b0[0xA];
sum -= window[0xB] * b0[0xB];
sum += window[0xC] * b0[0xC];
sum -= window[0xD] * b0[0xD];
sum += window[0xE] * b0[0xE];
sum -= window[0xF] * b0[0xF];
WRITE_SAMPLE(samples,sum,clip);
}
{
real sum;
sum = window[0x0] * b0[0x0];
sum += window[0x2] * b0[0x2];
sum += window[0x4] * b0[0x4];
sum += window[0x6] * b0[0x6];
sum += window[0x8] * b0[0x8];
sum += window[0xA] * b0[0xA];
sum += window[0xC] * b0[0xC];
sum += window[0xE] * b0[0xE];
WRITE_SAMPLE(samples,sum,clip);
b0-=0x10,window-=0x20,samples+=step;
}
window += bo1<<1;
for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)
{
real sum;
sum = -window[-0x1] * b0[0x0];
sum -= window[-0x2] * b0[0x1];
sum -= window[-0x3] * b0[0x2];
sum -= window[-0x4] * b0[0x3];
sum -= window[-0x5] * b0[0x4];
sum -= window[-0x6] * b0[0x5];
sum -= window[-0x7] * b0[0x6];
sum -= window[-0x8] * b0[0x7];
sum -= window[-0x9] * b0[0x8];
sum -= window[-0xA] * b0[0x9];
sum -= window[-0xB] * b0[0xA];
sum -= window[-0xC] * b0[0xB];
sum -= window[-0xD] * b0[0xC];
sum -= window[-0xE] * b0[0xD];
sum -= window[-0xF] * b0[0xE];
sum -= window[-0x0] * b0[0xF];
WRITE_SAMPLE(samples,sum,clip);
}
}
if(final) fr->buffer.fill += 128;
return clip;
}
#endif
#ifdef OPT_PENTIUM
int synth_1to1_i586(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
int ret;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
/* this is in asm, can be dither or not */
/* uh, is this return from pointer correct? */
ret = (int) opt_synth_1to1_i586_asm(fr)(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, fr->bo, fr->decwin);
if(final) fr->buffer.fill += 128;
return ret;
}
#endif
#ifdef OPT_3DNOW
int synth_1to1_3dnow(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
int ret;
if(fr->have_eq_settings) do_equalizer_3dnow(bandPtr,channel,fr->equalizer);
/* this is in asm, can be dither or not */
/* uh, is this return from pointer correct? */
ret = (int) synth_1to1_3dnow_asm(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, fr->bo, fr->decwin);
if(final) fr->buffer.fill += 128;
return ret;
}
#endif
#ifdef OPT_MMX
/* wrapper for da interface */
int synth_1to1_mmx(real *bandPtr, int channel, mpg123_handle *fr, int final)
{
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
/* in asm */
synth_1to1_MMX(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, fr->bo, fr->decwins);
if(final) fr->buffer.fill += 128;
return 0;
}
#endif
#ifdef OPT_SSE
int synth_1to1_sse(real *bandPtr, int channel, mpg123_handle *fr, int final)
{
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
synth_1to1_sse_asm(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, fr->bo, fr->decwins);
if(final) fr->buffer.fill += 128;
return 0;
}
#endif
#ifdef OPT_3DNOWEXT
int synth_1to1_3dnowext(real *bandPtr, int channel, mpg123_handle *fr, int final)
{
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
synth_1to1_3dnowext_asm(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, fr->bo, fr->decwins);
if(final) fr->buffer.fill += 128;
return 0;
}
#endif

252
src/libmpg123/decode_i486.c Normal file
View File

@@ -0,0 +1,252 @@
/*
decode_i486.c: i486 decode
copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Fabrice Bellard
One has to see if the modification for non-static memory kills this optimization (cache locality?).
*/
/*
* Subband Synthesis for MPEG Audio.
*
* Version optimized for 80486 by using integer arithmetic,
* multiplications by shift and add, and by increasing locality in
* order to fit the 8KB L1 cache. This code should be compiled with gcc
* 2.7.2 or higher.
*
* Note: this version does not guaranty a good accuracy. The filter
* coefficients are quantified on 14 bits.
*
* (c) 1998 Fabrice Bellard
*/
#include "mpg123lib_intern.h"
#define FIR16_1(pos,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15) \
{\
int sum;\
sum=(c0)*b0[0]+(c1)*b0[1]+(c2)*b0[2]+(c3)*b0[3]+\
(c4)*b0[4]+(c5)*b0[5]+(c6)*b0[6]+(c7)*b0[7]+\
(c8)*b0[8]+(c9)*b0[9]+(c10)*b0[10]+(c11)*b0[11]+\
(c12)*b0[12]+(c13)*b0[13]+(c14)*b0[14]+(c15)*b0[15];\
sum=(sum+(1 << 13))>>14;\
if (sum<-32768) sum=-32768;\
else if (sum>32767) sum=32767;\
samples[2*(pos)]=sum;\
b0+=FIR_BUFFER_SIZE;\
}
#define FIR16_2(pos1,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,\
pos2,d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15) \
{\
int sum1,sum2,v;\
\
v=b0[0];\
sum1=(c0)*v;\
sum2=(d0)*v;\
v=b0[1];\
sum1+=(c1)*v;\
sum2+=(d1)*v;\
v=b0[2];\
sum1+=(c2)*v;\
sum2+=(d2)*v;\
v=b0[3];\
sum1+=(c3)*v;\
sum2+=(d3)*v;\
v=b0[4];\
sum1+=(c4)*v;\
sum2+=(d4)*v;\
v=b0[5];\
sum1+=(c5)*v;\
sum2+=(d5)*v;\
v=b0[6];\
sum1+=(c6)*v;\
sum2+=(d6)*v;\
v=b0[7];\
sum1+=(c7)*v;\
sum2+=(d7)*v;\
v=b0[8];\
sum1+=(c8)*v;\
sum2+=(d8)*v;\
v=b0[9];\
sum1+=(c9)*v;\
sum2+=(d9)*v;\
v=b0[10];\
sum1+=(c10)*v;\
sum2+=(d10)*v;\
v=b0[11];\
sum1+=(c11)*v;\
sum2+=(d11)*v;\
v=b0[12];\
sum1+=(c12)*v;\
sum2+=(d12)*v;\
v=b0[13];\
sum1+=(c13)*v;\
sum2+=(d13)*v;\
v=b0[14];\
sum1+=(c14)*v;\
sum2+=(d14)*v;\
v=b0[15];\
sum1+=(c15)*v;\
sum2+=(d15)*v;\
\
sum1=(sum1+(1<<13))>>14;\
sum2=(sum2+(1<<13))>>14;\
\
if (sum1<-32768) sum1=-32768;\
else if (sum1>32767) sum1=32767;\
samples[(pos1)*2]=sum1;\
\
if (sum2<-32768) sum2=-32768;\
else if (sum2>32767) sum2=32767;\
samples[(pos2)*2]=sum2;\
b0+=FIR_BUFFER_SIZE;\
}
int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks)
{
short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
int *b0,**buf;
int clip = 0;
int block,b,bo_start;
/* samples address */
samples+=channel;
bo_start=fr->bo[channel];
buf = fr->int_buffs[channel];
b=bo_start;
for(block=0;block<nb_blocks;block++) {
/* FIR offset */
b++;
if (b >= FIR_BUFFER_SIZE) {
int *p,*q;
int c,i,j;
/* we shift the buffers */
for(c=0;c<2;c++) {
p=&buf[c][0]+1;
q=p+(FIR_BUFFER_SIZE-FIR_SIZE);
for(i=0;i<17;i++) {
for(j=0;j<FIR_SIZE-1;j++) p[j]=q[j];
p+=FIR_BUFFER_SIZE;
q+=FIR_BUFFER_SIZE;
}
}
/* we update 'bo' accordingly */
b=fr->bo[channel]=FIR_SIZE;
}
if(b & 1) {
dct64_i486(buf[1]+b,buf[0]+b,bandPtr);
} else {
dct64_i486(buf[0]+b,buf[1]+b,bandPtr);
}
bandPtr+=32;
}
fr->bo[channel]=b;
/* filter bank: part 1 */
b=bo_start;
for(block=0;block<nb_blocks;block++) {
b++;
if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
if(b & 1) {
b0 = buf[0] + b - (FIR_SIZE-1);
} else {
b0 = buf[1] + b - (FIR_SIZE-1);
}
FIR16_1(0,-7,53,-114,509,-1288,1643,-9372,18759,9372,1643,1288,509,114,53,7,0);
FIR16_2(1,-6,52,-100,515,-1197,1783,-8910,18748,9834,1489,1379,500,129,54,7,0,
31,0,-7,54,-129,500,-1379,1489,-9834,18748,8910,1783,1197,515,100,52,6);
FIR16_2(2,-6,50,-86,520,-1106,1910,-8447,18714,10294,1322,1469,488,145,55,8,0,
30,0,-8,55,-145,488,-1469,1322,-10294,18714,8447,1910,1106,520,86,50,6);
FIR16_2(3,-5,49,-73,521,-1015,2023,-7986,18657,10751,1140,1559,473,161,56,9,0,
29,0,-9,56,-161,473,-1559,1140,-10751,18657,7986,2023,1015,521,73,49,5);
samples+=64;
}
samples-=64*nb_blocks;
/* filter bank: part 2 */
b=bo_start;
for(block=0;block<nb_blocks;block++) {
b++;
if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
if(b & 1) {
b0 = buf[0] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
} else {
b0 = buf[1] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
}
FIR16_2(4,-4,47,-61,521,-926,2123,-7528,18578,11205,944,1647,455,177,56,10,0,
28,0,-10,56,-177,455,-1647,944,-11205,18578,7528,2123,926,521,61,47,4);
FIR16_2(5,-4,45,-49,518,-837,2210,-7072,18477,11654,733,1733,434,194,57,11,0,
27,0,-11,57,-194,434,-1733,733,-11654,18477,7072,2210,837,518,49,45,4);
FIR16_2(6,-4,44,-38,514,-751,2284,-6620,18353,12097,509,1817,411,212,57,12,0,
26,0,-12,57,-212,411,-1817,509,-12097,18353,6620,2284,751,514,38,44,4);
FIR16_2(7,-3,42,-27,508,-665,2347,-6173,18208,12534,270,1899,383,229,56,13,0,
25,0,-13,56,-229,383,-1899,270,-12534,18208,6173,2347,665,508,27,42,3);
samples+=64;
}
samples-=64*nb_blocks;
/* filter bank: part 3 */
b=bo_start;
for(block=0;block<nb_blocks;block++) {
b++;
if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
if(b & 1) {
b0 = buf[0] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
} else {
b0 = buf[1] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
}
FIR16_2(8,-3,40,-18,500,-582,2398,-5732,18042,12963,17,1977,353,247,56,14,0,
24,0,-14,56,-247,353,-1977,17,-12963,18042,5732,2398,582,500,18,40,3);
FIR16_2(9,-2,38,-9,490,-501,2437,-5297,17855,13383,-249,2052,320,266,55,15,0,
23,0,-15,55,-266,320,-2052,-249,-13383,17855,5297,2437,501,490,9,38,2);
FIR16_2(10,-2,36,0,479,-423,2465,-4869,17647,13794,-530,2122,282,284,53,17,0,
22,0,-17,53,-284,282,-2122,-530,-13794,17647,4869,2465,423,479,0,36,2);
FIR16_2(11,-2,34,7,467,-347,2483,-4449,17419,14194,-825,2188,242,302,52,18,0,
21,0,-18,52,-302,242,-2188,-825,-14194,17419,4449,2483,347,467,-7,34,2);
samples+=64;
}
samples-=64*nb_blocks;
/* filter bank: part 4 */
b=bo_start;
for(block=0;block<nb_blocks;block++) {
b++;
if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
if(b & 1) {
b0 = buf[0] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
} else {
b0 = buf[1] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
}
FIR16_2(12,-2,33,14,454,-273,2491,-4038,17173,14583,-1133,2249,198,320,50,19,0,
20,0,-19,50,-320,198,-2249,-1133,-14583,17173,4038,2491,273,454,-14,33,2);
FIR16_2(13,-1,31,20,439,-203,2489,-3637,16907,14959,-1454,2304,151,339,47,21,-1,
19,-1,-21,47,-339,151,-2304,-1454,-14959,16907,3637,2489,203,439,-20,31,1);
FIR16_2(14,-1,29,26,424,-136,2479,-3245,16623,15322,-1788,2354,100,357,44,22,-1,
18,-1,-22,44,-357,100,-2354,-1788,-15322,16623,3245,2479,136,424,-26,29,1);
FIR16_2(15,-1,27,31,408,-72,2459,-2863,16322,15671,-2135,2396,46,374,40,24,-1,
17,-1,-24,40,-374,46,-2396,-2135,-15671,16322,2863,2459,72,408,-31,27,1);
FIR16_1(16,-1,0,36,0,-11,0,-2493,0,16004,0,2431,0,391,0,26,0);
samples+=64;
}
return clip;
}

335
src/libmpg123/decode_i586.S Normal file
View File

@@ -0,0 +1,335 @@
/*
decode_i586: asm synth
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Stefan Bieschewski
synth_1to1 works the same way as the c version of this
file. only two types of changes have been made:
- reordered floating point instructions to
prevent pipline stalls
- made WRITE_SAMPLE use integer instead of
(slower) floating point
all kinds of x86 processors should benefit from these
modifications.
useful sources of information on optimizing x86 code include:
Intel Architecture Optimization Manual
http://www.intel.com/design/pentium/manuals/242816.htm
Cyrix 6x86 Instruction Set Summary
ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
AMD-K5 Processor Software Development
http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
Stefan Bieschewski <stb@acm.org>
$Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
*/
#include "mangle.h"
.data
#ifndef __APPLE__
.section .rodata
#endif
ALIGN8
.LC0:
.long 0x0,0x40dfffc0
ALIGN8
.LC1:
.long 0x0,0xc0e00000
ALIGN8
.text
/* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
.globl ASM_NAME(synth_1to1_i586_asm)
ASM_NAME(synth_1to1_i586_asm):
subl $12,%esp
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
movl 32(%esp),%eax /* *bandPtr */
movl 40(%esp),%esi /* *out */
movl 48(%esp),%edi /* *bo */
movl (%edi),%ebp /* store bo value in ebp */
xorl %edi,%edi
cmpl %edi,36(%esp)
jne .L48 /* if(!channel) */
decl %ebp /* bo-- */
andl $15,%ebp /* bo &= 0xf */
movl 48(%esp), %edi /* *bo */
movl %ebp,(%edi) /* write back bo */
xorl %edi,%edi /* restore %edi to 0; it's used later */
movl 44(%esp),%ecx /* use buffs */
jmp .L49
.L48: /* if(channel) use buffs+2176 */
addl $2,%esi
movl 44(%esp),%ecx /* *buffs */
addl $2176,%ecx
.L49:
testl $1,%ebp
je .L50
movl %ecx,%ebx
movl %ebp,16(%esp)
pushl %eax
movl 20(%esp),%edx
leal (%ebx,%edx,4),%eax
pushl %eax
movl 24(%esp),%eax
incl %eax
andl $15,%eax
leal 1088(,%eax,4),%eax
addl %ebx,%eax
jmp .L74
.L50:
leal 1088(%ecx),%ebx
leal 1(%ebp),%edx
movl %edx,16(%esp)
pushl %eax
leal 1092(%ecx,%ebp,4),%eax
pushl %eax
leal (%ecx,%ebp,4),%eax
.L74:
pushl %eax
call ASM_NAME(dct64_i386)
addl $12,%esp
/* stack now back on track */
movl 16(%esp),%edx
leal 0(,%edx,4),%edx
movl 52(%esp),%eax /* decwin */
addl $64,%eax
movl %eax,%ecx
subl %edx,%ecx
movl $16,%ebp
.L55:
flds (%ecx)
fmuls (%ebx)
flds 4(%ecx)
fmuls 4(%ebx)
fxch %st(1)
flds 8(%ecx)
fmuls 8(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 12(%ecx)
fmuls 12(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 16(%ecx)
fmuls 16(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 20(%ecx)
fmuls 20(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 24(%ecx)
fmuls 24(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 28(%ecx)
fmuls 28(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 32(%ecx)
fmuls 32(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 36(%ecx)
fmuls 36(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 40(%ecx)
fmuls 40(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 44(%ecx)
fmuls 44(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 48(%ecx)
fmuls 48(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 52(%ecx)
fmuls 52(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 56(%ecx)
fmuls 56(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 60(%ecx)
fmuls 60(%ebx)
fxch %st(2)
subl $4,%esp
faddp %st,%st(1)
fxch %st(1)
fsubrp %st,%st(1)
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3: incl %edi
4:
.L54:
addl $64,%ebx
subl $-128,%ecx
addl $4,%esi
decl %ebp
jnz .L55
flds (%ecx)
fmuls (%ebx)
flds 8(%ecx)
fmuls 8(%ebx)
flds 16(%ecx)
fmuls 16(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 24(%ecx)
fmuls 24(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 32(%ecx)
fmuls 32(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 40(%ecx)
fmuls 40(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 48(%ecx)
fmuls 48(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 56(%ecx)
fmuls 56(%ebx)
fxch %st(2)
subl $4,%esp
faddp %st,%st(1)
fxch %st(1)
faddp %st,%st(1)
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3: incl %edi
4:
.L62:
addl $-64,%ebx
addl $4,%esi
movl 16(%esp),%edx
leal -128(%ecx,%edx,8),%ecx
movl $15,%ebp
.L68:
flds -4(%ecx)
fchs
fmuls (%ebx)
flds -8(%ecx)
fmuls 4(%ebx)
fxch %st(1)
flds -12(%ecx)
fmuls 8(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -16(%ecx)
fmuls 12(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -20(%ecx)
fmuls 16(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -24(%ecx)
fmuls 20(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -28(%ecx)
fmuls 24(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -32(%ecx)
fmuls 28(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -36(%ecx)
fmuls 32(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -40(%ecx)
fmuls 36(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -44(%ecx)
fmuls 40(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -48(%ecx)
fmuls 44(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -52(%ecx)
fmuls 48(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -56(%ecx)
fmuls 52(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -60(%ecx)
fmuls 56(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds (%ecx)
fmuls 60(%ebx)
fxch %st(2)
subl $4,%esp
fsubrp %st,%st(1)
fxch %st(1)
fsubrp %st,%st(1)
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3: incl %edi
4:
.L67:
addl $-64,%ebx
addl $-128,%ecx
addl $4,%esi
decl %ebp
jnz .L68
movl %edi,%eax
popl %ebx
popl %esi
popl %edi
popl %ebp
addl $12,%esp
ret

View File

@@ -0,0 +1,368 @@
/*
decode_i586_dither: asm synth with dither noise
copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Stefan Bieschewski as decode_i586.s without dither
This version uses "circular" 64k dither noise.
(Patch by Adrian <adrian.bacon@xs4all.nl>)
Thomas learned something about assembler and the stack while making this one thread safe (removing static data).
*/
#include "mangle.h"
.data
#ifndef __APPLE__
.section .rodata
#endif
ALIGN8
.LC0:
.long 0x0,0x40dfffc0
ALIGN8
.LC1:
.long 0x0,0xc0e00000
ALIGN8
.text
/* int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int bo_and_ditherindex[2], real *decwin); */
.globl ASM_NAME(synth_1to1_i586_asm_dither)
ASM_NAME(synth_1to1_i586_asm_dither):
subl $16,%esp
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
/* stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin */
#define BANDPTR 36(%esp)
#define CHANNEL 40(%esp)
#define OUT 44(%esp)
#define BUFFS 48(%esp)
#define BO 52(%esp)
#define DECWIN 56(%esp)
#define LOC0 16(%esp)
#define LOC1 20(%esp)
#define LOC2 24(%esp)
#define DITHERINDEX 28(%esp)
movl BANDPTR,%eax
movl OUT,%esi
movl BO, %ebx
movl (%ebx),%ebp /* get bo value */
movl 4(%ebx),%edi; /* get the ditherindex behind bo */
movl %edi,DITHERINDEX
xorl %edi,%edi
cmpl %edi,CHANNEL
jne .L48
decl %ebp
andl $15,%ebp
movl %ebp,(%ebx) /* save bo back */
movl BUFFS,%ecx
jmp .L49
.L48:
/* In stereo mode , "rewind" dither pointer 32 samples , so 2nd channel */
/* has same dither values. Tested OK for mono and stereo MP2 and MP3 */
subl $128,DITHERINDEX /* better move to %edi for the two calculations? */
andl $0x0003fffc,DITHERINDEX
addl $2,%esi
movl BUFFS,%ecx
addl $2176,%ecx
.L49:
/* now the call of dct64 is prepared, stuff pushed to the stack, but soon after it's removed again */
testl $1,%ebp
je .L50
movl %ecx,%ebx
movl %ebp,LOC0
pushl %eax
movl LOC1,%edx
leal (%ebx,%edx,4),%eax
pushl %eax
movl LOC2,%eax
incl %eax
andl $15,%eax
leal 1088(,%eax,4),%eax
addl %ebx,%eax
jmp .L74
.L50:
leal 1088(%ecx),%ebx
leal 1(%ebp),%edx
movl %edx,LOC0
pushl %eax
leal 1092(%ecx,%ebp,4),%eax
pushl %eax
leal (%ecx,%ebp,4),%eax
.L74:
pushl %eax
call ASM_NAME(dct64_i386)
addl $12,%esp
/* Now removed the parameters.
stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
movl LOC0,%edx
leal 0(,%edx,4),%edx
/* movl $ASM_NAME(decwin)+64,%eax */
movl DECWIN,%eax
addl $64,%eax
movl %eax,%ecx
subl %edx,%ecx
movl $16,%ebp
.L55:
flds (%ecx)
fmuls (%ebx)
flds 4(%ecx)
fmuls 4(%ebx)
fxch %st(1)
flds 8(%ecx)
fmuls 8(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 12(%ecx)
fmuls 12(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 16(%ecx)
fmuls 16(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 20(%ecx)
fmuls 20(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 24(%ecx)
fmuls 24(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 28(%ecx)
fmuls 28(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 32(%ecx)
fmuls 32(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 36(%ecx)
fmuls 36(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 40(%ecx)
fmuls 40(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 44(%ecx)
fmuls 44(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 48(%ecx)
fmuls 48(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 52(%ecx)
fmuls 52(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 56(%ecx)
fmuls 56(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 60(%ecx)
fmuls 60(%ebx)
fxch %st(2)
subl $4,%esp
faddp %st,%st(1)
fxch %st(1)
fsubrp %st,%st(1)
addl $4,DITHERINDEX
andl $0x0003fffc,DITHERINDEX
movl $ASM_NAME(dithernoise),%edi
addl DITHERINDEX,%edi
fadd (%edi)
/* fistpl and popl as a unit keep the stack unchanged */
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3:
/* incl %edi */
4:
.L54:
addl $64,%ebx
subl $-128,%ecx
addl $4,%esi
decl %ebp
jnz .L55
flds (%ecx)
fmuls (%ebx)
flds 8(%ecx)
fmuls 8(%ebx)
flds 16(%ecx)
fmuls 16(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 24(%ecx)
fmuls 24(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 32(%ecx)
fmuls 32(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 40(%ecx)
fmuls 40(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 48(%ecx)
fmuls 48(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 56(%ecx)
fmuls 56(%ebx)
fxch %st(2)
subl $4,%esp
faddp %st,%st(1)
fxch %st(1)
faddp %st,%st(1)
addl $4,DITHERINDEX
andl $0x0003fffc,DITHERINDEX
movl $ASM_NAME(dithernoise),%edi
addl DITHERINDEX,%edi
fadd (%edi)
/* fistpl and popl as a unit keep the stack unchanged */
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3:
/* incl %edi */
4:
.L62:
addl $-64,%ebx
addl $4,%esi
movl LOC0,%edx
leal -128(%ecx,%edx,8),%ecx
movl $15,%ebp
.L68:
flds -4(%ecx)
fchs
fmuls (%ebx)
flds -8(%ecx)
fmuls 4(%ebx)
fxch %st(1)
flds -12(%ecx)
fmuls 8(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -16(%ecx)
fmuls 12(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -20(%ecx)
fmuls 16(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -24(%ecx)
fmuls 20(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -28(%ecx)
fmuls 24(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -32(%ecx)
fmuls 28(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -36(%ecx)
fmuls 32(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -40(%ecx)
fmuls 36(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -44(%ecx)
fmuls 40(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -48(%ecx)
fmuls 44(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -52(%ecx)
fmuls 48(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -56(%ecx)
fmuls 52(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -60(%ecx)
fmuls 56(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds (%ecx)
fmuls 60(%ebx)
fxch %st(2)
subl $4,%esp
fsubrp %st,%st(1)
fxch %st(1)
fsubrp %st,%st(1)
addl $4,DITHERINDEX
andl $0x0003fffc,DITHERINDEX
movl $ASM_NAME(dithernoise),%edi
addl DITHERINDEX,%edi
fadd (%edi)
/* fistpl and popl as a unit keep the stack unchanged */
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3:
/* incl %edi */
4:
.L67:
addl $-64,%ebx
addl $-128,%ecx
addl $4,%esi
decl %ebp
jnz .L68
/* return ipv edi 0 in eax */
movl $0,%eax
/* save ditherindex */
movl BO,%ebx
movl DITHERINDEX,%esi
movl %esi,4(%ebx);
/* stack: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
popl %ebx
popl %esi
popl %edi
popl %ebp
addl $16,%esp
/* The stack must be now: 0=back 4=bandptr 8=channel 12=out 16=buffs 20=bo */
ret

125
src/libmpg123/decode_mmx.S Normal file
View File

@@ -0,0 +1,125 @@
/*
decode_MMX.s: MMX optimized synth
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by the mysterious higway (apparently)
Thomas' words about a note:
Initially, I found the note "this code comes under GPL" in this file.
After asking Michael about legal status of the MMX files, he said that he got them without any comment and thus I believe that the GPL comment was made by Michael, since he made mpg123 GPL at some time - and marked some files that way, but not all.
Based on that thought, I now consider this file along with the other parts of higway's MMX optimization to be licensed under LGPL 2.1 by Michael's decision.
*/
#include "mangle.h"
.text
.globl ASM_NAME(synth_1to1_MMX)
/* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
ASM_NAME(synth_1to1_MMX):
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
movl 24(%esp),%ecx
movl 28(%esp),%edi
movl $15,%ebx
movl 36(%esp),%edx
leal (%edi,%ecx,2),%edi
decl %ecx
movl 32(%esp),%esi
movl (%edx),%eax
jecxz .L1
decl %eax
andl %ebx,%eax
leal 1088(%esi),%esi
movl %eax,(%edx)
.L1:
leal (%esi,%eax,2),%edx
movl %eax,%ebp
incl %eax
pushl 20(%esp)
andl %ebx,%eax
leal 544(%esi,%eax,2),%ecx
incl %ebx
testl $1, %eax
jnz .L2
xchgl %edx,%ecx
incl %ebp
leal 544(%esi),%esi
.L2:
pushl %edx
pushl %ecx
call ASM_NAME(dct64_MMX)
addl $12,%esp
/* stack like before, pushed 3, incremented again */
leal 1(%ebx), %ecx
subl %ebp,%ebx
pushl %eax
movl 44(%esp),%eax /* decwins */
leal (%eax,%ebx,2), %edx
popl %eax
.L3:
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm1,%mm0
psrad $13,%mm0
packssdw %mm0,%mm0
movd %mm0,%eax
movw %ax, (%edi)
leal 32(%esi),%esi
leal 64(%edx),%edx
leal 4(%edi),%edi
loop .L3
subl $64,%esi
movl $15,%ecx
.L4:
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm0,%mm1
psrad $13,%mm1
packssdw %mm1,%mm1
psubd %mm0,%mm0
psubsw %mm1,%mm0
movd %mm0,%eax
movw %ax,(%edi)
subl $32,%esi
addl $64,%edx
leal 4(%edi),%edi
loop .L4
emms
popl %ebx
popl %esi
popl %edi
popl %ebp
ret

View File

@@ -0,0 +1,278 @@
/*
decode_mmxsse: Synth for SSE and extended 3DNow (yeah, the name is a relic)
copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by the mysterious higway for MMX (apparently)
then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
Both have agreed to distribution under LGPL 2.1 .
Transformed back into standalone asm, with help of
gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
Original comment from MPlayer source follows:
*/
/*
* this code comes under GPL
* This code was taken from http://www.mpg123.org
* See ChangeLog of mpg123-0.59s-pre.1 for detail
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
*
* Local ChangeLog:
* - Partial loops unrolling and removing MOVW insn from loops
*/
#include "mangle.h"
.globl ASM_NAME(costab_mmxsse)
.data
ALIGN16
/* .type ASM_NAME(costab_mmxsse), @object
.size ASM_NAME(costab_mmxsse), 124 */
ASM_NAME(costab_mmxsse):
.long 1056974725
.long 1057056395
.long 1057223771
.long 1057485416
.long 1057855544
.long 1058356026
.long 1059019886
.long 1059897405
.long 1061067246
.long 1062657950
.long 1064892987
.long 1066774581
.long 1069414683
.long 1073984175
.long 1079645762
.long 1092815430
.long 1057005197
.long 1057342072
.long 1058087743
.long 1059427869
.long 1061799040
.long 1065862217
.long 1071413542
.long 1084439708
.long 1057128951
.long 1058664893
.long 1063675095
.long 1076102863
.long 1057655764
.long 1067924853
.long 1060439283
ALIGN8
/* .type one_null, @object
.size one_null, 8 */
one_null:
.long -65536
.long -65536
ALIGN8
/* .type null_one, @object
.size null_one, 8 */
null_one:
.long 65535
.long 65535
/* .local temp */
COMM(temp,4,4)
.text
ALIGN16,,15
/* void synth_1to1_sse_s(real *bandPtr, int channel, short *samples, short *buffs, int *bo) */
.globl ASM_NAME(synth_1to1_sse_s)
/* .type ASM_NAME(synth_1to1_sse_s), @function */
ASM_NAME(synth_1to1_sse_s):
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
#APP
movl 12(%ebp),%ecx
movl 16(%ebp),%edi
movl $15,%ebx
movl 24(%ebp),%edx
leal (%edi,%ecx,2),%edi
decl %ecx
movl 20(%ebp),%esi
movl (%edx),%eax
jecxz .L01
decl %eax
andl %ebx,%eax
leal 1088(%esi),%esi
movl %eax,(%edx)
.L01:
leal (%esi,%eax,2),%edx
movl %eax,temp
incl %eax
andl %ebx,%eax
leal 544(%esi,%eax,2),%ecx
incl %ebx
testl $1, %eax
jnz .L02
xchgl %edx,%ecx
incl temp
leal 544(%esi),%esi
.L02:
emms
pushl 8(%ebp)
pushl %edx
pushl %ecx
call *ASM_NAME(mpl_dct64)
addl $12, %esp
leal 1(%ebx), %ecx
subl temp,%ebx
pushl %ecx
leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx
shrl $1, %ecx
ALIGN16
.L03:
movq (%edx),%mm0
movq 64(%edx),%mm4
pmaddwd (%esi),%mm0
pmaddwd 32(%esi),%mm4
movq 8(%edx),%mm1
movq 72(%edx),%mm5
pmaddwd 8(%esi),%mm1
pmaddwd 40(%esi),%mm5
movq 16(%edx),%mm2
movq 80(%edx),%mm6
pmaddwd 16(%esi),%mm2
pmaddwd 48(%esi),%mm6
movq 24(%edx),%mm3
movq 88(%edx),%mm7
pmaddwd 24(%esi),%mm3
pmaddwd 56(%esi),%mm7
paddd %mm1,%mm0
paddd %mm5,%mm4
paddd %mm2,%mm0
paddd %mm6,%mm4
paddd %mm3,%mm0
paddd %mm7,%mm4
movq %mm0,%mm1
movq %mm4,%mm5
psrlq $32,%mm1
psrlq $32,%mm5
paddd %mm1,%mm0
paddd %mm5,%mm4
psrad $13,%mm0
psrad $13,%mm4
packssdw %mm0,%mm0
packssdw %mm4,%mm4
movq (%edi), %mm1
punpckldq %mm4, %mm0
pand one_null, %mm1
pand null_one, %mm0
por %mm0, %mm1
movq %mm1,(%edi)
leal 64(%esi),%esi
leal 128(%edx),%edx
leal 8(%edi),%edi
decl %ecx
jnz .L03
popl %ecx
andl $1, %ecx
jecxz .next_loop
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm1,%mm0
psrad $13,%mm0
packssdw %mm0,%mm0
movd %mm0,%eax
movw %ax, (%edi)
leal 32(%esi),%esi
leal 64(%edx),%edx
leal 4(%edi),%edi
.next_loop:
subl $64,%esi
movl $7,%ecx
ALIGN16
.L04:
movq (%edx),%mm0
movq 64(%edx),%mm4
pmaddwd (%esi),%mm0
pmaddwd -32(%esi),%mm4
movq 8(%edx),%mm1
movq 72(%edx),%mm5
pmaddwd 8(%esi),%mm1
pmaddwd -24(%esi),%mm5
movq 16(%edx),%mm2
movq 80(%edx),%mm6
pmaddwd 16(%esi),%mm2
pmaddwd -16(%esi),%mm6
movq 24(%edx),%mm3
movq 88(%edx),%mm7
pmaddwd 24(%esi),%mm3
pmaddwd -8(%esi),%mm7
paddd %mm1,%mm0
paddd %mm5,%mm4
paddd %mm2,%mm0
paddd %mm6,%mm4
paddd %mm3,%mm0
paddd %mm7,%mm4
movq %mm0,%mm1
movq %mm4,%mm5
psrlq $32,%mm1
psrlq $32,%mm5
paddd %mm0,%mm1
paddd %mm4,%mm5
psrad $13,%mm1
psrad $13,%mm5
packssdw %mm1,%mm1
packssdw %mm5,%mm5
psubd %mm0,%mm0
psubd %mm4,%mm4
psubsw %mm1,%mm0
psubsw %mm5,%mm4
movq (%edi), %mm1
punpckldq %mm4, %mm0
pand one_null, %mm1
pand null_one, %mm0
por %mm0, %mm1
movq %mm1,(%edi)
subl $64,%esi
addl $128,%edx
leal 8(%edi),%edi
decl %ecx
jnz .L04
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm0,%mm1
psrad $13,%mm1
packssdw %mm1,%mm1
psubd %mm0,%mm0
psubsw %mm1,%mm0
movd %mm0,%eax
movw %ax,(%edi)
emms
#NO_APP
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
/* .size ASM_NAME(synth_1to1_sse_s), .-ASM_NAME(synth_1to1_sse_s) */

401
src/libmpg123/decode_ntom.c Normal file
View File

@@ -0,0 +1,401 @@
/*
decode_ntom.c: N->M down/up sampling. Not optimized for speed.
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include "mpg123lib_intern.h"
int synth_ntom_set_step(mpg123_handle *fr)
{
long m,n;
m = frame_freq(fr);
n = fr->af.rate;
if(VERBOSE2)
fprintf(stderr,"Init rate converter: %ld->%ld\n",m,n);
if(n > NTOM_MAX_FREQ || m > NTOM_MAX_FREQ || m <= 0 || n <= 0) {
if(NOQUIET) error("NtoM converter: illegal rates");
fr->err = MPG123_BAD_RATE;
return -1;
}
n *= NTOM_MUL;
fr->ntom_step = (unsigned long) n / m;
if(fr->ntom_step > (unsigned long)NTOM_MAX*NTOM_MUL) {
if(NOQUIET) error3("max. 1:%i conversion allowed (%lu vs %lu)!", NTOM_MAX, fr->ntom_step, (unsigned long)8*NTOM_MUL);
fr->err = MPG123_BAD_RATE;
return -1;
}
fr->ntom_val[0] = fr->ntom_val[1] = ntom_val(fr, fr->num);
return 0;
}
/*
The SAFE_NTOM does iterative loops instead of straight multiplication.
The safety is not just about the algorithm closely mimicking the decoder instead of applying some formula,
it is more about avoiding multiplication of possibly big sample offsets (a 32bit off_t could overflow too easily).
*/
unsigned long ntom_val(mpg123_handle *fr, off_t frame)
{
off_t ntm;
#ifdef SAFE_NTOM /* Carry out the loop, without the threatening integer overflow. */
off_t f;
ntm = NTOM_MUL>>1; /* for frame 0 */
for(f=0; f<frame; ++f) /* for frame > 0 */
{
ntm += spf(fr)*fr->ntom_step;
ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
}
#else /* Just make one computation with overall sample offset. */
ntm = (NTOM_MUL>>1) + spf(fr)*frame*fr->ntom_step;
ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
#endif
return (unsigned long) ntm;
}
/* Set the ntom value for next expected frame to be decoded.
This is for keeping output consistent across seeks. */
void ntom_set_ntom(mpg123_handle *fr, off_t num)
{
fr->ntom_val[1] = fr->ntom_val[0] = ntom_val(fr, num);
}
/* Convert frame offset to unadjusted output sample offset. */
off_t ntom_frmouts(mpg123_handle *fr, off_t frame)
{
off_t soff = 0;
off_t ntm = ntom_val(fr,0);
#ifdef SAFE_NTOM
if(frame <= 0) return 0;
for(f=0; f<frame; ++f)
{
ntm += spf(fr)*fr->ntom_step;
soff += ntm/NTOM_MUL;
ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
}
#else
soff = (ntm + frame*spf(fr)*fr->ntom_step)/NTOM_MUL;
#endif
return soff;
}
/* Convert input samples to unadjusted output samples. */
off_t ntom_ins2outs(mpg123_handle *fr, off_t ins)
{
off_t soff = 0;
off_t ntm = ntom_val(fr,0);
#ifdef SAFE_NTOM
{
off_t block = spf(fr);
if(ins <= 0) return 0;
do
{
off_t nowblock = ins > block ? block : ins;
ntm += nowblock*fr->ntom_step;
soff += ntm/NTOM_MUL;
ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
ins -= nowblock;
} while(ins > 0);
}
#else
soff = (ntm + ins*fr->ntom_step)/NTOM_MUL;
#endif
return soff;
}
/* Determine frame offset from unadjusted output sample offset. */
off_t ntom_frameoff(mpg123_handle *fr, off_t soff)
{
off_t ioff = 0; /* frames or samples */
off_t ntm = ntom_val(fr,0);
#ifdef SAFE_NTOM
if(soff <= 0) return 0;
for(ioff=0; 1; ++ioff)
{
ntm += spf(fr)*fr->ntom_step;
if(ntm/NTOM_MUL > soff) break;
soff -= ntm/NTOM_MUL;
ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
}
return ioff;
#else
ioff = (soff*NTOM_MUL-ntm)/fr->ntom_step;
return ioff/spf(fr);
#endif
}
/* Now to the actual decoding/synth functions... */
int synth_ntom_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
sample_t samples_tmp[8*64];
sample_t *tmp1 = samples_tmp + channel;
int i,ret;
int pnt = fr->buffer.fill;
unsigned char *samples = fr->buffer.data;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_ntom(bandPtr, channel, fr, 1);
fr->buffer.data = samples;
samples += channel + pnt;
for(i=0;i<(fr->buffer.fill>>2);i++) {
#ifdef FLOATOUT
*samples = 0;
#else
*samples = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
samples += 2;
tmp1 += 2;
}
fr->buffer.fill = pnt + (final ? fr->buffer.fill>>1 : 0);
return ret;
}
int synth_ntom_8bit_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[8*64];
sample_t *tmp1 = samples_tmp;
int i,ret;
int pnt = fr->buffer.fill;
unsigned char *samples = fr->buffer.data;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_ntom(bandPtr, 0, fr, 1);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<(fr->buffer.fill>>2);i++) {
#ifdef FLOATOUT
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + (fr->buffer.fill>>2);
return ret;
}
int synth_ntom_8bit_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[8*64];
sample_t *tmp1 = samples_tmp;
int i,ret;
int pnt = fr->buffer.fill;
unsigned char *samples = fr->buffer.data;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_ntom(bandPtr, 0, fr, 1);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<(fr->buffer.fill>>2);i++) {
#ifdef FLOATOUT
*samples++ = 0;
*samples++ = 0;
#else
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
*samples++ = fr->conv16to8[*tmp1>>AUSHIFT];
#endif
tmp1 += 2;
}
fr->buffer.fill = pnt + (fr->buffer.fill>>1);
return ret;
}
int synth_ntom_mono(real *bandPtr, mpg123_handle *fr)
{
sample_t samples_tmp[8*64];
sample_t *tmp1 = samples_tmp;
int i,ret;
int pnt = fr->buffer.fill;
unsigned char *samples = fr->buffer.data;
fr->buffer.data = (unsigned char*) samples_tmp;
fr->buffer.fill = 0;
ret = synth_ntom(bandPtr, 0, fr, 1);
fr->buffer.data = samples;
samples += pnt;
for(i=0;i<(fr->buffer.fill>>2);i++) {
*( (sample_t *)samples) = *tmp1;
samples += sizeof(sample_t);
tmp1 += 2;
}
fr->buffer.fill = pnt + (fr->buffer.fill>>2)*sizeof(sample_t);
return ret;
}
int synth_ntom_mono2stereo(real *bandPtr, mpg123_handle *fr)
{
int i,ret;
int pnt1 = fr->buffer.fill;
unsigned char *samples = fr->buffer.data + pnt1;
ret = synth_ntom(bandPtr, 0, fr, 1);
for(i=0;i<((fr->buffer.fill-pnt1)>>2);i++) {
((sample_t *)samples)[1] = ((sample_t *)samples)[0];
samples+=2*sizeof(sample_t);
}
return ret;
}
int synth_ntom(real *bandPtr,int channel, mpg123_handle *fr, int final)
{
static const int step = 2;
sample_t *samples = (sample_t *) (fr->buffer.data + fr->buffer.fill);
real *b0, **buf; /* (*buf)[0x110]; */
int clip = 0;
int bo1;
int ntom;
if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
if(!channel) {
fr->bo[0]--;
fr->bo[0] &= 0xf;
buf = fr->real_buffs[0];
ntom = fr->ntom_val[1] = fr->ntom_val[0];
}
else {
samples++;
buf = fr->real_buffs[1];
ntom = fr->ntom_val[1];
}
if(fr->bo[0] & 0x1) {
b0 = buf[0];
bo1 = fr->bo[0];
opt_dct64(fr)(buf[1]+((fr->bo[0]+1)&0xf),buf[0]+fr->bo[0],bandPtr);
}
else {
b0 = buf[1];
bo1 = fr->bo[0]+1;
opt_dct64(fr)(buf[0]+fr->bo[0],buf[1]+fr->bo[0]+1,bandPtr);
}
{
register int j;
real *window = opt_decwin(fr) + 16 - bo1;
for (j=16;j;j--,window+=0x10)
{
real sum;
ntom += fr->ntom_step;
if(ntom < NTOM_MUL) {
window += 16;
b0 += 16;
continue;
}
sum = *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
sum += *window++ * *b0++;
sum -= *window++ * *b0++;
while(ntom >= NTOM_MUL) {
WRITE_SAMPLE(samples,sum,clip);
samples += step;
ntom -= NTOM_MUL;
}
}
ntom += fr->ntom_step;
if(ntom >= NTOM_MUL)
{
real sum;
sum = window[0x0] * b0[0x0];
sum += window[0x2] * b0[0x2];
sum += window[0x4] * b0[0x4];
sum += window[0x6] * b0[0x6];
sum += window[0x8] * b0[0x8];
sum += window[0xA] * b0[0xA];
sum += window[0xC] * b0[0xC];
sum += window[0xE] * b0[0xE];
while(ntom >= NTOM_MUL) {
WRITE_SAMPLE(samples,sum,clip);
samples += step;
ntom -= NTOM_MUL;
}
}
b0-=0x10,window-=0x20;
window += bo1<<1;
for (j=15;j;j--,b0-=0x20,window-=0x10)
{
real sum;
ntom += fr->ntom_step;
if(ntom < NTOM_MUL) {
window -= 16;
b0 += 16;
continue;
}
sum = -*(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
sum -= *(--window) * *b0++;
while(ntom >= NTOM_MUL) {
WRITE_SAMPLE(samples,sum,clip);
samples += step;
ntom -= NTOM_MUL;
}
}
}
fr->ntom_val[channel] = ntom;
if(final) fr->buffer.fill = ((unsigned char *) samples - fr->buffer.data - (channel ? 2 : 0));
return clip;
}

View File

@@ -0,0 +1,4 @@
#include "mangle.h"
#define MPL_DCT64 ASM_NAME(dct64_sse)
#define SYNTH_NAME ASM_NAME(synth_1to1_sse_asm)
#include "decode_sse3d.h"

View File

@@ -0,0 +1,247 @@
/*
decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic)
copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by the mysterious higway for MMX (apparently)
then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
Both have agreed to distribution under LGPL 2.1 .
Transformed back into standalone asm, with help of
gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
The difference between SSE and 3DNowExt is the dct64 function and the synth function name.
This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S...
That's not memory efficient since there's doubled code, but it's easier than giving another function pointer.
Maybe I'll change it in future, but now I need something that works.
Original comment from MPlayer source follows:
*/
/*
* this code comes under GPL
* This code was taken from http://www.mpg123.org
* See ChangeLog of mpg123-0.59s-pre.1 for detail
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
*
* Local ChangeLog:
* - Partial loops unrolling and removing MOVW insn from loops
*/
#include "mangle.h"
.data
ALIGN8
one_null:
.long -65536
.long -65536
ALIGN8
null_one:
.long 65535
.long 65535
.text
ALIGN16,,15
/* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */
.globl SYNTH_NAME
SYNTH_NAME:
pushl %ebp
/* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */
movl %esp, %ebp
/* Now the old stack addresses are preserved via %epb. */
subl $4,%esp /* What has been called temp before. */
pushl %edi
pushl %esi
pushl %ebx
#define TEMP 12(%esp)
#APP
movl 12(%ebp),%ecx
movl 16(%ebp),%edi
movl $15,%ebx
movl 24(%ebp),%edx
leal (%edi,%ecx,2),%edi
decl %ecx
movl 20(%ebp),%esi
movl (%edx),%eax
jecxz .L01
decl %eax
andl %ebx,%eax
leal 1088(%esi),%esi
movl %eax,(%edx)
.L01:
leal (%esi,%eax,2),%edx
movl %eax,TEMP
incl %eax
andl %ebx,%eax
leal 544(%esi,%eax,2),%ecx
incl %ebx
testl $1, %eax
jnz .L02
xchgl %edx,%ecx
incl TEMP
leal 544(%esi),%esi
.L02:
emms
pushl 8(%ebp)
pushl %edx
pushl %ecx
call MPL_DCT64
addl $12, %esp
leal 1(%ebx), %ecx
subl TEMP,%ebx
pushl %ecx
/* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */
movl 28(%ebp),%ecx
leal (%ecx,%ebx,2), %edx
movl (%esp),%ecx /* restore, but leave value on stack */
shrl $1, %ecx
ALIGN16
.L03:
movq (%edx),%mm0
movq 64(%edx),%mm4
pmaddwd (%esi),%mm0
pmaddwd 32(%esi),%mm4
movq 8(%edx),%mm1
movq 72(%edx),%mm5
pmaddwd 8(%esi),%mm1
pmaddwd 40(%esi),%mm5
movq 16(%edx),%mm2
movq 80(%edx),%mm6
pmaddwd 16(%esi),%mm2
pmaddwd 48(%esi),%mm6
movq 24(%edx),%mm3
movq 88(%edx),%mm7
pmaddwd 24(%esi),%mm3
pmaddwd 56(%esi),%mm7
paddd %mm1,%mm0
paddd %mm5,%mm4
paddd %mm2,%mm0
paddd %mm6,%mm4
paddd %mm3,%mm0
paddd %mm7,%mm4
movq %mm0,%mm1
movq %mm4,%mm5
psrlq $32,%mm1
psrlq $32,%mm5
paddd %mm1,%mm0
paddd %mm5,%mm4
psrad $13,%mm0
psrad $13,%mm4
packssdw %mm0,%mm0
packssdw %mm4,%mm4
movq (%edi), %mm1
punpckldq %mm4, %mm0
pand one_null, %mm1
pand null_one, %mm0
por %mm0, %mm1
movq %mm1,(%edi)
leal 64(%esi),%esi
leal 128(%edx),%edx
leal 8(%edi),%edi
decl %ecx
jnz .L03
popl %ecx
andl $1, %ecx
jecxz .next_loop
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm1,%mm0
psrad $13,%mm0
packssdw %mm0,%mm0
movd %mm0,%eax
movw %ax, (%edi)
leal 32(%esi),%esi
leal 64(%edx),%edx
leal 4(%edi),%edi
.next_loop:
subl $64,%esi
movl $7,%ecx
ALIGN16
.L04:
movq (%edx),%mm0
movq 64(%edx),%mm4
pmaddwd (%esi),%mm0
pmaddwd -32(%esi),%mm4
movq 8(%edx),%mm1
movq 72(%edx),%mm5
pmaddwd 8(%esi),%mm1
pmaddwd -24(%esi),%mm5
movq 16(%edx),%mm2
movq 80(%edx),%mm6
pmaddwd 16(%esi),%mm2
pmaddwd -16(%esi),%mm6
movq 24(%edx),%mm3
movq 88(%edx),%mm7
pmaddwd 24(%esi),%mm3
pmaddwd -8(%esi),%mm7
paddd %mm1,%mm0
paddd %mm5,%mm4
paddd %mm2,%mm0
paddd %mm6,%mm4
paddd %mm3,%mm0
paddd %mm7,%mm4
movq %mm0,%mm1
movq %mm4,%mm5
psrlq $32,%mm1
psrlq $32,%mm5
paddd %mm0,%mm1
paddd %mm4,%mm5
psrad $13,%mm1
psrad $13,%mm5
packssdw %mm1,%mm1
packssdw %mm5,%mm5
psubd %mm0,%mm0
psubd %mm4,%mm4
psubsw %mm1,%mm0
psubsw %mm5,%mm4
movq (%edi), %mm1
punpckldq %mm4, %mm0
pand one_null, %mm1
pand null_one, %mm0
por %mm0, %mm1
movq %mm1,(%edi)
subl $64,%esi
addl $128,%edx
leal 8(%edi),%edi
decl %ecx
jnz .L04
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm0,%mm1
psrad $13,%mm1
packssdw %mm1,%mm1
psubd %mm0,%mm0
psubsw %mm1,%mm0
movd %mm0,%eax
movw %ax,(%edi)
emms
#NO_APP
popl %ebx
popl %esi
popl %edi
addl $4,%esp
popl %ebp
ret

65539
src/libmpg123/dnoise.c Normal file

File diff suppressed because it is too large Load Diff

17
src/libmpg123/equalizer.c Normal file
View File

@@ -0,0 +1,17 @@
/*
equalizer.c: equalizer settings
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include "mpg123lib_intern.h"
void do_equalizer(real *bandPtr,int channel, real equalizer[2][32])
{
int i;
for(i=0;i<32;i++)
bandPtr[i] = REAL_MUL(bandPtr[i], equalizer[channel][i]);
}

View File

@@ -0,0 +1,68 @@
/*
equalizer_3dnow: 3DNow! optimized do_equalizer()
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by KIMURA Takuhiro
*/
#include "mangle.h"
.text
ALIGN4
.globl ASM_NAME(do_equalizer_3dnow)
/* .type ASM_NAME(do_equalizer_3dnow),@function */
/* void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]); */
ASM_NAME(do_equalizer_3dnow):
pushl %esi
pushl %ebx
/* bandPtr */
movl 12(%esp),%ebx
/* channel */
movl 16(%esp),%ecx
xorl %edx,%edx
/* equalizer */
movl 20(%esp),%esi
sall $7,%ecx
ALIGN4
.L9:
movq (%ebx,%edx),%mm0
pfmul (%esi,%ecx),%mm0
movq 8(%ebx,%edx),%mm1
pfmul 8(%esi,%ecx),%mm1
movq %mm0,(%ebx,%edx)
movq 16(%ebx,%edx),%mm0
pfmul 16(%esi,%ecx),%mm0
movq %mm1,8(%ebx,%edx)
movq 24(%ebx,%edx),%mm1
pfmul 24(%esi,%ecx),%mm1
movq %mm0,16(%ebx,%edx)
movq 32(%ebx,%edx),%mm0
pfmul 32(%esi,%ecx),%mm0
movq %mm1,24(%ebx,%edx)
movq 40(%ebx,%edx),%mm1
pfmul 40(%esi,%ecx),%mm1
movq %mm0,32(%ebx,%edx)
movq 48(%ebx,%edx),%mm0
pfmul 48(%esi,%ecx),%mm0
movq %mm1,40(%ebx,%edx)
movq 56(%ebx,%edx),%mm1
pfmul 56(%esi,%ecx),%mm1
movq %mm0,48(%ebx,%edx)
movq %mm1,56(%ebx,%edx)
addl $64,%edx
addl $32,%ecx
cmpl $124,%edx
jle .L9
ALIGN4
popl %ebx
popl %esi
ret

195
src/libmpg123/format.c Normal file
View File

@@ -0,0 +1,195 @@
#include "mpg123lib_intern.h"
/* static int chans[NUM_CHANNELS] = { 1 , 2 }; */
const long mpg123_rates[MPG123_RATES] = /* only the standard rates */
{
8000, 11025, 12000,
16000, 22050, 24000,
32000, 44100, 48000,
};
const int mpg123_encodings[MPG123_ENCODINGS] =
{
MPG123_ENC_SIGNED_16,
MPG123_ENC_UNSIGNED_16,
MPG123_ENC_UNSIGNED_8,
MPG123_ENC_SIGNED_8,
MPG123_ENC_ULAW_8,
MPG123_ENC_ALAW_8
};
/* char audio_caps[NUM_CHANNELS][MPG123_RATES+1][MPG123_ENCODINGS]; */
static int rate2num(mpg123_handle *fr, long r)
{
int i;
for(i=0;i<MPG123_RATES;i++) if(mpg123_rates[i] == r) return i;
if(fr->p.force_rate != 0 && fr->p.force_rate == r) return MPG123_RATES;
return -1;
}
static int cap_fit(mpg123_handle *fr, struct audioformat *nf, int f0, int f2)
{
int i;
int c = nf->channels-1;
int rn = rate2num(fr, nf->rate);
if(rn >= 0) for(i=f0;i<f2;i++)
{
if(fr->p.audio_caps[c][rn][i])
{
nf->encoding = mpg123_encodings[i];
return 1;
}
}
return 0;
}
static int freq_fit(mpg123_handle *fr, struct audioformat *nf, int f0, int f2)
{
nf->rate = frame_freq(fr)>>fr->p.down_sample;
if(cap_fit(fr,nf,f0,f2)) return 1;
nf->rate>>=1;
if(cap_fit(fr,nf,f0,f2)) return 1;
nf->rate>>=1;
if(cap_fit(fr,nf,f0,f2)) return 1;
return 0;
}
/* match constraints against supported audio formats, store possible setup in frame
return: -1: error; 0: no format change; 1: format change */
int frame_output_format(mpg123_handle *fr)
{
struct audioformat nf;
int f0=0;
mpg123_pars *p = &fr->p;
/* initialize new format, encoding comes later */
nf.channels = fr->stereo;
if(p->flags & MPG123_FORCE_8BIT) f0 = 2; /* skip the 16bit encodings */
/* force stereo is stronger */
if(p->flags & MPG123_FORCE_MONO) nf.channels = 1;
if(p->flags & MPG123_FORCE_STEREO) nf.channels = 2;
if(p->force_rate)
{
nf.rate = p->force_rate;
if(cap_fit(fr,&nf,f0,2)) goto end; /* 16bit encodings */
if(cap_fit(fr,&nf,2,MPG123_ENCODINGS)) goto end; /* 8bit encodings */
/* try again with different stereoness */
if(nf.channels == 2 && !(p->flags & MPG123_FORCE_STEREO)) nf.channels = 1;
else if(nf.channels == 1 && !(p->flags & MPG123_FORCE_MONO)) nf.channels = 2;
if(cap_fit(fr,&nf,f0,2)) goto end; /* 16bit encodings */
if(cap_fit(fr,&nf,2,MPG123_ENCODINGS)) goto end; /* 8bit encodings */
if(NOQUIET)
error3( "Unable to set up output format! Constraints: %s%s%liHz.",
( p->flags & MPG123_FORCE_STEREO ? "stereo, " :
(p->flags & MPG123_FORCE_MONO ? "mono, " : "") ),
(p->flags & MPG123_FORCE_8BIT ? "8bit, " : ""),
p->force_rate );
/* if(NOQUIET && p->verbose <= 1) print_capabilities(fr); */
fr->err = MPG123_BAD_OUTFORMAT;
return -1;
}
if(freq_fit(fr, &nf, f0, 2)) goto end; /* try rates with 16bit */
if(freq_fit(fr, &nf, 2, MPG123_ENCODINGS)) goto end; /* ... 8bit */
/* try again with different stereoness */
if(nf.channels == 2 && !(p->flags & MPG123_FORCE_STEREO)) nf.channels = 1;
else if(nf.channels == 1 && !(p->flags & MPG123_FORCE_MONO)) nf.channels = 2;
if(freq_fit(fr, &nf, f0, 2)) goto end; /* try rates with 16bit */
if(freq_fit(fr, &nf, 2, MPG123_ENCODINGS)) goto end; /* ... 8bit */
/* Here is the _bad_ end. */
if(NOQUIET)
error5( "Unable to set up output format! Constraints: %s%s%li, %li or %liHz.",
( p->flags & MPG123_FORCE_STEREO ? "stereo, " :
(p->flags & MPG123_FORCE_MONO ? "mono, " : "") ),
(p->flags & MPG123_FORCE_8BIT ? "8bit, " : ""),
frame_freq(fr), frame_freq(fr)>>1, frame_freq(fr)>>2 );
/* if(NOQUIET && p->verbose <= 1) print_capabilities(fr); */
fr->err = MPG123_BAD_OUTFORMAT;
return -1;
end: /* Here is the _good_ end. */
/* we had a successful match, now see if there's a change */
if(nf.rate == fr->af.rate && nf.channels == fr->af.channels && nf.encoding == fr->af.encoding)
return 0; /* the same format as before */
else /* a new format */
{
fr->af.rate = nf.rate;
fr->af.channels = nf.channels;
fr->af.encoding = nf.encoding;
return 1;
}
}
int mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding)
{
if(mh == NULL) return MPG123_ERR;
*rate = mh->af.rate;
*channels = mh->af.channels;
*encoding = mh->af.encoding;
return MPG123_OK;
}
int mpg123_format_none(mpg123_handle *mh)
{
if(mh == NULL) return MPG123_ERR;
memset(mh->p.audio_caps,0,sizeof(mh->p.audio_caps));
return MPG123_OK;
}
int mpg123_format_all(mpg123_handle *mh)
{
if(mh == NULL) return MPG123_ERR;
memset(mh->p.audio_caps,1,sizeof(mh->p.audio_caps));
return MPG123_OK;
}
int mpg123_format(mpg123_handle *mh, int ratei, int channels, int encodings)
{
int ie, ic;
int ch[2] = {0, 1};
if(!(channels & (MPG123_MONO|MPG123_STEREO)))
{
mh->err = MPG123_BAD_CHANNEL;
return MPG123_ERR;
}
if(!(channels & MPG123_STEREO)) ch[1] = 0; /* {0,0} */
else if(!(channels & MPG123_MONO)) ch[0] = 1; /* {1,1} */
if(ratei >= MPG123_RATES)
{
mh->err = MPG123_BAD_RATE;
return MPG123_ERR;
}
if(ratei < 0) ratei = MPG123_RATES; /* the special one */
/* now match the encodings */
for(ic = 0; ic < 2; ++ic)
{
for(ie = 0; ie < MPG123_ENCODINGS; ++ie)
if(mpg123_encodings[ie] & encodings) mh->p.audio_caps[ch[ic]][ratei][ie] = 1;
if(ch[0] == ch[1]) break; /* no need to do it again */
}
return MPG123_OK;
}
int mpg123_format_support(mpg123_handle *mh, int ratei, int enci)
{
int ch = 0;
if(mh == NULL || ratei >= MPG123_RATES || enci < 0 || enci >= MPG123_ENCODINGS) return 0;
if(ratei < 0) ratei = MPG123_RATES; /* the special one */
if(mh->p.audio_caps[0][ratei][enci]) ch |= MPG123_MONO;
if(mh->p.audio_caps[1][ratei][enci]) ch |= MPG123_STEREO;
return ch;
}

920
src/libmpg123/frame.c Normal file
View File

@@ -0,0 +1,920 @@
#include "mpg123lib_intern.h"
#include "getcpuflags.h"
#define IGNORESHIFT 2
/* that's doubled in decode_ntom.c */
#define NTOM_MUL (32768)
#define aligned_pointer(p,type,alignment) \
(((char*)(p)-(char*)NULL) % (alignment)) \
? (type*)((char*)(p) + (alignment) - (((char*)(p)-(char*)NULL) % (alignment))) \
: (type*)(p)
void frame_default_pars(mpg123_pars *mp)
{
mp->outscale = MAXOUTBURST;
mp->flags = 0;
mp->force_rate = 0;
mp->down_sample = 0;
mp->rva = 0;
mp->halfspeed = 0;
mp->doublespeed = 0;
mp->verbose = 0;
mp->icy_interval = 0;
}
void frame_init(mpg123_handle *fr)
{
frame_init_par(fr, NULL);
}
void frame_init_par(mpg123_handle *fr, mpg123_pars *mp)
{
fr->fresh = 1;
fr->new_format = 0;
fr->own_buffer = FALSE;
fr->buffer.data = NULL;
fr->rawbuffs = NULL;
fr->rawdecwin = NULL;
fr->conv16to8_buf = NULL;
fr->cpu_opts.type = defopt;
fr->cpu_opts.class = (defopt == mmx || defopt == sse || defopt == dreidnowext) ? mmxsse : normal;
/* these two look unnecessary, check guarantee for synth_ntom_set_step (in control_generic, even)! */
fr->ntom_val[0] = NTOM_MUL>>1;
fr->ntom_val[1] = NTOM_MUL>>1;
fr->ntom_step = NTOM_MUL;
/* unnecessary: fr->buffer.size = fr->buffer.fill = 0; */
fr->lastscale = -1;
mpg123_reset_eq(fr);
fr->rd = NULL;
init_icy(&fr->icy);
init_id3(fr);
/* frame_outbuffer is missing... */
/* frame_buffers is missing... that one needs cpu opt setting! */
/* after these... frame_reset is needed before starting full decode */
fr->af.encoding = 0;
fr->af.rate = 0;
fr->af.channels = 0;
fr->icy.data = NULL;
fr->icy.interval = 0;
fr->icy.next = 0;
fr->to_decode = FALSE;
fr->to_ignore = FALSE;
fr->decoder_change = 1;
fr->err = MPG123_OK;
mpg123_format_all(fr);
if(mp == NULL) frame_default_pars(&fr->p);
else memcpy(&fr->p, mp, sizeof(struct mpg123_pars_struct));
}
mpg123_pars *mpg123_new_pars(int *error)
{
mpg123_pars *mp = malloc(sizeof(struct mpg123_pars_struct));
if(mp != NULL){ frame_default_pars(mp); if(error != NULL) *error = MPG123_OK; }
else if(error != NULL) *error = MPG123_OUT_OF_MEM;
return mp;
}
void mpg123_delete_pars(mpg123_pars* mp)
{
if(mp != NULL) free(mp);
}
int mpg123_reset_eq(mpg123_handle *mh)
{
int i;
mh->have_eq_settings = 0;
for(i=0; i < 32; ++i) mh->equalizer[0][i] = mh->equalizer[1][i] = DOUBLE_TO_REAL(1.0);
return MPG123_OK;
}
int frame_outbuffer(mpg123_handle *fr)
{
size_t size = mpg123_safe_buffer()*AUDIOBUFSIZE;
if(!fr->own_buffer) fr->buffer.data = NULL;
if(fr->buffer.data != NULL && fr->buffer.size != size)
{
free(fr->buffer.data);
fr->buffer.data = NULL;
}
fr->buffer.size = size;
if(fr->buffer.data == NULL) fr->buffer.data = (unsigned char*) malloc(fr->buffer.size);
if(fr->buffer.data == NULL)
{
fr->err = MPG123_OUT_OF_MEM;
return -1;
}
fr->own_buffer = TRUE;
fr->buffer.fill = 0;
return 0;
}
int mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size)
{
if(data == NULL || size < mpg123_safe_buffer())
{
mh->err = MPG123_BAD_BUFFER;
return MPG123_ERR;
}
if(mh->own_buffer && mh->buffer.data != NULL) free(mh->buffer.data);
mh->own_buffer = FALSE;
mh->buffer.data = data;
mh->buffer.size = size;
mh->buffer.fill = 0;
return MPG123_OK;
}
int frame_buffers(mpg123_handle *fr)
{
int buffssize = 0;
debug1("frame %p buffer", (void*)fr);
/*
the used-to-be-static buffer of the synth functions, has some subtly different types/sizes
2to1, 4to1, ntom, generic, i386: real[2][2][0x110]
mmx, sse: short[2][2][0x110]
i586(_dither): 4352 bytes; int/long[2][2][0x110]
i486: int[2][2][17*FIR_BUFFER_SIZE]
altivec: static real __attribute__ ((aligned (16))) buffs[4][4][0x110]
Huh, altivec looks like fun. Well, let it be large... then, the 16 byte alignment seems to be implicit on MacOSX malloc anyway.
Let's make a reasonable attempt to allocate enough memory...
Keep in mind: biggest ones are i486 and altivec (mutually exclusive!), then follows i586 and normal real.
mmx/sse use short but also real for resampling.
Thus, minimum is 2*2*0x110*sizeof(real).
*/
if(fr->cpu_opts.type == altivec) buffssize = 4*4*0x110*sizeof(real);
#ifdef OPT_I486
else if(fr->cpu_opts.type == ivier) buffssize = 2*2*17*FIR_BUFFER_SIZE*sizeof(int);
#endif
else if(fr->cpu_opts.type == ifuenf || fr->cpu_opts.type == ifuenf_dither || fr->cpu_opts.type == dreidnow)
buffssize = 2*2*0x110*4; /* don't rely on type real, we need 4352 bytes */
if(2*2*0x110*sizeof(real) > buffssize)
buffssize = 2*2*0x110*sizeof(real);
if(fr->rawbuffs != NULL && fr->rawbuffss != buffssize)
{
free(fr->rawbuffs);
fr->rawbuffs = NULL;
}
if(fr->rawbuffs == NULL) fr->rawbuffs = (unsigned char*) malloc(buffssize);
if(fr->rawbuffs == NULL) return -1;
fr->rawbuffss = buffssize;
fr->short_buffs[0][0] = (short*) fr->rawbuffs;
fr->short_buffs[0][1] = fr->short_buffs[0][0] + 0x110;
fr->short_buffs[1][0] = fr->short_buffs[0][1] + 0x110;
fr->short_buffs[1][1] = fr->short_buffs[1][0] + 0x110;
fr->real_buffs[0][0] = (real*) fr->rawbuffs;
fr->real_buffs[0][1] = fr->real_buffs[0][0] + 0x110;
fr->real_buffs[1][0] = fr->real_buffs[0][1] + 0x110;
fr->real_buffs[1][1] = fr->real_buffs[1][0] + 0x110;
#ifdef OPT_I486
if(fr->cpu_opts.type == ivier)
{
fr->int_buffs[0][0] = (int*) fr->rawbuffs;
fr->int_buffs[0][1] = fr->int_buffs[0][0] + 17*FIR_BUFFER_SIZE;
fr->int_buffs[1][0] = fr->int_buffs[0][1] + 17*FIR_BUFFER_SIZE;
fr->int_buffs[1][1] = fr->int_buffs[1][0] + 17*FIR_BUFFER_SIZE;
}
#endif
#ifdef OPT_ALTIVEC
if(fr->cpu_opts.type == altivec)
{
int i,j;
fr->areal_buffs[0][0] = (real*) fr->rawbuffs;
for(i=0; i<4; ++i) for(j=0; j<4; ++j)
fr->areal_buffs[i][j] = fr->areal_buffs[0][0] + (i*4+j)*0x110;
}
#endif
/* now the different decwins... all of the same size, actually */
/* The MMX ones want 32byte alignment, which I'll try to ensure manually */
{
int decwin_size = (512+32)*sizeof(real);
if(fr->rawdecwin != NULL) free(fr->rawdecwin);
#ifdef OPT_MMXORSSE
#ifdef OPT_MULTI
if(fr->cpu_opts.class == mmxsse)
{
#endif
/* decwin_mmx will share, decwins will be appended ... sizeof(float)==4 */
if(decwin_size < (512+32)*4) decwin_size = (512+32)*4;
decwin_size += (512+32)*4 + 32; /* the second window + alignment zone */
/* (512+32)*4/32 == 2176/32 == 68, so one decwin block retains alignment */
#ifdef OPT_MULTI
}
#endif
#endif
fr->rawdecwin = (unsigned char*) malloc(decwin_size);
if(fr->rawdecwin == NULL) return -1;
fr->decwin = (real*) fr->rawdecwin;
#ifdef OPT_MMXORSSE
#ifdef OPT_MULTI
if(fr->cpu_opts.class == mmxsse)
{
#endif
/* align decwin, assign that to decwin_mmx, append decwins */
/* I need to add to decwin what is missing to the next full 32 byte -- also I want to make gcc -pedantic happy... */
fr->decwin = aligned_pointer(fr->rawdecwin,real,32);
debug1("aligned decwin: %p", (void*)fr->decwin);
fr->decwin_mmx = (float*)fr->decwin;
fr->decwins = fr->decwin_mmx+512+32;
#ifdef OPT_MULTI
}
else debug("no decwins/decwin_mmx for that class");
#endif
#endif
}
frame_buffers_reset(fr);
debug1("frame %p buffer done", (void*)fr);
return 0;
}
int frame_buffers_reset(mpg123_handle *fr)
{
fr->buffer.fill = 0; /* hm, reset buffer fill... did we do a flush? */
fr->bsnum = 0;
/* Wondering: could it be actually _wanted_ to retain buffer contents over different files? (special gapless / cut stuff) */
fr->bsbuf = fr->bsspace[1];
fr->bsbufold = fr->bsbuf;
memset(fr->bsspace, 0, 2*(MAXFRAMESIZE+512));
memset(fr->ssave, 0, 34);
memset(fr->rawbuffs, 0, fr->rawbuffss);
fr->hybrid_blc[0] = fr->hybrid_blc[1] = 0;
memset(fr->hybrid_block, 0, sizeof(real)*2*2*SBLIMIT*SSLIMIT);
/* Not totally, but quite, sure that decwin(s) doesn't need cleaning. */
return 0;
}
void frame_icy_reset(mpg123_handle* fr)
{
if(fr->icy.data != NULL) free(fr->icy.data);
fr->icy.data = NULL;
fr->icy.interval = 0;
fr->icy.next = 0;
}
/* Prepare the handle for a new track.
That includes (re)allocation or reuse of the output buffer */
int frame_reset(mpg123_handle* fr)
{
frame_buffers_reset(fr);
frame_icy_reset(fr);
fr->metaflags = 0;
fr->outblock = mpg123_safe_buffer();
fr->num = -1;
fr->clip = 0;
fr->oldhead = 0;
fr->firsthead = 0;
fr->vbr = MPG123_CBR;
fr->abr_rate = 0;
fr->track_frames = 0;
fr->mean_frames = 0;
fr->mean_framesize = 0;
fr->lastscale = -1;
fr->rva.level[0] = -1;
fr->rva.level[1] = -1;
fr->rva.gain[0] = 0;
fr->rva.gain[1] = 0;
fr->rva.peak[0] = 0;
fr->rva.peak[1] = 0;
fr->index.fill = 0;
fr->index.step = 1;
fr->fsizeold = 0;
fr->do_recover = 0;
fr->firstframe = 0;
fr->ignoreframe = fr->firstframe-IGNORESHIFT;
fr->lastframe = -1;
fr->fresh = 1;
fr->new_format = 0;
#ifdef GAPLESS
frame_gapless_init(fr,0,0);
fr->lastoff = 0;
fr->firstoff = 0;
#endif
fr->bo[0] = 1; /* the usual bo */
fr->bo[1] = 0; /* ditherindex */
#ifdef OPT_I486
fr->bo[0] = fr->bo[1] = FIR_SIZE-1;
#endif
reset_id3(fr);
reset_icy(&fr->icy);
fr->halfphase = 0; /* here or indeed only on first-time init? */
fr->to_decode = FALSE;
return 0;
}
void frame_free_buffers(mpg123_handle *fr)
{
if(fr->rawbuffs != NULL) free(fr->rawbuffs);
fr->rawbuffs = NULL;
if(fr->rawdecwin != NULL) free(fr->rawdecwin);
fr->rawdecwin = NULL;
if(fr->conv16to8_buf != NULL) free(fr->conv16to8_buf);
fr->conv16to8_buf = NULL;
}
void frame_exit(mpg123_handle *fr)
{
if(fr->own_buffer && fr->buffer.data != NULL) free(fr->buffer.data);
fr->buffer.data = NULL;
frame_free_buffers(fr);
exit_id3(fr);
clear_icy(&fr->icy);
}
int mpg123_print_index(mpg123_handle *fr, FILE* out)
{
size_t c;
if(fr == NULL) return MPG123_ERR;
for(c=0; c < fr->index.fill;++c) fprintf(out, "[%lu] %lu: %li (+%li)\n", (unsigned long) c, (unsigned long) c*fr->index.step, (long)fr->index.data[c], (long) (c ? fr->index.data[c]-fr->index.data[c-1] : 0));
return MPG123_OK;
}
int mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi)
{
if(mh == NULL) return MPG123_ERR;
if(mi == NULL)
{
mh->err = MPG123_ERR_NULL;
return MPG123_ERR;
}
mi->version = mh->mpeg25 ? MPG123_2_5 : (mh->lsf ? MPG123_2_0 : MPG123_1_0);
mi->layer = mh->lay;
mi->rate = frame_freq(mh);
switch(mh->mode)
{
case 0: mi->mode = MPG123_M_STEREO; break;
case 1: mi->mode = MPG123_M_JOINT; break;
case 2: mi->mode = MPG123_M_DUAL; break;
case 3: mi->mode = MPG123_M_MONO; break;
default: error("That mode cannot be!");
}
mi->mode_ext = mh->mode_ext;
mi->framesize = mh->framesize+4; /* Include header. */
mi->flags = 0;
if(mh->error_protection) mi->flags |= MPG123_CRC;
if(mh->copyright) mi->flags |= MPG123_COPYRIGHT;
if(mh->extension) mi->flags |= MPG123_PRIVATE;
if(mh->original) mi->flags |= MPG123_ORIGINAL;
mi->emphasis = mh->emphasis;
mi->bitrate = frame_bitrate(mh);
mi->abr_rate = mh->abr_rate;
mi->vbr = mh->vbr;
return MPG123_OK;
}
/*
find the best frame in index just before the wanted one, seek to there
then step to just before wanted one with read_frame
do not care tabout the stuff that was in buffer but not played back
everything that left the decoder is counted as played
Decide if you want low latency reaction and accurate timing info or stable long-time playback with buffer!
*/
off_t frame_index_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame)
{
/* default is file start if no index position */
off_t gopos = 0;
*get_frame = 0;
if(fr->index.fill)
{
/* find in index */
size_t fi;
/* at index fi there is frame step*fi... */
fi = want_frame/fr->index.step;
if(fi >= fr->index.fill) fi = fr->index.fill - 1;
*get_frame = fi*fr->index.step;
gopos = fr->index.data[fi];
}
debug2("index: 0x%lx for frame %li", (unsigned long)gopos, (long) *get_frame);
return gopos;
}
off_t frame_ins2outs(mpg123_handle *fr, off_t ins)
{
off_t outs = 0;
switch(fr->down_sample)
{
case 0:
case 1:
case 2: outs = ins>>fr->down_sample; break;
case 3: outs = ntom_ins2outs(fr, ins); break;
default: error("Bad down_sample ... should not be possible!!");
}
return outs;
}
off_t frame_outs(mpg123_handle *fr, off_t num)
{
off_t outs = 0;
switch(fr->down_sample)
{
case 0:
case 1:
case 2: outs = (spf(fr)>>fr->down_sample)*num; break;
case 3: outs = ntom_frmouts(fr, num); break;
default: error("Bad down_sample ... should not be possible!!");
}
return outs;
}
off_t frame_offset(mpg123_handle *fr, off_t outs)
{
off_t num = 0;
switch(fr->down_sample)
{
case 0:
case 1:
case 2: num = outs/(spf(fr)>>fr->down_sample); break;
case 3: num = ntom_frameoff(fr, outs); break;
default: error("Bad down_sample ... should not be possible!!");
}
return num;
}
#ifdef GAPLESS
/* input in _input_ samples */
void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e)
{
fr->begin_s = b;
fr->end_s = e;
/* These will get proper values later, from above plus resampling info. */
fr->begin_os = 0;
fr->end_os = 0;
debug2("frame_gapless_init: from %lu to %lu samples", fr->begin_s, fr->end_s);
}
void frame_gapless_realinit(mpg123_handle *fr)
{
fr->begin_os = frame_ins2outs(fr, fr->begin_s);
fr->end_os = frame_ins2outs(fr, fr->end_s);
debug2("frame_gapless_realinit: from %lu to %lu samples", fr->begin_os, fr->end_os);
}
#endif
/* The frame seek... This is not simply the seek to fe*spf(fr) samples in output because we think of _input_ frames here.
Seek to frame offset 1 may be just seek to 200 samples offset in output since the beginning of first frame is delay/padding.
Hm, is that right? OK for the padding stuff, but actually, should the decoder delay be better totally hidden or not?
With gapless, even the whole frame position could be advanced further than requested (since Homey don't play dat). */
void frame_set_frameseek(mpg123_handle *fr, off_t fe)
{
fr->firstframe = fe;
#ifdef GAPLESS
if(fr->p.flags & MPG123_GAPLESS)
{
/* Take care of the beginning... */
off_t beg_f = frame_offset(fr, fr->begin_os);
if(fe <= beg_f)
{
fr->firstframe = beg_f;
fr->firstoff = fr->begin_os - frame_outs(fr, beg_f);
}
else fr->firstoff = 0;
/* The end is set once for a track at least, on the frame_set_frameseek called in get_next_frame() */
if(fr->end_os > 0)
{
fr->lastframe = frame_offset(fr,fr->end_os);
fr->lastoff = fr->end_os - frame_outs(fr, fr->lastframe);
} else fr->lastoff = 0;
} else { fr->firstoff = fr->lastoff = 0; fr->lastframe = -1; }
#endif
fr->ignoreframe = fr->lay == 3 ? fr->firstframe-IGNORESHIFT : fr->firstframe;
#ifdef GAPLESS
debug5("frame_set_frameseek: begin at %li frames and %li samples, end at %li and %li; ignore from %li",
(long) fr->firstframe, (long) fr->firstoff,
(long) fr->lastframe, (long) fr->lastoff, (long) fr->ignoreframe);
#else
debug3("frame_set_frameseek: begin at %li frames, end at %li; ignore from %li",
(long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe);
#endif
}
/* Sample accurate seek prepare for decoder. */
/* This gets unadjusted output samples and takes resampling into account */
void frame_set_seek(mpg123_handle *fr, off_t sp)
{
fr->firstframe = frame_offset(fr, sp);
fr->ignoreframe = fr->lay == 3 ? fr->firstframe-IGNORESHIFT : fr->firstframe;
#ifdef GAPLESS /* The sample offset is used for non-gapless mode, too! */
fr->firstoff = sp - frame_outs(fr, fr->firstframe);
debug5("frame_set_seek: begin at %li frames and %li samples, end at %li and %li; ignore from %li",
(long) fr->firstframe, (long) fr->firstoff,
(long) fr->lastframe, (long) fr->lastoff, (long) fr->ignoreframe);
#else
debug3("frame_set_seek: begin at %li frames, end at %li; ignore from %li",
(long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe);
#endif
}
/* Unadjusted! */
off_t frame_tell_seek(mpg123_handle *fr)
{
off_t pos = frame_outs(fr, fr->firstframe);
#ifdef GAPLESS
pos += fr->firstoff;
#endif
return pos;
}
/* to vanish */
void frame_outformat(mpg123_handle *fr, int format, int channels, long rate)
{
fr->af.encoding = format;
fr->af.rate = rate;
fr->af.channels = channels;
}
/* set synth functions for current frame, optimizations handled by opt_* macros */
int set_synth_functions(mpg123_handle *fr)
{
int ds = fr->down_sample;
int p8=0;
static func_synth funcs[2][4] = {
{ NULL,
synth_2to1,
synth_4to1,
synth_ntom } ,
{ NULL,
synth_2to1_8bit,
synth_4to1_8bit,
synth_ntom_8bit }
};
static func_synth_mono funcs_mono[2][2][4] = {
{ { NULL ,
synth_2to1_mono2stereo ,
synth_4to1_mono2stereo ,
synth_ntom_mono2stereo } ,
{ NULL ,
synth_2to1_8bit_mono2stereo ,
synth_4to1_8bit_mono2stereo ,
synth_ntom_8bit_mono2stereo } } ,
{ { NULL ,
synth_2to1_mono ,
synth_4to1_mono ,
synth_ntom_mono } ,
{ NULL ,
synth_2to1_8bit_mono ,
synth_4to1_8bit_mono ,
synth_ntom_8bit_mono } }
};
/* possibly non-constand entries filled here */
funcs[0][0] = (func_synth) opt_synth_1to1(fr);
funcs[1][0] = (func_synth) opt_synth_1to1_8bit(fr);
funcs_mono[0][0][0] = (func_synth_mono) opt_synth_1to1_mono2stereo(fr);
funcs_mono[0][1][0] = (func_synth_mono) opt_synth_1to1_8bit_mono2stereo(fr);
funcs_mono[1][0][0] = (func_synth_mono) opt_synth_1to1_mono(fr);
funcs_mono[1][1][0] = (func_synth_mono) opt_synth_1to1_8bit_mono(fr);
if(MPG123_ENC_8(fr->af.encoding)) p8 = 1;
fr->synth = funcs[p8][ds];
fr->synth_mono = funcs_mono[fr->af.channels==2 ? 0 : 1][p8][ds];
if(p8)
{
if(make_conv16to8_table(fr) != 0)
{
/* it's a bit more work to get proper error propagation up */
return -1;
}
}
return 0;
}
int mpg123_volume_change(mpg123_handle *mh, double change)
{
if(mh == NULL) return MPG123_ERR;
return mpg123_volume(mh, change + (double) mh->p.outscale / MAXOUTBURST);
}
int mpg123_volume(mpg123_handle *mh, double vol)
{
if(mh == NULL) return MPG123_ERR;
if(vol >= 0) mh->p.outscale = (double) MAXOUTBURST * vol;
do_rva(mh);
return MPG123_OK;
}
static int get_rva(mpg123_handle *fr, double *peak, double *gain)
{
double p = -1;
double g = 0;
int ret = 0;
if(fr->p.rva)
{
int rt = 0;
/* Should one assume a zero RVA as no RVA? */
if(fr->p.rva == 2 && fr->rva.level[1] != -1) rt = 1;
if(fr->rva.level[rt] != -1)
{
p = fr->rva.peak[rt];
g = fr->rva.gain[rt];
ret = 1; /* Success. */
}
}
if(peak != NULL) *peak = p;
if(gain != NULL) *gain = g;
return ret;
}
/* adjust the volume, taking both fr->outscale and rva values into account */
void do_rva(mpg123_handle *fr)
{
double peak = 0;
double gain = 0;
scale_t newscale;
double rvafact = 1;
if(get_rva(fr, &peak, &gain))
{
if(NOQUIET && fr->p.verbose > 1) fprintf(stderr, "Note: doing RVA with gain %f\n", gain);
rvafact = pow(10,gain/20);
}
newscale = fr->p.outscale*rvafact;
/* if peak is unknown (== 0) this check won't hurt */
if((peak*newscale) > MAXOUTBURST)
{
newscale = (scale_t) ((double) MAXOUTBURST/peak);
warning2("limiting scale value to %li to prevent clipping with indicated peak factor of %f", newscale, peak);
}
/* first rva setting is forced with fr->lastscale < 0 */
if(newscale != fr->lastscale)
{
debug3("changing scale value from %li to %li (peak estimated to %li)", fr->lastscale != -1 ? fr->lastscale : fr->p.outscale, newscale, (long) (newscale*peak));
fr->lastscale = newscale;
opt_make_decode_tables(fr); /* the actual work */
}
}
int mpg123_getvolume(mpg123_handle *mh, double *base, double *really, double *rva_db)
{
if(mh == NULL) return MPG123_ERR;
if(base) *base = (double)mh->p.outscale/MAXOUTBURST;
if(really) *really = (double)mh->lastscale/MAXOUTBURST;
get_rva(mh, NULL, rva_db);
return MPG123_OK;
}
int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
char* chosen = ""; /* the chosed decoder opt as string */
int auto_choose = 0;
int done = 0;
if( (cpu == NULL)
|| (cpu[0] == 0)
|| !strcasecmp(cpu, "auto") )
auto_choose = 1;
#ifndef OPT_MULTI
{
char **sd = mpg123_decoders(); /* this contains _one_ decoder */
if(!auto_choose && strcasecmp(cpu, sd[0])) done = 0;
else
{
chosen = sd[0];
done = 1;
}
}
#else
/* covers any i386+ cpu; they actually differ only in the synth_1to1 function... */
#ifdef OPT_X86
#ifdef OPT_MMXORSSE
fr->cpu_opts.make_decode_tables = make_decode_tables;
fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2;
fr->cpu_opts.init_layer2_table = init_layer2_table;
#endif
#ifdef OPT_3DNOW
fr->cpu_opts.dct36 = dct36;
#endif
#ifdef OPT_3DNOWEXT
fr->cpu_opts.dct36 = dct36;
#endif
if(cpu_i586(cpu_flags))
{
debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
#ifdef OPT_3DNOWEXT
if( !done && (auto_choose || !strcasecmp(cpu, "3dnowext"))
&& cpu_3dnow(cpu_flags)
&& cpu_3dnowext(cpu_flags)
&& cpu_mmx(cpu_flags) )
{
int go = 1;
if(fr->p.force_rate)
{
#if defined(K6_FALLBACK) || defined(PENTIUM_FALLBACK)
if(!auto_choose){ if(NOQUIET) error("I refuse to choose 3DNowExt as this will screw up with forced rate!"); }
else if(VERBOSE) fprintf(stderr, "Note: Not choosing 3DNowExt because flexible rate not supported.\n");
go = 0;
#else
if(NOQUIET) error("You will hear some awful sound because of flexible rate being chosen with 3DNowExt decoder!");
#endif
}
if(go){ /* temporary hack for flexible rate bug, not going indent this - fix it instead! */
chosen = "3DNowExt";
fr->cpu_opts.type = dreidnowext;
fr->cpu_opts.class = mmxsse;
fr->cpu_opts.dct36 = dct36_3dnowext;
fr->cpu_opts.synth_1to1 = synth_1to1_3dnowext;
fr->cpu_opts.dct64 = dct64_mmx; /* only use the 3dnow version in the synth_1to1_sse */
fr->cpu_opts.make_decode_tables = make_decode_tables_mmx;
fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2_mmx;
fr->cpu_opts.init_layer2_table = init_layer2_table_mmx;
fr->cpu_opts.mpl_dct64 = dct64_3dnowext;
done = 1;
}
}
#endif
#ifdef OPT_SSE
if( !done && (auto_choose || !strcasecmp(cpu, "sse"))
&& cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
{
int go = 1;
if(fr->p.force_rate)
{
#ifdef PENTIUM_FALLBACK
if(!auto_choose){ if(NOQUIET) error("I refuse to choose SSE as this will screw up with forced rate!"); }
else if(VERBOSE) fprintf(stderr, "Note: Not choosing SSE because flexible rate not supported.\n");
go = 0;
#else
if(NOQUIET) error("You will hear some awful sound because of flexible rate being chosen with SSE decoder!");
#endif
}
if(go){ /* temporary hack for flexible rate bug, not going indent this - fix it instead! */
chosen = "SSE";
fr->cpu_opts.type = sse;
fr->cpu_opts.class = mmxsse;
fr->cpu_opts.synth_1to1 = synth_1to1_sse;
fr->cpu_opts.dct64 = dct64_mmx; /* only use the sse version in the synth_1to1_sse */
fr->cpu_opts.make_decode_tables = make_decode_tables_mmx;
fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2_mmx;
fr->cpu_opts.init_layer2_table = init_layer2_table_mmx;
fr->cpu_opts.mpl_dct64 = dct64_sse;
done = 1;
}
}
#endif
#ifdef OPT_3DNOW
fr->cpu_opts.dct36 = dct36;
/* TODO: make autodetection for _all_ x86 optimizations (maybe just for i586+ and keep separate 486 build?) */
/* check cpuflags bit 31 (3DNow!) and 23 (MMX) */
if( !done && (auto_choose || !strcasecmp(cpu, "3dnow"))
&& cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
{
chosen = "3DNow";
fr->cpu_opts.type = dreidnow;
fr->cpu_opts.dct36 = dct36_3dnow; /* 3DNow! optimized dct36() */
fr->cpu_opts.synth_1to1 = synth_1to1_3dnow;
fr->cpu_opts.dct64 = dct64_i386; /* use the 3dnow one? */
done = 1;
}
#endif
#ifdef OPT_MMX
if( !done && (auto_choose || !strcasecmp(cpu, "mmx"))
&& cpu_mmx(cpu_flags) )
{
int go = 1;
if(fr->p.force_rate)
{
#ifdef PENTIUM_FALLBACK
if(!auto_choose){ if(NOQUIET) error("I refuse to choose MMX as this will screw up with forced rate!"); }
else if(VERBOSE) fprintf(stderr, "Note: Not choosing MMX because flexible rate not supported.\n");
go = 0;
#else
error("You will hear some awful sound because of flexible rate being chosen with MMX decoder!");
#endif
}
if(go){ /* temporary hack for flexible rate bug, not going indent this - fix it instead! */
chosen = "MMX";
fr->cpu_opts.type = mmx;
fr->cpu_opts.class = mmxsse;
fr->cpu_opts.synth_1to1 = synth_1to1_mmx;
fr->cpu_opts.dct64 = dct64_mmx;
fr->cpu_opts.make_decode_tables = make_decode_tables_mmx;
fr->cpu_opts.init_layer3_gainpow2 = init_layer3_gainpow2_mmx;
fr->cpu_opts.init_layer2_table = init_layer2_table_mmx;
done = 1;
}
}
#endif
#ifdef OPT_I586
if(!done && (auto_choose || !strcasecmp(cpu, "i586")))
{
chosen = "i586/pentium";
fr->cpu_opts.type = ifuenf;
fr->cpu_opts.synth_1to1 = synth_1to1_i586;
fr->cpu_opts.synth_1to1_i586_asm = synth_1to1_i586_asm;
fr->cpu_opts.dct64 = dct64_i386;
done = 1;
}
#endif
#ifdef OPT_I586_DITHER
if(!done && (auto_choose || !strcasecmp(cpu, "i586_dither")))
{
chosen = "dithered i586/pentium";
fr->cpu_opts.type = ifuenf_dither;
fr->cpu_opts.synth_1to1 = synth_1to1_i586;
fr->cpu_opts.dct64 = dct64_i386;
fr->cpu_opts.synth_1to1_i586_asm = synth_1to1_i586_asm_dither;
done = 1;
}
#endif
}
#ifdef OPT_I486 /* that won't cooperate nicely in multi opt mode - forcing i486 in layer3.c */
if(!done && (auto_choose || !strcasecmp(cpu, "i486")))
{
chosen = "i486";
fr->cpu_opts.type = ivier;
fr->cpu_opts.synth_1to1 = synth_1to1_i386; /* i486 function is special */
fr->cpu_opts.dct64 = dct64_i386;
done = 1;
}
#endif
#ifdef OPT_I386
if(!done && (auto_choose || !strcasecmp(cpu, "i386")))
{
chosen = "i386";
fr->cpu_opts.type = idrei;
fr->cpu_opts.synth_1to1 = synth_1to1_i386;
fr->cpu_opts.dct64 = dct64_i386;
done = 1;
}
#endif
if(done) /* set common x86 functions */
{
fr->cpu_opts.synth_1to1_mono = synth_1to1_mono_i386;
fr->cpu_opts.synth_1to1_mono2stereo = synth_1to1_mono2stereo_i386;
fr->cpu_opts.synth_1to1_8bit = synth_1to1_8bit_i386;
fr->cpu_opts.synth_1to1_8bit_mono = synth_1to1_8bit_mono_i386;
fr->cpu_opts.synth_1to1_8bit_mono2stereo = synth_1to1_8bit_mono2stereo_i386;
}
#endif /* OPT_X86 */
#ifdef OPT_ALTIVEC
if(!done && (auto_choose || !strcasecmp(cpu, "altivec")))
{
chosen = "AltiVec";
fr->cpu_opts.type = altivec;
fr->cpu_opts.dct64 = dct64_altivec;
fr->cpu_opts.synth_1to1 = synth_1to1_altivec;
fr->cpu_opts.synth_1to1_mono = synth_1to1_mono_altivec;
fr->cpu_opts.synth_1to1_mono2stereo = synth_1to1_mono2stereo_altivec;
fr->cpu_opts.synth_1to1_8bit = synth_1to1_8bit_altivec;
fr->cpu_opts.synth_1to1_8bit_mono = synth_1to1_8bit_mono_altivec;
fr->cpu_opts.synth_1to1_8bit_mono2stereo = synth_1to1_8bit_mono2stereo_altivec;
done = 1;
}
#endif
#ifdef OPT_GENERIC
if(!done && (auto_choose || !strcasecmp(cpu, "generic")))
{
chosen = "generic";
fr->cpu_opts.type = generic;
fr->cpu_opts.dct64 = dct64;
fr->cpu_opts.synth_1to1 = synth_1to1;
fr->cpu_opts.synth_1to1_mono = synth_1to1_mono;
fr->cpu_opts.synth_1to1_mono2stereo = synth_1to1_mono2stereo;
fr->cpu_opts.synth_1to1_8bit = synth_1to1_8bit;
fr->cpu_opts.synth_1to1_8bit_mono = synth_1to1_8bit_mono;
fr->cpu_opts.synth_1to1_8bit_mono2stereo = synth_1to1_8bit_mono2stereo;
done = 1;
}
#endif
#endif
if(done)
{
if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
return 1;
}
else
{
if(NOQUIET) error("Could not set optimization!");
return 0;
}
}
enum optdec dectype(const char* decoder)
{
if(decoder == NULL) return nodec;
if(!strcasecmp(decoder, "3dnowext")) return dreidnowext;
if(!strcasecmp(decoder, "3dnow")) return dreidnow;
if(!strcasecmp(decoder, "sse")) return sse;
if(!strcasecmp(decoder, "mmx")) return mmx;
if(!strcasecmp(decoder, "generic")) return generic;
if(!strcasecmp(decoder, "altivec")) return altivec;
if(!strcasecmp(decoder, "i386")) return idrei;
if(!strcasecmp(decoder, "i486")) return ivier;
if(!strcasecmp(decoder, "i586")) return ifuenf;
if(!strcasecmp(decoder, "i586_dither")) return ifuenf_dither;
return nodec;
}

375
src/libmpg123/frame.h Normal file
View File

@@ -0,0 +1,375 @@
#ifndef MPG123_FRAME_H
#define MPG123_FRAME_H
#include "mpg123.h"
#include "id3.h"
#include "icy.h"
#include "reader.h"
#include <stdio.h>
/* max = 1728 */
#define MAXFRAMESIZE 3456
/* need the definite optimization flags here */
#ifdef OPT_I486
#define OPT_I386
#define FIR_BUFFER_SIZE 128
#define FIR_SIZE 16
#endif
#ifdef OPT_I386
#define PENTIUM_FALLBACK
#define OPT_X86
#endif
#ifdef OPT_I586
#define PENTIUM_FALLBACK
#define OPT_PENTIUM
#define OPT_X86
#endif
#ifdef OPT_I586_DITHER
#define PENTIUM_FALLBACK
#define OPT_PENTIUM
#define OPT_X86
#endif
#ifdef OPT_MMX
#define OPT_MMXORSSE
#define OPT_X86
#ifndef OPT_MULTI
#define OPT_MMX_ONLY
#endif
#endif
#ifdef OPT_SSE
#define OPT_MMXORSSE
#define OPT_MPLAYER
#define OPT_X86
#ifndef OPT_MULTI
#define OPT_MMX_ONLY
#endif
#endif
#ifdef OPT_3DNOWEXT
#define OPT_MMXORSSE
#define OPT_MPLAYER
#define OPT_X86
#ifndef OPT_MULTI
#define OPT_MMX_ONLY
#endif
#endif
#ifdef OPT_3DNOW
#define K6_FALLBACK
#define OPT_X86
#endif
struct al_table
{
short bits;
short d;
};
struct frame_index
{
off_t data[INDEX_SIZE];
size_t fill;
off_t step;
};
/* the output buffer, used to be pcm_sample, pcm_point and audiobufsize */
struct outbuffer
{
unsigned char *data;
unsigned char *p; /* read pointer */
size_t fill; /* fill from read pointer */
size_t size; /* that's actually more like a safe size, after we have more than that, flush it */
};
struct audioformat
{
int encoding;
int channels;
long rate;
};
enum optdec { nodec=0, generic, idrei, ivier, ifuenf, ifuenf_dither, mmx, dreidnow, dreidnowext, altivec, sse };
enum optcla { nocla=0, normal, mmxsse };
struct mpg123_pars_struct
{
int verbose; /* verbose level */
long flags; /* combination of above */
long force_rate;
int down_sample;
int rva; /* (which) rva to do: 0: nothing, 1: radio/mix/track 2: album/audiophile */
long halfspeed;
long doublespeed;
#define NUM_CHANNELS 2
char audio_caps[NUM_CHANNELS][MPG123_RATES+1][MPG123_ENCODINGS];
/* long start_frame; */ /* frame offset to begin with */
/* long frame_number;*/ /* number of frames to decode */
long icy_interval;
scale_t outscale;
};
/* There is a lot to condense here... many ints can be merged as flags; though the main space is still consumed by buffers. */
struct mpg123_handle_struct
{
int fresh; /* to be moved into flags */
int new_format;
real hybrid_block[2][2][SBLIMIT*SSLIMIT];
int hybrid_blc[2];
/* the scratch vars for the decoders, sometimes real, sometimes short... sometimes int/long */
short *short_buffs[2][2];
real *real_buffs[2][2];
unsigned char *rawbuffs;
int rawbuffss;
int bo[2]; /* i486 and dither need a second value */
unsigned char* rawdecwin; /* the block with all decwins */
real *decwin; /* _the_ decode table */
#ifdef OPT_MMXORSSE
/* I am not really sure that I need both of them... used in assembler */
float *decwin_mmx;
float *decwins;
#endif
int have_eq_settings;
real equalizer[2][32];
/* for halfspeed mode */
unsigned char ssave[34];
int halfphase;
/* a raw buffer and a pointer into the middle for signed short conversion, only allocated on demand */
unsigned char *conv16to8_buf;
unsigned char *conv16to8;
/* There's some possible memory saving for stuff that is not _really_ dynamic. */
/* layer3 */
int longLimit[9][23];
int shortLimit[9][14];
real gainpow2[256+118+4]; /* not really dynamic, just different for mmx */
/* layer2 */
real muls[27][64]; /* also used by layer 1 */
/* decode_ntom */
unsigned long ntom_val[2];
unsigned long ntom_step;
/* special i486 fun */
#ifdef OPT_I486
int *int_buffs[2][2];
#endif
/* special altivec... */
#ifdef OPT_ALTIVEC
real *areal_buffs[4][4];
#endif
struct
{
#ifdef OPT_MULTI
int (*synth_1to1)(real *,int, mpg123_handle *,int );
int (*synth_1to1_mono)(real *, mpg123_handle *);
int (*synth_1to1_mono2stereo)(real *, mpg123_handle *);
int (*synth_1to1_8bit)(real *,int, mpg123_handle *,int );
int (*synth_1to1_8bit_mono)(real *, mpg123_handle *);
int (*synth_1to1_8bit_mono2stereo)(real *, mpg123_handle *);
#ifdef OPT_PENTIUM
int (*synth_1to1_i586_asm)(real *,int,unsigned char *, unsigned char *, int *, real *decwin);
#endif
#ifdef OPT_MMXORSSE
void (*make_decode_tables)(mpg123_handle *fr);
real (*init_layer3_gainpow2)(mpg123_handle*, int);
real* (*init_layer2_table)(mpg123_handle*, real*, double);
#endif
#ifdef OPT_3DNOW
void (*dct36)(real *,real *,real *,real *,real *);
#endif
void (*dct64)(real *,real *,real *);
#ifdef OPT_MPLAYER
void (*mpl_dct64)(real *,real *,real *);
#endif
#endif
enum optdec type;
enum optcla class;
} cpu_opts;
int verbose; /* 0: nothing, 1: just print chosen decoder, 2: be verbose */
/* mpg123_handle */
const struct al_table *alloc;
/* could use types from optimize.h */
int (*synth)(real *,int, mpg123_handle*, int);
int (*synth_mono)(real *, mpg123_handle*);
int stereo; /* I _think_ 1 for mono and 2 for stereo */
int jsbound;
#define SINGLE_STEREO -1
#define SINGLE_LEFT 0
#define SINGLE_RIGHT 1
#define SINGLE_MIX 3
int single;
int II_sblimit;
int down_sample_sblimit;
int lsf; /* 0: MPEG 1.0; 1: MPEG 2.0/2.5 -- both used as bool and array index! */
int mpeg25;
int down_sample;
int header_change;
int lay;
int (*do_layer)(mpg123_handle *);
int error_protection;
int bitrate_index;
int sampling_frequency;
int padding;
int extension;
int mode;
int mode_ext;
int copyright;
int original;
int emphasis;
int framesize; /* computed framesize */
enum mpg123_vbr vbr; /* 1 if variable bitrate was detected */
off_t num; /* frame offset ... */
/* bitstream info; bsi */
int bitindex;
unsigned char *wordpointer;
/* temporary storage for getbits stuff */
unsigned long ultmp;
unsigned char uctmp;
/* rva data, used in common.c, set in id3.c */
scale_t lastscale;
struct
{
int level[2];
float gain[2];
float peak[2];
} rva;
int do_recover;
/* input data */
off_t track_frames;
double mean_framesize;
off_t mean_frames;
int fsizeold;
int ssize;
unsigned char bsspace[2][MAXFRAMESIZE+512]; /* MAXFRAMESIZE */
unsigned char *bsbuf;
unsigned char *bsbufold;
int bsnum;
unsigned long oldhead;
unsigned long firsthead;
int abr_rate;
struct frame_index index;
/* output data */
struct outbuffer buffer;
struct audioformat af;
int own_buffer;
size_t outblock; /* number of bytes that this frame produces (upper bound) */
int to_decode; /* this frame holds data to be decoded */
int to_ignore; /* the same, somehow */
off_t firstframe; /* start decoding from here */
off_t lastframe; /* last frame to decode (for gapless or num_frames limit) */
off_t ignoreframe; /* frames to decode but discard before firstframe */
#ifdef GAPLESS
off_t firstoff; /* number of samples to ignore from firstframe */
off_t lastoff; /* number of samples to use from lastframe */
off_t begin_s; /* overall begin offset in samples */
off_t begin_os;
off_t end_s; /* overall end offset in samples */
off_t end_os;
#endif
unsigned int crc;
struct reader *rd; /* pointer to the reading functions */
struct reader_data rdat; /* reader data and state info */
struct mpg123_pars_struct p;
int err;
int decoder_change;
int delayed_change;
long clip;
/* the meta crap */
int metaflags;
unsigned char id3buf[128];
mpg123_id3v2 id3v2;
struct icy_meta icy;
};
/* generic init, does not include dynamic buffers */
void frame_init(mpg123_handle *fr);
void frame_init_par(mpg123_handle *fr, mpg123_pars *mp);
/* output buffer and format */
int frame_outbuffer(mpg123_handle *fr);
int frame_output_format(mpg123_handle *fr);
int frame_buffers(mpg123_handle *fr); /* various decoder buffers, needed once */
int frame_reset(mpg123_handle* fr); /* reset for next track */
int frame_buffers_reset(mpg123_handle *fr);
void frame_exit(mpg123_handle *fr); /* end, free all buffers */
int mpg123_print_index(mpg123_handle *fr, FILE* out);
off_t frame_index_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame);
int frame_cpu_opt(mpg123_handle *fr, const char* cpu);
enum optdec dectype(const char* decoder);
int set_synth_functions(mpg123_handle *fr);
void do_volume(mpg123_handle *fr, double factor);
void do_rva(mpg123_handle *fr);
/* samples per frame ...
Layer I
Layer II
Layer III
MPEG-1
384
1152
1152
MPEG-2 LSF
384
1152
576
MPEG 2.5
384
1152
576
*/
#define spf(fr) ((fr)->lay == 1 ? 384 : ((fr)->lay==2 ? 1152 : ((fr)->lsf || (fr)->mpeg25 ? 576 : 1152)))
#ifdef GAPLESS
/* well, I take that one for granted... at least layer3 */
#define DECODER_DELAY 529
/* still fine-tuning the "real music" window... see read_frame */
#define GAP_SHIFT 0
void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e);
void frame_gapless_realinit(mpg123_handle *fr);
/*void frame_gapless_position(mpg123_handle* fr);
void frame_gapless_bytify(mpg123_handle *fr);
void frame_gapless_ignore(mpg123_handle *fr, off_t frames);*/
/* void frame_gapless_buffercheck(mpg123_handle *fr); */
#endif
/*
Seeking core functions:
- convert input sample offset to output sample offset
- convert frame offset to output sample offset
- get leading frame offset for output sample offset
The offsets are "unadjusted"/internal; resampling is being taken care of.
*/
off_t frame_ins2outs(mpg123_handle *fr, off_t ins);
off_t frame_outs(mpg123_handle *fr, off_t num);
off_t frame_offset(mpg123_handle *fr, off_t outs);
void frame_set_frameseek(mpg123_handle *fr, off_t fe);
void frame_set_seek(mpg123_handle *fr, off_t sp);
off_t frame_tell_seek(mpg123_handle *fr);
/* adjust volume to current outscale and rva values if wanted */
void do_rva(mpg123_handle *fr);
#endif

135
src/libmpg123/getbits.c Normal file
View File

@@ -0,0 +1,135 @@
/*
getbits
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include "mpg123app.h"
#include "common.h"
#if 0
static void check_buffer_range(int size)
{
int pos = (bsi.wordpointer-bsbuf) + (size>>3);
if( pos >= fsizeold) {
fprintf(stderr,"Pointer out of range (%d,%d)!\n",pos,fsizeold);
}
}
#endif
void backbits(int number_of_bits)
{
bsi.bitindex -= number_of_bits;
bsi.wordpointer += (bsi.bitindex>>3);
bsi.bitindex &= 0x7;
}
int getbitoffset(void)
{
return (-bsi.bitindex)&0x7;
}
int getbyte(void)
{
#ifdef DEBUG_GETBITS
if(bsi.bitindex)
fprintf(stderr,"getbyte called unsynched!\n");
#endif
return *bsi.wordpointer++;
}
unsigned int getbits(int number_of_bits)
{
unsigned long rval;
#ifdef DEBUG_GETBITS
fprintf(stderr,"g%d",number_of_bits);
#endif
if(!number_of_bits)
return 0;
#if 0
check_buffer_range(number_of_bits+bsi.bitindex);
#endif
{
rval = bsi.wordpointer[0];
rval <<= 8;
rval |= bsi.wordpointer[1];
rval <<= 8;
rval |= bsi.wordpointer[2];
rval <<= bsi.bitindex;
rval &= 0xffffff;
bsi.bitindex += number_of_bits;
rval >>= (24-number_of_bits);
bsi.wordpointer += (bsi.bitindex>>3);
bsi.bitindex &= 7;
}
#ifdef DEBUG_GETBITS
fprintf(stderr,":%lx ",rval);
#endif
return rval;
}
unsigned int getbits_fast(int number_of_bits)
{
unsigned int rval;
#ifdef DEBUG_GETBITS
fprintf(stderr,"g%d",number_of_bits);
#endif
#if 0
check_buffer_range(number_of_bits+bsi.bitindex);
#endif
rval = (unsigned char) (bsi.wordpointer[0] << bsi.bitindex);
rval |= ((unsigned int) bsi.wordpointer[1]<<bsi.bitindex)>>8;
rval <<= number_of_bits;
rval >>= 8;
bsi.bitindex += number_of_bits;
bsi.wordpointer += (bsi.bitindex>>3);
bsi.bitindex &= 7;
#ifdef DEBUG_GETBITS
fprintf(stderr,":%x ",rval);
#endif
return rval;
}
unsigned int get1bit(void)
{
unsigned char rval;
#ifdef DEBUG_GETBITS
fprintf(stderr,"g%d",1);
#endif
#if 0
check_buffer_range(1+bsi.bitindex);
#endif
rval = *bsi.wordpointer << bsi.bitindex;
bsi.bitindex++;
bsi.wordpointer += (bsi.bitindex>>3);
bsi.bitindex &= 7;
#ifdef DEBUG_GETBITS
fprintf(stderr,":%d ",rval>>7);
#endif
return rval>>7;
}

49
src/libmpg123/getbits.h Normal file
View File

@@ -0,0 +1,49 @@
/*
getbits
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#ifndef _MPG123_GETBITS_H_
#define _MPG123_GETBITS_H_
/* that's the same file as getits.c but with defines to
force inlining */
#define backbits(fr,nob) ((void)( \
fr->bitindex -= nob, \
fr->wordpointer += (fr->bitindex>>3), \
fr->bitindex &= 0x7 ))
#define getbitoffset(fr) ((-fr->bitindex)&0x7)
#define getbyte(fr) (*fr->wordpointer++)
#define getbits(fr, nob) ( \
fr->ultmp = fr->wordpointer[0], fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[1], \
fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[2], fr->ultmp <<= fr->bitindex, \
fr->ultmp &= 0xffffff, fr->bitindex += nob, \
fr->ultmp >>= (24-nob), fr->wordpointer += (fr->bitindex>>3), \
fr->bitindex &= 7,fr->ultmp)
#define skipbits(fr, nob) fr->ultmp = ( \
fr->ultmp = fr->wordpointer[0], fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[1], \
fr->ultmp <<= 8, fr->ultmp |= fr->wordpointer[2], fr->ultmp <<= fr->bitindex, \
fr->ultmp &= 0xffffff, fr->bitindex += nob, \
fr->ultmp >>= (24-nob), fr->wordpointer += (fr->bitindex>>3), \
fr->bitindex &= 7 )
#define getbits_fast(fr, nob) ( \
fr->ultmp = (unsigned char) (fr->wordpointer[0] << fr->bitindex), \
fr->ultmp |= ((unsigned long) fr->wordpointer[1]<<fr->bitindex)>>8, \
fr->ultmp <<= nob, fr->ultmp >>= 8, \
fr->bitindex += nob, fr->wordpointer += (fr->bitindex>>3), \
fr->bitindex &= 7, fr->ultmp )
#define get1bit(fr) ( \
fr->uctmp = *fr->wordpointer << fr->bitindex, fr->bitindex++, \
fr->wordpointer += (fr->bitindex>>3), fr->bitindex &= 7, fr->uctmp>>7 )
#endif

View File

@@ -0,0 +1,79 @@
/*
getcpucpuflags: get cpuflags for ia32
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http:#mpg123.org
initially written by KIMURA Takuhiro (for 3DNow!)
extended for general use by Thomas Orgis
extern int getcpuid(struct cpuflags*)
or just
extern int getcpuid(unsigned int*)
where there is memory for 4 ints
-> the first set of idflags (basic cpu family info)
and the idflags, stdflags, std2flags, extflags written to the parameter
-> 0x00000000 (CPUID instruction not supported)
*/
#include "mangle.h"
.text
ALIGN4
.globl ASM_NAME(getcpuflags)
/* .type ASM_NAME(getcpuflags),@function */
ASM_NAME(getcpuflags):
pushl %ebp
movl %esp,%ebp
pushl %edx
pushl %ecx
pushl %ebx
pushl %esi
/* get the int pointer for storing the flags */
movl 8(%ebp), %esi
/* does that one make sense? */
movl $0x80000000,%eax
/* now save the flags and do a check for cpuid availability */
pushfl
pushfl
popl %eax
movl %eax,%ebx
/* set that bit... */
xorl $0x00200000,%eax
pushl %eax
popfl
/* ...and read back the flags to see if it is understood */
pushfl
popl %eax
popfl
cmpl %ebx,%eax
je .Lnocpuid
/* now get the info, first extended */
movl $0x80000001,%eax
cpuid
movl %edx,12(%esi)
/* then the other ones, called last to get the id flags in %eax for ret */
movl $0x00000001,%eax
cpuid
movl %eax, (%esi)
movl %ecx, 4(%esi)
movl %edx, 8(%esi)
jmp .Lend
ALIGN4
.Lnocpuid:
/* error: set everything to zero */
movl $0, %eax
movl $0, (%esi)
movl $0, 4(%esi)
movl $0, 8(%esi)
movl $0, 12(%esi)
ALIGN4
.Lend:
/* return value are the id flags, still stored in %eax */
popl %esi
popl %ebx
popl %ecx
popl %edx
movl %ebp,%esp
popl %ebp
ret

View File

@@ -0,0 +1,39 @@
#ifndef MPG123_H_GETCPUFLAGS
#define MPG123_H_GETCPUFLAGS
/* standard level flags part 1 */
#define FLAG_SSE3 0x00000001
/* standard level flags part 2 */
#define FLAG2_MMX 0x00800000
#define FLAG2_SSE 0x02000000
#define FLAG2_SSE2 0x04000000
#define FLAG2_FPU 0x00000001
/* cpuid extended level 1 (AMD) */
#define XFLAG_MMX 0x00800000
#define XFLAG_3DNOW 0x80000000
#define XFLAG_3DNOWEXT 0x40000000
struct cpuflags
{
unsigned int id;
unsigned int std;
unsigned int std2;
unsigned int ext;
};
extern struct cpuflags cpu_flags;
unsigned int getcpuflags(struct cpuflags* cf);
/* checks the family */
#define cpu_i586(s) ( ((s.id & 0xf00)>>8) == 0 || ((s.id & 0xf00)>>8) > 4 )
/* checking some flags... */
#define cpu_fpu(s) (FLAG2_FPU & s.std2)
#define cpu_mmx(s) (FLAG2_MMX & s.std2 || XFLAG_MMX & s.ext)
#define cpu_3dnow(s) (XFLAG_3DNOW & s.ext)
#define cpu_3dnowext(s) (XFLAG_3DNOWEXT & s.ext)
#define cpu_sse(s) (FLAG2_SSE & s.std2)
#define cpu_sse2(s) (FLAG2_SSE2 & s.std2)
#define cpu_sse3(s) (FLAG_SSE3 & s.std)
#endif

340
src/libmpg123/huffman.h Normal file
View File

@@ -0,0 +1,340 @@
/*
huffman.h: huffman tables ... recalcualted to work with optimzed decoder scheme (MH)
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
probably we could save a few bytes of memory, because the
smaller tables are often the part of a bigger table
*/
#ifndef _MPG123_HUFFMAN_H_
#define _MPG123_HUFFMAN_H_
struct newhuff
{
unsigned int linbits;
short *table;
};
static short tab0[] =
{
0
};
static short tab1[] =
{
-5, -3, -1, 17, 1, 16, 0
};
static short tab2[] =
{
-15, -11, -9, -5, -3, -1, 34, 2, 18, -1, 33, 32, 17, -1, 1,
16, 0
};
static short tab3[] =
{
-13, -11, -9, -5, -3, -1, 34, 2, 18, -1, 33, 32, 16, 17, -1,
1, 0
};
static short tab5[] =
{
-29, -25, -23, -15, -7, -5, -3, -1, 51, 35, 50, 49, -3, -1, 19,
3, -1, 48, 34, -3, -1, 18, 33, -1, 2, 32, 17, -1, 1, 16,
0
};
static short tab6[] =
{
-25, -19, -13, -9, -5, -3, -1, 51, 3, 35, -1, 50, 48, -1, 19,
49, -3, -1, 34, 2, 18, -3, -1, 33, 32, 1, -1, 17, -1, 16,
0
};
static short tab7[] =
{
-69, -65, -57, -39, -29, -17, -11, -7, -3, -1, 85, 69, -1, 84, 83,
-1, 53, 68, -3, -1, 37, 82, 21, -5, -1, 81, -1, 5, 52, -1,
80, -1, 67, 51, -5, -3, -1, 36, 66, 20, -1, 65, 64, -11, -7,
-3, -1, 4, 35, -1, 50, 3, -1, 19, 49, -3, -1, 48, 34, 18,
-5, -1, 33, -1, 2, 32, 17, -1, 1, 16, 0
};
static short tab8[] =
{
-65, -63, -59, -45, -31, -19, -13, -7, -5, -3, -1, 85, 84, 69, 83,
-3, -1, 53, 68, 37, -3, -1, 82, 5, 21, -5, -1, 81, -1, 52,
67, -3, -1, 80, 51, 36, -5, -3, -1, 66, 20, 65, -3, -1, 4,
64, -1, 35, 50, -9, -7, -3, -1, 19, 49, -1, 3, 48, 34, -1,
2, 32, -1, 18, 33, 17, -3, -1, 1, 16, 0
};
static short tab9[] =
{
-63, -53, -41, -29, -19, -11, -5, -3, -1, 85, 69, 53, -1, 83, -1,
84, 5, -3, -1, 68, 37, -1, 82, 21, -3, -1, 81, 52, -1, 67,
-1, 80, 4, -7, -3, -1, 36, 66, -1, 51, 64, -1, 20, 65, -5,
-3, -1, 35, 50, 19, -1, 49, -1, 3, 48, -5, -3, -1, 34, 2,
18, -1, 33, 32, -3, -1, 17, 1, -1, 16, 0
};
static short tab10[] =
{
-125,-121,-111, -83, -55, -35, -21, -13, -7, -3, -1, 119, 103, -1, 118,
87, -3, -1, 117, 102, 71, -3, -1, 116, 86, -1, 101, 55, -9, -3,
-1, 115, 70, -3, -1, 85, 84, 99, -1, 39, 114, -11, -5, -3, -1,
100, 7, 112, -1, 98, -1, 69, 53, -5, -1, 6, -1, 83, 68, 23,
-17, -5, -1, 113, -1, 54, 38, -5, -3, -1, 37, 82, 21, -1, 81,
-1, 52, 67, -3, -1, 22, 97, -1, 96, -1, 5, 80, -19, -11, -7,
-3, -1, 36, 66, -1, 51, 4, -1, 20, 65, -3, -1, 64, 35, -1,
50, 3, -3, -1, 19, 49, -1, 48, 34, -7, -3, -1, 18, 33, -1,
2, 32, 17, -1, 1, 16, 0
};
static short tab11[] =
{
-121,-113, -89, -59, -43, -27, -17, -7, -3, -1, 119, 103, -1, 118, 117,
-3, -1, 102, 71, -1, 116, -1, 87, 85, -5, -3, -1, 86, 101, 55,
-1, 115, 70, -9, -7, -3, -1, 69, 84, -1, 53, 83, 39, -1, 114,
-1, 100, 7, -5, -1, 113, -1, 23, 112, -3, -1, 54, 99, -1, 96,
-1, 68, 37, -13, -7, -5, -3, -1, 82, 5, 21, 98, -3, -1, 38,
6, 22, -5, -1, 97, -1, 81, 52, -5, -1, 80, -1, 67, 51, -1,
36, 66, -15, -11, -7, -3, -1, 20, 65, -1, 4, 64, -1, 35, 50,
-1, 19, 49, -5, -3, -1, 3, 48, 34, 33, -5, -1, 18, -1, 2,
32, 17, -3, -1, 1, 16, 0
};
static short tab12[] =
{
-115, -99, -73, -45, -27, -17, -9, -5, -3, -1, 119, 103, 118, -1, 87,
117, -3, -1, 102, 71, -1, 116, 101, -3, -1, 86, 55, -3, -1, 115,
85, 39, -7, -3, -1, 114, 70, -1, 100, 23, -5, -1, 113, -1, 7,
112, -1, 54, 99, -13, -9, -3, -1, 69, 84, -1, 68, -1, 6, 5,
-1, 38, 98, -5, -1, 97, -1, 22, 96, -3, -1, 53, 83, -1, 37,
82, -17, -7, -3, -1, 21, 81, -1, 52, 67, -5, -3, -1, 80, 4,
36, -1, 66, 20, -3, -1, 51, 65, -1, 35, 50, -11, -7, -5, -3,
-1, 64, 3, 48, 19, -1, 49, 34, -1, 18, 33, -7, -5, -3, -1,
2, 32, 0, 17, -1, 1, 16
};
static short tab13[] =
{
-509,-503,-475,-405,-333,-265,-205,-153,-115, -83, -53, -35, -21, -13, -9,
-7, -5, -3, -1, 254, 252, 253, 237, 255, -1, 239, 223, -3, -1, 238,
207, -1, 222, 191, -9, -3, -1, 251, 206, -1, 220, -1, 175, 233, -1,
236, 221, -9, -5, -3, -1, 250, 205, 190, -1, 235, 159, -3, -1, 249,
234, -1, 189, 219, -17, -9, -3, -1, 143, 248, -1, 204, -1, 174, 158,
-5, -1, 142, -1, 127, 126, 247, -5, -1, 218, -1, 173, 188, -3, -1,
203, 246, 111, -15, -7, -3, -1, 232, 95, -1, 157, 217, -3, -1, 245,
231, -1, 172, 187, -9, -3, -1, 79, 244, -3, -1, 202, 230, 243, -1,
63, -1, 141, 216, -21, -9, -3, -1, 47, 242, -3, -1, 110, 156, 15,
-5, -3, -1, 201, 94, 171, -3, -1, 125, 215, 78, -11, -5, -3, -1,
200, 214, 62, -1, 185, -1, 155, 170, -1, 31, 241, -23, -13, -5, -1,
240, -1, 186, 229, -3, -1, 228, 140, -1, 109, 227, -5, -1, 226, -1,
46, 14, -1, 30, 225, -15, -7, -3, -1, 224, 93, -1, 213, 124, -3,
-1, 199, 77, -1, 139, 184, -7, -3, -1, 212, 154, -1, 169, 108, -1,
198, 61, -37, -21, -9, -5, -3, -1, 211, 123, 45, -1, 210, 29, -5,
-1, 183, -1, 92, 197, -3, -1, 153, 122, 195, -7, -5, -3, -1, 167,
151, 75, 209, -3, -1, 13, 208, -1, 138, 168, -11, -7, -3, -1, 76,
196, -1, 107, 182, -1, 60, 44, -3, -1, 194, 91, -3, -1, 181, 137,
28, -43, -23, -11, -5, -1, 193, -1, 152, 12, -1, 192, -1, 180, 106,
-5, -3, -1, 166, 121, 59, -1, 179, -1, 136, 90, -11, -5, -1, 43,
-1, 165, 105, -1, 164, -1, 120, 135, -5, -1, 148, -1, 119, 118, 178,
-11, -3, -1, 27, 177, -3, -1, 11, 176, -1, 150, 74, -7, -3, -1,
58, 163, -1, 89, 149, -1, 42, 162, -47, -23, -9, -3, -1, 26, 161,
-3, -1, 10, 104, 160, -5, -3, -1, 134, 73, 147, -3, -1, 57, 88,
-1, 133, 103, -9, -3, -1, 41, 146, -3, -1, 87, 117, 56, -5, -1,
131, -1, 102, 71, -3, -1, 116, 86, -1, 101, 115, -11, -3, -1, 25,
145, -3, -1, 9, 144, -1, 72, 132, -7, -5, -1, 114, -1, 70, 100,
40, -1, 130, 24, -41, -27, -11, -5, -3, -1, 55, 39, 23, -1, 113,
-1, 85, 7, -7, -3, -1, 112, 54, -1, 99, 69, -3, -1, 84, 38,
-1, 98, 53, -5, -1, 129, -1, 8, 128, -3, -1, 22, 97, -1, 6,
96, -13, -9, -5, -3, -1, 83, 68, 37, -1, 82, 5, -1, 21, 81,
-7, -3, -1, 52, 67, -1, 80, 36, -3, -1, 66, 51, 20, -19, -11,
-5, -1, 65, -1, 4, 64, -3, -1, 35, 50, 19, -3, -1, 49, 3,
-1, 48, 34, -3, -1, 18, 33, -1, 2, 32, -3, -1, 17, 1, 16,
0
};
static short tab15[] =
{
-495,-445,-355,-263,-183,-115, -77, -43, -27, -13, -7, -3, -1, 255, 239,
-1, 254, 223, -1, 238, -1, 253, 207, -7, -3, -1, 252, 222, -1, 237,
191, -1, 251, -1, 206, 236, -7, -3, -1, 221, 175, -1, 250, 190, -3,
-1, 235, 205, -1, 220, 159, -15, -7, -3, -1, 249, 234, -1, 189, 219,
-3, -1, 143, 248, -1, 204, 158, -7, -3, -1, 233, 127, -1, 247, 173,
-3, -1, 218, 188, -1, 111, -1, 174, 15, -19, -11, -3, -1, 203, 246,
-3, -1, 142, 232, -1, 95, 157, -3, -1, 245, 126, -1, 231, 172, -9,
-3, -1, 202, 187, -3, -1, 217, 141, 79, -3, -1, 244, 63, -1, 243,
216, -33, -17, -9, -3, -1, 230, 47, -1, 242, -1, 110, 240, -3, -1,
31, 241, -1, 156, 201, -7, -3, -1, 94, 171, -1, 186, 229, -3, -1,
125, 215, -1, 78, 228, -15, -7, -3, -1, 140, 200, -1, 62, 109, -3,
-1, 214, 227, -1, 155, 185, -7, -3, -1, 46, 170, -1, 226, 30, -5,
-1, 225, -1, 14, 224, -1, 93, 213, -45, -25, -13, -7, -3, -1, 124,
199, -1, 77, 139, -1, 212, -1, 184, 154, -7, -3, -1, 169, 108, -1,
198, 61, -1, 211, 210, -9, -5, -3, -1, 45, 13, 29, -1, 123, 183,
-5, -1, 209, -1, 92, 208, -1, 197, 138, -17, -7, -3, -1, 168, 76,
-1, 196, 107, -5, -1, 182, -1, 153, 12, -1, 60, 195, -9, -3, -1,
122, 167, -1, 166, -1, 192, 11, -1, 194, -1, 44, 91, -55, -29, -15,
-7, -3, -1, 181, 28, -1, 137, 152, -3, -1, 193, 75, -1, 180, 106,
-5, -3, -1, 59, 121, 179, -3, -1, 151, 136, -1, 43, 90, -11, -5,
-1, 178, -1, 165, 27, -1, 177, -1, 176, 105, -7, -3, -1, 150, 74,
-1, 164, 120, -3, -1, 135, 58, 163, -17, -7, -3, -1, 89, 149, -1,
42, 162, -3, -1, 26, 161, -3, -1, 10, 160, 104, -7, -3, -1, 134,
73, -1, 148, 57, -5, -1, 147, -1, 119, 9, -1, 88, 133, -53, -29,
-13, -7, -3, -1, 41, 103, -1, 118, 146, -1, 145, -1, 25, 144, -7,
-3, -1, 72, 132, -1, 87, 117, -3, -1, 56, 131, -1, 102, 71, -7,
-3, -1, 40, 130, -1, 24, 129, -7, -3, -1, 116, 8, -1, 128, 86,
-3, -1, 101, 55, -1, 115, 70, -17, -7, -3, -1, 39, 114, -1, 100,
23, -3, -1, 85, 113, -3, -1, 7, 112, 54, -7, -3, -1, 99, 69,
-1, 84, 38, -3, -1, 98, 22, -3, -1, 6, 96, 53, -33, -19, -9,
-5, -1, 97, -1, 83, 68, -1, 37, 82, -3, -1, 21, 81, -3, -1,
5, 80, 52, -7, -3, -1, 67, 36, -1, 66, 51, -1, 65, -1, 20,
4, -9, -3, -1, 35, 50, -3, -1, 64, 3, 19, -3, -1, 49, 48,
34, -9, -7, -3, -1, 18, 33, -1, 2, 32, 17, -3, -1, 1, 16,
0
};
static short tab16[] =
{
-509,-503,-461,-323,-103, -37, -27, -15, -7, -3, -1, 239, 254, -1, 223,
253, -3, -1, 207, 252, -1, 191, 251, -5, -1, 175, -1, 250, 159, -3,
-1, 249, 248, 143, -7, -3, -1, 127, 247, -1, 111, 246, 255, -9, -5,
-3, -1, 95, 245, 79, -1, 244, 243, -53, -1, 240, -1, 63, -29, -19,
-13, -7, -5, -1, 206, -1, 236, 221, 222, -1, 233, -1, 234, 217, -1,
238, -1, 237, 235, -3, -1, 190, 205, -3, -1, 220, 219, 174, -11, -5,
-1, 204, -1, 173, 218, -3, -1, 126, 172, 202, -5, -3, -1, 201, 125,
94, 189, 242, -93, -5, -3, -1, 47, 15, 31, -1, 241, -49, -25, -13,
-5, -1, 158, -1, 188, 203, -3, -1, 142, 232, -1, 157, 231, -7, -3,
-1, 187, 141, -1, 216, 110, -1, 230, 156, -13, -7, -3, -1, 171, 186,
-1, 229, 215, -1, 78, -1, 228, 140, -3, -1, 200, 62, -1, 109, -1,
214, 155, -19, -11, -5, -3, -1, 185, 170, 225, -1, 212, -1, 184, 169,
-5, -1, 123, -1, 183, 208, 227, -7, -3, -1, 14, 224, -1, 93, 213,
-3, -1, 124, 199, -1, 77, 139, -75, -45, -27, -13, -7, -3, -1, 154,
108, -1, 198, 61, -3, -1, 92, 197, 13, -7, -3, -1, 138, 168, -1,
153, 76, -3, -1, 182, 122, 60, -11, -5, -3, -1, 91, 137, 28, -1,
192, -1, 152, 121, -1, 226, -1, 46, 30, -15, -7, -3, -1, 211, 45,
-1, 210, 209, -5, -1, 59, -1, 151, 136, 29, -7, -3, -1, 196, 107,
-1, 195, 167, -1, 44, -1, 194, 181, -23, -13, -7, -3, -1, 193, 12,
-1, 75, 180, -3, -1, 106, 166, 179, -5, -3, -1, 90, 165, 43, -1,
178, 27, -13, -5, -1, 177, -1, 11, 176, -3, -1, 105, 150, -1, 74,
164, -5, -3, -1, 120, 135, 163, -3, -1, 58, 89, 42, -97, -57, -33,
-19, -11, -5, -3, -1, 149, 104, 161, -3, -1, 134, 119, 148, -5, -3,
-1, 73, 87, 103, 162, -5, -1, 26, -1, 10, 160, -3, -1, 57, 147,
-1, 88, 133, -9, -3, -1, 41, 146, -3, -1, 118, 9, 25, -5, -1,
145, -1, 144, 72, -3, -1, 132, 117, -1, 56, 131, -21, -11, -5, -3,
-1, 102, 40, 130, -3, -1, 71, 116, 24, -3, -1, 129, 128, -3, -1,
8, 86, 55, -9, -5, -1, 115, -1, 101, 70, -1, 39, 114, -5, -3,
-1, 100, 85, 7, 23, -23, -13, -5, -1, 113, -1, 112, 54, -3, -1,
99, 69, -1, 84, 38, -3, -1, 98, 22, -1, 97, -1, 6, 96, -9,
-5, -1, 83, -1, 53, 68, -1, 37, 82, -1, 81, -1, 21, 5, -33,
-23, -13, -7, -3, -1, 52, 67, -1, 80, 36, -3, -1, 66, 51, 20,
-5, -1, 65, -1, 4, 64, -1, 35, 50, -3, -1, 19, 49, -3, -1,
3, 48, 34, -3, -1, 18, 33, -1, 2, 32, -3, -1, 17, 1, 16,
0
};
static short tab24[] =
{
-451,-117, -43, -25, -15, -7, -3, -1, 239, 254, -1, 223, 253, -3, -1,
207, 252, -1, 191, 251, -5, -1, 250, -1, 175, 159, -1, 249, 248, -9,
-5, -3, -1, 143, 127, 247, -1, 111, 246, -3, -1, 95, 245, -1, 79,
244, -71, -7, -3, -1, 63, 243, -1, 47, 242, -5, -1, 241, -1, 31,
240, -25, -9, -1, 15, -3, -1, 238, 222, -1, 237, 206, -7, -3, -1,
236, 221, -1, 190, 235, -3, -1, 205, 220, -1, 174, 234, -15, -7, -3,
-1, 189, 219, -1, 204, 158, -3, -1, 233, 173, -1, 218, 188, -7, -3,
-1, 203, 142, -1, 232, 157, -3, -1, 217, 126, -1, 231, 172, 255,-235,
-143, -77, -45, -25, -15, -7, -3, -1, 202, 187, -1, 141, 216, -5, -3,
-1, 14, 224, 13, 230, -5, -3, -1, 110, 156, 201, -1, 94, 186, -9,
-5, -1, 229, -1, 171, 125, -1, 215, 228, -3, -1, 140, 200, -3, -1,
78, 46, 62, -15, -7, -3, -1, 109, 214, -1, 227, 155, -3, -1, 185,
170, -1, 226, 30, -7, -3, -1, 225, 93, -1, 213, 124, -3, -1, 199,
77, -1, 139, 184, -31, -15, -7, -3, -1, 212, 154, -1, 169, 108, -3,
-1, 198, 61, -1, 211, 45, -7, -3, -1, 210, 29, -1, 123, 183, -3,
-1, 209, 92, -1, 197, 138, -17, -7, -3, -1, 168, 153, -1, 76, 196,
-3, -1, 107, 182, -3, -1, 208, 12, 60, -7, -3, -1, 195, 122, -1,
167, 44, -3, -1, 194, 91, -1, 181, 28, -57, -35, -19, -7, -3, -1,
137, 152, -1, 193, 75, -5, -3, -1, 192, 11, 59, -3, -1, 176, 10,
26, -5, -1, 180, -1, 106, 166, -3, -1, 121, 151, -3, -1, 160, 9,
144, -9, -3, -1, 179, 136, -3, -1, 43, 90, 178, -7, -3, -1, 165,
27, -1, 177, 105, -1, 150, 164, -17, -9, -5, -3, -1, 74, 120, 135,
-1, 58, 163, -3, -1, 89, 149, -1, 42, 162, -7, -3, -1, 161, 104,
-1, 134, 119, -3, -1, 73, 148, -1, 57, 147, -63, -31, -15, -7, -3,
-1, 88, 133, -1, 41, 103, -3, -1, 118, 146, -1, 25, 145, -7, -3,
-1, 72, 132, -1, 87, 117, -3, -1, 56, 131, -1, 102, 40, -17, -7,
-3, -1, 130, 24, -1, 71, 116, -5, -1, 129, -1, 8, 128, -1, 86,
101, -7, -5, -1, 23, -1, 7, 112, 115, -3, -1, 55, 39, 114, -15,
-7, -3, -1, 70, 100, -1, 85, 113, -3, -1, 54, 99, -1, 69, 84,
-7, -3, -1, 38, 98, -1, 22, 97, -5, -3, -1, 6, 96, 53, -1,
83, 68, -51, -37, -23, -15, -9, -3, -1, 37, 82, -1, 21, -1, 5,
80, -1, 81, -1, 52, 67, -3, -1, 36, 66, -1, 51, 20, -9, -5,
-1, 65, -1, 4, 64, -1, 35, 50, -1, 19, 49, -7, -5, -3, -1,
3, 48, 34, 18, -1, 33, -1, 2, 32, -3, -1, 17, 1, -1, 16,
0
};
static short tab_c0[] =
{
-29, -21, -13, -7, -3, -1, 11, 15, -1, 13, 14, -3, -1, 7, 5,
9, -3, -1, 6, 3, -1, 10, 12, -3, -1, 2, 1, -1, 4, 8,
0
};
static short tab_c1[] =
{
-15, -7, -3, -1, 15, 14, -1, 13, 12, -3, -1, 11, 10, -1, 9,
8, -7, -3, -1, 7, 6, -1, 5, 4, -3, -1, 3, 2, -1, 1,
0
};
static struct newhuff ht[] =
{
{ /* 0 */ 0 , tab0 } ,
{ /* 2 */ 0 , tab1 } ,
{ /* 3 */ 0 , tab2 } ,
{ /* 3 */ 0 , tab3 } ,
{ /* 0 */ 0 , tab0 } ,
{ /* 4 */ 0 , tab5 } ,
{ /* 4 */ 0 , tab6 } ,
{ /* 6 */ 0 , tab7 } ,
{ /* 6 */ 0 , tab8 } ,
{ /* 6 */ 0 , tab9 } ,
{ /* 8 */ 0 , tab10 } ,
{ /* 8 */ 0 , tab11 } ,
{ /* 8 */ 0 , tab12 } ,
{ /* 16 */ 0 , tab13 } ,
{ /* 0 */ 0 , tab0 } ,
{ /* 16 */ 0 , tab15 } ,
{ /* 16 */ 1 , tab16 } ,
{ /* 16 */ 2 , tab16 } ,
{ /* 16 */ 3 , tab16 } ,
{ /* 16 */ 4 , tab16 } ,
{ /* 16 */ 6 , tab16 } ,
{ /* 16 */ 8 , tab16 } ,
{ /* 16 */ 10, tab16 } ,
{ /* 16 */ 13, tab16 } ,
{ /* 16 */ 4 , tab24 } ,
{ /* 16 */ 5 , tab24 } ,
{ /* 16 */ 6 , tab24 } ,
{ /* 16 */ 7 , tab24 } ,
{ /* 16 */ 8 , tab24 } ,
{ /* 16 */ 9 , tab24 } ,
{ /* 16 */ 11, tab24 } ,
{ /* 16 */ 13, tab24 }
};
static struct newhuff htc[] =
{
{ /* 1 , 1 , */ 0 , tab_c0 } ,
{ /* 1 , 1 , */ 0 , tab_c1 }
};
#endif

25
src/libmpg123/icy.c Normal file
View File

@@ -0,0 +1,25 @@
#include "icy.h"
#include <stdlib.h>
void init_icy(struct icy_meta *icy)
{
icy->data = NULL;
}
void clear_icy(struct icy_meta *icy)
{
if(icy->data != NULL) free(icy->data);
init_icy(icy);
}
void reset_icy(struct icy_meta *icy)
{
clear_icy(icy);
init_icy(icy);
}
/*void set_icy(struct icy_meta *icy, char* new_data)
{
if(icy->data) free(icy->data);
icy->data = new_data;
icy->changed = 1;
}*/

25
src/libmpg123/icy.h Normal file
View File

@@ -0,0 +1,25 @@
/*
icy: support for SHOUTcast ICY meta info, an attempt to keep it organized
copyright 2006-7 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Thomas Orgis and modelled after patch by Honza
*/
#ifndef MPG123_ICY_H
#define MPG123_ICY_H
#include <sys/types.h>
#include "mpg123.h"
struct icy_meta
{
char* data;
off_t interval;
off_t next;
};
void init_icy(struct icy_meta *);
void clear_icy(struct icy_meta *);
void reset_icy(struct icy_meta *);
#endif

601
src/libmpg123/id3.c Normal file
View File

@@ -0,0 +1,601 @@
#include "mpg123lib_intern.h"
#include "id3.h"
/* UTF support definitions */
typedef void (*text_converter)(mpg123_string *sb, unsigned char* source, size_t len);
static void convert_latin1 (mpg123_string *sb, unsigned char* source, size_t len);
static void convert_utf16 (mpg123_string *sb, unsigned char* source, size_t len, int str_be);
static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len);
static void convert_utf16be (mpg123_string *sb, unsigned char* source, size_t len);
static void convert_utf8 (mpg123_string *sb, unsigned char* source, size_t len);
static const text_converter text_converters[4] =
{
convert_latin1,
convert_utf16bom,
convert_utf16be,
convert_utf8
};
const int encoding_widths[4] = { 1, 2, 2, 1 };
/* the code starts here... */
void init_id3(mpg123_handle *fr)
{
fr->id3v2.version = 0; /* nothing there */
mpg123_init_string(&fr->id3v2.title);
mpg123_init_string(&fr->id3v2.artist);
mpg123_init_string(&fr->id3v2.album);
mpg123_init_string(&fr->id3v2.year);
mpg123_init_string(&fr->id3v2.comment);
mpg123_init_string(&fr->id3v2.genre);
}
void exit_id3(mpg123_handle *fr)
{
mpg123_free_string(&fr->id3v2.title);
mpg123_free_string(&fr->id3v2.artist);
mpg123_free_string(&fr->id3v2.album);
mpg123_free_string(&fr->id3v2.year);
mpg123_free_string(&fr->id3v2.comment);
mpg123_free_string(&fr->id3v2.genre);
}
void reset_id3(mpg123_handle *fr)
{
fr->id3v2.version = 0;
fr->id3v2.title.fill = 0;
fr->id3v2.artist.fill = 0;
fr->id3v2.album.fill = 0;
fr->id3v2.year.fill = 0;
fr->id3v2.comment.fill = 0;
fr->id3v2.genre.fill = 0;
}
/*
Store any text in UTF8 encoding; preserve the zero string separator (I don't need strlen for the total size).
ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
*/
void store_id3_text(mpg123_string *sb, char *source, size_t source_size)
{
int encoding;
int bwidth;
if(!source_size)
{
debug("Empty id3 data!");
return;
}
encoding = source[0];
++source;
--source_size;
debug1("encoding: %i", encoding);
/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
if(encoding > 3)
{
warning1("Unknown text encoding %d, assuming ISO8859-1 - I will probably screw a bit up!", encoding);
encoding = 0;
}
bwidth = encoding_widths[encoding];
if(source_size % bwidth)
{
/* Uh. (BTW, the -1 is for the encoding byte.) */
warning2("Weird tag size %d for encoding %d - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
source_size -= source_size % bwidth;
}
text_converters[encoding](sb, (unsigned char*)source, source_size);
if(sb->size) debug1("UTF-8 string (the first one): %s", sb->p);
else error("unable to convert string to UTF-8 (out of memory, junk input?)!");
}
/*
trying to parse ID3v2.3 and ID3v2.4 tags...
returns: 0 = read-error... or so... soft issue... ok... somehow...
... = illegal ID3 header; maybe extended to mean unparseable (to new) header in future
1 = somehow ok...
...or READER_MORE...
*/
int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
{
#define UNSYNC_FLAG 128
#define EXTHEAD_FLAG 64
#define EXP_FLAG 32
#define FOOTER_FLAG 16
#define UNKNOWN_FLAGS 15 /* 00001111*/
unsigned char buf[6];
unsigned long length=0;
unsigned char flags = 0;
int ret = 1;
int ret2;
unsigned char* tagdata = NULL;
unsigned char major = first4bytes & 0xff;
debug1("ID3v2: major tag version: %i", major);
if(major == 0xff) return 0; /* used to be -1 */
if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
return ret2;
if(buf[0] == 0xff) /* major version, will never be 0xff */
return 0; /* used to be -1 */
/* second new byte are some nice flags, if these are invalid skip the whole thing */
flags = buf[1];
debug1("ID3v2: flags 0x%08x", flags);
/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
#define synchsafe_to_long(buf,res) \
( \
(((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
(res = (((unsigned long) (buf)[0]) << 21) \
| (((unsigned long) (buf)[1]) << 14) \
| (((unsigned long) (buf)[2]) << 7) \
| ((unsigned long) (buf)[3]) \
,1) \
)
/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
#define bytes_to_long(buf,res) \
( \
major == 3 ? \
(res = (((unsigned long) (buf)[0]) << 24) \
| (((unsigned long) (buf)[1]) << 16) \
| (((unsigned long) (buf)[2]) << 8) \
| ((unsigned long) (buf)[3]) \
,1) : synchsafe_to_long(buf,res) \
)
/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */
/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
if(!synchsafe_to_long(buf+2,length)) return -1;
debug1("ID3v2: tag data length %lu", length);
if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
/* skip if unknown version/scary flags, parse otherwise */
if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 3))
{
/* going to skip because there are unknown flags set */
warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
ret = ret2;
}
else
{
fr->id3v2.version = major;
/* try to interpret that beast */
if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
{
debug("ID3v2: analysing frames...");
if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
{
unsigned long tagpos = 0;
debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
/* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
tagdata[length] = 0;
if(flags & EXTHEAD_FLAG)
{
debug("ID3v2: skipping extended header");
if(!bytes_to_long(tagdata, tagpos)) ret = -1;
}
if(ret > 0)
{
char id[5];
unsigned long framesize;
unsigned long fflags; /* need 16 bits, actually */
id[4] = 0;
/* pos now advanced after ext head, now a frame has to follow */
while(tagpos < length-10) /* I want to read at least a full header */
{
int i = 0;
unsigned long pos = tagpos;
/* level 1,2,3 - 0 is info from lame/info tag! */
/* rva tags with ascending significance, then general frames */
#define KNOWN_FRAMES 8
const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "TPE1", "TALB", "TIT2", "TYER", "TCON" };
enum { egal = -1, comment, extra, rva2, artist, album, title, year, genre } tt = egal;
/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
for(; i< 4; ++i) if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
|| ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
{
debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
ret = 0; /* used to be -1 */
break;
}
if(ret > 0)
{
/* 4 bytes id */
strncpy(id, (char*) tagdata+pos, 4);
pos += 4;
/* size as 32 bits */
if(!bytes_to_long(tagdata+pos, framesize))
{
ret = -1;
error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
break;
}
if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
tagpos += 10 + framesize; /* the important advancement in whole tag */
pos += 4;
fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
pos += 2;
/* for sanity, after full parsing tagpos should be == pos */
/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
/* %0abc0000 %0h00kmnp */
#define BAD_FFLAGS (unsigned long) 36784
#define PRES_TAG_FFLAG 16384
#define PRES_FILE_FFLAG 8192
#define READ_ONLY_FFLAG 4096
#define GROUP_FFLAG 64
#define COMPR_FFLAG 8
#define ENCR_FFLAG 4
#define UNSYNC_FFLAG 2
#define DATLEN_FFLAG 1
/* shall not or want not handle these */
if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
{
warning("ID3v2: skipping invalid/unsupported frame");
continue;
}
for(i = 0; i < KNOWN_FRAMES; ++i)
if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
if(tt != egal)
{
int rva_mode = -1; /* mix / album */
unsigned long realsize = framesize;
unsigned char* realdata = tagdata+pos;
if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
{
unsigned long ipos = 0;
unsigned long opos = 0;
debug("Id3v2: going to de-unsync the frame data");
/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
/* standard mandates that de-unsync should always be safe if flag is set */
realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
if(realdata == NULL)
{
error("ID3v2: unable to allocate working buffer for de-unsync");
continue;
}
/* now going byte per byte through the data... */
realdata[0] = tagdata[pos];
opos = 1;
for(ipos = pos+1; ipos < pos+framesize; ++ipos)
{
if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
{
realdata[opos++] = tagdata[ipos];
}
}
realsize = opos;
debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
}
pos = 0; /* now at the beginning again... */
switch(tt)
{
case comment: /* a comment that perhaps is a RVA / fr->rva.ALBUM/AUDIOPHILE / fr->rva.MIX/RADIO one */
{
/* Text encoding $xx */
/* Language $xx xx xx */
/* policy about encodings: do not care for now here */
/* if(realdata[0] == 0) */
{
/* don't care about language */
pos = 4;
if( !strcasecmp((char*)realdata+pos, "rva")
|| !strcasecmp((char*)realdata+pos, "fr->rva.mix")
|| !strcasecmp((char*)realdata+pos, "fr->rva.radio"))
rva_mode = 0;
else if( !strcasecmp((char*)realdata+pos, "fr->rva.album")
|| !strcasecmp((char*)realdata+pos, "fr->rva.audiophile")
|| !strcasecmp((char*)realdata+pos, "fr->rva.user"))
rva_mode = 1;
if((rva_mode > -1) && (fr->rva.level[rva_mode] <= tt+1))
{
char* comstr;
size_t comsize = realsize-4-(strlen((char*)realdata+pos)+1);
if(VERBOSE3) fprintf(stderr, "Note: evaluating %s data for RVA\n", realdata+pos);
if((comstr = (char*) malloc(comsize+1)) != NULL)
{
memcpy(comstr,realdata+realsize-comsize, comsize);
comstr[comsize] = 0;
/* hm, what about utf16 here? */
fr->rva.gain[rva_mode] = atof(comstr);
if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
fr->rva.peak[rva_mode] = 0;
fr->rva.level[rva_mode] = tt+1;
free(comstr);
}
else error("could not allocate memory for rva comment interpretation");
}
else
{
if(!strcasecmp((char*)realdata+pos, ""))
{
/* only add general comments */
realdata[pos] = realdata[pos-4]; /* the encoding field copied */
store_id3_text(&fr->id3v2.comment, (char*)realdata+pos, realsize-4);
}
}
}
}
break;
case extra: /* perhaps foobar2000's work */
{
/* Text encoding $xx */
/* unicode would hurt in string comparison... */
if(realdata[0] == 0)
{
int is_peak = 0;
pos = 1;
if(!strncasecmp((char*)realdata+pos, "replaygain_track_",17))
{
debug("ID3v2: track gain/peak");
rva_mode = 0;
if(!strcasecmp((char*)realdata+pos, "replaygain_track_peak")) is_peak = 1;
else if(strcasecmp((char*)realdata+pos, "replaygain_track_gain")) rva_mode = -1;
}
else
if(!strncasecmp((char*)realdata+pos, "replaygain_album_",17))
{
debug("ID3v2: album gain/peak");
rva_mode = 1;
if(!strcasecmp((char*)realdata+pos, "replaygain_album_peak")) is_peak = 1;
else if(strcasecmp((char*)realdata+pos, "replaygain_album_gain")) rva_mode = -1;
}
if((rva_mode > -1) && (fr->rva.level[rva_mode] <= tt+1))
{
char* comstr;
size_t comsize = realsize-1-(strlen((char*)realdata+pos)+1);
if(VERBOSE3) fprintf(stderr, "Note: evaluating %s data for RVA\n", realdata+pos);
if((comstr = (char*) malloc(comsize+1)) != NULL)
{
memcpy(comstr,realdata+realsize-comsize, comsize);
comstr[comsize] = 0;
if(is_peak)
{
fr->rva.peak[rva_mode] = atof(comstr);
if(VERBOSE3) fprintf(stderr, "Note: RVA peak %fdB\n", fr->rva.peak[rva_mode]);
}
else
{
fr->rva.gain[rva_mode] = atof(comstr);
if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
}
fr->rva.level[rva_mode] = tt+1;
free(comstr);
}
else error("could not allocate memory for rva comment interpretation");
}
}
}
break;
case rva2: /* "the" RVA tag */
{
#ifdef HAVE_INTTYPES_H
/* starts with null-terminated identification */
if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
/* default: some individual value, mix mode */
rva_mode = 0;
if( !strncasecmp((char*)realdata, "album", 5)
|| !strncasecmp((char*)realdata, "audiophile", 10)
|| !strncasecmp((char*)realdata, "user", 4))
rva_mode = 1;
if(fr->rva.level[rva_mode] <= tt+1)
{
pos += strlen((char*) realdata) + 1;
if(realdata[pos] == 1)
{
++pos;
/* only handle master channel */
debug("ID3v2: it is for the master channel");
/* two bytes adjustment, one byte for bits representing peak - n bytes for peak */
/* 16 bit signed integer = dB * 512 */
/* we already assume short being 16 bit */
fr->rva.gain[rva_mode] = (float) ((((short) realdata[pos]) << 8) | ((short) realdata[pos+1])) / 512;
pos += 2;
if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
fr->rva.peak[rva_mode] = 0;
fr->rva.level[rva_mode] = tt+1;
}
}
#else
warning("ID3v2: Cannot parse RVA2 value because I don't have a guaranteed 16 bit signed integer type");
#endif
}
break;
/* non-rva metainfo, simply store... */
case artist:
debug("ID3v2: parsing artist info");
store_id3_text(&fr->id3v2.artist, (char*) realdata, realsize);
break;
case album:
debug("ID3v2: parsing album info");
store_id3_text(&fr->id3v2.album, (char*) realdata, realsize);
break;
case title:
debug("ID3v2: parsing title info");
store_id3_text(&fr->id3v2.title, (char*) realdata, realsize);
break;
case year:
debug("ID3v2: parsing year info");
store_id3_text(&fr->id3v2.year, (char*) realdata, realsize);
break;
case genre:
debug("ID3v2: parsing genre info");
store_id3_text(&fr->id3v2.genre, (char*) realdata, realsize);
break;
default: error1("ID3v2: unknown frame type %i", tt);
}
if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
}
#undef BAD_FFLAGS
#undef PRES_TAG_FFLAG
#undef PRES_FILE_FFLAG
#undef READ_ONLY_FFLAG
#undef GROUP_FFLAG
#undef COMPR_FFLAG
#undef ENCR_FFLAG
#undef UNSYNC_FFLAG
#undef DATLEN_FFLAG
}
else break;
#undef KNOWN_FRAMES
}
}
}
else
{
error("ID3v2: Duh, not able to read ID3v2 tag data.");
ret = ret2;
}
free(tagdata);
}
else
{
error1("ID3v2Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
else ret = 0;
}
}
/* skip footer if present */
if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
return ret;
#undef UNSYNC_FLAG
#undef EXTHEAD_FLAG
#undef EXP_FLAG
#undef FOOTER_FLAG
#undef UNKOWN_FLAGS
}
static void convert_latin1(mpg123_string *sb, unsigned char* s, size_t l)
{
size_t length = l;
size_t i;
unsigned char *p;
/* determine real length, a latin1 character can at most take 2 bytes in UTF8 */
for(i=0; i<l; ++i)
if(s[i] >= 0x80) ++length;
debug1("UTF-8 length: %lu", (unsigned long)length);
/* one extra zero byte for paranoia */
if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
for(i=0; i<l; ++i)
if(s[i] < 0x80){ *p = s[i]; ++p; }
else /* two-byte encoding */
{
*p = 0xc0 | (s[i]>>6);
*(p+1) = 0x80 | (s[i] & 0x3f);
p+=2;
}
sb->p[length] = 0;
sb->fill = length+1;
}
#define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
/* Remember: There's a limit at 0x1ffff. */
#define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
static void convert_utf16(mpg123_string *sb, unsigned char* s, size_t l, int str_be)
{
size_t i;
unsigned char *p;
size_t length = 0; /* the resulting UTF-8 length */
/* Determine real length... extreme case can be more than utf-16 length. */
size_t high = 0;
size_t low = 1;
if(!str_be) /* little-endian */
{
high = 1; /* The second byte is the high byte. */
low = 0; /* The first byte is the low byte. */
}
/* first: get length, check for errors -- stop at first one */
for(i=0; i < l-1; i+=2)
{
unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
if((point & 0xd800) == 0xd800) /* lead surrogate */
{
unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
if((second & 0xdc00) == 0xdc00) /* good... */
{
point = FULLPOINT(point,second);
length += UTF8LEN(point); /* possibly 4 bytes */
i+=2; /* We overstepped one word. */
}
else /* if no valid pair, break here */
{
l = i; /* Forget the half pair, END! */
break;
}
}
else length += UTF8LEN(point); /* 1,2 or 3 bytes */
}
if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
/* Now really convert, skip checks as these have been done just before. */
p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
for(i=0; i < l-1; i+=2)
{
unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
{
unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
codepoint = FULLPOINT(codepoint,second);
i+=2; /* We overstepped one word. */
}
if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
else if(codepoint < 0x800)
{
*p++ = 0xc0 | (codepoint>>6);
*p++ = 0x80 | (codepoint & 0x3f);
}
else if(codepoint < 0x10000)
{
*p++ = 0xe0 | (codepoint>>12);
*p++ = 0x80 | ((codepoint>>6) & 0x3f);
*p++ = 0x80 | (codepoint & 0x3f);
}
else if (codepoint < 0x200000)
{
*p++ = 0xf0 | codepoint>>18;
*p++ = 0x80 | ((codepoint>>12) & 0x3f);
*p++ = 0x80 | ((codepoint>>6) & 0x3f);
*p++ = 0x80 | (codepoint & 0x3f);
} /* ignore bigger ones (that are not possible here anyway) */
}
sb->p[sb->size-1] = 0; /* paranoia... */
sb->fill = sb->size;
}
#undef UTF8LEN
#undef FULLPOINT
static void convert_utf16be(mpg123_string *sb, unsigned char* source, size_t len)
{
convert_utf16(sb, source, len, 1);
}
static void convert_utf16bom(mpg123_string *sb, unsigned char* source, size_t len)
{
if(len < 2){ mpg123_free_string(sb); return; }
if(source[0] == 0xff && source[1] == 0xfe) /* Little-endian */
convert_utf16(sb, source + 2, len - 2, 0);
else /* Big-endian */
convert_utf16(sb, source + 2, len - 2, 1);
}
static void convert_utf8(mpg123_string *sb, unsigned char* source, size_t len)
{
if(mpg123_resize_string(sb, len+1))
{
memcpy(sb->p, source, len);
sb->p[len] = 0;
sb->fill = len+1;
}
else mpg123_free_string(sb);
}

12
src/libmpg123/id3.h Normal file
View File

@@ -0,0 +1,12 @@
#ifndef MPG123_ID3_H
#define MPG123_ID3_H
/* really need it _here_! */
#include "frame.h"
void init_id3(mpg123_handle *fr);
void exit_id3(mpg123_handle *fr);
void reset_id3(mpg123_handle *fr);
int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes);
#endif

164
src/libmpg123/l2tables.h Normal file
View File

@@ -0,0 +1,164 @@
/*
l2tables.h: Layer 2 Alloc tables
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
most other tables are calculated on program start (which is (of course) not ISO-conform)
Layer-3 huffman table is in huffman.h
*/
#ifndef _MPG123_L2TABLES_H_
#define _MPG123_L2TABLES_H_
const struct al_table alloc_0[] = {
{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767} };
const struct al_table alloc_1[] = {
{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{3,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767},
{2,0},{5,3},{7,5},{16,-32767} };
const struct al_table alloc_2[] = {
{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} };
const struct al_table alloc_3[] = {
{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} };
const struct al_table alloc_4[] = {
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9},
{2,0},{5,3},{7,5},{10,9} };
#endif

153
src/libmpg123/layer1.c Normal file
View File

@@ -0,0 +1,153 @@
/*
layer1.c: the layer 1 decoder
copyright 1995-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
may have a few bugs after last optimization ...
*/
#include "mpg123lib_intern.h"
#include "getbits.h"
void I_step_one(unsigned int balloc[], unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr)
{
unsigned int *ba=balloc;
unsigned int *sca = (unsigned int *) scale_index;
if(fr->stereo == 2) {
int i;
int jsbound = fr->jsbound;
for (i=0;i<jsbound;i++) {
*ba++ = getbits(fr, 4);
*ba++ = getbits(fr, 4);
}
for (i=jsbound;i<SBLIMIT;i++)
*ba++ = getbits(fr, 4);
ba = balloc;
for (i=0;i<jsbound;i++) {
if ((*ba++))
*sca++ = getbits(fr, 6);
if ((*ba++))
*sca++ = getbits(fr, 6);
}
for (i=jsbound;i<SBLIMIT;i++)
if ((*ba++)) {
*sca++ = getbits(fr, 6);
*sca++ = getbits(fr, 6);
}
}
else {
int i;
for (i=0;i<SBLIMIT;i++)
*ba++ = getbits(fr, 4);
ba = balloc;
for (i=0;i<SBLIMIT;i++)
if ((*ba++))
*sca++ = getbits(fr, 6);
}
}
void I_step_two(real fraction[2][SBLIMIT],unsigned int balloc[2*SBLIMIT],
unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr)
{
int i,n;
int smpb[2*SBLIMIT]; /* values: 0-65535 */
int *sample;
register unsigned int *ba;
register unsigned int *sca = (unsigned int *) scale_index;
if(fr->stereo == 2) {
int jsbound = fr->jsbound;
register real *f0 = fraction[0];
register real *f1 = fraction[1];
ba = balloc;
for (sample=smpb,i=0;i<jsbound;i++) {
if ((n = *ba++))
*sample++ = getbits(fr, n+1);
if ((n = *ba++))
*sample++ = getbits(fr, n+1);
}
for (i=jsbound;i<SBLIMIT;i++)
if ((n = *ba++))
*sample++ = getbits(fr, n+1);
ba = balloc;
for (sample=smpb,i=0;i<jsbound;i++) {
if((n=*ba++))
*f0++ = (real) ( ((-1)<<n) + (*sample++) + 1) * fr->muls[n+1][*sca++];
else
*f0++ = 0.0;
if((n=*ba++))
*f1++ = (real) ( ((-1)<<n) + (*sample++) + 1) * fr->muls[n+1][*sca++];
else
*f1++ = 0.0;
}
for (i=jsbound;i<SBLIMIT;i++) {
if ((n=*ba++)) {
real samp = ( ((-1)<<n) + (*sample++) + 1);
*f0++ = samp * fr->muls[n+1][*sca++];
*f1++ = samp * fr->muls[n+1][*sca++];
}
else
*f0++ = *f1++ = 0.0;
}
for(i=fr->down_sample_sblimit;i<32;i++)
fraction[0][i] = fraction[1][i] = 0.0;
}
else {
register real *f0 = fraction[0];
ba = balloc;
for (sample=smpb,i=0;i<SBLIMIT;i++)
if ((n = *ba++))
*sample++ = getbits(fr, n+1);
ba = balloc;
for (sample=smpb,i=0;i<SBLIMIT;i++) {
if((n=*ba++))
*f0++ = (real) ( ((-1)<<n) + (*sample++) + 1) * fr->muls[n+1][*sca++];
else
*f0++ = 0.0;
}
for(i=fr->down_sample_sblimit;i<32;i++)
fraction[0][i] = 0.0;
}
}
int do_layer1(mpg123_handle *fr)
{
int clip=0;
int i,stereo = fr->stereo;
unsigned int balloc[2*SBLIMIT];
unsigned int scale_index[2][SBLIMIT];
real aligned(16) fraction[2][SBLIMIT];
int single = fr->single;
fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ? (fr->mode_ext<<2)+4 : 32;
if(stereo == 1 || single == SINGLE_MIX) /* I don't see mixing handled here */
single = SINGLE_LEFT;
I_step_one(balloc,scale_index,fr);
for (i=0;i<SCALE_BLOCK;i++)
{
I_step_two(fraction,balloc,scale_index,fr);
if(single != SINGLE_STEREO)
{
clip += (fr->synth_mono)( (real *) fraction[single], fr);
}
else
{
clip += (fr->synth)( (real *) fraction[0], 0, fr, 0);
clip += (fr->synth)( (real *) fraction[1], 1, fr, 1);
}
}
return clip;
}

335
src/libmpg123/layer2.c Normal file
View File

@@ -0,0 +1,335 @@
/*
layer2.c: the layer 2 decoder, root of mpg123
copyright 1994-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
mpg123 started as mp2 decoder a long time ago...
*/
#include "mpg123lib_intern.h"
#include "l2tables.h"
#include "getbits.h"
static int grp_3tab[32 * 3] = { 0, }; /* used: 27 */
static int grp_5tab[128 * 3] = { 0, }; /* used: 125 */
static int grp_9tab[1024 * 3] = { 0, }; /* used: 729 */
static const double mulmul[27] =
{
0.0 , -2.0/3.0 , 2.0/3.0 ,
2.0/7.0 , 2.0/15.0 , 2.0/31.0, 2.0/63.0 , 2.0/127.0 , 2.0/255.0 ,
2.0/511.0 , 2.0/1023.0 , 2.0/2047.0 , 2.0/4095.0 , 2.0/8191.0 ,
2.0/16383.0 , 2.0/32767.0 , 2.0/65535.0 ,
-4.0/5.0 , -2.0/5.0 , 2.0/5.0, 4.0/5.0 ,
-8.0/9.0 , -4.0/9.0 , -2.0/9.0 , 2.0/9.0 , 4.0/9.0 , 8.0/9.0
};
void init_layer2(void)
{
const int base[3][9] = {
{ 1 , 0, 2 , } ,
{ 17, 18, 0 , 19, 20 , } ,
{ 21, 1, 22, 23, 0, 24, 25, 2, 26 } };
int i,j,k,l,len;
const int tablen[3] = { 3 , 5 , 9 };
int *itable;
int *tables[3] = { grp_3tab , grp_5tab , grp_9tab };
for(i=0;i<3;i++)
{
itable = tables[i];
len = tablen[i];
for(j=0;j<len;j++)
for(k=0;k<len;k++)
for(l=0;l<len;l++)
{
*itable++ = base[i][l];
*itable++ = base[i][k];
*itable++ = base[i][j];
}
}
}
void init_layer2_stuff(mpg123_handle *fr)
{
int k;
real *table;
for(k=0;k<27;k++)
{
table = opt_init_layer2_table(fr)(fr, fr->muls[k], mulmul[k]);
*table++ = 0.0;
}
}
real* init_layer2_table(mpg123_handle *fr, real *table, double m)
{
int i,j;
for(j=3,i=0;i<63;i++,j--)
*table++ = m * pow(2.0,(double) j / 3.0);
return table;
}
#ifdef OPT_MMXORSSE
real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m)
{
int i,j;
if(!fr->p.down_sample)
for(j=3,i=0;i<63;i++,j--)
*table++ = 16384 * m * pow(2.0,(double) j / 3.0);
else
for(j=3,i=0;i<63;i++,j--)
*table++ = m * pow(2.0,(double) j / 3.0);
return table;
}
#endif
void II_step_one(unsigned int *bit_alloc,int *scale,mpg123_handle *fr)
{
int stereo = fr->stereo-1;
int sblimit = fr->II_sblimit;
int jsbound = fr->jsbound;
int sblimit2 = fr->II_sblimit<<stereo;
const struct al_table *alloc1 = fr->alloc;
int i;
/* static unsigned int scfsi_buf[64]; */
unsigned int scfsi_buf[64];
unsigned int *scfsi,*bita;
int sc,step;
bita = bit_alloc;
if(stereo)
{
for (i=jsbound;i;i--,alloc1+=(1<<step))
{
step=alloc1->bits;
*bita++ = (char) getbits(fr, step);
*bita++ = (char) getbits(fr, step);
}
for (i=sblimit-jsbound;i;i--,alloc1+=(1<<step))
{
step=alloc1->bits;
bita[0] = (char) getbits(fr, step);
bita[1] = bita[0];
bita+=2;
}
bita = bit_alloc;
scfsi=scfsi_buf;
for (i=sblimit2;i;i--)
if (*bita++)
*scfsi++ = (char) getbits_fast(fr, 2);
}
else /* mono */
{
for (i=sblimit;i;i--,alloc1+=(1<<step))
{
step=alloc1->bits;
*bita++ = (char) getbits(fr, step);
}
bita = bit_alloc;
scfsi=scfsi_buf;
for (i=sblimit;i;i--)
if (*bita++)
*scfsi++ = (char) getbits_fast(fr, 2);
}
bita = bit_alloc;
scfsi=scfsi_buf;
for (i=sblimit2;i;i--)
if (*bita++)
switch (*scfsi++)
{
case 0:
*scale++ = getbits_fast(fr, 6);
*scale++ = getbits_fast(fr, 6);
*scale++ = getbits_fast(fr, 6);
break;
case 1 :
*scale++ = sc = getbits_fast(fr, 6);
*scale++ = sc;
*scale++ = getbits_fast(fr, 6);
break;
case 2:
*scale++ = sc = getbits_fast(fr, 6);
*scale++ = sc;
*scale++ = sc;
break;
default: /* case 3 */
*scale++ = getbits_fast(fr, 6);
*scale++ = sc = getbits_fast(fr, 6);
*scale++ = sc;
break;
}
}
void II_step_two(unsigned int *bit_alloc,real fraction[2][4][SBLIMIT],int *scale,mpg123_handle *fr,int x1)
{
int i,j,k,ba;
int stereo = fr->stereo;
int sblimit = fr->II_sblimit;
int jsbound = fr->jsbound;
const struct al_table *alloc2,*alloc1 = fr->alloc;
unsigned int *bita=bit_alloc;
int d1,step;
for (i=0;i<jsbound;i++,alloc1+=(1<<step))
{
step = alloc1->bits;
for (j=0;j<stereo;j++)
{
if ( (ba=*bita++) )
{
k=(alloc2 = alloc1+ba)->bits;
if( (d1=alloc2->d) < 0)
{
real cm=fr->muls[k][scale[x1]];
fraction[j][0][i] = ((real) ((int)getbits(fr, k) + d1)) * cm;
fraction[j][1][i] = ((real) ((int)getbits(fr, k) + d1)) * cm;
fraction[j][2][i] = ((real) ((int)getbits(fr, k) + d1)) * cm;
}
else
{
const int *table[] = { 0,0,0,grp_3tab,0,grp_5tab,0,0,0,grp_9tab };
unsigned int idx,*tab,m=scale[x1];
idx = (unsigned int) getbits(fr, k);
tab = (unsigned int *) (table[d1] + idx + idx + idx);
fraction[j][0][i] = fr->muls[*tab++][m];
fraction[j][1][i] = fr->muls[*tab++][m];
fraction[j][2][i] = fr->muls[*tab][m];
}
scale+=3;
}
else
fraction[j][0][i] = fraction[j][1][i] = fraction[j][2][i] = 0.0;
}
}
for (i=jsbound;i<sblimit;i++,alloc1+=(1<<step))
{
step = alloc1->bits;
bita++; /* channel 1 and channel 2 bitalloc are the same */
if ( (ba=*bita++) )
{
k=(alloc2 = alloc1+ba)->bits;
if( (d1=alloc2->d) < 0)
{
real cm;
cm=fr->muls[k][scale[x1+3]];
fraction[1][0][i] = (fraction[0][0][i] = (real) ((int)getbits(fr, k) + d1) ) * cm;
fraction[1][1][i] = (fraction[0][1][i] = (real) ((int)getbits(fr, k) + d1) ) * cm;
fraction[1][2][i] = (fraction[0][2][i] = (real) ((int)getbits(fr, k) + d1) ) * cm;
cm=fr->muls[k][scale[x1]];
fraction[0][0][i] *= cm; fraction[0][1][i] *= cm; fraction[0][2][i] *= cm;
}
else
{
const int *table[] = { 0,0,0,grp_3tab,0,grp_5tab,0,0,0,grp_9tab };
unsigned int idx,*tab,m1,m2;
m1 = scale[x1]; m2 = scale[x1+3];
idx = (unsigned int) getbits(fr, k);
tab = (unsigned int *) (table[d1] + idx + idx + idx);
fraction[0][0][i] = fr->muls[*tab][m1]; fraction[1][0][i] = fr->muls[*tab++][m2];
fraction[0][1][i] = fr->muls[*tab][m1]; fraction[1][1][i] = fr->muls[*tab++][m2];
fraction[0][2][i] = fr->muls[*tab][m1]; fraction[1][2][i] = fr->muls[*tab][m2];
}
scale+=6;
}
else {
fraction[0][0][i] = fraction[0][1][i] = fraction[0][2][i] =
fraction[1][0][i] = fraction[1][1][i] = fraction[1][2][i] = 0.0;
}
/*
should we use individual scalefac for channel 2 or
is the current way the right one , where we just copy channel 1 to
channel 2 ??
The current 'strange' thing is, that we throw away the scalefac
values for the second channel ...!!
-> changed .. now we use the scalefac values of channel one !!
*/
}
if(sblimit > (fr->down_sample_sblimit) )
sblimit = fr->down_sample_sblimit;
for(i=sblimit;i<SBLIMIT;i++)
for (j=0;j<stereo;j++)
fraction[j][0][i] = fraction[j][1][i] = fraction[j][2][i] = 0.0;
}
static void II_select_table(mpg123_handle *fr)
{
const int translate[3][2][16] =
{ { { 0,2,2,2,2,2,2,0,0,0,1,1,1,1,1,0 } ,
{ 0,2,2,0,0,0,1,1,1,1,1,1,1,1,1,0 } } ,
{ { 0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0 } ,
{ 0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0 } } ,
{ { 0,3,3,3,3,3,3,0,0,0,1,1,1,1,1,0 } ,
{ 0,3,3,0,0,0,1,1,1,1,1,1,1,1,1,0 } } };
int table,sblim;
const struct al_table *tables[5] =
{ alloc_0, alloc_1, alloc_2, alloc_3 , alloc_4 };
const int sblims[5] = { 27 , 30 , 8, 12 , 30 };
if(fr->sampling_frequency >= 3) /* Or equivalent: (fr->lsf == 1) */
table = 4;
else
table = translate[fr->sampling_frequency][2-fr->stereo][fr->bitrate_index];
sblim = sblims[table];
fr->alloc = tables[table];
fr->II_sblimit = sblim;
}
int do_layer2(mpg123_handle *fr)
{
int clip=0;
int i,j;
int stereo = fr->stereo;
real aligned(16) fraction[2][4][SBLIMIT]; /* pick_table clears unused subbands */
unsigned int bit_alloc[64];
int scale[192];
int single = fr->single;
II_select_table(fr);
fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ?
(fr->mode_ext<<2)+4 : fr->II_sblimit;
if (fr->jsbound > fr->II_sblimit) {
fprintf(stderr, "Truncating stereo boundary to sideband limit.\n");
fr->jsbound=fr->II_sblimit;
}
if(stereo == 1 || single == SINGLE_MIX) /* also, mix not really handled */
single = SINGLE_LEFT;
II_step_one(bit_alloc, scale, fr);
for (i=0;i<SCALE_BLOCK;i++)
{
II_step_two(bit_alloc,fraction,scale,fr,i>>2);
for (j=0;j<3;j++)
{
if(single != SINGLE_STEREO)
{
clip += (fr->synth_mono) (fraction[single][j], fr);
}
else
{
clip += (fr->synth) (fraction[0][j], 0, fr, 0);
clip += (fr->synth) (fraction[1][j], 1, fr, 1);
}
}
}
return clip;
}

1868
src/libmpg123/layer3.c Normal file

File diff suppressed because it is too large Load Diff

840
src/libmpg123/libmpg123.c Normal file
View File

@@ -0,0 +1,840 @@
#include "mpg123lib_intern.h"
#include "getbits.h"
#ifdef GAPLESS
#define SAMPLE_ADJUST(x) ((x) - ((mh->p.flags & MPG123_GAPLESS) ? mh->begin_os : 0))
#define SAMPLE_UNADJUST(x) ((x) + ((mh->p.flags & MPG123_GAPLESS) ? mh->begin_os : 0))
#else
#define SAMPLE_ADJUST(x) (x)
#define SAMPLE_UNADJUST(x) (x)
#endif
#define SEEKFRAME(mh) ((mh)->ignoreframe < 0 ? 0 : (mh)->ignoreframe)
static int initialized = 0;
#ifdef GAPLESS
/*
Take the buffer after a frame decode (strictly: it is the data from frame fr->num!) and cut samples out.
fr->buffer.fill may then be smaller than before...
*/
static void frame_buffercheck(mpg123_handle *fr)
{
/* The first interesting frame: Skip some leading samples. */
if(fr->firstoff && fr->num == fr->firstframe)
{
off_t byteoff = samples_to_bytes(fr, fr->firstoff);
if(fr->buffer.fill > byteoff)
{
fr->buffer.fill -= byteoff;
/* buffer.p != buffer.data only for own buffer */
debug6("cutting %li samples/%li bytes on begin, own_buffer=%i at %p=%p, buf[1]=%i",
(long)fr->firstoff, (long)byteoff, fr->own_buffer, (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]);
if(fr->own_buffer) fr->buffer.p = fr->buffer.data + byteoff;
else memmove(fr->buffer.data, fr->buffer.data + byteoff, fr->buffer.fill);
debug3("done cutting, buffer at %p =? %p, buf[1]=%i",
(void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]);
}
else fr->buffer.fill = 0;
fr->firstoff = 0; /* Only enter here once... when you seek, firstoff should be reset. */
}
/* The last interesting (planned) frame: Only use some leading samples. */
if(fr->lastoff && fr->num == fr->lastframe)
{
off_t byteoff = samples_to_bytes(fr, fr->lastoff);
if(fr->buffer.fill > byteoff)
{
fr->buffer.fill = byteoff;
}
fr->lastoff = 0; /* Only enter here once... when you seek, lastoff should be reset. */
}
}
#endif
int mpg123_init(void)
{
if((sizeof(short) != 2) || (sizeof(long) < 4)) return MPG123_BAD_TYPES;
init_layer2(); /* inits also shared tables with layer1 */
init_layer3();
#ifndef OPT_MMX_ONLY
prepare_decode_tables();
#endif
check_decoders();
initialized = 1;
return MPG123_OK;
}
void mpg123_exit(void)
{
/* nothing yet, but something later perhaps */
if(initialized) return;
}
/* create a new handle with specified decoder, decoder can be "", "auto" or NULL for auto-detection */
mpg123_handle *mpg123_new(const char* decoder, int *error)
{
return mpg123_parnew(NULL, decoder, error);
}
/* ...the full routine with optional initial parameters to override defaults. */
mpg123_handle *mpg123_parnew(mpg123_pars *mp, const char* decoder, int *error)
{
mpg123_handle *fr = NULL;
int err = MPG123_OK;
if(initialized) fr = (mpg123_handle*) malloc(sizeof(mpg123_handle));
else err = MPG123_NOT_INITIALIZED;
if(fr != NULL)
{
frame_init_par(fr, mp);
debug("cpu opt setting");
if(frame_cpu_opt(fr, decoder) != 1)
{
err = MPG123_BAD_DECODER;
frame_exit(fr);
free(fr);
fr = NULL;
}
}
if(fr != NULL)
{
if((frame_outbuffer(fr) != 0) || (frame_buffers(fr) != 0))
{
err = MPG123_NO_BUFFERS;
frame_exit(fr);
free(fr);
fr = NULL;
}
else
{
opt_make_decode_tables(fr);
fr->decoder_change = 1;
/* happening on frame change instead:
init_layer3_stuff(fr);
init_layer2_stuff(fr); */
}
}
else if(err == MPG123_OK) err = MPG123_OUT_OF_MEM;
if(error != NULL) *error = err;
return fr;
}
int mpg123_decoder(mpg123_handle *mh, const char* decoder)
{
enum optdec dt = dectype(decoder);
if(mh == NULL) return MPG123_ERR;
if(dt == nodec)
{
mh->err = MPG123_BAD_DECODER;
return MPG123_ERR;
}
if(dt == mh->cpu_opts.type) return MPG123_OK;
/* Now really change. */
/* frame_exit(mh);
frame_init(mh); */
debug("cpu opt setting");
if(frame_cpu_opt(mh, decoder) != 1)
{
mh->err = MPG123_BAD_DECODER;
frame_exit(mh);
return MPG123_ERR;
}
/* New buffers for decoder are created in frame_buffers() */
if((frame_outbuffer(mh) != 0) || (frame_buffers(mh) != 0))
{
mh->err = MPG123_NO_BUFFERS;
frame_exit(mh);
return MPG123_ERR;
}
opt_make_decode_tables(mh);
mh->decoder_change = 1;
return MPG123_OK;
}
int mpg123_param(mpg123_handle *mh, int key, long val, double fval)
{
int r;
if(mh == NULL) return MPG123_ERR;
r = mpg123_par(&mh->p, key, val, fval);
if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
return r;
}
int mpg123_par(mpg123_pars *mp, int key, long val, double fval)
{
int ret = MPG123_OK;
switch(key)
{
case MPG123_VERBOSE:
mp->verbose = val;
break;
case MPG123_FLAGS:
#ifndef GAPLESS
if(val & MPG123_GAPLESS) ret = MPG123_NO_GAPLESS;
else
#endif
mp->flags = val;
debug1("set flags to 0x%lx", (unsigned long) mp->flags);
break;
case MPG123_ADD_FLAGS:
mp->flags |= val;
break;
case MPG123_FORCE_RATE: /* should this trigger something? */
if(val > 96000) ret = MPG123_BAD_RATE;
else mp->force_rate = val < 0 ? 0 : val; /* >0 means enable, 0 disable */
break;
case MPG123_DOWN_SAMPLE:
if(val < 0 || val > 2) ret = MPG123_BAD_RATE;
else mp->down_sample = (int)val;
break;
case MPG123_RVA:
if(val < 0 || val > MPG123_RVA_MAX) ret = MPG123_BAD_RVA;
else mp->rva = (int)val;
break;
case MPG123_DOWNSPEED:
mp->halfspeed = val < 0 ? 0 : val;
break;
case MPG123_UPSPEED:
mp->doublespeed = val < 0 ? 0 : val;
break;
case MPG123_ICY_INTERVAL:
mp->icy_interval = val > 0 ? val : 0;
break;
case MPG123_OUTSCALE:
#ifdef FLOATOUT
mp->outscale = fval;
#else
mp->outscale = val;
#endif
break;
default:
ret = MPG123_BAD_PARAM;
}
return ret;
}
int mpg123_getparam(mpg123_handle *mh, int key, long *val, double *fval)
{
int r;
if(mh == NULL) return MPG123_ERR;
r = mpg123_getpar(&mh->p, key, val, fval);
if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
return r;
}
int mpg123_getpar(mpg123_pars *mp, int key, long *val, double *fval)
{
int ret = 0;
switch(key)
{
case MPG123_VERBOSE:
if(val) *val = mp->verbose;
break;
case MPG123_FLAGS:
case MPG123_ADD_FLAGS:
if(val) *val = mp->flags;
break;
case MPG123_FORCE_RATE:
if(val) *val = mp->force_rate;
break;
case MPG123_DOWN_SAMPLE:
if(val) *val = mp->down_sample;
break;
case MPG123_RVA:
if(val) *val = mp->rva;
break;
case MPG123_DOWNSPEED:
if(val) *val = mp->halfspeed;
break;
case MPG123_UPSPEED:
if(val) *val = mp->doublespeed;
break;
case MPG123_ICY_INTERVAL:
if(val) *val = (long)mp->icy_interval;
break;
case MPG123_OUTSCALE:
#ifdef FLOATOUT
if(fval) *fval = mp->outscale;
#else
if(val) *val = mp->outscale;
#endif
break;
default:
ret = MPG123_BAD_PARAM;
}
return ret;
}
int mpg123_eq(mpg123_handle *mh, int channel, int band, double val)
{
if(mh == NULL) return MPG123_ERR;
if(band < 0 || band > 31){ mh->err = MPG123_BAD_BAND; return MPG123_ERR; }
switch(channel)
{
case MPG123_LEFT|MPG123_RIGHT:
mh->equalizer[0][band] = mh->equalizer[1][band] = DOUBLE_TO_REAL(val);
break;
case MPG123_LEFT: mh->equalizer[0][band] = DOUBLE_TO_REAL(val); break;
case MPG123_RIGHT: mh->equalizer[1][band] = DOUBLE_TO_REAL(val); break;
default:
mh->err=MPG123_BAD_CHANNEL;
return MPG123_ERR;
}
mh->have_eq_settings = TRUE;
return MPG123_OK;
}
/* plain file access, no http! */
int mpg123_open(mpg123_handle *mh, char *path)
{
mpg123_close(mh);
frame_reset(mh);
return open_stream(mh, path, -1);
}
int mpg123_open_fd(mpg123_handle *mh, int fd)
{
mpg123_close(mh);
frame_reset(mh);
return open_stream(mh, NULL, fd);
}
int mpg123_open_feed(mpg123_handle *mh)
{
mpg123_close(mh);
frame_reset(mh);
return open_feed(mh);
}
int decode_update(mpg123_handle *mh)
{
long native_rate = frame_freq(mh);
debug("updating decoder structure");
if(mh->af.rate == native_rate) mh->down_sample = 0;
else if(mh->af.rate == native_rate>>1) mh->down_sample = 1;
else if(mh->af.rate == native_rate>>2) mh->down_sample = 2;
else mh->down_sample = 3; /* flexible (fixed) rate */
switch(mh->down_sample)
{
case 0:
case 1:
case 2:
mh->down_sample_sblimit = SBLIMIT>>(mh->down_sample);
/* With downsampling I get less samples per frame */
mh->outblock = sizeof(sample_t)*mh->af.channels*(spf(mh)>>mh->down_sample);
break;
case 3:
{
if(synth_ntom_set_step(mh) != 0) return -1;
if(frame_freq(mh) > mh->af.rate)
{
mh->down_sample_sblimit = SBLIMIT * mh->af.rate;
mh->down_sample_sblimit /= frame_freq(mh);
}
else mh->down_sample_sblimit = SBLIMIT;
mh->outblock = sizeof(sample_t) * mh->af.channels *
( ( NTOM_MUL-1+spf(mh)
* (((size_t)NTOM_MUL*mh->af.rate)/frame_freq(mh))
)/NTOM_MUL );
}
break;
}
if(!(mh->p.flags & MPG123_FORCE_MONO))
{
if(mh->af.channels == 1) mh->single = SINGLE_MIX;
else mh->single = SINGLE_STEREO;
}
else mh->single = (mh->p.flags & MPG123_FORCE_MONO)-1;
if(set_synth_functions(mh) != 0) return -1;;
init_layer3_stuff(mh);
init_layer2_stuff(mh);
do_rva(mh);
return 0;
}
size_t mpg123_safe_buffer()
{
return sizeof(sample_t)*2*1152*NTOM_MAX;
}
size_t mpg123_outblock(mpg123_handle *mh)
{
if(mh != NULL) return mh->outblock;
else return mpg123_safe_buffer();
}
static int get_next_frame(mpg123_handle *mh)
{
int change = mh->decoder_change;
do
{
int b;
/* Decode & discard some frame(s) before beginning. */
if(mh->to_ignore && mh->num < mh->firstframe && mh->num >= mh->ignoreframe)
{
debug1("ignoring frame %li", (long)mh->num);
/* Decoder structure must be current! decode_update has been called before... */
(mh->do_layer)(mh); mh->buffer.fill = 0;
mh->to_ignore = mh->to_decode = FALSE;
}
/* Read new frame data; possibly breaking out here for MPG123_NEED_MORE. */
debug("read frame");
mh->to_decode = FALSE;
b = read_frame(mh); /* That sets to_decode only if a full frame was read. */
debug3("read of frame %li returned %i (to_decode=%i)", mh->num, b, mh->to_decode);
if(b == MPG123_NEED_MORE) return MPG123_NEED_MORE; /* need another call with data */
else if(b <= 0)
{
/* More sophisticated error control? */
if(b==0 || mh->rdat.filepos == mh->rdat.filelen)
{ /* We simply reached the end. */
mh->track_frames = mh->num + 1;
return MPG123_DONE;
}
else return MPG123_ERR; /* Some real error. */
}
/* Now, there should be new data to decode ... and also possibly new stream properties */
if(mh->header_change > 1)
{
debug("big header change");
change = 1;
}
} while(mh->num < mh->firstframe);
/* When we start actually using the CRC, this could move into the loop... */
/* A question of semantics ... should I fold start_frame and frame_number into firstframe/lastframe? */
if(mh->lastframe >= 0 && mh->num > mh->lastframe)
{
mh->to_decode = mh->to_ignore = FALSE;
return MPG123_DONE;
}
if(change)
{
int b = frame_output_format(mh); /* Select the new output format based on given constraints. */
if(b < 0) return MPG123_ERR; /* not nice to fail here... perhaps once should add possibility to repeat this step */
if(decode_update(mh) < 0) return MPG123_ERR; /* dito... */
mh->decoder_change = 0;
if(b == 1) mh->new_format = 1; /* Store for later... */
#ifdef GAPLESS
if(mh->fresh)
{
b=0;
/* Prepare offsets for gapless decoding. */
frame_gapless_realinit(mh);
frame_set_frameseek(mh, mh->num);
mh->fresh = 0;
/* Could this possibly happen? With a real big gapless offset... */
if(mh->num < mh->firstframe) b = get_next_frame(mh);
if(b < 0) return b; /* Could be error, need for more, new format... */
}
#endif
}
return MPG123_OK;
}
/*
Put _one_ decoded frame into the frame structure's buffer, accessible at the location stored in <audio>, with <bytes> bytes available.
The buffer contents will be lost on next call to mpg123_decode_frame.
MPG123_OK -- successfully decoded the frame, you get your output data
MPg123_DONE -- This is it. End.
MPG123_ERR -- some error occured...
MPG123_NEW_FORMAT -- new frame was read, it results in changed output format -> will be decoded on next call
MPG123_NEED_MORE -- that should not happen as this function is intended for in-library stream reader but if you force it...
MPG123_NO_SPACE -- not enough space in buffer for safe decoding, also should not happen
num will be updated to the last decoded frame number (may possibly _not_ increase, p.ex. when format changed).
*/
int mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes)
{
if(mh == NULL) return MPG123_ERR;
if(mh->buffer.size < mh->outblock) return MPG123_NO_SPACE;
mh->buffer.fill = 0; /* always start fresh */
*bytes = 0;
while(TRUE)
{
/* decode if possible */
if(mh->to_decode)
{
if(mh->new_format)
{
mh->new_format = 0;
return MPG123_NEW_FORMAT;
}
*num = mh->num;
debug("decoding");
mh->clip += (mh->do_layer)(mh);
mh->to_decode = mh->to_ignore = FALSE;
mh->buffer.p = mh->buffer.data;
#ifdef GAPLESS
/* This checks for individual samples to skip, for gapless mode or sample-accurate seek. */
frame_buffercheck(mh);
#endif
*audio = mh->buffer.p;
*bytes = mh->buffer.fill;
return MPG123_OK;
}
else
{
int b = get_next_frame(mh);
if(b < 0) return b;
debug1("got next frame, %i", mh->to_decode);
}
}
return MPG123_ERR;
}
ssize_t mpg123_read(mpg123_handle *mh, unsigned char *out, size_t size, size_t *done)
{
return mpg123_decode(mh, NULL, 0, out, size, done);
}
/*
The old picture:
while(1) {
len = read(0,buf,16384);
if(len <= 0)
break;
ret = decodeMP3(&mp,buf,len,out,8192,&size);
while(ret == MP3_OK) {
write(1,out,size);
ret = decodeMP3(&mp,NULL,0,out,8192,&size);
}
}
*/
int mpg123_decode(mpg123_handle *mh,unsigned char *inmemory, size_t inmemsize, unsigned char *outmemory, size_t outmemsize, size_t *done)
{
int ret = MPG123_OK;
*done = 0;
if(mh == NULL) return MPG123_ERR;
if(inmemsize > 0)
if(feed_more(mh, inmemory, inmemsize) == -1) return MPG123_ERR;
while(ret == MPG123_OK)
{
debug3("decode loop, fill %i (%li vs. %li)", mh->buffer.fill, (long)mh->num, (long)mh->firstframe);
/* Decode a frame that has been read before.
This only happens when buffer is empty! */
if(mh->to_decode)
{
if(mh->new_format)
{
mh->new_format = 0;
return MPG123_NEW_FORMAT;
}
if(mh->buffer.size - mh->buffer.fill < mh->outblock) return MPG123_NO_SPACE;
mh->clip += (mh->do_layer)(mh);
mh->to_decode = mh->to_ignore = FALSE;
mh->buffer.p = mh->buffer.data;
debug2("decoded frame %li, got %li samples in buffer", mh->num, mh->buffer.fill / (samples_to_bytes(mh, 1)));
#ifdef GAPLESS
frame_buffercheck(mh); /* Seek & gapless. */
#endif
}
if(mh->buffer.fill) /* Copy (part of) the decoded data to the caller's buffer. */
{
/* get what is needed - or just what is there */
int a = mh->buffer.fill > (outmemsize - *done) ? outmemsize - *done : mh->buffer.fill;
debug4("buffer fill: %i; copying %i (%i - %i)", mh->buffer.fill, a, outmemsize, *done);
memcpy(outmemory, mh->buffer.p, a);
/* less data in frame buffer, less needed, output pointer increase, more data given... */
mh->buffer.fill -= a;
outmemory += a;
*done += a;
mh->buffer.p += a;
if(!(outmemsize > *done)) return ret;
}
else /* If we didn't have data, get a new frame. */
{
int b = get_next_frame(mh);
if(b < 0) return b;
}
}
return ret;
}
long mpg123_clip(mpg123_handle *mh)
{
long ret = 0;
if(mh != NULL)
{
ret = mh->clip;
mh->clip = 0;
}
return ret;
}
/*
Now, where are we? We need to know the last decoded frame... and what's left of it in buffer.
The current frame number can mean the last decoded frame or the to-be-decoded frame.
If mh->to_decode, then mh->num frames have been decoded, the frame mh->num now coming next.
If not, we have the possibility of mh->num+1 frames being decoded or nothing at all.
Then, there is firstframe...when we didn't reach it yet, then the next data will come from there.
mh->num starts with -1
*/
off_t mpg123_tell(mpg123_handle *mh)
{
if(mh == NULL) return MPG123_ERR;
if(!mh->to_decode && mh->fresh)
{
/* Fresh track, need first frame for basic info. */
int b = get_next_frame(mh);
if(b < 0) return b;
}
/* Now we have all the info at hand. */
debug5("tell: %li/%i first %li firstoff %li buffer %lu", (long)mh->num, mh->to_decode, (long)mh->firstframe, (long)mh->firstoff, (unsigned long)mh->buffer.fill);
if((mh->num < mh->firstframe) || (mh->num == mh->firstframe && mh->to_decode)) return SAMPLE_ADJUST(frame_tell_seek(mh));
else if(mh->to_decode) return SAMPLE_ADJUST(frame_outs(mh, mh->num) - mh->buffer.fill);
else return SAMPLE_ADJUST(frame_outs(mh, mh->num+1) - mh->buffer.fill);
}
off_t mpg123_tellframe(mpg123_handle *mh)
{
if(mh == NULL) return MPG123_ERR;
if(mh->num < mh->firstframe) return mh->firstframe;
if(mh->to_decode) return mh->num;
/* Consider firstoff? */
return mh->buffer.fill ? mh->num : mh->num + 1;
}
static int do_the_seek(mpg123_handle *mh)
{
int b;
off_t fnum = SEEKFRAME(mh);
mh->buffer.fill = 0;
if(mh->num < mh->firstframe) mh->to_decode = FALSE;
if(mh->num == fnum && mh->to_decode) return MPG123_OK;
if(mh->num == fnum-1)
{
mh->to_decode = FALSE;
return MPG123_OK;
}
/*frame_buffers_reset(mh);*/
b = mh->rd->seek_frame(mh, fnum);
if(b<0) return b;
/* Only mh->to_ignore is TRUE. */
if(mh->num < mh->firstframe) mh->to_decode = FALSE;
return 0;
}
off_t mpg123_seek(mpg123_handle *mh, off_t sampleoff, int whence)
{
off_t pos = mpg123_tell(mh); /* adjusted samples */
debug1("pos=%li", (long)pos);
if(pos < 0) return pos; /* mh == NULL is covered in mpg123_tell() */
switch(whence)
{
case SEEK_CUR: pos += sampleoff; break;
case SEEK_SET: pos = sampleoff; break;
case SEEK_END:
#ifdef GAPLESS
if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh->end_os) - sampleoff;
#else
if(mh->track_frames > 0) pos = SAMPLE_ADJUST(frame_outs(mh, mh->track_frames)) - sampleoff;
#endif
else
{
mh->err = MPG123_NO_SEEK_FROM_END;
return MPG123_ERR;
}
break;
default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR;
}
if(pos < 0) pos = 0;
/* pos now holds the wanted sample offset in adjusted samples */
frame_set_seek(mh, SAMPLE_UNADJUST(pos));
pos = do_the_seek(mh);
if(pos < 0) return pos;
return mpg123_tell(mh);
}
/*
A bit more tricky... libmpg123 does not do the seeking itself.
All it can do is to ignore frames until the wanted one is there.
The caller doesn't know where a specific frame starts and mpg123 also only knows the general region after it scanned the file.
Well, it is tricky...
*/
off_t mpg123_feedseek(mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset)
{
off_t pos = mpg123_tell(mh); /* adjusted samples */
debug3("seek from %li to %li (whence=%i)", (long)pos, (long)sampleoff, whence);
if(pos < 0) return pos; /* mh == NULL is covered in mpg123_tell() */
switch(whence)
{
case SEEK_CUR: pos += sampleoff; break;
case SEEK_SET: pos = sampleoff; break;
case SEEK_END:
#ifdef GAPLESS
if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh->end_os) - sampleoff;
#else
if(mh->track_frames > 0) pos = SAMPLE_ADJUST(frame_outs(mh, mh->track_frames)) - sampleoff;
#endif
else
{
mh->err = MPG123_NO_SEEK_FROM_END;
return MPG123_ERR;
}
break;
default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR;
}
if(pos < 0) pos = 0;
frame_set_seek(mh, SAMPLE_UNADJUST(pos));
pos = SEEKFRAME(mh);
mh->buffer.fill = 0;
/* Shortcuts without modifying input stream. */
*input_offset = mh->rdat.firstpos + mh->rdat.filelen;
if(mh->num < mh->firstframe) mh->to_decode = FALSE;
if(mh->num == pos && mh->to_decode) goto feedseekend;
if(mh->num == pos-1) goto feedseekend;
/* Whole way. */
*input_offset = feed_set_pos(mh, frame_index_find(mh, SEEKFRAME(mh), &pos));
mh->num = pos-1; /* The next read frame will have num = pos. */
if(*input_offset < 0) return MPG123_ERR;
feedseekend:
return mpg123_tell(mh);
}
off_t mpg123_seek_frame(mpg123_handle *mh, off_t offset, int whence)
{
off_t pos = 0;
if(mh == NULL) return MPG123_ERR;
if(!mh->to_decode && mh->fresh)
{
/* Fresh track, need first frame for basic info. */
int b = get_next_frame(mh);
if(b < 0) return b;
}
/* Could play games here with to_decode... */
pos = mh->num;
switch(whence)
{
case SEEK_CUR: pos += offset; break;
case SEEK_SET: pos = offset; break;
case SEEK_END:
if(mh->track_frames > 0) pos = mh->track_frames - offset;
else
{
mh->err = MPG123_NO_SEEK_FROM_END;
return MPG123_ERR;
}
break;
default:
mh->err = MPG123_BAD_WHENCE;
return MPG123_ERR;
}
if(pos < 0) pos = 0;
/* Hm, do we need to seek right past the end? */
else if(mh->track_frames > 0 && pos >= mh->track_frames) pos = mh->track_frames;
frame_set_frameseek(mh, pos);
pos = do_the_seek(mh);
if(pos < 0) return pos;
return mpg123_tellframe(mh);
}
int mpg123_meta_check(mpg123_handle *mh)
{
if(mh != NULL) return mh->metaflags;
else return 0;
}
int mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2)
{
if(v1 != NULL) *v1 = NULL;
if(v2 != NULL) *v2 = NULL;
if(mh == NULL) return MPG123_ERR;
if(mh->metaflags & MPG123_ID3)
{
if(v1 != NULL && mh->rdat.flags & READER_ID3TAG) *v1 = (mpg123_id3v1*) mh->id3buf;
if(v2 != NULL) *v2 = &mh->id3v2;
mh->metaflags |= MPG123_ID3;
mh->metaflags &= ~MPG123_NEW_ID3;
}
return MPG123_OK;
}
int mpg123_icy(mpg123_handle *mh, char **icy_meta)
{
*icy_meta = NULL;
if(mh == NULL) return MPG123_ERR;
if(mh->metaflags & MPG123_ICY)
{
*icy_meta = mh->icy.data;
mh->metaflags |= MPG123_ICY;
mh->metaflags &= ~MPG123_NEW_ICY;
}
return MPG123_OK;
}
int mpg123_close(mpg123_handle *mh)
{
if(mh == NULL) return MPG123_ERR;
if(mh->rd != NULL && mh->rd->close != NULL) mh->rd->close(mh);
mh->rd = NULL;
return MPG123_OK;
}
void mpg123_delete(mpg123_handle *mh)
{
if(mh != NULL)
{
mpg123_close(mh);
frame_exit(mh); /* free buffers in frame */
free(mh); /* free struct; cast? */
}
}
static const char *mpg123_error[] =
{
"No error... (code 0)",
"Unable to set up output format! (code 1)",
"Invalid channel number specified. (code 2)",
"Invalid sample rate specified. (code 3)",
"Unable to allocate memory for 16 to 8 converter table! (code 4)",
"Bad parameter id! (code 5)",
"Bad buffer given -- invalid pointer or too small size. (code 6)",
"Out of memory -- some malloc() failed, (code 7)",
"You didn't initialize the library! (code 8)",
"Invalid decoder choice. (code 9)",
"Invalid mpg123 handle. (code 10)",
"Unable to initialize frame buffers (out of memory?)! (code 11)",
"Invalid RVA mode. (code 12)",
"This build doesn't support gapless decoding. (code 13)"
"Not enough buffer space. (code 14)",
"Incompatible numeric data types. (code 15)",
"Bad equalizer band. (code 16)",
"Null pointer given where valid storage address needed. (code 17)",
"Some problem reading the stream. (code 18)",
"Cannot seek from end (end is not known). (code 19)",
"Invalid \"whence\" for seek function. (code 20)"
};
const char* mpg123_plain_strerror(int errcode)
{
if(errcode >= 0 && errcode < sizeof(mpg123_error)/sizeof(char*))
return mpg123_error[errcode];
else return "I have no idea - an unknown error code!";
}
int mpg123_errcode(mpg123_handle *mh)
{
if(mh != NULL) return mh->err;
return MPG123_BAD_HANDLE;
}
const char* mpg123_strerror(mpg123_handle *mh)
{
return mpg123_plain_strerror(mpg123_errcode(mh));
}

View File

@@ -0,0 +1,58 @@
mpg123_init
mpg123_exit
mpg123_new
mpg123_parnew
mpg123_delete
mpg123_decoders
mpg123_supported_decoders
mpg123_decoder
mpg123_plain_strerror
mpg123_strerror
mpg123_errcode
mpg123_rates
mpg123_encodings
mpg123_format_none
mpg123_format_all
mpg123_format
mpg123_format_support
mpg123_getformat
mpg123_param
mpg123_getparam
mpg123_new_pars
mpg123_delete_pars
mpg123_par
mpg123_getpar
mpg123_eq
mpg123_reset_eq
mpg123_volume
mpg123_volume_change
mpg123_getvolume
mpg123_position
mpg123_tpf
mpg123_open
mpg123_open_feed
mpg123_open_fd
mpg123_read
mpg123_decode
mpg123_decode_frame
mpg123_clip
mpg123_close
mpg123_seek_frame
mpg123_timeframe
mpg123_print_index
mpg123_seek
mpg123_info
mpg123_safe_buffer
mpg123_outblock
mpg123_replace_buffer
mpg123_init_string
mpg123_free_string
mpg123_resize_string
mpg123_copy_string
mpg123_add_string
mpg123_set_string
mpg123_meta_check
mpg123_id3
mpg123_icy
mpg123_tell
mpg123_feedseek

57
src/libmpg123/mangle.h Normal file
View File

@@ -0,0 +1,57 @@
/* mangle.h - This file has some CPP macros to deal with different symbol
* mangling across binary formats.
* (c)2002 by Felix Buenemann <atmosfear at users.sourceforge.net>
* File licensed under the GPL, see http://www.fsf.org/ for more info.
*/
/* ThOr: added the plain ASM_NAME
Also this is getting more generic with the align stuff. */
#ifndef __MANGLE_H
#define __MANGLE_H
#include "config.h"
#ifdef CCALIGN
#define MOVUAPS movaps
#else
#define MOVUAPS movups
#endif
#ifdef ASMALIGN_EXP
#define ALIGN4 .align 2
#define ALIGN8 .align 3
#define ALIGN16 .align 4
#define ALIGN32 .align 5
#else
#define ALIGN4 .align 4
#define ALIGN8 .align 8
#define ALIGN16 .align 16
#define ALIGN32 .align 32
#endif
/* Feel free to add more to the list, eg. a.out IMO */
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \
(defined(__OpenBSD__) && !defined(__ELF__)) || defined(__APPLE__)
#define MANGLE(a) "_" #a
#define ASM_NAME(a) _##a
#define ASM_VALUE(a) $_##a
#else
#define MANGLE(a) #a
#define ASM_NAME(a) a
#define ASM_VALUE(a) "$" #a
#endif
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__APPLE__)
#define COMM(a,b,c) .comm a,b
#else
#define COMM(a,b,c) .comm a,b,c
#endif
/* more hacks for macosx; no .bss ... */
#ifdef __APPLE__
#define BSS .data
#else
#define BSS .bss
#endif
#endif /* !__MANGLE_H */

326
src/libmpg123/mpg123.h Normal file
View File

@@ -0,0 +1,326 @@
#ifndef MPG123_LIB_H
#define MPG123_LIB_H
/* These aren't actually in use... seems to work without using libtool. */
#ifdef BUILD_MPG123_DLL
/* The dll exports. */
#define EXPORT __declspec(dllexport)
#else
#ifdef LINK_MPG123_DLL
/* The exe imports. */
#define EXPORT __declspec(dllimport)
#else
/* Nothing on normal/UNIX builds */
#define EXPORT
#endif
#endif
#include <stdlib.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
/* not decided... how anonymous should the handle be? */
struct mpg123_handle_struct;
struct mpg123_pars_struct;
typedef struct mpg123_handle_struct mpg123_handle;
typedef struct mpg123_pars_struct mpg123_pars;
/* non-threadsafe init/exit, call _once_ */
EXPORT int mpg123_init(void);
EXPORT void mpg123_exit(void);
/* Create a handle with optional choice of decoder (named by a string).
and optional retrieval of an error code to feed to mpg123_plain_strerror().
Optional means: Any of or both the parameters may be NULL.
The handle creation is successful when a non-NULL pointer is returned. */
EXPORT mpg123_handle *mpg123_new(const char* decoder, int *error);
/* Create a handle with preset parameters. */
EXPORT mpg123_handle *mpg123_parnew(mpg123_pars *mp, const char* decoder, int *error);
/* Delete handle, mh is either a valid mpg123 handle or NULL. */
EXPORT void mpg123_delete(mpg123_handle *mh);
/* Return NULL-terminated array of generally available decoder names... */
EXPORT char **mpg123_decoders();
/* ...or just the actually supported (by CPU) decoders. */
EXPORT char **mpg123_supported_decoders();
EXPORT int mpg123_decoder(mpg123_handle *mh, const char* decoder);
enum mpg123_errors
{
MPG123_OK=0, MPG123_BAD_OUTFORMAT, MPG123_BAD_CHANNEL, MPG123_BAD_RATE,
MPG123_ERR_16TO8TABLE, MPG123_BAD_PARAM, MPG123_BAD_BUFFER,
MPG123_OUT_OF_MEM, MPG123_NOT_INITIALIZED, MPG123_BAD_DECODER, MPG123_BAD_HANDLE,
MPG123_NO_BUFFERS, MPG123_BAD_RVA, MPG123_NO_GAPLESS, MPG123_NO_SPACE,
MPG123_BAD_TYPES, MPG123_BAD_BAND, MPG123_ERR_NULL, MPG123_ERR_READER,
MPG123_NO_SEEK_FROM_END, MPG123_BAD_WHENCE
};
/* Give string describing that error errcode means. */
EXPORT const char* mpg123_plain_strerror(int errcode);
/* Give string describing what error has occured in the context of handle mh.
When a function operating on an mpg123 handle returns MPG123_ERR, you should check for the actual reason via
char *errmsg = mpg123_strerror(mh)
This function will catch mh == NULL and return the message for MPG123_BAD_HANDLE. */
EXPORT const char* mpg123_strerror(mpg123_handle *mh);
/* Return the plain errcode intead of a string. */
EXPORT int mpg123_errcode(mpg123_handle *mh);
/* 16 or 8 bits, signed or unsigned... all flags fit into 8 bits, float/double are not yet standard and special anyway */
#define MPG123_ENC_16 0x40 /* 0100 0000 */
#define MPG123_ENC_SIGNED 0x80 /* 1000 0000 */
#define MPG123_ENC_8(f) (!((f) & MPG123_ENC_16)) /* it's 8bit encoding of not 16bit, this changes in case float output will be integrated in the normal library */
#define MPG123_ENC_SIGNED_16 (MPG123_ENC_16|MPG123_ENC_SIGNED|0x10) /* 1101 0000 */
#define MPG123_ENC_UNSIGNED_16 (MPG123_ENC_16|0x20) /* 0110 0000 */
#define MPG123_ENC_UNSIGNED_8 0x01 /* 0000 0001 */
#define MPG123_ENC_SIGNED_8 (MPG123_ENC_SIGNED|0x02) /* 1000 0010 */
#define MPG123_ENC_ULAW_8 0x04 /* 0000 0100 */
#define MPG123_ENC_ALAW_8 0x08 /* 0000 1000 */
#define MPG123_ENC_ANY ( MPG123_ENC_SIGNED_16 | MPG123_ENC_UNSIGNED_16 | \
MPG123_ENC_UNSIGNED_8 | MPG123_ENC_SIGNED_8 | \
MPG123_ENC_ULAW_8 | MPG123_ENC_ALAW_8 | MPG123_ENC_ANY )
/* They can be combined into one number to indicate mono and stereo... */
#define MPG123_MONO 1
#define MPG123_STEREO 2
/* 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 or _one_ custom rate <=96000 */
#define MPG123_RATES 9 /* A future library version may not have less! */
EXPORT extern const long mpg123_rates[MPG123_RATES];
#define MPG123_ENCODINGS 6 /* A future library version may not have less! */
EXPORT extern const int mpg123_encodings[MPG123_ENCODINGS];
/* Accept no output format at all, use before specifying supported formats with mpg123_format */
EXPORT int mpg123_format_none(mpg123_handle *mh);
/* Accept all formats (also any custom rate you may set) -- this is default. */
EXPORT int mpg123_format_all(mpg123_handle *mh);
/*
Setting audio format support in detail:
rateindex: Index in rates list...
Negative rate index chooses the custom one.
channels: combination of MPG123_STEREO and MPG123_MONO
encodings: combination of accepted encodings for rate and channels, p.ex MPG123_ENC_SIGNED16|MPG123_ENC_ULAW_8
*/
EXPORT int mpg123_format(mpg123_handle *mh, int rateindex, int channels, int encodings); /* 0 is good, -1 is error */
/* Check if a specific format at a specific rate is supported.
Returns 0 for no support (includes invalid parameters), MPG123_STEREO, MPG123_MONO or MPG123_STEREO|MPG123_MONO. */
EXPORT int mpg123_format_support(mpg123_handle *mh, int ratei, int enci); /* Indices of rate and encoding! */
/* Get the current output format. */
EXPORT int mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding);
/* various flags */
#define MPG123_FORCE_MONO 0x7 /* 0111 */
#define MPG123_MONO_LEFT 0x1 /* 0001 */
#define MPG123_MONO_RIGHT 0x2 /* 0010 */
#define MPG123_MONO_MIX 0x4 /* 0100 */
#define MPG123_FORCE_STEREO 0x8 /* 1000 */
#define MPG123_FORCE_8BIT 0x10 /* 00010000 */
#define MPG123_QUIET 0x20 /* 00100000 suppress any printouts (overrules verbose) */
#define MPG123_GAPLESS 0x40 /* 01000000 flag always defined... */
#define MPG123_NO_RESYNC 0x80 /* 10000000 disable resync stream after error */
/* RVA choices */
#define MPG123_RVA_OFF 0
#define MPG123_RVA_MIX 1
#define MPG123_RVA_ALBUM 2
#define MPG123_RVA_MAX MPG123_RVA_ALBUM
enum mpg123_parms
{
MPG123_VERBOSE, /* set verbosity value for enabling messages to stderr, >= 0 makes sense */
MPG123_FLAGS, /* set all flags, p.ex val = MPG123_GAPLESS|MPG123_MONO_MIX */
MPG123_ADD_FLAGS, /* add some flags */
MPG123_FORCE_RATE, /* when value > 0, force output rate to that value */
MPG123_DOWN_SAMPLE, /* 0=native rate, 1=half rate, 2=quarter rate */
MPG123_RVA, /* one of the RVA choices above */
MPG123_DOWNSPEED, /* play a frame <n> times */
MPG123_UPSPEED, /* play every <n>th frame */
MPG123_START_FRAME, /* start with this frame (skip frames before that) */
MPG123_DECODE_FRAMES, /* decode only this number of frames */
MPG123_ICY_INTERVAL, /* stream contains ICY metadata with this interval */
MPG123_OUTSCALE /* the scale for output samples (amplitude) */
};
/* This sets, for a specific handle, a specific parameter (key chosen from the above list), to the specified value.
TODO: Assess the possibilities and troubles of changing parameters during playback. */
EXPORT int mpg123_param (mpg123_handle *mh, int key, long value, double fvalue);
EXPORT int mpg123_getparam(mpg123_handle *mh, int key, long *val, double *fval);
/* Direct access to a parameter set without full handle around it. */
EXPORT mpg123_pars *mpg123_new_pars(int *error);
EXPORT void mpg123_delete_pars(mpg123_pars* mp);
EXPORT int mpg123_par (mpg123_pars *mp, int key, long value, double fvalue);
EXPORT int mpg123_getpar(mpg123_pars *mp, int key, long *val, double *fval);
#define MPG123_LEFT 1
#define MPG123_RIGHT 2
/* Channel can be MPG123_LEFT, MPG123_RIGHT or MPG123_LEFT|MPG123_RIGHT for both.
Band is an eq band from 0 to 31, val the (linear) factor. */
EXPORT int mpg123_eq(mpg123_handle *mh, int channel, int band, double val);
EXPORT int mpg123_reset_eq(mpg123_handle *mh); /* all back to 1 */
/* Change output volume including the RVA setting, vol<0 just applies (a possibly changed) RVA setting. */
EXPORT int mpg123_volume(mpg123_handle *mh, double vol);
EXPORT int mpg123_volume_change(mpg123_handle *mh, double change);
/* Return current volume setting, the actual value due to RVA, the RVA adjustment itself.
It's all as double float value to abstract the sample format.
Oh, and the volume values are linear factors / amplitudes (not percent) and the RVA value is in decibel. */
EXPORT int mpg123_getvolume(mpg123_handle *mh, double *base, double *really, double *rva_db);
/* The current position in samples. One the next read, you'd get that sample. */
EXPORT off_t mpg123_tell(mpg123_handle *mh);
/* The next read will give you data from this frame. */
EXPORT off_t mpg123_tellframe(mpg123_handle *mh);
/* If possible, tell the full (expected) length of current track in samples. */
EXPORT off_t mpg123_length(mpg123_handle *mh);
/* Info about current and remaining frames/seconds.
You provide an offset (in frames) from now and a number of output bytes served by mpg123 but not yet played.
You get the projected current frame and seconds, as well as the remaining frames/seconds.
This does _not_ care about skipped samples due to gapless playback. */
EXPORT int mpg123_position( mpg123_handle *mh, off_t frame_offset, off_t buffered_bytes,
off_t *current_frame, off_t *frames_left,
/* off_t *current_samples, off_t *samples_left ); */
double *current_seconds, double *seconds_left);
/* Time (seconds) per frame; <0 is error. */
EXPORT double mpg123_tpf(mpg123_handle *mh);
/* The open functions reset stuff and make a new, different stream possible - even if there isn't actually a resource involved like with open_feed. */
EXPORT int mpg123_open (mpg123_handle *mh, char *url); /* a file or http url */
EXPORT int mpg123_open_feed(mpg123_handle *mh); /* prepare for direct feeding */
EXPORT int mpg123_open_fd (mpg123_handle *mh, int fd); /* use an already opened file descriptor */
/* reading samples / triggering decoding, possible return values: */
/* MPG123_OK on success */
#define MPG123_ERR -1 /* in general, functions return that on error */
/* special status valuea */
#define MPG123_NEED_MORE -10 /* For feed: "Feed me more!" */
#define MPG123_NEW_FORMAT -11 /* Output format will be different on next call. */
#define MPG123_DONE -12 /* Track ended. */
/* Read from stream and decode up to outmemsize bytes. Returns a code from above and the number of decoded bytes in *done. */
EXPORT ssize_t mpg123_read(mpg123_handle *mh, unsigned char *outmemory, size_t outmemsize, size_t *done);
/* Same as above but with feeding input data (when inmemory != NULL).
This is very close to a drop-in replacement for old mpglib.
When you give zero-sized output buffer the input will be parsed until decoded data is available.
That enables you to get NEW_FORMAT (and query it) without taking decoded data. */
EXPORT int mpg123_decode(mpg123_handle *mh, unsigned char *inmemory, size_t inmemsize, unsigned char *outmemory, size_t outmemsize, size_t *done);
/* Decode only one frame (or read a frame and return after setting a new format), update num to latest decoded frame index. */
EXPORT int mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes);
/* Get and reset the clip count. */
EXPORT long mpg123_clip(mpg123_handle *mh);
/* Well, what do you think? Closes the resource, if libmpg123 opened it. */
EXPORT int mpg123_close(mpg123_handle *mh);
/* The seek stuff needs more thought; it's going to be sample-accurate and I need a way for feeding.
So: SEEK STUFF WILL CHANGE! */
EXPORT off_t mpg123_timeframe(mpg123_handle *mh, double sec);
EXPORT int mpg123_print_index(mpg123_handle *fr, FILE* out);
/*
Seeking in MPEG files/streams: modelled after the standard fseek (or fseeko).
- set whence to SEEK_SET, SEEK_CUR or SEEK_END (not guaranteed to work for all streams, of course)
- returning resulting offset >= 0 or MPG123_ERR (-1)
mpg123_feedseek() gives also an input data offset that it expects to be present the next time data is fed to mpg123_decode().
Still wondering: long or off_t ??
Trying to code it so that no decoding happens during seek (but some pre-decoding may be needed after seek).
Sample-accurate seek depends on the gapless code being in effect.
Without that, we only get frame-accurate.
*/
EXPORT off_t mpg123_seek (mpg123_handle *mh, off_t sampleoff, int whence);
EXPORT off_t mpg123_feedseek (mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset);
/* in/output offset in MPEG frames instead of samples */
EXPORT off_t mpg123_seek_frame(mpg123_handle *mh, off_t frameoff, int whence);
enum mpg123_vbr { MPG123_CBR=0, MPG123_VBR, MPG123_ABR };
struct mpg123_frameinfo
{
enum {MPG123_1_0 = 0, MPG123_2_0, MPG123_2_5 } version;
int layer; /* Well... 1, 2 or 3 */
long rate; /* The sampling rate. */
/* "Stereo", "Joint-Stereo", "Dual-Channel", "Single-Channel" ... so mode != MPG213_M_MONO means two channels. */
enum { MPG123_M_STEREO=0, MPG123_M_JOINT, MPG123_M_DUAL, MPG123_M_MONO } mode;
int mode_ext;
int framesize;
#define MPG123_CRC 1
#define MPG123_COPYRIGHT 2
#define MPG123_PRIVATE 4
#define MPG123_ORIGINAL 8
int flags;
int emphasis;
int bitrate;
int abr_rate;
enum mpg123_vbr vbr;
};
EXPORT int mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi);
/* Scan through file (if seekable) or just the first frame (without decoding, for non-seekable) and return various information.
That could include format, length, padding, ID3, ... */
/* int mpg123_scan(mpg123_handle *mh, struct mpg123_info *mi); */
EXPORT size_t mpg123_safe_buffer(); /* Get the safe output buffer size for all cases (when you want to replace the internal buffer) */
EXPORT size_t mpg123_outblock(mpg123_handle *mh); /* The max size of one frame's decoded output with current settings. */
EXPORT int mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size);
/* 128 bytes of ID3v1 - Don't take anything for granted (like string termination)! */
typedef struct
{
char tag[3]; /* "TAG", the classic intro */
char title[30]; /* title string */
char artist[30]; /* artist string */
char album[30]; /* album string */
char year[4]; /* year string */
char comment[30]; /* comment string */
unsigned char genre; /* genre code */
} mpg123_id3v1;
/* A safer string, also can hold a number of null-terminated strings. */
typedef struct
{
char* p; /* pointer to the string data */
size_t size; /* raw number of bytes allocated */
size_t fill; /* number of used bytes (including closing zero byte) */
} mpg123_string;
/* A little string library, it's not strictly mpeg decoding, but the funcitons are there. */
EXPORT void mpg123_init_string (mpg123_string* sb);
EXPORT void mpg123_free_string (mpg123_string* sb);
/* returning 0 on error, 1 on success */
EXPORT int mpg123_resize_string(mpg123_string* sb, size_t news);
EXPORT int mpg123_copy_string (mpg123_string* from, mpg123_string* to);
EXPORT int mpg123_add_string (mpg123_string* sb, char* stuff);
EXPORT int mpg123_set_string (mpg123_string* sb, char* stuff);
typedef struct
{
unsigned char version; /* 3 or 4 for ID3v2.3 or ID3v2.4 */
/* The ID3v2 text frames are allowed to contain multiple strings.
So check for null bytes until you reach the mpg123_string fill.
All text is encoded in UTF-8 */
mpg123_string title;
mpg123_string artist;
mpg123_string album;
mpg123_string year; /* be ready for 20570! */
mpg123_string comment;
mpg123_string genre; /* The genre string(s) may very well need postprocessing, esp. for ID3v2.3 . */
} mpg123_id3v2;
/* Query if there is (new) meta info, be it ID3 or ICY (or something new in future).
The check function returns a combination of these flags: */
#define MPG123_ID3 0x3 /* 0011 There is some ID3 info. Also matches 0010 or NEW_ID3. */
#define MPG123_NEW_ID3 0x1 /* 0001 There is ID3 info that changed since last call to mpg123_id3. */
#define MPG123_ICY 0xc /* 1100 There is some ICY info. Also matches 0100 or NEW_ICY.*/
#define MPG123_NEW_ICY 0x4 /* 0100 There is ICY info that changed since last call to mpg123_icy. */
EXPORT int mpg123_meta_check(mpg123_handle *mh); /* On error (no valid handle) just 0 is returned. */
/* Point v1 and v2 to existing data structures wich may change on any next read/decode function call.
Return value is MPG123_OK or MPG123_ERR, v1 and/or v2 can be set to NULL when there is no corresponding data. */
EXPORT int mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2);
EXPORT int mpg123_icy(mpg123_handle *mh, char **icy_meta); /* same for ICY meta string */
/* missing various functions to change properties: RVA, equalizer */
/* also: functions to access properties: RVA, equalizer... */
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,149 @@
#ifndef MPG123_H_INTERN
#define MPG123_H_INTERN
#include "mpg123.h"
#include "config.h"
#include "debug.h"
/* Seek code relies on GAPLESS, at least for now. */
#ifndef GAPLESS
#define GAPLESS
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#define SKIP_JUNK 1
/* should these really be here? */
#ifdef _WIN32 /* Win32 Additions By Tony Million */
# undef MPG123_WIN32
# define MPG122_WIN32
# define M_PI 3.14159265358979323846
# define M_SQRT2 1.41421356237309504880
# ifndef REAL_IS_FLOAT
# define REAL_IS_FLOAT
# endif
# define NEW_DCT9
#endif
#ifdef SUNOS
#define memmove(dst,src,size) bcopy(src,dst,size)
#endif
/* some stuff has to go back to mpg123.h */
#ifdef REAL_IS_FLOAT
# define real float
# define REAL_SCANF "%f"
# define REAL_PRINTF "%f"
#elif defined(REAL_IS_LONG_DOUBLE)
# define real long double
# define REAL_SCANF "%Lf"
# define REAL_PRINTF "%Lf"
#elif defined(REAL_IS_FIXED)
# define real long
# define REAL_RADIX 15
# define REAL_FACTOR (32.0 * 1024.0)
# define REAL_PLUS_32767 ( 32767 << REAL_RADIX )
# define REAL_MINUS_32768 ( -32768 << REAL_RADIX )
# define DOUBLE_TO_REAL(x) ((int)((x) * REAL_FACTOR))
# define REAL_TO_SHORT(x) ((x) >> REAL_RADIX)
# define REAL_MUL(x, y) (((long long)(x) * (long long)(y)) >> REAL_RADIX)
# define REAL_SCANF "%ld"
# define REAL_PRINTF "%ld"
#else
# define real double
# define REAL_SCANF "%lf"
# define REAL_PRINTF "%f"
#endif
#ifndef DOUBLE_TO_REAL
# define DOUBLE_TO_REAL(x) (x)
#endif
#ifndef REAL_TO_SHORT
# define REAL_TO_SHORT(x) (x)
#endif
#ifndef REAL_PLUS_32767
# define REAL_PLUS_32767 32767.0
#endif
#ifndef REAL_MINUS_32768
# define REAL_MINUS_32768 -32768.0
#endif
#ifndef REAL_MUL
# define REAL_MUL(x, y) ((x) * (y))
#endif
/* used to be: AUDIOBUFSIZE = n*64 with n=1,2,3 ...
now: factor on minimum frame buffer size (which takes upsampling into account) */
#define AUDIOBUFSIZE 2
#include "true.h"
#define MAX_NAME_SIZE 81
#define SBLIMIT 32
#define SCALE_BLOCK 12
#define SSLIMIT 18
/* Same as MPG_M_* */
#define MPG_MD_STEREO 0
#define MPG_MD_JOINT_STEREO 1
#define MPG_MD_DUAL_CHANNEL 2
#define MPG_MD_MONO 3
/* float output only for generic decoder! */
#ifdef FLOATOUT
#define MAXOUTBURST 1.0
#define scale_t double
#else
/* I suspect that 32767 would be a better idea here, but Michael put this in... */
#define MAXOUTBURST 32768
#define scale_t long
#endif
/* Pre Shift fo 16 to 8 bit converter table */
#define AUSHIFT (3)
/* stuff that should be moved... */
#include <stdio.h>
#include <string.h>
#include <signal.h>
#ifndef WIN32
#include <sys/signal.h>
#include <unistd.h>
#endif
/* want to suport large files in future */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifndef off_t
#define off_t long
#endif
#include <math.h>
typedef unsigned char byte;
#ifdef OS2
#include <float.h>
#endif
#include "decode.h"
#include "parse.h"
#include "optimize.h"
#include "frame.h"
/* fr is a mpg123_handle* by convention here... */
#define NOQUIET (!(fr->p.flags & MPG123_QUIET))
#define VERBOSE (NOQUIET && fr->p.verbose)
#define VERBOSE2 (NOQUIET && fr->p.verbose > 1)
#define VERBOSE3 (NOQUIET && fr->p.verbose > 2)
int decode_update(mpg123_handle *mh);
#endif

144
src/libmpg123/optimize.c Normal file
View File

@@ -0,0 +1,144 @@
/*
optimize: get a grip on the different optimizations
copyright 2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
*/
#include "mpg123lib_intern.h" /* includes optimize.h */
#ifdef OPT_MULTI
#include "getcpuflags.h"
struct cpuflags cpu_flags;
/* same number of entries as full list, but empty at beginning */
static char *mpg123_supported_decoder_list[] =
{
#ifdef OPT_3DNOWEXT
NULL,
#endif
#ifdef OPT_SSE
NULL,
#endif
#ifdef OPT_3DNOW
NULL,
#endif
#ifdef OPT_MMX
NULL,
#endif
#ifdef OPT_I586
NULL,
#endif
#ifdef OPT_I586_DITHER
NULL,
#endif
#ifdef OPT_I486
NULL,
#endif
#ifdef OPT_I386
NULL,
#endif
#ifdef OPT_ALTIVEC
NULL,
#endif
NULL, /* generic */
NULL
};
#endif
static char *mpg123_decoder_list[] =
{
#ifdef OPT_3DNOWEXT
"3DNowExt",
#endif
#ifdef OPT_SSE
"SSE",
#endif
#ifdef OPT_3DNOW
"3DNow",
#endif
#ifdef OPT_MMX
"MMX",
#endif
#ifdef OPT_I586
"i586",
#endif
#ifdef OPT_I586_DITHER
"i586_dither",
#endif
#ifdef OPT_I486
"i486",
#endif
#ifdef OPT_I386
"i386",
#endif
#ifdef OPT_ALTIVEC
"AltiVec",
#endif
#ifdef OPT_GENERIC
"generic",
#endif
NULL
};
void check_decoders(void )
{
#ifndef OPT_MULTI
return;
#else
char **d = mpg123_supported_decoder_list;
#ifdef OPT_X86
getcpuflags(&cpu_flags);
if(cpu_i586(cpu_flags))
{
/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
#ifdef OPT_3DNOWEXT
if(cpu_3dnowext(cpu_flags)) *(d++) = "3DNowExt";
#endif
#ifdef OPT_SSE
if(cpu_sse(cpu_flags)) *(d++) = "SSE";
#endif
#ifdef OPT_3DNOW
if(cpu_3dnow(cpu_flags)) *(d++) = "3DNow";
#endif
#ifdef OPT_MMX
if(cpu_mmx(cpu_flags)) *(d++) = "MMX";
#endif
#ifdef OPT_I586
*(d++) = "i586";
#endif
#ifdef OPT_I586_DITHER
*(d++) = "i586_dither";
#endif
}
#endif
/* just assume that the i486 built is run on a i486 cpu... */
#ifdef OPT_I486
*(d++) = "i486";
#endif
#ifdef OPT_ALTIVEC
*(d++) = "AltiVec";
#endif
/* every supported x86 can do i386, any cpu can do generic */
#ifdef OPT_I386
*(d++) = "i386";
#endif
#ifdef OPT_GENERIC
*(d++) = "generic";
#endif
#endif /* ndef OPT_MULTI */
}
char **mpg123_decoders(){ return mpg123_decoder_list; }
char **mpg123_supported_decoders()
{
#ifdef OPT_MULTI
return mpg123_supported_decoder_list;
#else
return mpg123_decoder_list;
#endif
}

334
src/libmpg123/optimize.h Normal file
View File

@@ -0,0 +1,334 @@
/*
optimize: get a grip on the different optimizations
copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Thomas Orgis, taking from mpg123.[hc]
for building mpg123 with one optimization only, you have to choose exclusively between
OPT_GENERIC (generic C code for everyone)
OPT_I386 (Intel i386)
OPT_I486 (...)
OPT_I586 (Intel Pentium)
OPT_I586_DITHER (Intel Pentium with dithering/noise shaping for enhanced quality)
OPT_MMX (Intel Pentium and compatibles with MMX, fast, but not the best accuracy)
OPT_3DNOW (AMD 3DNow!, K6-2/3, Athlon, compatibles...)
OPT_ALTIVEC (Motorola/IBM PPC with AltiVec under MacOSX)
or you define OPT_MULTI and give a combination which makes sense (do not include i486, do not mix altivec and x86).
I still have to examine the dynamics of this here together with REAL_IS_FIXED.
*/
/* this is included in mpg123.h, which includes config.h */
#ifdef CCALIGN
#define aligned(a) __attribute__((aligned(a)))
#else
#define aligned(a)
#endif
/* the optimizations only cover the synth1to1 mode and the dct36 function */
/* the first two types are needed in set_synth_functions regardless of optimizations */
typedef int (*func_synth)(real *,int, mpg123_handle *,int );
typedef int (*func_synth_mono)(real *, mpg123_handle *);
typedef void (*func_dct36)(real *,real *,real *,real *,real *);
typedef void (*func_dct64)(real *,real *,real *);
typedef void (*func_make_decode_tables)(mpg123_handle*);
typedef real (*func_init_layer3_gainpow2)(mpg123_handle*, int);
typedef real* (*func_init_layer2_table)(mpg123_handle*, real*, double);
typedef int (*func_synth_pent)(real *,int,unsigned char *);
/* last headaches about getting mmx hardcode out */
real init_layer3_gainpow2(mpg123_handle *fr, int i);
real* init_layer2_table(mpg123_handle *fr, real *table, double m);
void make_decode_tables(mpg123_handle *fr);
void prepare_decode_tables(void); /* perhaps not best place here */
/* only 3dnow replaces that one, it's internal to layer3.c otherwise */
void dct36(real *,real *,real *,real *,real *);
#define opt_dct36(fr) dct36
/* only mmx replaces those */
#define opt_make_decode_tables(fr) make_decode_tables(fr)
#define opt_decwin(fr) (fr)->decwin
#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2
#define opt_init_layer2_table(fr) init_layer2_table
#ifdef OPT_GENERIC
#define PENTIUM_FALLBACK
void dct64(real *,real *,real *);
int synth_1to1(real *bandPtr,int channel, mpg123_handle *fr, int final);
int synth_1to1_8bit(real *bandPtr,int channel, mpg123_handle *fr, int final);
int synth_1to1_mono(real *, mpg123_handle *fr);
int synth_1to1_mono2stereo (real *, mpg123_handle *fr);
int synth_1to1_8bit_mono (real *, mpg123_handle *fr);
int synth_1to1_8bit_mono2stereo (real *, mpg123_handle *fr);
#ifndef OPT_MULTI
#define defopt generic
#define opt_dct64(fr) dct64
#define opt_synth_1to1(fr) synth_1to1
#define opt_synth_1to1_mono(fr) synth_1to1_mono
#define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo
#define opt_synth_1to1_8bit(fr) synth_1to1_8bit
#define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono
#define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo
#endif
#endif
/* i486 is special */
#ifdef OPT_I486
#define OPT_I386
#define defopt ivier
int synth_1to1_486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks);
void dct64_i486(int *a,int *b,real *c); /* not used generally */
#endif
#ifdef OPT_I386
#define PENTIUM_FALLBACK
#define OPT_X86
int synth_1to1_i386(real *bandPtr, int channel, mpg123_handle *fr, int final);
#ifndef OPT_MULTI
#ifndef defopt
#define defopt idrei
#endif
#define opt_synth_1to1(fr) synth_1to1_i386
#endif
#endif
#ifdef OPT_I586
#define PENTIUM_FALLBACK
#define OPT_PENTIUM
#define OPT_X86
int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final);
int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
#ifndef OPT_MULTI
#define defopt ifuenf
#define opt_synth_1to1(fr) synth_1to1_i586
#define opt_synth_1to1_i586_asm(fr) synth_1to1_i586_asm
#endif
#endif
#ifdef OPT_I586_DITHER
#define PENTIUM_FALLBACK
#define OPT_PENTIUM
#define OPT_X86
int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final);
int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
#ifndef OPT_MULTI
#define defopt ifuenf_dither
#define opt_synth_1to1(fr) synth_1to1_i586
#define opt_synth_1to1_i586_asm(fr) synth_1to1_i586_asm_dither
#endif
#endif
/* That one has by far the most ugly hacks to make it cooperative. */
#ifdef OPT_MMX
#define OPT_MMXORSSE
#define OPT_X86
real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
/* I think one can optimize storage here with the normal decwin */
extern real decwin_mmx[512+32];
void dct64_mmx(real *,real *,real *);
int synth_1to1_mmx(real *bandPtr, int channel, mpg123_handle *fr, int final);
void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
/* these are in asm, dct64 called directly there */
void dct64_MMX(short *a,short *b,real *c);
int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins);
#ifndef OPT_MULTI
#define defopt mmx
/* #undef opt_decwin
#define opt_decwin(fr) decwin_mmx */
#define opt_dct64(fr) dct64_mmx
#define opt_synth_1to1(fr) synth_1to1_mmx
#define opt_
#undef opt_make_decode_tables
#define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
#undef opt_init_layer2_table
#define opt_init_layer2_table(fr) init_layer2_table_mmx
#define OPT_MMX_ONLY
#endif
#endif
/* first crude hack into our source */
#ifdef OPT_SSE
#define OPT_MMXORSSE
#define OPT_MPLAYER
#define OPT_X86
real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
/* I think one can optimize storage here with the normal decwin */
extern real decwin_mmx[512+32];
void dct64_mmx(real *,real *,real *);
void dct64_sse(real *,real *,real *);
int synth_1to1_sse(real *bandPtr, int channel, mpg123_handle *fr, int final);
void synth_1to1_sse_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
/* ugly! */
extern func_dct64 mpl_dct64;
#ifndef OPT_MULTI
#define defopt sse
#define opt_mpl_dct64(fr) dct64_sse
/* #undef opt_decwin
#define opt_decwin(fr) decwin_mmx */
#define opt_dct64(fr) dct64_mmx /* dct64_sse is silent in downsampling modes */
#define opt_synth_1to1(fr) synth_1to1_sse /* that will use dct64_sse */
#undef opt_make_decode_tables
#define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
#undef opt_init_layer2_table
#define opt_init_layer2_table(fr) init_layer2_table_mmx
#define OPT_MMX_ONLY /* watch out! */
#endif
#endif
/* first crude hack into our source */
#ifdef OPT_3DNOWEXT
#define OPT_MMXORSSE
#define OPT_MPLAYER
#define OPT_X86
real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i);
real* init_layer2_table_mmx(mpg123_handle *fr, real *table, double m);
/* I think one can optimize storage here with the normal decwin */
extern real decwin_mmx[512+32];
void dct64_mmx(real *,real *,real *);
void dct64_3dnowext(real *,real *,real *);
void dct36_3dnowext(real *,real *,real *,real *,real *);
int synth_1to1_3dnowext(real *bandPtr, int channel, mpg123_handle *fr, int final);
void synth_1to1_3dnowext_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
void make_decode_tables_mmx(mpg123_handle *fr); /* tabinit_mmx.s */
void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); /* tabinit_mmx.s */
/* ugly! */
extern func_dct64 mpl_dct64;
#ifndef OPT_MULTI
#define defopt dreidnowext
#define opt_mpl_dct64(fr) dct64_3dnowext
#undef opt_dct36
#define opt_dct36(fr) dct36_3dnowext
/* #undef opt_decwin
#define opt_decwin(fr) decwin_mmx */
#define opt_dct64(fr) dct64_mmx /* dct64_sse is silent in downsampling modes */
#define opt_synth_1to1(fr) synth_1to1_3dnowext /* that will use dct64_3dnowext */
#undef opt_make_decode_tables
#define opt_make_decode_tables(fr) make_decode_tables_mmx(fr)
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2(fr) init_layer3_gainpow2_mmx
#undef opt_init_layer2_table
#define opt_init_layer2_table(fr) init_layer2_table_mmx
#define OPT_MMX_ONLY /* watch out! */
#endif
#endif
#ifndef OPT_MMX_ONLY
extern real *pnts[5];
extern real decwin[512+32];
#endif
#ifdef OPT_MPLAYER
extern const int costab_mmxsse[];
#endif
/* 3dnow used to use synth_1to1_i586 for mono / 8bit conversion - was that intentional? */
/* I'm trying to skip the pentium code here ... until I see that that is indeed a bad idea */
#ifdef OPT_3DNOW
#define K6_FALLBACK /* a fallback for 3DNowExt */
#define OPT_X86
void dct36_3dnow(real *,real *,real *,real *,real *);
void do_equalizer_3dnow(real *bandPtr,int channel, real equalizer[2][32]);
int synth_1to1_3dnow(real *bandPtr, int channel, mpg123_handle *fr, int final);
int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
#ifndef OPT_MULTI
#define defopt dreidnow
#undef opt_dct36
#define opt_dct36(fr) dct36_3dnow
#define opt_synth_1to1(fr) synth_1to1_3dnow
#endif
#endif
#ifdef OPT_X86
/* these have to be merged back into one! */
unsigned int getcpuid();
unsigned int getextcpuflags();
unsigned int getstdcpuflags();
unsigned int getstd2cpuflags();
void dct64_i386(real *,real *,real *);
int synth_1to1_mono_i386(real *, mpg123_handle *fr);
int synth_1to1_mono2stereo_i386(real *, mpg123_handle *fr);
int synth_1to1_8bit_i386(real *,int, mpg123_handle *fr, int final);
int synth_1to1_8bit_mono_i386(real *, mpg123_handle *fr);
int synth_1to1_8bit_mono2stereo_i386(real *, mpg123_handle *fr);
#ifndef OPT_MULTI
#ifndef opt_dct64
#define opt_dct64(fr) dct64_i386 /* default one even for 3dnow and i486 in decode_2to1, decode_ntom */
#endif
#define opt_synth_1to1_mono(fr) synth_1to1_mono_i386
#define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo_i386
#define opt_synth_1to1_8bit(fr) synth_1to1_8bit_i386
#define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono_i386
#define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo_i386
#endif
#endif
#ifdef OPT_ALTIVEC
void dct64_altivec(real *out0,real *out1,real *samples);
int synth_1to1_altivec(real *,int,unsigned char *,int *);
int synth_1to1_mono_altivec(real *,unsigned char *,int *);
int synth_1to1_mono2stereo_altivec(real *,unsigned char *,int *);
int synth_1to1_8bit_altivec(real *,int,unsigned char *,int *);
int synth_1to1_8bit_mono_altivec(real *,unsigned char *,int *);
int synth_1to1_8bit_mono2stereo_altivec(real *,unsigned char *,int *);
#ifndef OPT_MULTI
#define defopt altivec
#define opt_dct64(fr) dct64_altivec
#define opt_synth_1to1(fr) synth_1to1_altivec
#define opt_synth_1to1_mono(fr) synth_1to1_mono_altivec
#define opt_synth_1to1_mono2stereo(fr) synth_1to1_mono2stereo_altivec
#define opt_synth_1to1_8bit(fr) synth_1to1_8bit_altivec
#define opt_synth_1to1_8bit_mono(fr) synth_1to1_8bit_mono_altivec
#define opt_synth_1to1_8bit_mono2stereo(fr) synth_1to1_8bit_mono2stereo_altivec
#endif
#endif
/* used for multi opt mode and the single 3dnow mode to have the old 3dnow test flag still working */
void check_decoders(void);
#ifdef OPT_MULTI
#ifdef OPT_X86
extern struct cpuflags cf;
#endif
#define defopt nodec
/* a simple global struct to hold the decoding function pointers, could be localized later if really wanted */
#define opt_synth_1to1(fr) ((fr)->cpu_opts.synth_1to1)
#define opt_synth_1to1_mono(fr) ((fr)->cpu_opts.synth_1to1_mono)
#define opt_synth_1to1_mono2stereo(fr) ((fr)->cpu_opts.synth_1to1_mono2stereo)
#define opt_synth_1to1_8bit(fr) ((fr)->cpu_opts.synth_1to1_8bit)
#define opt_synth_1to1_8bit_mono(fr) ((fr)->cpu_opts.synth_1to1_8bit_mono)
#define opt_synth_1to1_8bit_mono2stereo(fr) ((fr)->cpu_opts.synth_1to1_8bit_mono2stereo)
#ifdef OPT_PENTIUM
#define opt_synth_1to1_i586_asm(fr) ((fr)->cpu_opts.synth_1to1_i586_asm)
#endif
#ifdef OPT_MMXORSSE
#undef opt_make_decode_tables
#define opt_make_decode_tables(fr) ((fr)->cpu_opts.make_decode_tables)(fr)
/* #undef opt_decwin
#define opt_decwin(fr) (fr)->cpu_opts.decwin */
#undef opt_init_layer3_gainpow2
#define opt_init_layer3_gainpow2(fr) ((fr)->cpu_opts.init_layer3_gainpow2)
#undef opt_init_layer2_table
#define opt_init_layer2_table(fr) ((fr)->cpu_opts.init_layer2_table)
#endif
#ifdef OPT_3DNOW
#undef opt_dct36
#define opt_dct36(fr) ((fr)->cpu_opts.dct36)
#endif
#define opt_dct64(fr) ((fr)->cpu_opts.dct64)
#ifdef OPT_MPLAYER
#define opt_mpl_dct64(fr) ((fr)->cpu_opts.mpl_dct64)
#endif
#endif

1013
src/libmpg123/parse.c Normal file

File diff suppressed because it is too large Load Diff

19
src/libmpg123/parse.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef MPG123_PARSE_H
#define MPG123_PARSE_H
#include "frame.h"
int read_frame_init(mpg123_handle* fr);
int frame_bitrate(mpg123_handle *fr);
long frame_freq(mpg123_handle *fr);
int read_frame_recover(mpg123_handle* fr); /* dead? */
int read_frame(mpg123_handle *fr);
void set_pointer(mpg123_handle *fr, long backstep);
int position_info(mpg123_handle* fr, unsigned long no, long buffsize, unsigned long* frames_left, double* current_seconds, double* seconds_left);
double compute_bpf(mpg123_handle *fr);
long time_to_frame(mpg123_handle *fr, double seconds);
int get_songlen(mpg123_handle *fr,int no);
off_t samples_to_bytes(mpg123_handle *fr , off_t s);
off_t bytes_to_samples(mpg123_handle *fr , off_t b);
#endif

72
src/libmpg123/reader.h Normal file
View File

@@ -0,0 +1,72 @@
#ifndef MPG123_READER_H
#define MPG123_READER_H
#include "config.h"
#include "mpg123.h"
struct buffy
{
unsigned char *data;
off_t size;
struct buffy *next;
};
struct reader_data
{
off_t filelen; /* total file length or total buffer size */
off_t filepos; /* position in file or position in buffer chain */
int filept;
int flags;
/* variables specific to feed reader */
off_t firstpos; /* the point of return on non-forget() */
struct buffy *buf; /* first in buffer chain */
};
/* start to use off_t to properly do LFS in future ... used to be long */
struct reader
{
int (*init) (mpg123_handle *);
void (*close) (mpg123_handle *);
ssize_t (*fullread) (mpg123_handle *, unsigned char *, ssize_t);
int (*head_read) (mpg123_handle *, unsigned long *newhead); /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */
int (*head_shift) (mpg123_handle *, unsigned long *head); /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */
off_t (*skip_bytes) (mpg123_handle *, off_t len); /* succ: >=0, else error or READER_MORE */
int (*read_frame_body)(mpg123_handle *, unsigned char *, int size);
int (*back_bytes) (mpg123_handle *, off_t bytes);
int (*seek_frame) (mpg123_handle *, off_t num);
off_t (*tell) (mpg123_handle *);
void (*rewind) (mpg123_handle *);
void (*forget) (mpg123_handle *);
};
/* Open a file by path or use an opened file descriptor. */
int open_stream(mpg123_handle *, char *path, int fd);
/* feed based operation has some specials */
int open_feed(mpg123_handle *);
/* externally called function, returns 0 on success, -1 on error */
int feed_more(mpg123_handle *fr, unsigned char *in, long count);
void feed_forget(mpg123_handle *fr); /* forget the data that has been read (free some buffers) */
off_t feed_set_pos(mpg123_handle *fr, off_t pos); /* Set position (inside available data if possible), return wanted byte offset of next feed. */
#define READER_FD_OPENED 0x1
#define READER_ID3TAG 0x2
#define READER_SEEKABLE 0x4
#define READER_BUFFERED 0x8
#define READER_MICROSEEK 0x10
#define READER_STREAM 0
#define READER_ICY_STREAM 1
#define READER_FEED 2
#ifdef READ_SYSTEM
#define READER_SYSTEM 3
#define READERS 4
#else
#define READERS 3
#endif
#define READER_ERROR -1
#define READER_MORE MPG123_NEED_MORE
#endif

566
src/libmpg123/readers.c Normal file
View File

@@ -0,0 +1,566 @@
/*
readers.c: reading input data
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "mpg123lib_intern.h"
static off_t get_fileinfo(mpg123_handle *);
/* stream based operation with icy meta data*/
static ssize_t icy_fullread(mpg123_handle *fr, unsigned char *buf, ssize_t count)
{
ssize_t ret,cnt;
cnt = 0;
/*
We check against READER_ID3TAG instead of rds->filelen >= 0 because if we got the ID3 TAG we know we have the end of the file.
If we don't have an ID3 TAG, then it is possible the file has grown since we started playing, so we want to keep reading from it if possible.
*/
if((fr->rdat.flags & READER_ID3TAG) && fr->rdat.filepos + count > fr->rdat.filelen) count = fr->rdat.filelen - fr->rdat.filepos;
while(cnt < count)
{
/* all icy code is inside this if block, everything else is the plain fullread we know */
/* debug1("read: %li left", (long) count-cnt); */
if(fr->icy.interval && (fr->rdat.filepos+count > fr->icy.next))
{
unsigned char temp_buff;
size_t meta_size;
ssize_t cut_pos;
/* we are near icy-metaint boundary, read up to the boundary */
cut_pos = fr->icy.next - fr->rdat.filepos;
ret = read(fr->rdat.filept,buf,cut_pos);
if(ret < 0) return READER_ERROR;
fr->rdat.filepos += ret;
cnt += ret;
/* now off to read icy data */
/* one byte icy-meta size (must be multiplied by 16 to get icy-meta length) */
ret = read(fr->rdat.filept,&temp_buff,1);
if(ret < 0) return READER_ERROR;
if(ret == 0) break;
debug2("got meta-size byte: %u, at filepos %li", temp_buff, (long)fr->rdat.filepos );
fr->rdat.filepos += ret; /* 1... */
if((meta_size = ((size_t) temp_buff) * 16))
{
/* we have got some metadata */
char *meta_buff;
meta_buff = (char*) malloc(meta_size+1);
if(meta_buff != NULL)
{
ret = read(fr->rdat.filept,meta_buff,meta_size);
meta_buff[meta_size] = 0; /* string paranoia */
if(ret < 0) return READER_ERROR;
fr->rdat.filepos += ret;
if(fr->icy.data) free(fr->icy.data);
fr->icy.data = meta_buff;
fr->metaflags |= MPG123_NEW_ICY;
debug2("icy-meta: %s size: %d bytes", fr->icy.data, (int)meta_size);
}
else
{
error1("cannot allocate memory for meta_buff (%lu bytes) ... trying to skip the metadata!", (unsigned long)meta_size);
fr->rd->skip_bytes(fr, meta_size);
}
}
fr->icy.next = fr->rdat.filepos+fr->icy.interval;
}
ret = read(fr->rdat.filept,buf+cnt,count-cnt);
if(ret < 0) return READER_ERROR;
if(ret == 0) break;
fr->rdat.filepos += ret;
cnt += ret;
}
/* debug1("done reading, got %li", (long)cnt); */
return cnt;
}
/* stream based operation */
static ssize_t plain_fullread(mpg123_handle *fr,unsigned char *buf, ssize_t count)
{
ssize_t ret,cnt=0;
/*
We check against READER_ID3TAG instead of rds->filelen >= 0 because if we got the ID3 TAG we know we have the end of the file.
If we don't have an ID3 TAG, then it is possible the file has grown since we started playing, so we want to keep reading from it if possible.
*/
if((fr->rdat.flags & READER_ID3TAG) && fr->rdat.filepos + count > fr->rdat.filelen) count = fr->rdat.filelen - fr->rdat.filepos;
while(cnt < count)
{
ret = read(fr->rdat.filept,buf+cnt,count-cnt);
if(ret < 0) return READER_ERROR;
if(ret == 0) break;
fr->rdat.filepos += ret;
cnt += ret;
}
return cnt;
}
static off_t stream_lseek(struct reader_data *rds, off_t pos, int whence)
{
off_t ret;
ret = lseek(rds->filept, pos, whence);
if (ret >= 0) rds->filepos = ret;
else ret = READER_ERROR; /* not the original value */
return ret;
}
static int default_init(mpg123_handle *fr)
{
fr->rdat.filelen = get_fileinfo(fr);
fr->rdat.filepos = 0;
if(fr->rdat.filelen >= 0)
{
fr->rdat.flags |= READER_SEEKABLE;
if(!strncmp((char*)fr->id3buf,"TAG",3))
{
fr->rdat.flags |= READER_ID3TAG;
fr->metaflags |= MPG123_NEW_ID3;
}
}
return 0;
}
void stream_close(mpg123_handle *fr)
{
if (fr->rdat.flags & READER_FD_OPENED) close(fr->rdat.filept);
}
/****************************************
* HACK,HACK,HACK: step back <num> frames
* can only work if the 'stream' isn't a real stream but a file
* returns 0 on success;
*/
static int stream_back_bytes(mpg123_handle *fr, off_t bytes)
{
if(stream_lseek(&fr->rdat,-bytes,SEEK_CUR) < 0) return READER_ERROR;
return 0;
}
static int stream_seek_frame(mpg123_handle *fr, off_t newframe)
{
if(fr->rdat.flags & READER_SEEKABLE)
{
off_t preframe;
/* two leading frames? hm, doesn't seem to be really needed... */
/*if(newframe > 1) newframe -= 2;
else newframe = 0;*/
/* now seek to nearest leading index position and read from there until newframe is reached */
if(stream_lseek(&fr->rdat,frame_index_find(fr, newframe, &preframe),SEEK_SET) < 0)
return READER_ERROR;
debug2("going to %lu; just got %lu", newframe, preframe);
fr->num = preframe-1; /* Watch out! I am going to read preframe... fr->num should indicate the frame before! */
while(fr->num < newframe)
{
/* try to be non-fatal now... frameNum only gets advanced on success anyway */
if(!read_frame(fr)) break;
}
/* Now the wanted frame should be ready for decoding. */
/* I think, I don't want this...
if(fr->lay == 3) set_pointer(fr, 512); */
debug1("arrived at %lu", fr->num);
return MPG123_OK;
}
else return READER_ERROR; /* invalid, no seek happened */
}
/* return FALSE on error, TRUE on success, READER_MORE on occasion */
static int generic_head_read(mpg123_handle *fr,unsigned long *newhead)
{
unsigned char hbuf[4];
int ret = fr->rd->fullread(fr,hbuf,4);
if(ret == READER_MORE) return ret;
if(ret != 4) return FALSE;
*newhead = ((unsigned long) hbuf[0] << 24) |
((unsigned long) hbuf[1] << 16) |
((unsigned long) hbuf[2] << 8) |
(unsigned long) hbuf[3];
return TRUE;
}
/* return FALSE on error, TRUE on success, READER_MORE on occasion */
static int generic_head_shift(mpg123_handle *fr,unsigned long *head)
{
unsigned char hbuf;
int ret = fr->rd->fullread(fr,&hbuf,1);
if(ret == READER_MORE) return ret;
if(ret != 1) return FALSE;
*head <<= 8;
*head |= hbuf;
*head &= 0xffffffff;
return TRUE;
}
/* returns reached position... negative ones are bad... */
static off_t stream_skip_bytes(mpg123_handle *fr,off_t len)
{
if((fr->rdat.flags & READER_SEEKABLE) && (fr->rdat.filelen >= 0))
{
off_t ret = stream_lseek(&fr->rdat, len, SEEK_CUR);
return ret<0 ? READER_ERROR : ret;
}
else if(len >= 0)
{
unsigned char buf[1024]; /* ThOr: Compaq cxx complained and it makes sense to me... or should one do a cast? What for? */
ssize_t ret;
while (len > 0)
{
ssize_t num = len < (off_t)sizeof(buf) ? (ssize_t)len : (ssize_t)sizeof(buf);
ret = fr->rd->fullread(fr, buf, num);
if (ret < 0) return ret;
len -= ret;
}
return fr->rdat.filepos;
}
else return READER_ERROR;
}
/* returns size on success... */
static int generic_read_frame_body(mpg123_handle *fr,unsigned char *buf, int size)
{
long l;
if((l=fr->rd->fullread(fr,buf,size)) != size)
{
long ll = l;
if(ll <= 0) ll = 0;
/* This allows partial frames at the end... do we really want to pad and decode these?! */
memset(buf+ll,0,size-ll);
}
return l;
}
static off_t generic_tell(mpg123_handle *fr){ return fr->rdat.filepos; }
static void stream_rewind(mpg123_handle *fr)
{
stream_lseek(&fr->rdat,0,SEEK_SET);
}
/*
* returns length of a file (if filept points to a file)
* reads the last 128 bytes information into buffer
* ... that is not totally safe...
*/
static off_t get_fileinfo(mpg123_handle *fr)
{
off_t len;
if((len=lseek(fr->rdat.filept,0,SEEK_END)) < 0) return -1;
if(lseek(fr->rdat.filept,-128,SEEK_END) < 0) return -1;
if(fr->rd->fullread(fr,(unsigned char *)fr->id3buf,128) != 128) return -1;
if(!strncmp((char*)fr->id3buf,"TAG",3)) len -= 128;
if(lseek(fr->rdat.filept,0,SEEK_SET) < 0) return -1;
if(len <= 0) return -1;
return len;
}
/* reader for input via manually provided buffers */
static int feed_init(mpg123_handle *fr)
{
fr->rdat.buf = NULL;
fr->rdat.filelen = 0;
fr->rdat.filepos = 0;
fr->rdat.firstpos = 0;
fr->rdat.flags |= READER_BUFFERED | READER_MICROSEEK;
return 0;
}
static void feed_close(mpg123_handle *fr)
{
/* free the buffer chain */
struct buffy *b = fr->rdat.buf;
while(b != NULL)
{
struct buffy *n = b->next;
free(b->data);
free(b);
b = n;
}
feed_init(fr);
}
/* externally called function, returns 0 on success, -1 on error */
int feed_more(mpg123_handle *fr, unsigned char *in, long count)
{
/* the pointer to the pointer for the buffy after the end... */
struct buffy **b = &fr->rdat.buf;
debug("feed_more");
while(*b != NULL){ b = &(*b)->next; }
*b = (struct buffy*)malloc(sizeof(struct buffy));
if(*b == NULL) return -1;
(*b)->data = (unsigned char*)malloc(count);
if((*b)->data == NULL){ free(*b); *b = NULL; return -1; }
memcpy((*b)->data, in, count);
(*b)->size = count;
(*b)->next = NULL; /* Hurray, the new last buffer! */
fr->rdat.filelen += count;
debug3("feed_more: %p %luB filelen=%lu", (*b)->data, (unsigned long)(*b)->size, (unsigned long)fr->rdat.filelen);
return 0;
}
static ssize_t feed_read(mpg123_handle *fr, unsigned char *out, ssize_t count)
{
struct buffy *b = fr->rdat.buf;
ssize_t gotcount = 0;
ssize_t offset = 0;
if(fr->rdat.filelen - fr->rdat.filepos < count)
{
debug3("hit end, back to beginning (%li - %li < %li)", (long)fr->rdat.filelen, (long)fr->rdat.filepos, (long)count);
/* go back to firstpos, undo the previous reads */
fr->rdat.filepos = fr->rdat.firstpos;
return MPG123_NEED_MORE;
}
/* find the current buffer */
while(b != NULL && (offset + b->size) <= fr->rdat.filepos)
{
offset += b->size;
b = b->next;
}
/* now start copying from there */
while(gotcount < count && (b != NULL))
{
ssize_t loff = fr->rdat.filepos - offset;
ssize_t chunk = count - gotcount; /* amount of bytes to get from here... */
if(chunk > b->size - loff) chunk = b->size - loff;
debug3("copying %liB from %p+%li",(long)chunk, b->data, (long)loff);
memcpy(out+gotcount, b->data+loff, chunk);
gotcount += chunk;
fr->rdat.filepos += chunk;
offset += b->size;
b = b->next;
}
debug2("got %li bytes, pos advanced to %li", (long)gotcount, (long)fr->rdat.filepos);
if(gotcount != count) return -1; /* That must be an error. */
return gotcount;
}
/* returns reached position... negative ones are bad... */
static off_t feed_skip_bytes(mpg123_handle *fr,off_t len)
{
if(len >= 0)
{
if(fr->rdat.filelen - fr->rdat.filepos < len) return READER_MORE;
else return fr->rdat.filepos += len;
}
else return READER_ERROR;
}
static int feed_back_bytes(mpg123_handle *fr, off_t bytes)
{
if(bytes >=0)
{
if(bytes <= fr->rdat.filepos) fr->rdat.filepos -= bytes;
else return READER_ERROR;
}
else
{
off_t ret = feed_skip_bytes(fr, -bytes);
if(ret > 0) ret = 0;
return ret; /* could be 0, could be error code */
}
return 0;
}
static int feed_seek_frame(mpg123_handle *fr, off_t num){ return READER_ERROR; }
void feed_rewind(mpg123_handle *fr)
{
fr->rdat.filepos = 0;
fr->rdat.firstpos = 0;
}
void feed_forget(mpg123_handle *fr)
{
struct buffy *b = fr->rdat.buf;
/* free all buffers that are def'n'tly outdated */
/* we have buffers until filepos... delete all buffers fully below it */
if(b) debug2("feed_forget: block %lu pos %lu", (unsigned long)b->size, (unsigned long)fr->rdat.filepos);
else debug("forget with nothing there!");
while(b != NULL && fr->rdat.filepos >= b->size)
{
struct buffy *n = b->next; /* != NULL or this is indeed the end and the last cycle anyway */
fr->rdat.filepos -= b->size;
fr->rdat.filelen -= b->size;
debug4("feed_forget: forgot %p with %lu, filepos=%lu, filelen=%lu", b->data, (unsigned long)b->size, (unsigned long)fr->rdat.filepos, (unsigned long)fr->rdat.filelen);
free(b->data);
free(b);
b = n;
}
fr->rdat.buf = b;
fr->rdat.firstpos = fr->rdat.filepos;
}
off_t feed_set_pos(mpg123_handle *fr, off_t pos)
{
if(pos >= fr->rdat.firstpos && pos < fr->rdat.firstpos + fr->rdat.filelen)
{ /* We have the position! */
fr->rdat.filepos = pos - fr->rdat.firstpos;
return fr->rdat.firstpos + fr->rdat.filelen;
}
else
{ /* I expect to get the specific position on next feed. Forget what I have now. */
feed_close(fr);
fr->rdat.firstpos = fr->rdat.filepos = pos;
return pos;
}
return READER_ERROR;
}
/*****************************************************************
* read frame helper
*/
struct reader readers[] =
{
{
default_init,
stream_close,
plain_fullread,
generic_head_read,
generic_head_shift,
stream_skip_bytes,
generic_read_frame_body,
stream_back_bytes,
stream_seek_frame,
generic_tell,
stream_rewind,
NULL
} ,
{
default_init,
stream_close,
icy_fullread,
generic_head_read,
generic_head_shift,
stream_skip_bytes,
generic_read_frame_body,
stream_back_bytes,
stream_seek_frame,
generic_tell,
stream_rewind,
NULL
},
{
feed_init,
feed_close,
feed_read,
generic_head_read,
generic_head_shift,
feed_skip_bytes,
generic_read_frame_body,
feed_back_bytes,
feed_seek_frame,
generic_tell,
feed_rewind,
feed_forget
}
/* buffer readers... can also be icy? nah, drop it... plain mpeg audio buffer reader */
#ifdef READ_SYSTEM
,{
system_init,
NULL, /* filled in by system_init() */
fullread,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
}
#endif
};
int open_feed(mpg123_handle *fr)
{
debug("feed reader");
clear_icy(&fr->icy);
fr->rd = &readers[READER_FEED];
fr->rdat.flags = 0;
if(fr->rd->init(fr) < 0) return -1;
return 0;
}
int open_stream(mpg123_handle *fr, char *bs_filenam, int fd)
{
int filept_opened = 1;
int filept; /* descriptor of opened file/stream */
clear_icy(&fr->icy); /* can be done inside frame_clear ...? */
if(!bs_filenam) /* no file to open, got a descriptor (stdin) */
{
filept = fd;
filept_opened = 0; /* and don't try to close it... */
}
#ifndef O_BINARY
#define O_BINARY (0)
#endif
else if((filept = open(bs_filenam, O_RDONLY|O_BINARY)) < 0) /* a plain old file to open... */
{
perror(bs_filenam);
return filept; /* error... */
}
/* now we have something behind filept and can init the reader */
fr->rdat.filelen = -1;
fr->rdat.filept = filept;
fr->rdat.flags = 0;
if(filept_opened) fr->rdat.flags |= READER_FD_OPENED;
if(fr->p.icy_interval > 0)
{
debug("ICY reader");
fr->icy.interval = fr->p.icy_interval;
fr->icy.next = fr->icy.interval;
fr->rd = &readers[READER_ICY_STREAM];
}
else
{
fr->rd = &readers[READER_STREAM];
debug("stream reader");
}
if(fr->rd->init(fr) < 0) return -1;
return MPG123_OK;
}

94
src/libmpg123/stringbuf.c Normal file
View File

@@ -0,0 +1,94 @@
/*
stringbuf: mimicking a bit of C++ to more safely handle strings
copyright 2006-7 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Thomas Orgis
*/
#include "config.h"
#include "debug.h"
#include "mpg123.h"
#include <stdlib.h>
#include <string.h>
void mpg123_init_string(mpg123_string* sb)
{
sb->p = NULL;
sb->size = 0;
sb->fill = 0;
}
void mpg123_free_string(mpg123_string* sb)
{
if(sb->p != NULL) free(sb->p);
mpg123_init_string(sb);
}
int mpg123_resize_string(mpg123_string* sb, size_t new)
{
debug3("resizing string pointer %p from %lu to %lu", (void*) sb->p, (unsigned long)sb->size, (unsigned long)new);
if(new == 0)
{
if(sb->size && sb->p != NULL) free(sb->p);
mpg123_init_string(sb);
return 1;
}
if(sb->size != new)
{
char* t;
debug("really!");
t = (char*) realloc(sb->p, new*sizeof(char));
debug1("realloc returned %p", (void*) t);
if(t != NULL)
{
sb->p = t;
sb->size = new;
return 1;
}
else return 0;
}
else return 1; /* success */
}
int mpg123_copy_string(mpg123_string* from, mpg123_string* to)
{
if(mpg123_resize_string(to, from->fill))
{
memcpy(to->p, from->p, to->size);
to->fill = to->size;
return 1;
}
else return 0;
}
int mpg123_add_string(mpg123_string* sb, char* stuff)
{
size_t addl = strlen(stuff)+1;
debug1("adding %s", stuff);
if(sb->fill)
{
if(sb->size >= sb->fill-1+addl || mpg123_resize_string(sb, sb->fill-1+addl))
{
memcpy(sb->p+sb->fill-1, stuff, addl);
sb->fill += addl-1;
}
else return 0;
}
else
{
if(mpg123_resize_string(sb, addl))
{
memcpy(sb->p, stuff, addl);
sb->fill = addl;
}
else return 0;
}
return 1;
}
int mpg123_set_string(mpg123_string* sb, char* stuff)
{
sb->fill = 0;
return mpg123_add_string(sb, stuff);
}

494
src/libmpg123/system.c Normal file
View File

@@ -0,0 +1,494 @@
/*
system.c: system stream decoder (standalone)
copyright 1997-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
grabs an audio stream inside a video/audio system stream
This Program outputs only the first audio stream to STDOUT
currently this is an external program. You must pipe
your streams file to this program and the output to
the mpg123 player. e.g:
./system < my_system_stream.mpg | mpg123 -
*/
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include "mpg123app.h"
static int filept;
static int verbose = 1;
#define PACKET_START 0x000001ba
#define STREAM_END 0x000001b9
#define SYSTEM_STREAM 0x000001bb
/* the following two types are not supported */
#define AUDIO_STREAM 0x000001b8
#define VIDEO_STREAM 0x000001b9
#define PADDING_STREAM 0x000001be
#define RESERVED_STREAM 0x000001bc
#define PRIVATE_STREAM_1 0x000001bd
#define PRIVATE_STREAM_2 0x000001bf
static int system_back_frame(mpg123_handle *fr,int num);
static int system_head_read(unsigned char *hbuf,unsigned long *newhead);
static int system_head_shift(unsigned char *hbuf,unsigned long *head);
static int system_skip_bytes(int len);
static int system_read_frame_body(int size);
static long system_tell(void);
struct system_info {
unsigned long rate;
int num_audio;
int num_video;
int fixed;
int csps;
int audio_lock;
int video_lock;
};
struct stream_info {
int id;
int id1;
int type;
int size;
int scale;
};
struct packet_info {
int scale;
int size;
unsigned long dts;
unsigned long pts;
};
struct system_info sys_info;
struct stream_info str_info[64];
static int my_read(int f,char *buf,int len)
{
int len1 = 0;
int ret;
while(len1 < len) {
ret = read(f,buf+len1,len-len1);
if(ret < 0)
return -1;
len1 += ret;
}
return len;
}
static int system_raw_read_head(int f,unsigned long *head)
{
unsigned char buf[4];
if(my_read(f,buf,4) != 4) {
perror("read_head");
return -1;
}
*head = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
if(verbose > 1)
fprintf(stderr,"head: %08lx\n",*head);
return 0;
}
static int system_raw_read_word(int f,int *word)
{
unsigned char buf[2];
if(my_read(f,buf,2) != 2) {
perror("read_word");
return -1;
}
*word = (buf[0]<<8) + buf[1];
return 0;
}
static int system_raw_read(int f,int len,unsigned char *buf)
{
if(my_read(f,buf,len) != len)
return -1;
return 0;
}
static int system_raw_skip(int f,int len)
{
int ret;
int cnt = 0;
ret = lseek(f,len,SEEK_CUR);
if(ret < 0 && errno == ESPIPE) {
cnt = len;
while(cnt) {
char buf[1024];
if(cnt > 1024)
ret = read(f,buf,1024);
else
ret = read(f,buf,cnt);
if(ret < 0)
return -1;
cnt -= ret;
}
ret = len;
}
return ret;
}
static unsigned long system_raw_timer_value(unsigned char *buf)
{
unsigned long val;
if(!(buf[0] & 0x1) || !(buf[2] & 0x1) || !(buf[4] & 0x1)) {
if(verbose)
fprintf(stderr,"Warning: missing marker in time stamp!\n");
}
val = (buf[0] & 0xe) << (29-1);
val |= buf[1] << 21;
val |= (buf[2] & 0xfe) << (14-1);
val |= buf[3] << 7;
val |= buf[4] >> 1;
return val;
}
static int system_raw_read_packet_data(int fd,struct packet_info *pi)
{
static unsigned char buf[16384];
int len;
int pos = 0;
int i;
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(verbose > 1)
fprintf(stderr,"Stream video/audio len: %d\n",len);
if(system_raw_read(fd,len,buf) < 0)
return -1;
for(i=0;i<16;i++,pos++) {
if(buf[pos] != 0xff)
break;
}
if(i == 16) {
fprintf(stderr,"Ouch ... too much stuffing bytes!\n");
return -1;
}
if( (buf[pos] & 0xc0) == 0x40 ) {
pi->scale = (buf[pos] >> 5) & 0x1;
pi->size = (buf[pos] & 0x1f) << 8;
pi->size |= buf[pos+1];
pos += 2;
}
switch( buf[pos] & 0xf0) {
case 0x00:
if(buf[pos] != 0x0f) {
fprintf(stderr,"Ouch ... illegal timer code!\n");
return -1;
}
pos++;
break;
case 0x20:
pi->pts = system_raw_timer_value(buf+pos);
pos += 5;
break;
case 0x30:
pi->pts = system_raw_timer_value(buf+pos);
pos += 5;
if( (buf[pos] & 0xf) != 0x10) {
if(verbose)
fprintf(stderr,"DTS should start with 0x1x!\n");
}
pi->dts = system_raw_timer_value(buf+pos);
pos += 5;
break;
default:
if(verbose)
fprintf(stderr,"Ouch ... illegal timer code!\n");
return -1;
}
#if 1
write(1,buf+pos,len-pos);
#endif
return 0;
}
static int system_raw_read_packet_info(int f,double *clock,unsigned long *rate)
{
unsigned char buf[8];
int i;
if(my_read(f,buf,8) != 8) {
perror("read_packet_info");
return -1;
}
*clock = 0.0;
for(i=0;i<5;i++) {
*clock *= 256.0;
*clock += (double) buf[4-i];
}
*rate = (buf[5]<<16) + (buf[6]<<8) + buf[7];
return 0;
}
static int system_raw_read_system_header(int f,struct system_info *ssi)
{
int rlen,len;
unsigned char buf[6+48*3];
int i,cnt;
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(verbose > 1)
fprintf(stderr,"system len: %d\n",len);
rlen = len;
if(len > 6 + 48 * 3) {
if(verbose)
fprintf(stderr,"Oops .. large System header!\n");
rlen = 6+48*3;
}
if(my_read(f,buf,rlen) != rlen) {
perror("raw_read_system_header");
return -1;
}
if(len - rlen) {
if(system_raw_skip(filept,len-rlen) < 0)
return -1;
}
if(buf[5] != 0xff) {
if(verbose)
fprintf(stderr,"Warning: buf[5] !=0xff \n");
}
ssi->rate = (buf[0]<<16)+(buf[1]<<8)+buf[2];
if( (ssi->rate & 0x800001) != 0x800001) {
if(verbose)
fprintf(stderr,"System Header Byte 0: Missing bits\n");
return -1;
}
ssi->rate >>= 1;
ssi->rate &= 0x7fffff;
ssi->num_audio = buf[3] >> 2;
ssi->num_video = buf[4] & 0x1f;
ssi->fixed = buf[3] & 0x2;
ssi->csps = buf[3] & 0x1;
ssi->audio_lock = buf[4] & 0x80;
ssi->video_lock = buf[4] & 0x40;
if(verbose)
fprintf(stderr,"Audio: %d Video: %d, Lock: %d/%d, fixed: %d, csps: %d\n",
ssi->num_audio,ssi->num_video,ssi->audio_lock?1:0,ssi->video_lock?1:0,
ssi->fixed?1:0,ssi->csps?1:0);
i = 6;
cnt = 0;
while( i < rlen ) {
if( !(buf[i] & 0x80) || ((buf[i+1] & 0xc0) != 0xc0) ) {
fprintf(stderr,"system_raw_read_system_header byte %d,%d: bits not set!\n",i,i+1);
return -1;
}
str_info[cnt].id = buf[i];
if( (str_info[cnt].id & 0xe0) == 0xc0 ) {
str_info[cnt].type = 'A';
str_info[cnt].id1 = str_info[cnt].id & 0x1f;
}
else if((str_info[cnt].id & 0xf0) == 0xe0 ) {
str_info[cnt].type = 'V';
str_info[cnt].id1 = str_info[cnt].id & 0x0f;
}
else {
str_info[cnt].type = 'R';
str_info[cnt].id1 = str_info[cnt].id & 0x3f;
}
str_info[cnt].scale = buf[i+1] & 0x20;
str_info[cnt].size = ((buf[i+1] & 0x1f)<<8)+buf[i+2];
i += 3;
if(verbose)
fprintf(stderr,"ID: %#02x=%c%d, scale: %d, size %d\n",
str_info[cnt].id,str_info[cnt].type,str_info[cnt].id1,str_info[cnt].scale?1:0,str_info[cnt].size);
}
return 0;
}
/***************************************************
* init system layer read functions
*/
int system_init(struct reader *r)
{
unsigned long head;
double clk;
unsigned long rate;
int len;
int err;
r->back_frame = NULL;
r->head_read = system_head_read;
r->head_shift = system_head_shift;
r->skip_bytes = system_skip_bytes;
r->read_frame_body = system_read_frame_body;
r->tell = system_tell;
if(system_raw_read_head(filept,&head) < 0)
return -1;
if(head != PACKET_START) {
fprintf(stderr,"No PACKET_START found!\n");
return -1;
}
if(system_raw_read_packet_info(filept,&clk,&rate) < 0)
return -1;
err = 0;
while(err == 0) {
if(system_raw_read_head(filept,&head) < 0)
return -1;
if((head & 0xffffff00) != 0x00000100)
return -1;
switch(head) {
case PACKET_START:
if(system_raw_read_packet_info(filept,&clk,&rate))
return -1;
if(verbose > 1)
fprintf(stderr,"Packet Start\n");
break;
case STREAM_END:
if(verbose)
fprintf(stderr,"Stream End\n");
break;
case SYSTEM_STREAM:
if(system_raw_read_system_header(filept,&sys_info) < 0)
return -1;
break;
#if 0
case AUDIO_STREAM:
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(verbose > 1)
fprintf(stderr,"STD audio len: %d\n",len);
if(system_raw_skip(filept,len) < 0)
return -1;
break;
case VIDEO_STREAM:
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(verbose > 1)
fprintf(stderr,"STD video len: %d\n",len);
if(system_raw_skip(filept,len) < 0)
return -1;
break;
#endif
default:
if(head >= 0x000001c0 && head < 0x000001f0) {
if(verbose > 1)
fprintf(stderr,"Stream ID %ld\n",head - 0x000001c0);
if( (head - 0x000001c0) == 0x0) {
struct packet_info pi;
if(system_raw_read_packet_data(filept,&pi) < 0 )
return -1;
}
else {
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(system_raw_skip(filept,len) < 0)
return -1;
}
break;
}
else if(head >= 0x000001bd && head < 0x000001c0) {
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(system_raw_skip(filept,len) < 0)
return -1;
break;
}
else {
if(verbose)
fprintf(stderr,"unsupported head %8lx\n",head);
if(system_raw_read_word(filept,&len) < 0)
return -1;
if(verbose)
fprintf(stderr,"Skipping: %d bytes\n",len);
if(system_raw_skip(filept,len) < 0)
return -1;
break;
}
err = 1;
break;
}
}
return 0;
}
static int system_back_frame(mpg123_handle *fr,int num)
{
return 0;
}
static int system_head_read(unsigned char *hbuf,unsigned long *newhead)
{
return 0;
}
static int system_head_shift(unsigned char *hbuf,unsigned long *head)
{
return 0;
}
static int system_skip_bytes(int len)
{
return 0;
}
static int system_read_frame_body(int size)
{
return 0;
}
static long system_tell(void)
{
return 0;
}
struct reader rd1;
void main(void)
{
int ret;
filept = 0;
ret = system_init(&rd1);
fprintf(stderr,"ret: %d\n",ret);
return ret;
}

181
src/libmpg123/tabinit.c Normal file
View File

@@ -0,0 +1,181 @@
/*
tabinit.c: initialize tables...
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by Michael Hipp
*/
#include <stdlib.h>
#include "mpg123lib_intern.h"
#ifdef OPT_MMXORSSE
/* 32 bit integer; i.e. "long" on x86, but int on x86_64... */
const int aligned(32) costab_mmxsse[] =
{
1056974725, 1057056395, 1057223771, 1057485416, 1057855544,
1058356026, 1059019886, 1059897405, 1061067246, 1062657950,
1064892987, 1066774581, 1069414683, 1073984175, 1079645762,
1092815430, 1057005197, 1057342072, 1058087743, 1059427869,
1061799040, 1065862217, 1071413542, 1084439708, 1057128951,
1058664893, 1063675095, 1076102863, 1057655764, 1067924853,
1060439283
};
#endif
/* All optimizations share this code - with the exception of MMX */
#ifndef OPT_MMX_ONLY
/* that altivec alignment part here should not hurt generic code, I hope */
#ifdef OPT_ALTIVEC
static real __attribute__ ((aligned (16))) cos64[16];
static real __attribute__ ((aligned (16))) cos32[8];
static real __attribute__ ((aligned (16))) cos16[4];
static real __attribute__ ((aligned (16))) cos8[2];
static real __attribute__ ((aligned (16))) cos4[1];
#else
static real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
#endif
real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
static long intwinbase[] = {
0, -1, -1, -1, -1, -1, -1, -2, -2, -2,
-2, -3, -3, -4, -4, -5, -5, -6, -7, -7,
-8, -9, -10, -11, -13, -14, -16, -17, -19, -21,
-24, -26, -29, -31, -35, -38, -41, -45, -49, -53,
-58, -63, -68, -73, -79, -85, -91, -97, -104, -111,
-117, -125, -132, -139, -147, -154, -161, -169, -176, -183,
-190, -196, -202, -208, -213, -218, -222, -225, -227, -228,
-228, -227, -224, -221, -215, -208, -200, -189, -177, -163,
-146, -127, -106, -83, -57, -29, 2, 36, 72, 111,
153, 197, 244, 294, 347, 401, 459, 519, 581, 645,
711, 779, 848, 919, 991, 1064, 1137, 1210, 1283, 1356,
1428, 1498, 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962,
2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063, 2037, 2000,
1952, 1893, 1822, 1739, 1644, 1535, 1414, 1280, 1131, 970,
794, 605, 402, 185, -45, -288, -545, -814, -1095, -1388,
-1692, -2006, -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
-5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597, -7910, -8209,
-8491, -8755, -8998, -9219, -9416, -9585, -9727, -9838, -9916, -9959,
-9966, -9935, -9863, -9750, -9592, -9389, -9139, -8840, -8492, -8092,
-7640, -7134, -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082,
-70, 998, 2122, 3300, 4533, 5818, 7154, 8540, 9975, 11455,
12980, 14548, 16155, 17799, 19478, 21189, 22929, 24694, 26482, 28289,
30112, 31947, 33791, 35640, 37489, 39336, 41176, 43006, 44821, 46617,
48390, 50137, 51853, 53534, 55178, 56778, 58333, 59838, 61289, 62684,
64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
73415, 73908, 74313, 74630, 74856, 74992, 75038 };
void prepare_decode_tables()
{
int i,k,kr,divv;
real *costab;
for(i=0;i<5;i++)
{
kr=0x10>>i; divv=0x40>>i;
costab = pnts[i];
for(k=0;k<kr;k++)
costab[k] = DOUBLE_TO_REAL(1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv)));
}
}
#endif
#ifdef OPT_MMXORSSE
void make_decode_tables_mmx(mpg123_handle *fr)
{
debug("MMX decode tables");
make_decode_tables_mmx_asm((fr->lastscale < 0 ? fr->p.outscale : fr->lastscale), fr->decwin_mmx, fr->decwins);
debug("MMX decode tables done");
}
#endif
#ifndef OPT_MMX_ONLY
void make_decode_tables(mpg123_handle *fr)
{
int i,j;
int idx = 0;
scale_t scaleval = -(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
debug("MMX decode tables");
for(i=0,j=0;i<256;i++,j++,idx+=32)
{
if(idx < 512+16)
fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] / 65536.0 * (double) scaleval);
if(i % 32 == 31)
idx -= 1023;
if(i % 64 == 63)
scaleval = - scaleval;
}
for( /* i=256 */ ;i<512;i++,j--,idx+=32)
{
if(idx < 512+16)
fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] / 65536.0 * (double) scaleval);
if(i % 32 == 31)
idx -= 1023;
if(i % 64 == 63)
scaleval = - scaleval;
}
debug("MMX decode tables done");
}
#endif
int make_conv16to8_table(mpg123_handle *fr)
{
int i;
int mode = fr->af.encoding;
/*
* ????: 8.0 is right but on SB cards '2.0' is a better value ???
*/
const double mul = 8.0;
if(!fr->conv16to8_buf){
fr->conv16to8_buf = (unsigned char *) malloc(8192);
if(!fr->conv16to8_buf) {
fr->err = MPG123_ERR_16TO8TABLE;
if(NOQUIET) error("Can't allocate 16 to 8 converter table!");
return -1;
}
fr->conv16to8 = fr->conv16to8_buf + 4096;
}
if(fr->af.encoding == MPG123_ENC_ULAW_8){
double m=127.0 / log(256.0);
int c1;
for(i=-4096;i<4096;i++) {
/* dunno whether this is a valid transformation rule ?!?!? */
if(i < 0)
c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m);
else
c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m);
if(c1 < 0 || c1 > 255)
fprintf(stderr,"Converror %d %d\n",i,c1);
if(c1 == 0)
c1 = 2;
fr->conv16to8[i] = (unsigned char) c1;
}
}
else if(mode == MPG123_ENC_SIGNED_8) {
for(i=-4096;i<4096;i++) {
fr->conv16to8[i] = i>>5;
}
}
else if(mode == MPG123_ENC_UNSIGNED_8) {
for(i=-4096;i<4096;i++) {
fr->conv16to8[i] = (i>>5)+128;
}
}
else {
for(i=-4096;i<4096;i++) {
fr->conv16to8[i] = 0;
}
}
return 0;
}

175
src/libmpg123/tabinit_mmx.S Normal file
View File

@@ -0,0 +1,175 @@
/*
tabinit_mmx: make_decode_tables_mmx
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
see COPYING and AUTHORS files in distribution or http://mpg123.org
initially written by the mysterious higway (apparently)
*/
#include "mangle.h"
.data
ALIGN32
intwinbase:
.value 0, -1, -1, -1, -1, -1, -1, -2
.value -2, -2, -2, -3, -3, -4, -4, -5
.value -5, -6, -7, -7, -8, -9, -10, -11
.value -13, -14, -16, -17, -19, -21, -24, -26
.value -29, -31, -35, -38, -41, -45, -49, -53
.value -58, -63, -68, -73, -79, -85, -91, -97
.value -104, -111, -117, -125, -132, -139, -147, -154
.value -161, -169, -176, -183, -190, -196, -202, -208
.value -213, -218, -222, -225, -227, -228, -228, -227
.value -224, -221, -215, -208, -200, -189, -177, -163
.value -146, -127, -106, -83, -57, -29, 2, 36
.value 72, 111, 153, 197, 244, 294, 347, 401
.value 459, 519, 581, 645, 711, 779, 848, 919
.value 991, 1064, 1137, 1210, 1283, 1356, 1428, 1498
.value 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962
.value 2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063
.value 2037, 2000, 1952, 1893, 1822, 1739, 1644, 1535
.value 1414, 1280, 1131, 970, 794, 605, 402, 185
.value -45, -288, -545, -814, -1095, -1388, -1692, -2006
.value -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
.value -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
.value -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
.value -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
.value -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
.value -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
.value -70, 998, 2122, 3300, 4533, 5818, 7154, 8540
.value 9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
.value 22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
.value -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
.value -8147, -6466, -4822, -3222, -1667, -162, 1289, 2684
.value 4019, 5290, 6494, 7629, 8692, 9679, 10590, 11420
.value 12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
.value 15038
intwindiv:
.long 0x47800000 # 65536.0
.text
ALIGN32
/* void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); */
.globl ASM_NAME(make_decode_tables_mmx_asm)
ASM_NAME(make_decode_tables_mmx_asm):
pushl %edi
pushl %esi
pushl %ebx
/* stack: 0=ebx, 4=esi, 8=edi, 12=back, 16=scaleval, 20=decwin_mmx, 24=decwins */
xorl %ecx,%ecx
xorl %ebx,%ebx
movl $32,%esi
movl $intwinbase,%edi
negl 16(%esp) /* scaleval */
pushl $2 /* intwinbase step */
/* stack: 20=scaleval 24=decwin_mmx, 28=decwins */
.L00:
cmpl $528,%ecx
jnc .L02
movswl (%edi),%eax
cmpl $intwinbase+444,%edi
jc .L01
addl $60000,%eax
.L01:
pushl %eax
/* stack: 24=scaleval 28=decwin_mmx, 32=decwins */
fildl (%esp)
fdivs intwindiv
fimull 24(%esp) /* scaleval */
/* eax used to be popped the line before... I'll just use it here a bit */
movl 28(%esp),%eax /* decwin_mmx */
fsts (%eax,%ecx,4)
fstps 64(%eax,%ecx,4)
popl %eax
/* stack: 20=scaleval 24=decwin_mmx, 28=decwins */
.L02:
leal -1(%esi),%edx
and %ebx,%edx
cmp $31,%edx
jnz .L03
addl $-1023,%ecx
test %esi,%ebx
jz .L03
negl 20(%esp)
.L03:
addl %esi,%ecx
addl (%esp),%edi
incl %ebx
cmpl $intwinbase,%edi
jz .L04
cmp $256,%ebx
jnz .L00
negl (%esp)
jmp .L00
.L04:
popl %eax
xorl %ecx,%ecx
xorl %ebx,%ebx
pushl $2 /* paired with popl above */
.L05:
cmpl $528,%ecx
jnc .L11
movswl (%edi),%eax
cmpl $intwinbase+444,%edi
jc .L06
addl $60000,%eax
.L06:
cltd
imull 20(%esp)
shrdl $17,%edx,%eax
cmpl $32767,%eax
movl $1055,%edx
jle .L07
movl $32767,%eax
jmp .L08
.L07:
cmpl $-32767,%eax
jge .L08
movl $-32767,%eax
.L08:
/* going to use ebx for decwins, watch the jumps */
pushl %ebx
/* stack: 24=scaleval 28=decwin_mmx, 32=decwins */
movl 32(%esp),%ebx
cmpl $512,%ecx
jnc .L09
subl %ecx,%edx
movw %ax,(%ebx,%edx,2) /* decwins */
movw %ax,-32(%ebx,%edx,2)
.L09:
testl $1,%ecx
jnz .L10
negl %eax
.L10:
movw %ax,(%ebx,%ecx,2)
movw %ax,32(%ebx,%ecx,2)
popl %ebx /* that has to match the pushl before */
.L11:
leal -1(%esi),%edx
and %ebx,%edx
cmp $31,%edx
jnz .L12
addl $-1023,%ecx
test %esi,%ebx
jz .L12
negl 20(%esp)
.L12:
addl %esi,%ecx
addl (%esp),%edi
incl %ebx
cmpl $intwinbase,%edi
jz .L13
cmp $256,%ebx
jnz .L05
negl (%esp)
jmp .L05
.L13:
popl %eax
popl %ebx
popl %esi
popl %edi
ret

27
src/libmpg123/testcpu.c Normal file
View File

@@ -0,0 +1,27 @@
#include <stdio.h>
#include "getcpuflags.h"
int main()
{
int family;
struct cpuflags flags;
if(!getcpuflags(&flags)){ printf("CPU won't do cpuid (some old i386 or i486)\n"); return 0; }
family = (flags.id & 0xf00)>>8;
printf("family: %i\n", family);
printf("stdcpuflags: 0x%08x\n", flags.std);
printf("std2cpuflags: 0x%08x\n", flags.std2);
printf("extcpuflags: 0x%08x\n", flags.ext);
if(cpu_i586(flags))
{
printf("A i586 or better cpu with:");
if(cpu_mmx(flags)) printf(" mmx");
if(cpu_3dnow(flags)) printf(" 3dnow");
if(cpu_3dnowext(flags)) printf(" 3dnowext");
if(cpu_sse(flags)) printf(" sse");
if(cpu_sse2(flags)) printf(" sse2");
if(cpu_sse3(flags)) printf(" sse3");
printf("\n");
}
else printf("I guess you have some i486\n");
return 0;
}

7
src/libmpg123/true.h Normal file
View File

@@ -0,0 +1,7 @@
#ifndef MPG123_H_TRUE
#define MPG123_H_TRUE
#define FALSE 0
#define TRUE 1
#endif