1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00
2004-12-29  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/ia64/fpu/libm_support.h (__libm_error_support): Use
	libc_hidden_proto instead of HIDDEN_PROTO.
	* sysdeps/ia64/fpu/libm-symbols.h (HIDDEN_PROTO): Remove.
	(__libm_error_support): If ASSEMBLER and in libc, define to
	HIDDEN_JUMPTARGET(__libm_error_support).

2004-12-28  David Mosberger  <davidm@hpl.hp.com>

	* sysdeps/ia64/fpu/Makefile (duplicated-routines): New macro.
	(sysdep_routines): Replace libm_ldexp{,f,l} and libm_scalbn{,f,l}
	with $(duplicated-routines).
	(libm-sysdep_routines): Likewise, but substitute "s_" prefix for
	"m_" prefix.

2004-12-27  David Mosberger  <davidm@hpl.hp.com>

	* sysdeps/ia64/fpu/libm-symbols.h: Add include of <sysdep.h> and
	undefine "ret" macro.  Add __libm_error_support hidden definitions.

	* sysdeps/ia64/fpu/e_lgamma_r.c: Remove CVS-id comment.  Add
	missing portion of copyright statement.
	* sysdeps/ia64/fpu/e_lgammaf_r.c: Likewise.
	* sysdeps/ia64/fpu/e_lgammal_r.c: Likewise.

	* sysdeps/ia64/fpu/w_lgamma.c: Remove CVS-id comment.  Add
	missing portion of copyright statement.
	(__ieee754_lgamma): Rename from lgamma().  Make lgamma() a weak alias.
	(__ieee754_gamma): Likewise.
	* sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
	* sysdeps/ia64/fpu/w_lgammal.c: Likewise.

2004-12-09  H. J. Lu <hjl@lucon.org>

	* sysdeps/ia64/fpu/s_nextafterl.c: Remove.
	* sysdeps/ia64/fpu/s_nexttoward.c: Likewise.
	* sysdeps/ia64/fpu/s_nexttowardf.c: Likewise.
	* sysdeps/ia64/fpu/e_atan2l.S: Remove (duplicate of e_atan2l.c).
	* sysdeps/ia64/fpu/e_expl.S: Likewise.
	* sysdeps/ia64/fpu/e_logl.c: Remove (conflicts with e_logl.S).

2004-11-18  David Mosberger  <davidm@hpl.hp.com>

	* sysdeps/ia64/fpu/README: New file.
	* sysdeps/ia64/fpu/gen_import_file_list: New file.
	* sysdeps/ia64/fpu/import_check: Likewise.
	* sysdeps/ia64/fpu/import_diffs: Likewise.
	* sysdeps/ia64/fpu/import_file.awk: Likewise.
	* sysdeps/ia64/fpu/import_intel_libm: Likewise.
	* sysdeps/ia64/fpu/libm-symbols.h: Likewise.

	* sysdeps/ia64/fpu/e_acos.S: Update from Intel libm v2.1+.
	* sysdeps/ia64/fpu/e_acosf.S: Likewise.
	* sysdeps/ia64/fpu/e_acosl.S: Likewise.
	* sysdeps/ia64/fpu/e_asin.S: Likewise.
	* sysdeps/ia64/fpu/e_asinf.S: Likewise.
	* sysdeps/ia64/fpu/e_asinl.S: Likewise.
	* sysdeps/ia64/fpu/e_atan2.S: Likewise.
	* sysdeps/ia64/fpu/e_atan2f.S: Likewise.
	* sysdeps/ia64/fpu/e_cosh.S: Likewise.
	* sysdeps/ia64/fpu/e_coshf.S: Likewise.
	* sysdeps/ia64/fpu/e_coshl.S: Likewise.
	* sysdeps/ia64/fpu/e_exp.S: Likewise.
	* sysdeps/ia64/fpu/e_expf.S: Likewise.
	* sysdeps/ia64/fpu/e_fmod.S: Likewise.
	* sysdeps/ia64/fpu/e_fmodf.S: Likewise.
	* sysdeps/ia64/fpu/e_fmodl.S: Likewise.
	* sysdeps/ia64/fpu/e_hypot.S: Likewise.
	* sysdeps/ia64/fpu/e_hypotf.S: Likewise.
	* sysdeps/ia64/fpu/e_hypotl.S: Likewise.
	* sysdeps/ia64/fpu/e_log.S: Likewise.
	* sysdeps/ia64/fpu/e_log2.S: Likewise.
	* sysdeps/ia64/fpu/e_log2f.S: Likewise.
	* sysdeps/ia64/fpu/e_log2l.S: Likewise.
	* sysdeps/ia64/fpu/e_logf.S: Likewise.
	* sysdeps/ia64/fpu/e_pow.S: Likewise.
	* sysdeps/ia64/fpu/e_powf.S: Likewise.
	* sysdeps/ia64/fpu/e_powl.S: Likewise.
	* sysdeps/ia64/fpu/e_remainder.S: Likewise.
	* sysdeps/ia64/fpu/e_remainderf.S: Likewise.
	* sysdeps/ia64/fpu/e_remainderl.S: Likewise.
	* sysdeps/ia64/fpu/e_scalb.S: Likewise.
	* sysdeps/ia64/fpu/e_scalbf.S: Likewise.
	* sysdeps/ia64/fpu/e_scalbl.S: Likewise.
	* sysdeps/ia64/fpu/e_sinh.S: Likewise.
	* sysdeps/ia64/fpu/e_sinhf.S: Likewise.
	* sysdeps/ia64/fpu/e_sinhl.S: Likewise.
	* sysdeps/ia64/fpu/e_sqrt.S: Likewise.
	* sysdeps/ia64/fpu/e_sqrtf.S: Likewise.
	* sysdeps/ia64/fpu/e_sqrtl.S: Likewise.
	* sysdeps/ia64/fpu/libm_error.c: Likewise.
	* sysdeps/ia64/fpu/libm_reduce.c: Likewise.
	* sysdeps/ia64/fpu/libm_support.h: Likewise.
	* sysdeps/ia64/fpu/s_atan.S: Likewise.
	* sysdeps/ia64/fpu/s_atanf.S: Likewise.
	* sysdeps/ia64/fpu/s_atanl.S: Likewise.
	* sysdeps/ia64/fpu/s_cbrt.S: Likewise.
	* sysdeps/ia64/fpu/s_cbrtf.S: Likewise.
	* sysdeps/ia64/fpu/s_cbrtl.S: Likewise.
	* sysdeps/ia64/fpu/s_ceil.S: Likewise.
	* sysdeps/ia64/fpu/s_ceilf.S: Likewise.
	* sysdeps/ia64/fpu/s_ceill.S: Likewise.
	* sysdeps/ia64/fpu/s_cos.S: Likewise.
	* sysdeps/ia64/fpu/s_cosf.S: Likewise.
	* sysdeps/ia64/fpu/s_cosl.S: Likewise.
	* sysdeps/ia64/fpu/s_expm1.S: Likewise.
	* sysdeps/ia64/fpu/s_expm1f.S: Likewise.
	* sysdeps/ia64/fpu/s_expm1l.S: Likewise.
	* sysdeps/ia64/fpu/s_fabs.S: Likewise.
	* sysdeps/ia64/fpu/s_fabsf.S: Likewise.
	* sysdeps/ia64/fpu/s_fabsl.S: Likewise.
	* sysdeps/ia64/fpu/s_floor.S: Likewise.
	* sysdeps/ia64/fpu/s_floorf.S: Likewise.
	* sysdeps/ia64/fpu/s_floorl.S: Likewise.
	* sysdeps/ia64/fpu/s_frexp.c: Likewise.
	* sysdeps/ia64/fpu/s_frexpf.c: Likewise.
	* sysdeps/ia64/fpu/s_frexpl.c: Likewise.
	* sysdeps/ia64/fpu/s_ilogb.S: Likewise.
	* sysdeps/ia64/fpu/s_ilogbf.S: Likewise.
	* sysdeps/ia64/fpu/s_ilogbl.S: Likewise.
	* sysdeps/ia64/fpu/s_log1p.S: Likewise.
	* sysdeps/ia64/fpu/s_log1pf.S: Likewise.
	* sysdeps/ia64/fpu/s_log1pl.S: Likewise.
	* sysdeps/ia64/fpu/s_logb.S: Likewise.
	* sysdeps/ia64/fpu/s_logbf.S: Likewise.
	* sysdeps/ia64/fpu/s_logbl.S: Likewise.
	* sysdeps/ia64/fpu/s_modf.S: Likewise.
	* sysdeps/ia64/fpu/s_modff.S: Likewise.
	* sysdeps/ia64/fpu/s_modfl.S: Likewise.
	* sysdeps/ia64/fpu/s_nearbyint.S: Likewise.
	* sysdeps/ia64/fpu/s_nearbyintf.S: Likewise.
	* sysdeps/ia64/fpu/s_nearbyintl.S: Likewise.
	* sysdeps/ia64/fpu/s_rint.S: Likewise.
	* sysdeps/ia64/fpu/s_rintf.S: Likewise.
	* sysdeps/ia64/fpu/s_rintl.S: Likewise.
	* sysdeps/ia64/fpu/s_round.S: Likewise.
	* sysdeps/ia64/fpu/s_roundf.S: Likewise.
	* sysdeps/ia64/fpu/s_roundl.S: Likewise.
	* sysdeps/ia64/fpu/s_significand.S: Likewise.
	* sysdeps/ia64/fpu/s_significandf.S: Likewise.
	* sysdeps/ia64/fpu/s_significandl.S: Likewise.
	* sysdeps/ia64/fpu/s_tan.S: Likewise.
	* sysdeps/ia64/fpu/s_tanf.S: Likewise.
	* sysdeps/ia64/fpu/s_tanl.S: Likewise.
	* sysdeps/ia64/fpu/s_trunc.S: Likewise.
	* sysdeps/ia64/fpu/s_truncf.S: Likewise.
	* sysdeps/ia64/fpu/s_truncl.S: Likewise.

	* sysdeps/ia64/fpu/e_acosh.S: New file from Intel libm v2.1+.
	* sysdeps/ia64/fpu/e_acoshf.S: Likewise.
	* sysdeps/ia64/fpu/e_acoshl.S: Likewise.
	* sysdeps/ia64/fpu/e_atanh.S: Likewise.
	* sysdeps/ia64/fpu/e_atanhf.S: Likewise.
	* sysdeps/ia64/fpu/e_atanhl.S: Likewise.
	* sysdeps/ia64/fpu/e_exp10.S: Likewise.
	* sysdeps/ia64/fpu/e_exp10f.S: Likewise.
	* sysdeps/ia64/fpu/e_exp10l.S: Likewise.
	* sysdeps/ia64/fpu/e_exp2.S: Likewise.
	* sysdeps/ia64/fpu/e_exp2f.S: Likewise.
	* sysdeps/ia64/fpu/e_exp2l.S: Likewise.
	* sysdeps/ia64/fpu/e_lgamma_r.S: Likewise.
	* sysdeps/ia64/fpu/e_lgammaf_r.S: Likewise.
	* sysdeps/ia64/fpu/e_lgammal_r.S: Likewise.
	* sysdeps/ia64/fpu/e_logl.S: Likewise.
	* sysdeps/ia64/fpu/libm_frexp.S: Likewise.
	* sysdeps/ia64/fpu/libm_frexpf.S: Likewise.
	* sysdeps/ia64/fpu/libm_frexpl.S: Likewise.
	* sysdeps/ia64/fpu/s_libm_ldexp.S: Likewise.
	* sysdeps/ia64/fpu/s_libm_ldexpf.S: Likewise.
	* sysdeps/ia64/fpu/s_libm_ldexpl.S: Likewise.
	* sysdeps/ia64/fpu/s_libm_scalbn.S: Likewise.
	* sysdeps/ia64/fpu/s_libm_scalbnf.S: Likewise.
	* sysdeps/ia64/fpu/s_libm_scalbnl.S: Likewise.
	* sysdeps/ia64/fpu/libm_lgamma.S: Likewise.
	* sysdeps/ia64/fpu/libm_lgammaf.S: Likewise.
	* sysdeps/ia64/fpu/libm_lgammal.S: Likewise.
	* sysdeps/ia64/fpu/libm_sincos.S: Likewise.
	* sysdeps/ia64/fpu/libm_sincos_large.S: Likewise.
	* sysdeps/ia64/fpu/libm_sincosf.S: Likewise.
	* sysdeps/ia64/fpu/libm_sincosl.S: Likewise.
	* sysdeps/ia64/fpu/libm_scalblnf.S: Likewise.
	* sysdeps/ia64/fpu/s_asinh.S: Likewise.
	* sysdeps/ia64/fpu/s_asinhf.S: Likewise.
	* sysdeps/ia64/fpu/s_asinhl.S: Likewise.
	* sysdeps/ia64/fpu/s_erf.S: Likewise.
	* sysdeps/ia64/fpu/s_erfc.S: Likewise.
	* sysdeps/ia64/fpu/s_erfcf.S: Likewise.
	* sysdeps/ia64/fpu/s_erfcl.S: Likewise.
	* sysdeps/ia64/fpu/s_erff.S: Likewise.
	* sysdeps/ia64/fpu/s_erfl.S: Likewise.
	* sysdeps/ia64/fpu/s_fdim.S: Likewise.
	* sysdeps/ia64/fpu/s_fdimf.S: Likewise.
	* sysdeps/ia64/fpu/s_fdiml.S: Likewise.
	* sysdeps/ia64/fpu/s_fma.S: Likewise.
	* sysdeps/ia64/fpu/s_fmaf.S: Likewise.
	* sysdeps/ia64/fpu/s_fmal.S: Likewise.
	* sysdeps/ia64/fpu/s_fmax.S: Likewise.
	* sysdeps/ia64/fpu/s_fmaxf.S: Likewise.
	* sysdeps/ia64/fpu/s_fmaxl.S: Likewise.
	* sysdeps/ia64/fpu/s_ldexp.c: Likewise.
	* sysdeps/ia64/fpu/s_ldexpf.c: Likewise.
	* sysdeps/ia64/fpu/s_ldexpl.c: Likewise.
	* sysdeps/ia64/fpu/s_nextafter.S: Likewise.
	* sysdeps/ia64/fpu/s_nextafterf.S: Likewise.
	* sysdeps/ia64/fpu/s_nextafterl.S: Likewise.
	* sysdeps/ia64/fpu/s_nexttoward.S: Likewise.
	* sysdeps/ia64/fpu/s_nexttowardf.S: Likewise.
	* sysdeps/ia64/fpu/s_nexttowardl.S: Likewise.
	* sysdeps/ia64/fpu/s_tanh.S: Likewise.
	* sysdeps/ia64/fpu/s_tanhf.S: Likewise.
	* sysdeps/ia64/fpu/s_tanhl.S: Likewise.
	* sysdeps/ia64/fpu/s_scalblnf.c: Likewise.
	* sysdeps/ia64/fpu/w_lgamma.c: Likewise.
	* sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
	* sysdeps/ia64/fpu/w_lgammal.c: Likewise.
	* sysdeps/ia64/fpu/w_tgamma.S: Likewise.
	* sysdeps/ia64/fpu/w_tgammaf.S: Likewise.
	* sysdeps/ia64/fpu/w_tgammal.S: Likewise.

	* sysdeps/ia64/fpu/e_gamma_r.c: New empty dummy-file.
	* sysdeps/ia64/fpu/e_gammaf_r.c: Likewise.
	* sysdeps/ia64/fpu/e_gammal_r.c: Likewise.
	* sysdeps/ia64/fpu/w_acosh.c: Likewise.
	* sysdeps/ia64/fpu/w_acoshf.c: Likewise.
	* sysdeps/ia64/fpu/w_acoshl.c: Likewise.
	* sysdeps/ia64/fpu/w_atanh.c: Likewise.
	* sysdeps/ia64/fpu/w_atanhf.c: Likewise.
	* sysdeps/ia64/fpu/w_atanhl.c: Likewise.
	* sysdeps/ia64/fpu/w_exp10.c: Likewise.
	* sysdeps/ia64/fpu/w_exp10f.c: Likewise.
	* sysdeps/ia64/fpu/w_exp10l.c: Likewise.
	* sysdeps/ia64/fpu/w_exp2.c: Likewise.
	* sysdeps/ia64/fpu/w_exp2f.c: Likewise.
	* sysdeps/ia64/fpu/w_exp2l.c: Likewise.
	* sysdeps/ia64/fpu/w_expl.c: Likewise.
	* sysdeps/ia64/fpu/e_expl.S: Likewise.
	* sysdeps/ia64/fpu/w_lgamma_r.c: Likewise.
	* sysdeps/ia64/fpu/w_lgammaf_r.c: Likewise.
	* sysdeps/ia64/fpu/w_lgammal_r.c: Likewise.
	* sysdeps/ia64/fpu/w_log2.c: Likewise.
	* sysdeps/ia64/fpu/w_log2f.c: Likewise.
	* sysdeps/ia64/fpu/w_log2l.c: Likewise.
	* sysdeps/ia64/fpu/w_sinh.c: Likewise.
	* sysdeps/ia64/fpu/w_sinhf.c: Likewise.
	* sysdeps/ia64/fpu/w_sinhl.c: Likewise.

	* sysdeps/ia64/fpu/libm_atan2_reg.S: Remove.
	* sysdeps/ia64/fpu/s_ldexp.S: Likewise.
	* sysdeps/ia64/fpu/s_ldexpf.S: Likewise.
	* sysdeps/ia64/fpu/s_ldexpl.S: Likewise.
	* sysdeps/ia64/fpu/s_scalbn.S: Likewise.
	* sysdeps/ia64/fpu/s_scalbnf.S: Likewise.
	* sysdeps/ia64/fpu/s_scalbnl.S: Likewise.

	* sysdeps/ia64/fpu/s_sincos.c: Make it an empty dummy-file.
	* sysdeps/ia64/fpu/s_sincosf.c: Likewise.
	* sysdeps/ia64/fpu/s_sincosl.c: Likewise.

	* sysdeps/ia64/fpu/e_atan2l.S: Add "Not needed" comment.

	* sysdeps/ia64/fpu/s_copysign.S: Add __libm_copysign{,f,l}
	alias for use by libm_error.c

	* sysdeps/ia64/fpu/Makefile (libm-sysdep_routines): Remove
	libm_atan2_reg, libm_tan, libm_frexp4{f,l}.
	Mention s_erfc{,f,l}, libm_frexp{,f,l}, libm_ldexp{,f,l},
	libm_sincos{,f,l}, libm_sincos_large, libm_lgamma{,f,l},
	libm_scalbn{,f,l}, libm_scalblnf.
	(sysdep_routines): Remove libm_frexp4{,f,l}.
	Mention libm_frexp{,f,l}, libm_ldexp{,f,l}, and libm_scalbn{,f,l}.
	(sysdep-CPPFLAGS): Add -include libm-symbols.h, -D__POSIX__,
	_D_LIB_VERSIONIMF=_LIB_VERSION, -DSIZE_LONG_INT_64, and
	-DSIZE_LONG_LONG_INT_64.
This commit is contained in:
Ulrich Drepper
2005-01-06 11:32:24 +00:00
parent ef07fd10d9
commit bb803bff5c
213 changed files with 101180 additions and 45395 deletions

275
ChangeLog
View File

@ -1,3 +1,278 @@
2004-12-29 Jakub Jelinek <jakub@redhat.com>
* sysdeps/ia64/fpu/libm_support.h (__libm_error_support): Use
libc_hidden_proto instead of HIDDEN_PROTO.
* sysdeps/ia64/fpu/libm-symbols.h (HIDDEN_PROTO): Remove.
(__libm_error_support): If ASSEMBLER and in libc, define to
HIDDEN_JUMPTARGET(__libm_error_support).
2004-12-28 David Mosberger <davidm@hpl.hp.com>
* sysdeps/ia64/fpu/Makefile (duplicated-routines): New macro.
(sysdep_routines): Replace libm_ldexp{,f,l} and libm_scalbn{,f,l}
with $(duplicated-routines).
(libm-sysdep_routines): Likewise, but substitute "s_" prefix for
"m_" prefix.
2004-12-27 David Mosberger <davidm@hpl.hp.com>
* sysdeps/ia64/fpu/libm-symbols.h: Add include of <sysdep.h> and
undefine "ret" macro. Add __libm_error_support hidden definitions.
* sysdeps/ia64/fpu/e_lgamma_r.c: Remove CVS-id comment. Add
missing portion of copyright statement.
* sysdeps/ia64/fpu/e_lgammaf_r.c: Likewise.
* sysdeps/ia64/fpu/e_lgammal_r.c: Likewise.
* sysdeps/ia64/fpu/w_lgamma.c: Remove CVS-id comment. Add
missing portion of copyright statement.
(__ieee754_lgamma): Rename from lgamma(). Make lgamma() a weak alias.
(__ieee754_gamma): Likewise.
* sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
* sysdeps/ia64/fpu/w_lgammal.c: Likewise.
2004-12-09 H. J. Lu <hjl@lucon.org>
* sysdeps/ia64/fpu/s_nextafterl.c: Remove.
* sysdeps/ia64/fpu/s_nexttoward.c: Likewise.
* sysdeps/ia64/fpu/s_nexttowardf.c: Likewise.
* sysdeps/ia64/fpu/e_atan2l.S: Remove (duplicate of e_atan2l.c).
* sysdeps/ia64/fpu/e_expl.S: Likewise.
* sysdeps/ia64/fpu/e_logl.c: Remove (conflicts with e_logl.S).
2004-11-18 David Mosberger <davidm@hpl.hp.com>
* sysdeps/ia64/fpu/README: New file.
* sysdeps/ia64/fpu/gen_import_file_list: New file.
* sysdeps/ia64/fpu/import_check: Likewise.
* sysdeps/ia64/fpu/import_diffs: Likewise.
* sysdeps/ia64/fpu/import_file.awk: Likewise.
* sysdeps/ia64/fpu/import_intel_libm: Likewise.
* sysdeps/ia64/fpu/libm-symbols.h: Likewise.
* sysdeps/ia64/fpu/e_acos.S: Update from Intel libm v2.1+.
* sysdeps/ia64/fpu/e_acosf.S: Likewise.
* sysdeps/ia64/fpu/e_acosl.S: Likewise.
* sysdeps/ia64/fpu/e_asin.S: Likewise.
* sysdeps/ia64/fpu/e_asinf.S: Likewise.
* sysdeps/ia64/fpu/e_asinl.S: Likewise.
* sysdeps/ia64/fpu/e_atan2.S: Likewise.
* sysdeps/ia64/fpu/e_atan2f.S: Likewise.
* sysdeps/ia64/fpu/e_cosh.S: Likewise.
* sysdeps/ia64/fpu/e_coshf.S: Likewise.
* sysdeps/ia64/fpu/e_coshl.S: Likewise.
* sysdeps/ia64/fpu/e_exp.S: Likewise.
* sysdeps/ia64/fpu/e_expf.S: Likewise.
* sysdeps/ia64/fpu/e_fmod.S: Likewise.
* sysdeps/ia64/fpu/e_fmodf.S: Likewise.
* sysdeps/ia64/fpu/e_fmodl.S: Likewise.
* sysdeps/ia64/fpu/e_hypot.S: Likewise.
* sysdeps/ia64/fpu/e_hypotf.S: Likewise.
* sysdeps/ia64/fpu/e_hypotl.S: Likewise.
* sysdeps/ia64/fpu/e_log.S: Likewise.
* sysdeps/ia64/fpu/e_log2.S: Likewise.
* sysdeps/ia64/fpu/e_log2f.S: Likewise.
* sysdeps/ia64/fpu/e_log2l.S: Likewise.
* sysdeps/ia64/fpu/e_logf.S: Likewise.
* sysdeps/ia64/fpu/e_pow.S: Likewise.
* sysdeps/ia64/fpu/e_powf.S: Likewise.
* sysdeps/ia64/fpu/e_powl.S: Likewise.
* sysdeps/ia64/fpu/e_remainder.S: Likewise.
* sysdeps/ia64/fpu/e_remainderf.S: Likewise.
* sysdeps/ia64/fpu/e_remainderl.S: Likewise.
* sysdeps/ia64/fpu/e_scalb.S: Likewise.
* sysdeps/ia64/fpu/e_scalbf.S: Likewise.
* sysdeps/ia64/fpu/e_scalbl.S: Likewise.
* sysdeps/ia64/fpu/e_sinh.S: Likewise.
* sysdeps/ia64/fpu/e_sinhf.S: Likewise.
* sysdeps/ia64/fpu/e_sinhl.S: Likewise.
* sysdeps/ia64/fpu/e_sqrt.S: Likewise.
* sysdeps/ia64/fpu/e_sqrtf.S: Likewise.
* sysdeps/ia64/fpu/e_sqrtl.S: Likewise.
* sysdeps/ia64/fpu/libm_error.c: Likewise.
* sysdeps/ia64/fpu/libm_reduce.c: Likewise.
* sysdeps/ia64/fpu/libm_support.h: Likewise.
* sysdeps/ia64/fpu/s_atan.S: Likewise.
* sysdeps/ia64/fpu/s_atanf.S: Likewise.
* sysdeps/ia64/fpu/s_atanl.S: Likewise.
* sysdeps/ia64/fpu/s_cbrt.S: Likewise.
* sysdeps/ia64/fpu/s_cbrtf.S: Likewise.
* sysdeps/ia64/fpu/s_cbrtl.S: Likewise.
* sysdeps/ia64/fpu/s_ceil.S: Likewise.
* sysdeps/ia64/fpu/s_ceilf.S: Likewise.
* sysdeps/ia64/fpu/s_ceill.S: Likewise.
* sysdeps/ia64/fpu/s_cos.S: Likewise.
* sysdeps/ia64/fpu/s_cosf.S: Likewise.
* sysdeps/ia64/fpu/s_cosl.S: Likewise.
* sysdeps/ia64/fpu/s_expm1.S: Likewise.
* sysdeps/ia64/fpu/s_expm1f.S: Likewise.
* sysdeps/ia64/fpu/s_expm1l.S: Likewise.
* sysdeps/ia64/fpu/s_fabs.S: Likewise.
* sysdeps/ia64/fpu/s_fabsf.S: Likewise.
* sysdeps/ia64/fpu/s_fabsl.S: Likewise.
* sysdeps/ia64/fpu/s_floor.S: Likewise.
* sysdeps/ia64/fpu/s_floorf.S: Likewise.
* sysdeps/ia64/fpu/s_floorl.S: Likewise.
* sysdeps/ia64/fpu/s_frexp.c: Likewise.
* sysdeps/ia64/fpu/s_frexpf.c: Likewise.
* sysdeps/ia64/fpu/s_frexpl.c: Likewise.
* sysdeps/ia64/fpu/s_ilogb.S: Likewise.
* sysdeps/ia64/fpu/s_ilogbf.S: Likewise.
* sysdeps/ia64/fpu/s_ilogbl.S: Likewise.
* sysdeps/ia64/fpu/s_log1p.S: Likewise.
* sysdeps/ia64/fpu/s_log1pf.S: Likewise.
* sysdeps/ia64/fpu/s_log1pl.S: Likewise.
* sysdeps/ia64/fpu/s_logb.S: Likewise.
* sysdeps/ia64/fpu/s_logbf.S: Likewise.
* sysdeps/ia64/fpu/s_logbl.S: Likewise.
* sysdeps/ia64/fpu/s_modf.S: Likewise.
* sysdeps/ia64/fpu/s_modff.S: Likewise.
* sysdeps/ia64/fpu/s_modfl.S: Likewise.
* sysdeps/ia64/fpu/s_nearbyint.S: Likewise.
* sysdeps/ia64/fpu/s_nearbyintf.S: Likewise.
* sysdeps/ia64/fpu/s_nearbyintl.S: Likewise.
* sysdeps/ia64/fpu/s_rint.S: Likewise.
* sysdeps/ia64/fpu/s_rintf.S: Likewise.
* sysdeps/ia64/fpu/s_rintl.S: Likewise.
* sysdeps/ia64/fpu/s_round.S: Likewise.
* sysdeps/ia64/fpu/s_roundf.S: Likewise.
* sysdeps/ia64/fpu/s_roundl.S: Likewise.
* sysdeps/ia64/fpu/s_significand.S: Likewise.
* sysdeps/ia64/fpu/s_significandf.S: Likewise.
* sysdeps/ia64/fpu/s_significandl.S: Likewise.
* sysdeps/ia64/fpu/s_tan.S: Likewise.
* sysdeps/ia64/fpu/s_tanf.S: Likewise.
* sysdeps/ia64/fpu/s_tanl.S: Likewise.
* sysdeps/ia64/fpu/s_trunc.S: Likewise.
* sysdeps/ia64/fpu/s_truncf.S: Likewise.
* sysdeps/ia64/fpu/s_truncl.S: Likewise.
* sysdeps/ia64/fpu/e_acosh.S: New file from Intel libm v2.1+.
* sysdeps/ia64/fpu/e_acoshf.S: Likewise.
* sysdeps/ia64/fpu/e_acoshl.S: Likewise.
* sysdeps/ia64/fpu/e_atanh.S: Likewise.
* sysdeps/ia64/fpu/e_atanhf.S: Likewise.
* sysdeps/ia64/fpu/e_atanhl.S: Likewise.
* sysdeps/ia64/fpu/e_exp10.S: Likewise.
* sysdeps/ia64/fpu/e_exp10f.S: Likewise.
* sysdeps/ia64/fpu/e_exp10l.S: Likewise.
* sysdeps/ia64/fpu/e_exp2.S: Likewise.
* sysdeps/ia64/fpu/e_exp2f.S: Likewise.
* sysdeps/ia64/fpu/e_exp2l.S: Likewise.
* sysdeps/ia64/fpu/e_lgamma_r.S: Likewise.
* sysdeps/ia64/fpu/e_lgammaf_r.S: Likewise.
* sysdeps/ia64/fpu/e_lgammal_r.S: Likewise.
* sysdeps/ia64/fpu/e_logl.S: Likewise.
* sysdeps/ia64/fpu/libm_frexp.S: Likewise.
* sysdeps/ia64/fpu/libm_frexpf.S: Likewise.
* sysdeps/ia64/fpu/libm_frexpl.S: Likewise.
* sysdeps/ia64/fpu/s_libm_ldexp.S: Likewise.
* sysdeps/ia64/fpu/s_libm_ldexpf.S: Likewise.
* sysdeps/ia64/fpu/s_libm_ldexpl.S: Likewise.
* sysdeps/ia64/fpu/s_libm_scalbn.S: Likewise.
* sysdeps/ia64/fpu/s_libm_scalbnf.S: Likewise.
* sysdeps/ia64/fpu/s_libm_scalbnl.S: Likewise.
* sysdeps/ia64/fpu/libm_lgamma.S: Likewise.
* sysdeps/ia64/fpu/libm_lgammaf.S: Likewise.
* sysdeps/ia64/fpu/libm_lgammal.S: Likewise.
* sysdeps/ia64/fpu/libm_sincos.S: Likewise.
* sysdeps/ia64/fpu/libm_sincos_large.S: Likewise.
* sysdeps/ia64/fpu/libm_sincosf.S: Likewise.
* sysdeps/ia64/fpu/libm_sincosl.S: Likewise.
* sysdeps/ia64/fpu/libm_scalblnf.S: Likewise.
* sysdeps/ia64/fpu/s_asinh.S: Likewise.
* sysdeps/ia64/fpu/s_asinhf.S: Likewise.
* sysdeps/ia64/fpu/s_asinhl.S: Likewise.
* sysdeps/ia64/fpu/s_erf.S: Likewise.
* sysdeps/ia64/fpu/s_erfc.S: Likewise.
* sysdeps/ia64/fpu/s_erfcf.S: Likewise.
* sysdeps/ia64/fpu/s_erfcl.S: Likewise.
* sysdeps/ia64/fpu/s_erff.S: Likewise.
* sysdeps/ia64/fpu/s_erfl.S: Likewise.
* sysdeps/ia64/fpu/s_fdim.S: Likewise.
* sysdeps/ia64/fpu/s_fdimf.S: Likewise.
* sysdeps/ia64/fpu/s_fdiml.S: Likewise.
* sysdeps/ia64/fpu/s_fma.S: Likewise.
* sysdeps/ia64/fpu/s_fmaf.S: Likewise.
* sysdeps/ia64/fpu/s_fmal.S: Likewise.
* sysdeps/ia64/fpu/s_fmax.S: Likewise.
* sysdeps/ia64/fpu/s_fmaxf.S: Likewise.
* sysdeps/ia64/fpu/s_fmaxl.S: Likewise.
* sysdeps/ia64/fpu/s_ldexp.c: Likewise.
* sysdeps/ia64/fpu/s_ldexpf.c: Likewise.
* sysdeps/ia64/fpu/s_ldexpl.c: Likewise.
* sysdeps/ia64/fpu/s_nextafter.S: Likewise.
* sysdeps/ia64/fpu/s_nextafterf.S: Likewise.
* sysdeps/ia64/fpu/s_nextafterl.S: Likewise.
* sysdeps/ia64/fpu/s_nexttoward.S: Likewise.
* sysdeps/ia64/fpu/s_nexttowardf.S: Likewise.
* sysdeps/ia64/fpu/s_nexttowardl.S: Likewise.
* sysdeps/ia64/fpu/s_tanh.S: Likewise.
* sysdeps/ia64/fpu/s_tanhf.S: Likewise.
* sysdeps/ia64/fpu/s_tanhl.S: Likewise.
* sysdeps/ia64/fpu/s_scalblnf.c: Likewise.
* sysdeps/ia64/fpu/w_lgamma.c: Likewise.
* sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
* sysdeps/ia64/fpu/w_lgammal.c: Likewise.
* sysdeps/ia64/fpu/w_tgamma.S: Likewise.
* sysdeps/ia64/fpu/w_tgammaf.S: Likewise.
* sysdeps/ia64/fpu/w_tgammal.S: Likewise.
* sysdeps/ia64/fpu/e_gamma_r.c: New empty dummy-file.
* sysdeps/ia64/fpu/e_gammaf_r.c: Likewise.
* sysdeps/ia64/fpu/e_gammal_r.c: Likewise.
* sysdeps/ia64/fpu/w_acosh.c: Likewise.
* sysdeps/ia64/fpu/w_acoshf.c: Likewise.
* sysdeps/ia64/fpu/w_acoshl.c: Likewise.
* sysdeps/ia64/fpu/w_atanh.c: Likewise.
* sysdeps/ia64/fpu/w_atanhf.c: Likewise.
* sysdeps/ia64/fpu/w_atanhl.c: Likewise.
* sysdeps/ia64/fpu/w_exp10.c: Likewise.
* sysdeps/ia64/fpu/w_exp10f.c: Likewise.
* sysdeps/ia64/fpu/w_exp10l.c: Likewise.
* sysdeps/ia64/fpu/w_exp2.c: Likewise.
* sysdeps/ia64/fpu/w_exp2f.c: Likewise.
* sysdeps/ia64/fpu/w_exp2l.c: Likewise.
* sysdeps/ia64/fpu/w_expl.c: Likewise.
* sysdeps/ia64/fpu/e_expl.S: Likewise.
* sysdeps/ia64/fpu/w_lgamma_r.c: Likewise.
* sysdeps/ia64/fpu/w_lgammaf_r.c: Likewise.
* sysdeps/ia64/fpu/w_lgammal_r.c: Likewise.
* sysdeps/ia64/fpu/w_log2.c: Likewise.
* sysdeps/ia64/fpu/w_log2f.c: Likewise.
* sysdeps/ia64/fpu/w_log2l.c: Likewise.
* sysdeps/ia64/fpu/w_sinh.c: Likewise.
* sysdeps/ia64/fpu/w_sinhf.c: Likewise.
* sysdeps/ia64/fpu/w_sinhl.c: Likewise.
* sysdeps/ia64/fpu/libm_atan2_reg.S: Remove.
* sysdeps/ia64/fpu/s_ldexp.S: Likewise.
* sysdeps/ia64/fpu/s_ldexpf.S: Likewise.
* sysdeps/ia64/fpu/s_ldexpl.S: Likewise.
* sysdeps/ia64/fpu/s_scalbn.S: Likewise.
* sysdeps/ia64/fpu/s_scalbnf.S: Likewise.
* sysdeps/ia64/fpu/s_scalbnl.S: Likewise.
* sysdeps/ia64/fpu/s_sincos.c: Make it an empty dummy-file.
* sysdeps/ia64/fpu/s_sincosf.c: Likewise.
* sysdeps/ia64/fpu/s_sincosl.c: Likewise.
* sysdeps/ia64/fpu/e_atan2l.S: Add "Not needed" comment.
* sysdeps/ia64/fpu/s_copysign.S: Add __libm_copysign{,f,l}
alias for use by libm_error.c
* sysdeps/ia64/fpu/Makefile (libm-sysdep_routines): Remove
libm_atan2_reg, libm_tan, libm_frexp4{f,l}.
Mention s_erfc{,f,l}, libm_frexp{,f,l}, libm_ldexp{,f,l},
libm_sincos{,f,l}, libm_sincos_large, libm_lgamma{,f,l},
libm_scalbn{,f,l}, libm_scalblnf.
(sysdep_routines): Remove libm_frexp4{,f,l}.
Mention libm_frexp{,f,l}, libm_ldexp{,f,l}, and libm_scalbn{,f,l}.
(sysdep-CPPFLAGS): Add -include libm-symbols.h, -D__POSIX__,
_D_LIB_VERSIONIMF=_LIB_VERSION, -DSIZE_LONG_INT_64, and
-DSIZE_LONG_LONG_INT_64.
2005-01-05 Steven Munroe <sjmunroe@us.ibm.com>
* elf/rtld.c (dl_main) [NEED_DL_SYSINFO_DSO]: Insure l_map_end and

View File

@ -1,8 +1,33 @@
ifeq ($(subdir),math)
libm-sysdep_routines += libm_atan2_reg s_matherrf s_matherrl libm_reduce \
libm_tan libm_error \
libm_frexp4 libm_frexp4f libm_frexp4l
#
# Some files which need to go both into libc and libm have external
# dependencies which need to be resolved differently for libc
# vs. libm. For example, inside libc, __libm_error_support needs to
# resolve to HIDDEN_JUMPTARGET(__libm_error_support) whereas within
# libm it always resolves to __libm_error_support. Such files need to
# be compiled twice. Fortunately, math/Makefile already has logic to
# support this: if a file starts with "s_", make will automatically
# generate a matching file whose name starts with "m_" which simply
# includes the corresponding "s_" file.
#
duplicated-routines = s_libm_ldexp s_libm_ldexpf s_libm_ldexpl \
s_libm_scalbn s_libm_scalbnf s_libm_scalbnl
sysdep_routines += libm_frexp4 libm_frexp4f libm_frexp4l libc_libm_error
sysdep-CPPFLAGS += -DSIZE_INT_32
libm-sysdep_routines += s_erfc s_erfcf s_erfcl \
s_matherrf s_matherrl libm_reduce \
libm_error \
libm_frexp libm_frexpf libm_frexpl \
libm_sincos libm_sincosf libm_sincosl \
libm_sincos_large \
libm_lgamma libm_lgammaf libm_lgammal \
libm_scalblnf \
$(duplicated-routines:s_%=m_%)
sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
$(duplicated-routines)
sysdep-CPPFLAGS += -include libm-symbols.h \
-D__POSIX__ \
-D_LIB_VERSIONIMF=_LIB_VERSION \
-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
endif

50
sysdeps/ia64/fpu/README Normal file
View File

@ -0,0 +1,50 @@
----------------------------------------------------------
Notes on how to update libm based on Intel's libm releases
----------------------------------------------------------
This source code in this directory is currently based on Intel libm
v2.1 as available from:
http://www.intel.com/software/products/opensource/libraries/num.htm
To ease importing, fix some bugs, and simplify integration into libc,
it is also necessary to apply the patch at:
ftp://ftp.hpl.hp.com/pub/linux-ia64/intel-libm-041228.diff.gz
The expectation is that Intel will integrate most if not all of these
changes into future releases of libm, so this patching step can
hopefully be omitted in the future.
Once the patched libm sources are extracted in a directory $LIBM, they
can be imported into the libc source tree at $LIBC with the following
step:
$ cd $LIBC/src/sysdep/ia64/fpu
$ ./import_intel_libm $LIBM
This should produce a number of "Importing..." messages, without
showing any errors.
At this point, you should be able to build glibc in the usual fashion.
We assume you do this in directory $OBJ. Once the build has
completed, run "make check" to verify that all (math) checks succeed.
If these checks succeed, you should also run the following commands to
verify that the new libm doesn't pollute the name-space and has proper
size-info for the data objects:
$ cd $LIBC/src/sysdep/ia64/fpu
$ import_check $OBJ/math/
There should be no (unexpected) errors reported by this script.
As an optional step, you may also want to confirm that the new libm
exports the exact same global symbols as the old one.
If you want to see the changes introduced by the "import_intel_libm"
script, you can run the commands:
$ cd $LIBC/src/sysdep/ia64/fpu
$ import_diffs
That's it.

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
.file "acosf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,19 +35,23 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
// 2/02/00 Initial revision
// 6/28/00 Improved speed
// 6/31/00 Changed register allocation because of some duplicate macros
// 02/02/00 Initial version
// 06/28/00 Improved speed
// 06/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
// 03/13/01 Corrected sign of imm1 value in dep instruction.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 04/17/03 Moved mutex after label
// Description
@ -115,7 +119,6 @@
// answer2 = sign(x) z P(t) if x>0
// = sign(x) z P(t) + pi if x<0
#include "libm_support.h"
//
// Assembly macros
@ -222,42 +225,30 @@ acosf_poly_p1a = f90
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
RODATA
.align 16
acosf_coeff_1_table:
ASM_TYPE_DIRECTIVE(acosf_coeff_1_table,@object)
LOCAL_OBJECT_START(acosf_coeff_1_table)
data8 0x3FC5555607DCF816 // P1
data8 0x3F9CF81AD9BAB2C6 // P4
data8 0x3FC59E0975074DF3 // P7
data8 0xBFA6F4CC2780AA1D // P6
data8 0x3FC2DD45292E93CB // P9
data8 0x3fe6a09e667f3bcd // sqrt(2)/2
ASM_SIZE_DIRECTIVE(acosf_coeff_1_table)
LOCAL_OBJECT_END(acosf_coeff_1_table)
acosf_coeff_2_table:
ASM_TYPE_DIRECTIVE(acosf_coeff_2_table,@object)
LOCAL_OBJECT_START(acosf_coeff_2_table)
data8 0x3FA6F108E31EFBA6 // P3
data8 0xBFCA31BF175D82A0 // P8
data8 0x3FA30C0337F6418B // P5
data8 0x3FB332C9266CB1F9 // P2
data8 0x3ff921fb54442d18 // pi_by_2
ASM_SIZE_DIRECTIVE(acosf_coeff_2_table)
LOCAL_OBJECT_END(acosf_coeff_2_table)
.align 32
.global acosf
ASM_TYPE_DIRECTIVE(acosf,@function)
.section .text
.proc acosf
.align 32
acosf:
GLOBAL_LIBM_ENTRY(acosf)
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
@ -342,7 +333,7 @@ acosf:
}
{ .mfb
nop.m 999
(p8) fma.s f8 = f8,f1,f0
(p8) fma.s.s0 f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -350,7 +341,7 @@ acosf:
{ .mfb
nop.m 999
fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
(p10) br.cond.spnt L(ACOSF_ZERO) ;; // Branch if x=0
(p10) br.cond.spnt ACOSF_ZERO ;; // Branch if x=0
}
{ .mfi
@ -367,7 +358,7 @@ acosf:
{ .mfb
nop.m 999
fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0
(p6) br.cond.spnt L(ACOSF_ABS_ONE) ;; // Branch if |x|=1
(p6) br.cond.spnt ACOSF_ABS_ONE ;; // Branch if |x|=1
}
{ .mfi
@ -575,42 +566,40 @@ acosf:
.pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
{ .mfi
nop.m 999
(p8) fma.s f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
(p8) fma.s.s0 f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
nop.i 999
}
{ .mfb
nop.m 999
(p7) fms.s f8 = acosf_const_piby2,f1,acosf_sinf1
(p7) fms.s.s0 f8 = acosf_const_piby2,f1,acosf_sinf1
br.ret.sptk b0 ;;
}
L(ACOSF_ZERO):
ACOSF_ZERO:
// Here if x=0
{ .mfb
nop.m 999
fma.s f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
fma.s.s0 f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
br.ret.sptk b0 ;;
}
L(ACOSF_ABS_ONE):
ACOSF_ABS_ONE:
.pred.rel "mutex",p11,p12
// Here if |x|=1
{ .mfi
nop.m 999
(p11) fma.s f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
(p11) fma.s.s0 f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
nop.i 999
}
{ .mfb
nop.m 999
(p12) fma.s f8 = f1,f0,f0 // acosf(1)=0
(p12) fma.s.s0 f8 = f1,f0,f0 // acosf(1)=0
br.ret.sptk b0 ;;
}
.endp acosf
ASM_SIZE_DIRECTIVE(acosf)
GLOBAL_LIBM_END(acosf)
// Stack operations when calling error support.
// (1) (2)
@ -642,8 +631,7 @@ ASM_SIZE_DIRECTIVE(acosf)
// restore ar.pfs
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -699,8 +687,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1200
sysdeps/ia64/fpu/e_acosh.S Normal file

File diff suppressed because it is too large Load Diff

1029
sysdeps/ia64/fpu/e_acoshf.S Normal file

File diff suppressed because it is too large Load Diff

1713
sysdeps/ia64/fpu/e_acoshl.S Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
.file "asinf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/02/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,20 +35,24 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
// 2/02/00 Initial revision
// 6/28/00 Improved speed
// 6/31/00 Changed register allocation because of some duplicate macros
// 02/02/00 Initial version
// 06/28/00 Improved speed
// 06/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
// 8/08/00 Improved speed by avoiding SIR flush.
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 08/08/00 Improved speed by avoiding SIR flush.
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
// 03/13/01 Corrected sign of imm1 value in dep instruction.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// Description
//=========================================
@ -119,7 +123,6 @@
// answer2 = - sign(x) z P(t) + (sign(x) pi/2)
//
#include "libm_support.h"
// Assembly macros
//=========================================
@ -225,42 +228,30 @@ asinf_poly_p1a = f90
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
RODATA
.align 16
asinf_coeff_1_table:
ASM_TYPE_DIRECTIVE(asinf_coeff_1_table,@object)
LOCAL_OBJECT_START(asinf_coeff_1_table)
data8 0x3FC5555607DCF816 // P1
data8 0x3F9CF81AD9BAB2C6 // P4
data8 0x3FC59E0975074DF3 // P7
data8 0xBFA6F4CC2780AA1D // P6
data8 0x3FC2DD45292E93CB // P9
data8 0x3fe6a09e667f3bcd // sqrt(2)/2
ASM_SIZE_DIRECTIVE(asinf_coeff_1_table)
LOCAL_OBJECT_END(asinf_coeff_1_table)
asinf_coeff_2_table:
ASM_TYPE_DIRECTIVE(asinf_coeff_2_table,@object)
LOCAL_OBJECT_START(asinf_coeff_2_table)
data8 0x3FA6F108E31EFBA6 // P3
data8 0xBFCA31BF175D82A0 // P8
data8 0x3FA30C0337F6418B // P5
data8 0x3FB332C9266CB1F9 // P2
data8 0x3ff921fb54442d18 // pi_by_2
ASM_SIZE_DIRECTIVE(asinf_coeff_2_table)
LOCAL_OBJECT_END(asinf_coeff_2_table)
.align 32
.global asinf
.section .text
.proc asinf
.align 32
asinf:
GLOBAL_LIBM_ENTRY(asinf)
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
@ -345,7 +336,7 @@ asinf:
}
{ .mfb
nop.m 999
(p8) fma.s f8 = f8,f1,f0
(p8) fma.s.s0 f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@ -370,7 +361,7 @@ asinf:
{ .mfb
nop.m 999
fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0
(p6) br.cond.spnt L(ASINF_ABS_ONE) ;; // Branch if |x|=1
(p6) br.cond.spnt ASINF_ABS_ONE ;; // Branch if |x|=1
}
{ .mfi
@ -572,28 +563,26 @@ asinf:
.pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
{ .mfi
nop.m 999
(p8) fnma.s f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
(p8) fnma.s.s0 f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
nop.i 999
}
{ .mfb
nop.m 999
(p7) fma.s f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
(p7) fma.s.s0 f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
br.ret.sptk b0 ;;
}
L(ASINF_ABS_ONE):
ASINF_ABS_ONE:
// Here for short exit if |x|=1
{ .mfb
nop.m 999
fma.s f8 = asinf_sgn_x,asinf_const_piby2,f0
fma.s.s0 f8 = asinf_sgn_x,asinf_const_piby2,f0
br.ret.sptk b0
}
;;
.endp asinf
ASM_SIZE_DIRECTIVE(asinf)
GLOBAL_LIBM_END(asinf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
@ -623,8 +612,7 @@ ASM_SIZE_DIRECTIVE(asinf)
// restore gp
// restore ar.pfs
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -680,8 +668,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
.file "atan2f.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,18 +35,21 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
// 6/01/00 Initial version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 06/01/00 Initial version
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 1/05/01 Fixed flag settings for denormal input.
// 1/19/01 Added documentation
// 1/30/01 Improved speed
// 01/05/01 Fixed flag settings for denormal input.
// 01/19/01 Added documentation
// 01/30/01 Improved speed
// 02/06/02 Corrected .section statement
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// Description
//=========================================
@ -226,7 +229,6 @@
// atan2f(+-0/+-0) sets single error tag to 38
// These are domain errors.
#include "libm_support.h"
//
// Assembly macros
@ -324,22 +326,20 @@ atan2f_poly_atan_U = f88
//atan2f_Pred_Xneg = p9 // x < 0
.data
RODATA
.align 16
atan2f_coef_table1:
ASM_TYPE_DIRECTIVE(atan2f_coef_table1,@object)
LOCAL_OBJECT_START(atan2f_coef_table1)
data8 0xBFD5555512191621 // p1
data8 0x3F522E5D33BC9BAA // p10
data8 0xBFA6E10BA401393F // p7
data8 0x3FB142A73D7C54E3 // p6
data8 0xBFC2473C5145EE38 // p3
data8 0x3FC9997E7AFBFF4E // p2
ASM_SIZE_DIRECTIVE(atan2f_coef_table1)
LOCAL_OBJECT_END(atan2f_coef_table1)
atan2f_coef_table2:
ASM_TYPE_DIRECTIVE(atan2f_coef_table2,@object)
LOCAL_OBJECT_START(atan2f_coef_table2)
data8 0xBF7DEAADAA336451 // p9
data8 0x3F97105B4160F86B // p8
data8 0xBFB68EED6A8CFA32 // p5
@ -348,29 +348,12 @@ data8 0x3ff921fb54442d18 // pi/2
data8 0x400921fb54442d18 // pi
data8 0x3fe921fb54442d18 // pi/4
data8 0x4002d97c7f3321d2 // 3pi/4
ASM_SIZE_DIRECTIVE(atan2f_coef_table2)
LOCAL_OBJECT_END(atan2f_coef_table2)
.global atan2f
#ifdef _LIBC
.global __atan2f
.global __ieee754_atan2f
#endif
.text
.align 32
atan2f:
.proc atan2f
#ifdef _LIBC
.proc __atan2f
__atan2f:
.proc __ieee754_atan2f
__ieee754_atan2f:
#endif
.section .text
GLOBAL_IEEE754_ENTRY(atan2f)
{ .mfi
alloc r32 = ar.pfs,1,5,4,0
@ -724,7 +707,7 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
(p10) fma.s f8 = f9,f8,f0 // Result quietized y if y is nan
(p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan
(p10) br.ret.spnt b0 // Exit if y is nan
}
;;
@ -737,7 +720,7 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
(p12) fnorm.s f8 = f9 // Result quietized x if x is nan, y not nan
(p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan
(p12) br.ret.spnt b0 // Exit if x is nan, y not nan
}
;;
@ -757,7 +740,7 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
(p7) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
(p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
(p7) br.ret.spnt b0 // Exit if x +inf and y inf
}
;;
@ -790,19 +773,19 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
(p13) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
(p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
(p13) br.ret.spnt b0 // Exit if x not -inf and y inf
}
;;
{ .mfi
nop.m 999
(p14) fma.s f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
(p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
nop.i 999
}
{ .mfb
nop.m 999
(p15) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
(p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
(p11) br.ret.spnt b0 // Exit if x -inf
}
;;
@ -829,31 +812,28 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
(p9) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
(p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
}
;;
{ .mfb
nop.m 999
(p11) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
(p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
br.ret.sptk b0 // Final special case exit
}
;;
.endp atan2f
ASM_SIZE_DIRECTIVE(atan2f)
GLOBAL_IEEE754_END(atan2f)
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
mov GR_Parameter_TAG = 38
fclass.m p10,p11 = f9,0x5 // @zero | @pos
;;
(p10) fmerge.s f10 = f8, f0
(p11) fma.s f10 = atan2f_sgn_Y, atan2f_const_pi,f0
(p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0
;;
{ .mfi
@ -913,8 +893,7 @@ __libm_error_region:
}
;;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1069
sysdeps/ia64/fpu/e_atanh.S Normal file

File diff suppressed because it is too large Load Diff

844
sysdeps/ia64/fpu/e_atanhf.S Normal file
View File

@ -0,0 +1,844 @@
.file "atanhf.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 05/22/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 08/06/02 Improved Itanium 2 performance
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 05/26/03 Improved performance, fixed to handle unorms
//
// API
//==============================================================
// float atanhf(float)
//
// Overview of operation
//==============================================================
// Background
//
//
// There are 7 paths:
// 1. x = +/-0.0
// Return atanhf(x) = +/-0.0
//
// 2. 0.0 < |x| <= MAX_DENORMAL_ABS
// Return atanhf(x) = x + sign(x)*x^2
//
// 3. MAX_DENORMAL_ABS < |x| < 2^(-20)
// Return atanhf(x) = Pol3(x), where Pol3(x) = x + x^3
//
// 4. 2^(-20) <= |x| < 1
// Return atanhf(x) = 0.5 * (log(1 + x) - log(1 - x))
// Algorithm description for log function see below.
//
// 5. |x| = 1
// Return atanhf(x) = sign(x) * +INF
//
// 6. 1 < |x| <= +INF
// Return atanhf(x) = QNaN
//
// 7. x = [S,Q]NaN
// Return atanhf(x) = QNaN
//
//==============================================================
// Algorithm Description for log(x) function
//
// Consider x = 2^N * 1.f1 f2 f3 f4...f63
// log(x) = log(x * frcpa(x) / frcpa(x))
// = log(x * frcpa(x)) + log(1/frcpa(x))
// = log(x * frcpa(x)) - log(frcpa(x))
//
// frcpa(x) = 2^(-N) * frcpa(1.f1 f2 ... f63)
//
// -log(frcpa(x)) = -log(C)
// = -log(2^(-N)) - log(frcpa(1.f1 f2 ... f63))
//
// -log(frcpa(x)) = -log(C)
// = N*log2 - log(frcpa(1.f1 f2 ... f63))
//
//
// log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
//
// log(x) = N*log2 + log(1./frcpa(1.f1 f2 ... f63)) + log(x * frcpa(x))
// log(x) = N*log2 + T + log(frcpa(x) x)
//
// Log(x) = N*log2 + T + log(C * x)
//
// C * x = 1 + r
//
// log(x) = N*log2 + T + log(1 + r)
// log(x) = N*log2 + T + Series(r)
//
// 1.f1 f2 ... f8 has 256 entries.
// They are 1 + k/2^8, k = 0 ... 255
// These 256 values are the table entries.
//
// Implementation
//==============================================================
// C = frcpa(x)
// r = C * x - 1
//
// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
//
// x = f * 2*N where f is 1.f_1f_2f_3...f_63
// Nfloat = float(n) where n is the true unbiased exponent
// pre-index = f_1f_2....f_8
// index = pre_index * 16
// get the dxt table entry at index + offset = T
//
// result = (T + Nfloat * log(2)) + rseries
//
// The T table is calculated as follows
// Form x_k = 1 + k/2^8 where k goes from 0... 255
// y_k = frcpa(x_k)
// log(1/y_k) in quad and round to double-extended
// Registers used
//==============================================================
// Floating Point registers used:
// f8, input
// f32 -> f59
// General registers used:
// r14 -> r29, r32 -> r39
// Predicate registers used:
// p6 -> p9
// p6 to filter out case when |x| >= 1
// p7 to filter out case when x = [Q,S]NaN or +/-0
// p8 to filter out case when |x| < 2^(-20)
// p9 to filter out case when x = denormal
// Assembly macros
//==============================================================
DataPtr = r14
RcpTablePtrM = r15
RcpTablePtrP = r16
rExpbMask = r17
rBias = r18
rNearZeroBound = r19
rArgSExpb = r20
rArgExpb = r21
rExpbm = r22
rExpbp = r23
rSigm = r24
rSigp = r25
rNm = r26
rNp = r27
rIndm = r28
rIndp = r29
GR_SAVE_B0 = r33
GR_SAVE_GP = r34
GR_SAVE_PFS = r35
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
atanh_GR_tag = r39
//==============================================================
fOneMx = f33
fOnePx = f34
fRm2 = f35
fRm3 = f36
fRp2 = f37
fRp3 = f38
fRcpM = f39
fRcpP = f40
fRp = f41
fRm = f42
fN4CvtM = f43
fN4CvtP = f44
fNm = f45
fNp = f46
fLogTm = f47
fLogTp = f48
fLog2 = f49
fArgAbs = f50
fNormX = f50
fP32m = f51
fP32p = f52
fP10m = f53
fP10p = f54
fX2 = f55
fP3 = f56
fP2 = f57
fP1 = f58
fHalf = f59
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(atanhf_data)
data8 0xbfc0001008f39d59 // P3*0.5
data8 0x3fc5556073e0c45a // P2*0.5
data8 0xbfcffffffffaea15 // P1*0.5
data8 0x3fe0000000000000 // 0.5
data8 0x3fd62e42fefa39ef // 0.5*ln(2)
data8 0x0000000000000000 // pad
LOCAL_OBJECT_END(atanhf_data)
LOCAL_OBJECT_START(atanhf_data2)
data8 0x3f50040155d5889e //log(1/frcpa(1+0/256))/2
data8 0x3f68121214586b54 //log(1/frcpa(1+1/256))/2
data8 0x3f741929f96832f0 //log(1/frcpa(1+2/256))/2
data8 0x3f7c317384c75f06 //log(1/frcpa(1+3/256))/2
data8 0x3f81a6b91ac73386 //log(1/frcpa(1+4/256))/2
data8 0x3f85ba9a5d9ac039 //log(1/frcpa(1+5/256))/2
data8 0x3f89d2a8074325f4 //log(1/frcpa(1+6/256))/2
data8 0x3f8d6b2725979802 //log(1/frcpa(1+7/256))/2
data8 0x3f90c58fa19dfaaa //log(1/frcpa(1+8/256))/2
data8 0x3f92954c78cbce1b //log(1/frcpa(1+9/256))/2
data8 0x3f94a94d2da96c56 //log(1/frcpa(1+10/256))/2
data8 0x3f967c94f2d4bb58 //log(1/frcpa(1+11/256))/2
data8 0x3f985188b630f068 //log(1/frcpa(1+12/256))/2
data8 0x3f9a6b8abe73af4c //log(1/frcpa(1+13/256))/2
data8 0x3f9c441e06f72a9e //log(1/frcpa(1+14/256))/2
data8 0x3f9e1e6713606d07 //log(1/frcpa(1+15/256))/2
data8 0x3f9ffa6911ab9301 //log(1/frcpa(1+16/256))/2
data8 0x3fa0ec139c5da601 //log(1/frcpa(1+17/256))/2
data8 0x3fa1dbd2643d190b //log(1/frcpa(1+18/256))/2
data8 0x3fa2cc7284fe5f1c //log(1/frcpa(1+19/256))/2
data8 0x3fa3bdf5a7d1ee64 //log(1/frcpa(1+20/256))/2
data8 0x3fa4b05d7aa012e0 //log(1/frcpa(1+21/256))/2
data8 0x3fa580db7ceb5702 //log(1/frcpa(1+22/256))/2
data8 0x3fa674f089365a7a //log(1/frcpa(1+23/256))/2
data8 0x3fa769ef2c6b568d //log(1/frcpa(1+24/256))/2
data8 0x3fa85fd927506a48 //log(1/frcpa(1+25/256))/2
data8 0x3fa9335e5d594989 //log(1/frcpa(1+26/256))/2
data8 0x3faa2b0220c8e5f5 //log(1/frcpa(1+27/256))/2
data8 0x3fab0004ac1a86ac //log(1/frcpa(1+28/256))/2
data8 0x3fabf968769fca11 //log(1/frcpa(1+29/256))/2
data8 0x3faccfedbfee13a8 //log(1/frcpa(1+30/256))/2
data8 0x3fada727638446a2 //log(1/frcpa(1+31/256))/2
data8 0x3faea3257fe10f7a //log(1/frcpa(1+32/256))/2
data8 0x3faf7be9fedbfde6 //log(1/frcpa(1+33/256))/2
data8 0x3fb02ab352ff25f4 //log(1/frcpa(1+34/256))/2
data8 0x3fb097ce579d204d //log(1/frcpa(1+35/256))/2
data8 0x3fb1178e8227e47c //log(1/frcpa(1+36/256))/2
data8 0x3fb185747dbecf34 //log(1/frcpa(1+37/256))/2
data8 0x3fb1f3b925f25d41 //log(1/frcpa(1+38/256))/2
data8 0x3fb2625d1e6ddf57 //log(1/frcpa(1+39/256))/2
data8 0x3fb2d1610c86813a //log(1/frcpa(1+40/256))/2
data8 0x3fb340c59741142e //log(1/frcpa(1+41/256))/2
data8 0x3fb3b08b6757f2a9 //log(1/frcpa(1+42/256))/2
data8 0x3fb40dfb08378003 //log(1/frcpa(1+43/256))/2
data8 0x3fb47e74e8ca5f7c //log(1/frcpa(1+44/256))/2
data8 0x3fb4ef51f6466de4 //log(1/frcpa(1+45/256))/2
data8 0x3fb56092e02ba516 //log(1/frcpa(1+46/256))/2
data8 0x3fb5d23857cd74d5 //log(1/frcpa(1+47/256))/2
data8 0x3fb6313a37335d76 //log(1/frcpa(1+48/256))/2
data8 0x3fb6a399dabbd383 //log(1/frcpa(1+49/256))/2
data8 0x3fb70337dd3ce41b //log(1/frcpa(1+50/256))/2
data8 0x3fb77654128f6127 //log(1/frcpa(1+51/256))/2
data8 0x3fb7e9d82a0b022d //log(1/frcpa(1+52/256))/2
data8 0x3fb84a6b759f512f //log(1/frcpa(1+53/256))/2
data8 0x3fb8ab47d5f5a310 //log(1/frcpa(1+54/256))/2
data8 0x3fb91fe49096581b //log(1/frcpa(1+55/256))/2
data8 0x3fb981634011aa75 //log(1/frcpa(1+56/256))/2
data8 0x3fb9f6c407089664 //log(1/frcpa(1+57/256))/2
data8 0x3fba58e729348f43 //log(1/frcpa(1+58/256))/2
data8 0x3fbabb55c31693ad //log(1/frcpa(1+59/256))/2
data8 0x3fbb1e104919efd0 //log(1/frcpa(1+60/256))/2
data8 0x3fbb94ee93e367cb //log(1/frcpa(1+61/256))/2
data8 0x3fbbf851c067555f //log(1/frcpa(1+62/256))/2
data8 0x3fbc5c0254bf23a6 //log(1/frcpa(1+63/256))/2
data8 0x3fbcc000c9db3c52 //log(1/frcpa(1+64/256))/2
data8 0x3fbd244d99c85674 //log(1/frcpa(1+65/256))/2
data8 0x3fbd88e93fb2f450 //log(1/frcpa(1+66/256))/2
data8 0x3fbdedd437eaef01 //log(1/frcpa(1+67/256))/2
data8 0x3fbe530effe71012 //log(1/frcpa(1+68/256))/2
data8 0x3fbeb89a1648b971 //log(1/frcpa(1+69/256))/2
data8 0x3fbf1e75fadf9bde //log(1/frcpa(1+70/256))/2
data8 0x3fbf84a32ead7c35 //log(1/frcpa(1+71/256))/2
data8 0x3fbfeb2233ea07cd //log(1/frcpa(1+72/256))/2
data8 0x3fc028f9c7035c1c //log(1/frcpa(1+73/256))/2
data8 0x3fc05c8be0d9635a //log(1/frcpa(1+74/256))/2
data8 0x3fc085eb8f8ae797 //log(1/frcpa(1+75/256))/2
data8 0x3fc0b9c8e32d1911 //log(1/frcpa(1+76/256))/2
data8 0x3fc0edd060b78081 //log(1/frcpa(1+77/256))/2
data8 0x3fc122024cf0063f //log(1/frcpa(1+78/256))/2
data8 0x3fc14be2927aecd4 //log(1/frcpa(1+79/256))/2
data8 0x3fc180618ef18adf //log(1/frcpa(1+80/256))/2
data8 0x3fc1b50bbe2fc63b //log(1/frcpa(1+81/256))/2
data8 0x3fc1df4cc7cf242d //log(1/frcpa(1+82/256))/2
data8 0x3fc214456d0eb8d4 //log(1/frcpa(1+83/256))/2
data8 0x3fc23ec5991eba49 //log(1/frcpa(1+84/256))/2
data8 0x3fc2740d9f870afb //log(1/frcpa(1+85/256))/2
data8 0x3fc29ecdabcdfa04 //log(1/frcpa(1+86/256))/2
data8 0x3fc2d46602adccee //log(1/frcpa(1+87/256))/2
data8 0x3fc2ff66b04ea9d4 //log(1/frcpa(1+88/256))/2
data8 0x3fc335504b355a37 //log(1/frcpa(1+89/256))/2
data8 0x3fc360925ec44f5d //log(1/frcpa(1+90/256))/2
data8 0x3fc38bf1c3337e75 //log(1/frcpa(1+91/256))/2
data8 0x3fc3c25277333184 //log(1/frcpa(1+92/256))/2
data8 0x3fc3edf463c1683e //log(1/frcpa(1+93/256))/2
data8 0x3fc419b423d5e8c7 //log(1/frcpa(1+94/256))/2
data8 0x3fc44591e0539f49 //log(1/frcpa(1+95/256))/2
data8 0x3fc47c9175b6f0ad //log(1/frcpa(1+96/256))/2
data8 0x3fc4a8b341552b09 //log(1/frcpa(1+97/256))/2
data8 0x3fc4d4f3908901a0 //log(1/frcpa(1+98/256))/2
data8 0x3fc501528da1f968 //log(1/frcpa(1+99/256))/2
data8 0x3fc52dd06347d4f6 //log(1/frcpa(1+100/256))/2
data8 0x3fc55a6d3c7b8a8a //log(1/frcpa(1+101/256))/2
data8 0x3fc5925d2b112a59 //log(1/frcpa(1+102/256))/2
data8 0x3fc5bf406b543db2 //log(1/frcpa(1+103/256))/2
data8 0x3fc5ec433d5c35ae //log(1/frcpa(1+104/256))/2
data8 0x3fc61965cdb02c1f //log(1/frcpa(1+105/256))/2
data8 0x3fc646a84935b2a2 //log(1/frcpa(1+106/256))/2
data8 0x3fc6740add31de94 //log(1/frcpa(1+107/256))/2
data8 0x3fc6a18db74a58c5 //log(1/frcpa(1+108/256))/2
data8 0x3fc6cf31058670ec //log(1/frcpa(1+109/256))/2
data8 0x3fc6f180e852f0ba //log(1/frcpa(1+110/256))/2
data8 0x3fc71f5d71b894f0 //log(1/frcpa(1+111/256))/2
data8 0x3fc74d5aefd66d5c //log(1/frcpa(1+112/256))/2
data8 0x3fc77b79922bd37e //log(1/frcpa(1+113/256))/2
data8 0x3fc7a9b9889f19e2 //log(1/frcpa(1+114/256))/2
data8 0x3fc7d81b037eb6a6 //log(1/frcpa(1+115/256))/2
data8 0x3fc8069e33827231 //log(1/frcpa(1+116/256))/2
data8 0x3fc82996d3ef8bcb //log(1/frcpa(1+117/256))/2
data8 0x3fc85855776dcbfb //log(1/frcpa(1+118/256))/2
data8 0x3fc8873658327ccf //log(1/frcpa(1+119/256))/2
data8 0x3fc8aa75973ab8cf //log(1/frcpa(1+120/256))/2
data8 0x3fc8d992dc8824e5 //log(1/frcpa(1+121/256))/2
data8 0x3fc908d2ea7d9512 //log(1/frcpa(1+122/256))/2
data8 0x3fc92c59e79c0e56 //log(1/frcpa(1+123/256))/2
data8 0x3fc95bd750ee3ed3 //log(1/frcpa(1+124/256))/2
data8 0x3fc98b7811a3ee5b //log(1/frcpa(1+125/256))/2
data8 0x3fc9af47f33d406c //log(1/frcpa(1+126/256))/2
data8 0x3fc9df270c1914a8 //log(1/frcpa(1+127/256))/2
data8 0x3fca0325ed14fda4 //log(1/frcpa(1+128/256))/2
data8 0x3fca33440224fa79 //log(1/frcpa(1+129/256))/2
data8 0x3fca57725e80c383 //log(1/frcpa(1+130/256))/2
data8 0x3fca87d0165dd199 //log(1/frcpa(1+131/256))/2
data8 0x3fcaac2e6c03f896 //log(1/frcpa(1+132/256))/2
data8 0x3fcadccc6fdf6a81 //log(1/frcpa(1+133/256))/2
data8 0x3fcb015b3eb1e790 //log(1/frcpa(1+134/256))/2
data8 0x3fcb323a3a635948 //log(1/frcpa(1+135/256))/2
data8 0x3fcb56fa04462909 //log(1/frcpa(1+136/256))/2
data8 0x3fcb881aa659bc93 //log(1/frcpa(1+137/256))/2
data8 0x3fcbad0bef3db165 //log(1/frcpa(1+138/256))/2
data8 0x3fcbd21297781c2f //log(1/frcpa(1+139/256))/2
data8 0x3fcc039236f08819 //log(1/frcpa(1+140/256))/2
data8 0x3fcc28cb1e4d32fd //log(1/frcpa(1+141/256))/2
data8 0x3fcc4e19b84723c2 //log(1/frcpa(1+142/256))/2
data8 0x3fcc7ff9c74554c9 //log(1/frcpa(1+143/256))/2
data8 0x3fcca57b64e9db05 //log(1/frcpa(1+144/256))/2
data8 0x3fcccb130a5cebb0 //log(1/frcpa(1+145/256))/2
data8 0x3fccf0c0d18f326f //log(1/frcpa(1+146/256))/2
data8 0x3fcd232075b5a201 //log(1/frcpa(1+147/256))/2
data8 0x3fcd490246defa6b //log(1/frcpa(1+148/256))/2
data8 0x3fcd6efa918d25cd //log(1/frcpa(1+149/256))/2
data8 0x3fcd9509707ae52f //log(1/frcpa(1+150/256))/2
data8 0x3fcdbb2efe92c554 //log(1/frcpa(1+151/256))/2
data8 0x3fcdee2f3445e4af //log(1/frcpa(1+152/256))/2
data8 0x3fce148a1a2726ce //log(1/frcpa(1+153/256))/2
data8 0x3fce3afc0a49ff40 //log(1/frcpa(1+154/256))/2
data8 0x3fce6185206d516e //log(1/frcpa(1+155/256))/2
data8 0x3fce882578823d52 //log(1/frcpa(1+156/256))/2
data8 0x3fceaedd2eac990c //log(1/frcpa(1+157/256))/2
data8 0x3fced5ac5f436be3 //log(1/frcpa(1+158/256))/2
data8 0x3fcefc9326d16ab9 //log(1/frcpa(1+159/256))/2
data8 0x3fcf2391a2157600 //log(1/frcpa(1+160/256))/2
data8 0x3fcf4aa7ee03192d //log(1/frcpa(1+161/256))/2
data8 0x3fcf71d627c30bb0 //log(1/frcpa(1+162/256))/2
data8 0x3fcf991c6cb3b379 //log(1/frcpa(1+163/256))/2
data8 0x3fcfc07ada69a910 //log(1/frcpa(1+164/256))/2
data8 0x3fcfe7f18eb03d3e //log(1/frcpa(1+165/256))/2
data8 0x3fd007c053c5002e //log(1/frcpa(1+166/256))/2
data8 0x3fd01b942198a5a1 //log(1/frcpa(1+167/256))/2
data8 0x3fd02f74400c64eb //log(1/frcpa(1+168/256))/2
data8 0x3fd04360be7603ad //log(1/frcpa(1+169/256))/2
data8 0x3fd05759ac47fe34 //log(1/frcpa(1+170/256))/2
data8 0x3fd06b5f1911cf52 //log(1/frcpa(1+171/256))/2
data8 0x3fd078bf0533c568 //log(1/frcpa(1+172/256))/2
data8 0x3fd08cd9687e7b0e //log(1/frcpa(1+173/256))/2
data8 0x3fd0a10074cf9019 //log(1/frcpa(1+174/256))/2
data8 0x3fd0b5343a234477 //log(1/frcpa(1+175/256))/2
data8 0x3fd0c974c89431ce //log(1/frcpa(1+176/256))/2
data8 0x3fd0ddc2305b9886 //log(1/frcpa(1+177/256))/2
data8 0x3fd0eb524bafc918 //log(1/frcpa(1+178/256))/2
data8 0x3fd0ffb54213a476 //log(1/frcpa(1+179/256))/2
data8 0x3fd114253da97d9f //log(1/frcpa(1+180/256))/2
data8 0x3fd128a24f1d9aff //log(1/frcpa(1+181/256))/2
data8 0x3fd1365252bf0865 //log(1/frcpa(1+182/256))/2
data8 0x3fd14ae558b4a92d //log(1/frcpa(1+183/256))/2
data8 0x3fd15f85a19c765b //log(1/frcpa(1+184/256))/2
data8 0x3fd16d4d38c119fa //log(1/frcpa(1+185/256))/2
data8 0x3fd18203c20dd133 //log(1/frcpa(1+186/256))/2
data8 0x3fd196c7bc4b1f3b //log(1/frcpa(1+187/256))/2
data8 0x3fd1a4a738b7a33c //log(1/frcpa(1+188/256))/2
data8 0x3fd1b981c0c9653d //log(1/frcpa(1+189/256))/2
data8 0x3fd1ce69e8bb106b //log(1/frcpa(1+190/256))/2
data8 0x3fd1dc619de06944 //log(1/frcpa(1+191/256))/2
data8 0x3fd1f160a2ad0da4 //log(1/frcpa(1+192/256))/2
data8 0x3fd2066d7740737e //log(1/frcpa(1+193/256))/2
data8 0x3fd2147dba47a394 //log(1/frcpa(1+194/256))/2
data8 0x3fd229a1bc5ebac3 //log(1/frcpa(1+195/256))/2
data8 0x3fd237c1841a502e //log(1/frcpa(1+196/256))/2
data8 0x3fd24cfce6f80d9a //log(1/frcpa(1+197/256))/2
data8 0x3fd25b2c55cd5762 //log(1/frcpa(1+198/256))/2
data8 0x3fd2707f4d5f7c41 //log(1/frcpa(1+199/256))/2
data8 0x3fd285e0842ca384 //log(1/frcpa(1+200/256))/2
data8 0x3fd294294708b773 //log(1/frcpa(1+201/256))/2
data8 0x3fd2a9a2670aff0c //log(1/frcpa(1+202/256))/2
data8 0x3fd2b7fb2c8d1cc1 //log(1/frcpa(1+203/256))/2
data8 0x3fd2c65a6395f5f5 //log(1/frcpa(1+204/256))/2
data8 0x3fd2dbf557b0df43 //log(1/frcpa(1+205/256))/2
data8 0x3fd2ea64c3f97655 //log(1/frcpa(1+206/256))/2
data8 0x3fd3001823684d73 //log(1/frcpa(1+207/256))/2
data8 0x3fd30e97e9a8b5cd //log(1/frcpa(1+208/256))/2
data8 0x3fd32463ebdd34ea //log(1/frcpa(1+209/256))/2
data8 0x3fd332f4314ad796 //log(1/frcpa(1+210/256))/2
data8 0x3fd348d90e7464d0 //log(1/frcpa(1+211/256))/2
data8 0x3fd35779f8c43d6e //log(1/frcpa(1+212/256))/2
data8 0x3fd36621961a6a99 //log(1/frcpa(1+213/256))/2
data8 0x3fd37c299f3c366a //log(1/frcpa(1+214/256))/2
data8 0x3fd38ae2171976e7 //log(1/frcpa(1+215/256))/2
data8 0x3fd399a157a603e7 //log(1/frcpa(1+216/256))/2
data8 0x3fd3afccfe77b9d1 //log(1/frcpa(1+217/256))/2
data8 0x3fd3be9d503533b5 //log(1/frcpa(1+218/256))/2
data8 0x3fd3cd7480b4a8a3 //log(1/frcpa(1+219/256))/2
data8 0x3fd3e3c43918f76c //log(1/frcpa(1+220/256))/2
data8 0x3fd3f2acb27ed6c7 //log(1/frcpa(1+221/256))/2
data8 0x3fd4019c2125ca93 //log(1/frcpa(1+222/256))/2
data8 0x3fd4181061389722 //log(1/frcpa(1+223/256))/2
data8 0x3fd42711518df545 //log(1/frcpa(1+224/256))/2
data8 0x3fd436194e12b6bf //log(1/frcpa(1+225/256))/2
data8 0x3fd445285d68ea69 //log(1/frcpa(1+226/256))/2
data8 0x3fd45bcc464c893a //log(1/frcpa(1+227/256))/2
data8 0x3fd46aed21f117fc //log(1/frcpa(1+228/256))/2
data8 0x3fd47a1527e8a2d3 //log(1/frcpa(1+229/256))/2
data8 0x3fd489445efffccc //log(1/frcpa(1+230/256))/2
data8 0x3fd4a018bcb69835 //log(1/frcpa(1+231/256))/2
data8 0x3fd4af5a0c9d65d7 //log(1/frcpa(1+232/256))/2
data8 0x3fd4bea2a5bdbe87 //log(1/frcpa(1+233/256))/2
data8 0x3fd4cdf28f10ac46 //log(1/frcpa(1+234/256))/2
data8 0x3fd4dd49cf994058 //log(1/frcpa(1+235/256))/2
data8 0x3fd4eca86e64a684 //log(1/frcpa(1+236/256))/2
data8 0x3fd503c43cd8eb68 //log(1/frcpa(1+237/256))/2
data8 0x3fd513356667fc57 //log(1/frcpa(1+238/256))/2
data8 0x3fd522ae0738a3d8 //log(1/frcpa(1+239/256))/2
data8 0x3fd5322e26867857 //log(1/frcpa(1+240/256))/2
data8 0x3fd541b5cb979809 //log(1/frcpa(1+241/256))/2
data8 0x3fd55144fdbcbd62 //log(1/frcpa(1+242/256))/2
data8 0x3fd560dbc45153c7 //log(1/frcpa(1+243/256))/2
data8 0x3fd5707a26bb8c66 //log(1/frcpa(1+244/256))/2
data8 0x3fd587f60ed5b900 //log(1/frcpa(1+245/256))/2
data8 0x3fd597a7977c8f31 //log(1/frcpa(1+246/256))/2
data8 0x3fd5a760d634bb8b //log(1/frcpa(1+247/256))/2
data8 0x3fd5b721d295f10f //log(1/frcpa(1+248/256))/2
data8 0x3fd5c6ea94431ef9 //log(1/frcpa(1+249/256))/2
data8 0x3fd5d6bb22ea86f6 //log(1/frcpa(1+250/256))/2
data8 0x3fd5e6938645d390 //log(1/frcpa(1+251/256))/2
data8 0x3fd5f673c61a2ed2 //log(1/frcpa(1+252/256))/2
data8 0x3fd6065bea385926 //log(1/frcpa(1+253/256))/2
data8 0x3fd6164bfa7cc06b //log(1/frcpa(1+254/256))/2
data8 0x3fd62643fecf9743 //log(1/frcpa(1+255/256))/2
LOCAL_OBJECT_END(atanhf_data2)
.section .text
GLOBAL_LIBM_ENTRY(atanhf)
{ .mfi
getf.exp rArgSExpb = f8
fclass.m p9,p0 = f8, 0x0b // is arg denormal ?
mov rExpbMask = 0x1ffff
}
{ .mfi
addl DataPtr = @ltoff(atanhf_data), gp
fnma.s1 fOneMx = f8, f1, f1 // 1 - x
mov rBias = 0xffff
}
;;
{ .mfi
nop.m 0
fclass.m p7,p0 = f8, 0xc7 // is arg NaN or +/-0 ?
mov rNearZeroBound = 0xffeb // 2^(-20)
}
{ .mfi
ld8 DataPtr = [DataPtr]
fma.s1 fOnePx = f8, f1, f1 // 1 + x
nop.i 0
}
;;
{ .mfb
nop.m 0
fnorm.s1 fNormX = f8 // Normalize x
(p9) br.cond.spnt ATANH_UNORM // Branch if x=unorm
}
;;
ATANH_COMMON:
// Return here if x=unorm and not denorm
{ .mfi
ldfpd fP3, fP2 = [DataPtr], 16
fma.s1 fX2 = f8, f8, f0 // x^2
nop.i 0
}
{ .mfb
nop.m 0
(p7) fma.s.s0 f8 = f8,f1,f8 // NaN or +/-0
(p7) br.ret.spnt b0
}
;;
{ .mfi
ldfpd fP1, fHalf = [DataPtr], 16
frcpa.s1 fRcpM, p9 = f1, fOneMx // rcpm = frcpa(1 - x)
nop.i 0
}
;;
{ .mfi
getf.exp rExpbm = fOneMx
frcpa.s1 fRcpP, p0 = f1, fOnePx // rcpp = frcpa(1 + x)
// biased exponent
and rArgExpb = rArgSExpb, rExpbMask
}
;;
{ .mmi
getf.exp rExpbp = fOnePx
// is |x| < 2^(-20) ?
cmp.gt p8,p0 = rNearZeroBound, rArgExpb
cmp.ge p6,p0 = rArgExpb, rBias // is |x| >= 1 ?
}
;;
{ .mmb
getf.sig rSigm = fOneMx
nop.m 0
(p6) br.cond.spnt atanhf_ge_one
}
;;
{ .mfb
getf.sig rSigp = fOnePx
(p8) fma.s.s0 f8 = fX2, f8, f8 // x + x^3
(p8) br.ret.spnt b0 // Exit for MAX_DENORM_ABS < |x| < 2^-20
}
;;
{ .mfi
ldfd fLog2 = [DataPtr], 16
fms.s1 fRm = fRcpM, fOneMx, f1 // rm = rcpm * (1 - x) - 1
nop.i 0
}
;;
{ .mmf
// (1 - x) is always positive here and we need not mask sign bit
sub rNm = rExpbm, rBias
// (1 + x) is always positive here and we need not mask sign bit
sub rNp = rExpbp, rBias
fms.s1 fRp = fRcpP, fOnePx, f1 // rp = rcpp * (1 + x) - 1
}
;;
{ .mmi
setf.sig fN4CvtM = rNm
setf.sig fN4CvtP = rNp
extr.u rIndm = rSigm,55,8 // Extract 8 bits
}
;;
{ .mmi
shladd RcpTablePtrM = rIndm, 3, DataPtr
nop.m 0
extr.u rIndp = rSigp,55,8 // Extract 8 bits
}
;;
{ .mmi
ldfd fLogTm = [RcpTablePtrM]
shladd RcpTablePtrP = rIndp, 3, DataPtr
nop.i 0
}
;;
{ .mfi
ldfd fLogTp = [RcpTablePtrP]
fma.s1 fRm2 = fRm, fRm, f0 // rm^2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fP32m = fP3, fRm, fP2 // P3*rm + P2
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRp2 = fRp, fRp, f0 // rp^2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fP10m = fP1, fRm, fHalf // P1*rm + 1
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fP32p = fP3, fRp, fP2 // P3*rp + P2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fP10p = fP1, fRp, fHalf // P1*rp + 1
nop.i 0
}
;;
{ .mfi
nop.m 0
fcvt.xf fNm = fN4CvtM
nop.i 0
}
{ .mfi
nop.m 0
fcvt.xf fNp = fN4CvtP
nop.i 0
}
;;
{ .mfi
nop.m 0
// (P3*rm + P2)*rm^2 + (P1*rm + 1)
fma.s1 fP32m = fP32m, fRm2, fP10m
nop.i 0
}
{ .mfi
nop.m 0
// (P3*rp + P2)*rp^2 + (P1*rp + 1)
fma.s1 fP32p = fP32p, fRp2, fP10p
nop.i 0
}
;;
{ .mfi
nop.m 0
// Nm*ln(2)/2 + Tm/2
fma.s1 fLogTm = fNm, fLog2, fLogTm
nop.i 0
}
{ .mfi
nop.m 0
// Np*ln(2)/2 + Tp/2
fma.s1 fLogTp = fNp, fLog2, fLogTp
nop.i 0
}
;;
{ .mfi
nop.m 0
// ((P3*rm + P2)*rm^2 + (P3*rm + 1))*0.5*rm + (Nm*ln(2)/2 + Tm/2)
fma.d.s1 fP32m = fP32m, fRm, fLogTm
nop.i 0
}
{ .mfi
nop.m 0
// ((P3*rp + P2)*rp^2 + (P3*rp + 1))*0.5*rp + (Np*ln(2)/2 + Tp/2)
fma.d.s1 fP32p = fP32p, fRp, fLogTp
nop.i 0
}
;;
{ .mfb
nop.m 0
// atanhf(x) = 0.5 * (log(1 + x) - log(1 - x))
fnma.s.s0 f8 = fP32m, f1, fP32p
br.ret.sptk b0 // Exit for 2^(-20) <= |x| < 1.0
}
;;
ATANH_UNORM:
// Here if x=unorm
{ .mfi
getf.exp rArgSExpb = fNormX // Recompute if x unorm
fclass.m p0,p9 = fNormX, 0x0b // Test x denorm
nop.i 0
}
;;
{ .mfb
nop.m 0
fcmp.lt.s0 p10,p11 = f8, f0 // Set denormal flag
(p9) br.cond.sptk ATANH_COMMON // Continue if x unorm and not denorm
}
;;
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fnma.s.s0 f8 = f8,f8,f8 // Result x-x^2 if x=-denorm
nop.i 0
}
{ .mfb
nop.m 0
(p7) fma.s.s0 f8 = f8,f8,f8 // Result x+x^2 if x=+denorm
br.ret.spnt b0 // Exit if denorm
}
;;
// Here if |x| >= 1.0
atanhf_ge_one:
{ .mfi
alloc r32 = ar.pfs,1,3,4,0
fmerge.s fArgAbs = f0, f8 // Form |x|
nop.i 0
}
;;
{ .mfi
nop.m 0
fmerge.s f10 = f8, f8 // Save input for error call
nop.i 0
}
;;
{ .mfi
nop.m 0
fcmp.eq.s1 p6,p7 = fArgAbs, f1 // Test for |x| = 1.0
nop.i 0
}
;;
// Set error tag and result, and raise invalid flag if |x| > 1.0
{ .mfi
(p7) mov atanh_GR_tag = 133
(p7) frcpa.s0 f8, p0 = f0, f0 // Get QNaN, and raise invalid
nop.i 0
}
;;
// Set error tag and result, and raise Z flag if |x| = 1.0
{ .mfi
nop.m 0
(p6) frcpa.s0 fRm, p0 = f1, f0 // Get inf, and raise Z flag
nop.i 0
}
;;
{ .mfb
(p6) mov atanh_GR_tag = 134
(p6) fmerge.s f8 = f8, fRm // result is +-inf
br.cond.sptk __libm_error_region // Exit if |x| >= 1.0
}
;;
GLOBAL_LIBM_END(atanhf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
// Parameter 3 address
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
add GR_Parameter_RESULT = 48,sp
nop.m 0
nop.i 0
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1155
sysdeps/ia64/fpu/e_atanhl.S Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

602
sysdeps/ia64/fpu/e_exp10.S Normal file
View File

@ -0,0 +1,602 @@
.file "exp10.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/25/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/06/02 Improved performance; no inexact flags on exact cases
// 01/29/03 Added missing } to bundle templates
//
// API
//==============================================================
// double exp10(double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x= (K + fh + fl + r)/log2(10), where
// K is an integer, fh= 0.b1 b2 b3 b4 b5,
// fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0),
// and |r|<2^{-11}
// Th is a table that stores 2^fh (32 entries) rounded to
// double extended precision (only mantissa is stored)
// Tl is a table that stores 2^fl (32 entries) rounded to
// double extended precision (only mantissa is stored)
//
// 10^x is approximated as
// 2^K * Th [ f ] * Tl [ f ] * (1+c1*e+c1*r+c2*r^2+c3*r^3+c4*r^4),
// where e= (x*log2(10)_hi-RN(x*log2(10)_hi))+log2(10)_lo*x
// Note there are only 22 non-zero values that produce an exact result:
// 1.0, 2.0, ... 22.0.
// We test for these cases and use s1 to avoid setting the inexact flag.
// Special values
//==============================================================
// exp10(0)= 1
// exp10(+inf)= inf
// exp10(-inf)= 0
//
// Registers used
//==============================================================
// r2-r3, r14-r40
// f6-f15, f32-f51
// p6-p9, p12
//
GR_TBL_START = r2
GR_LOG_TBL = r3
GR_OF_LIMIT = r14
GR_UF_LIMIT = r15
GR_EXP_CORR = r16
GR_F_low = r17
GR_F_high = r18
GR_K = r19
GR_Flow_ADDR = r20
GR_BIAS = r21
GR_Fh = r22
GR_Fh_ADDR = r23
GR_EXPMAX = r24
GR_BIAS53 = r25
GR_ROUNDVAL = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
GR_COEFF_START = r30
GR_exact_limit = r31
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_COEFF1 = f6
FR_COEFF2 = f7
FR_R = f9
FR_LOG2_10 = f10
FR_2P53 = f11
FR_KF0 = f12
FR_COEFF3 = f13
FR_COEFF4 = f14
FR_UF_LIMIT = f15
FR_OF_LIMIT = f32
FR_DX_L210 = f33
FR_ROUNDVAL = f34
FR_KF = f35
FR_2_TO_K = f36
FR_T_low = f37
FR_T_high = f38
FR_P34 = f39
FR_R2 = f40
FR_P12 = f41
FR_T_low_K = f42
FR_P14 = f43
FR_T = f44
FR_P = f45
FR_L2_10_low = f46
FR_L2_10_high = f47
FR_E0 = f48
FR_E = f49
FR_exact_limit = f50
FR_int_x = f51
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0xd49a784bcd1b8afe, 0x00003fcb // log2(10)*2^(10-63)
data8 0x9257edfe9b5fb698, 0x3fbf // log2(10)_low (bits 64...127)
data8 0x3fac6b08d704a0c0, 0x3f83b2ab6fba4e77 // C_3 and C_4
data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
// 2^{0.00000 b6 b7 b8 b9 b10}
data8 0x8000000000000000, 0x8016302f17467628
data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
data8 0x80855ad965e88b83, 0x809ba2264dada76a
data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
data8 0x813801881d886f7b, 0x814e67cceb90502c
data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
data8 0x8272fb97b2a5894c, 0x828998760d01faf3
data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
//
//
// 2^{0.b1 b2 b3 b4 b5}
data8 0x8000000000000000, 0x82cd8698ac2ba1d7
data8 0x85aac367cc487b14, 0x88980e8092da8527
data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
data8 0x9ef5326091a111ad, 0xa27043030c496818
data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
data8 0xbd08a39f580c36be, 0xc12c4cca66709456
data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
LOCAL_OBJECT_END(T_table)
.section .text
GLOBAL_IEEE754_ENTRY(exp10)
{.mfi
alloc r32= ar.pfs, 1, 4, 4, 0
// will continue only for non-zero normal/denormal numbers
fclass.nm.unc p12, p7= f8, 0x1b
mov GR_BIAS53= 0xffff+63-10
}
{.mlx
// GR_TBL_START= pointer to log2(10), C_1...C_4 followed by T_table
addl GR_TBL_START= @ltoff(poly_coeffs), gp
movl GR_ROUNDVAL= 0x3fc00000 // 1.5 (SP)
}
;;
{.mfi
ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
nop.i 0
}
;;
{.mlx
setf.exp FR_2P53= GR_BIAS53 // 2^{63-10}
movl GR_UF_LIMIT= 0xc07439b746e36b52 // (-2^10-51) / log2(10)
}
{.mlx
setf.s FR_ROUNDVAL= GR_ROUNDVAL
movl GR_OF_LIMIT= 0x40734413509f79fe // Overflow threshold
}
;;
{.mib
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
nop.i 0
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
}
;;
{.mmf
ldfe FR_L2_10_low= [ GR_COEFF_START ], 16 // load log2(10)_low
setf.d FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
fma.s0 f8= f8, f1, f0 // normalize x
}
;;
{.mfi
ldfpd FR_COEFF3, FR_COEFF4= [ GR_COEFF_START ], 16 // load C_3, C_4
(p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer
nop.i 0
}
{.mfi
setf.d FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
fma.s1 FR_KF0= f8, FR_LOG2_10, FR_ROUNDVAL // y= (x*log2(10)*2^10 +
// 1.5*2^63) * 2^(-63)
mov GR_EXP_CORR= 0xffff-126
}
;;
{.mfi
nop.m 0
fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi
nop.i 0
}
;;
{.mfi
ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)*2^(10-63)
mov GR_MASK= 1023
}
;;
{.mfi
ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
fma.s1 FR_LOG2_10= f8, FR_L2_10_high, f0 // y0= x*log2(10)_hi
mov GR_MASK_low= 31
}
;;
{.mlx
getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
(p8) movl GR_exact_limit= 0x41b00000 // Largest x for exact result,
// +22.0
}
;;
{.mfi
add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
fcmp.gt.s1 p12, p7= f8, FR_OF_LIMIT // x>overflow threshold ?
nop.i 0
}
;;
{.mfi
(p8) setf.s FR_exact_limit = GR_exact_limit // Largest x for exact result
(p8) fcvt.xf FR_int_x = FR_int_x // Integral part of x
shr GR_K= GR_KF0, 10 // K
}
{.mfi
and GR_F_high= GR_MASK, GR_KF0 // f_high*32
fnma.s1 FR_R= FR_KF, FR_2P53, FR_LOG2_10 // r= x*log2(10)-2^{63-10}*
// [ (K+f)*2^{10-63} ]
and GR_F_low= GR_KF0, GR_MASK_low // f_low
}
;;
{.mmi
shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
shr GR_Fh= GR_F_high, 5 // f_high
}
;;
{.mfi
setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
(p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x<underflow threshold ?
shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high}
}
{.mfi
ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low}
fms.s1 FR_DX_L210= f8, FR_L2_10_high, FR_LOG2_10 // x*log2(10)_hi-
// RN(x*log2(10)_hi)
nop.i 0
}
;;
{.mfi
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
fma.s1 FR_P34= FR_COEFF4, FR_R, FR_COEFF3 // P34= C_3+C_4*r
nop.i 0
}
{.mfb
nop.m 0
fma.s1 FR_R2= FR_R, FR_R, f0 // r*r
(p12) br.cond.spnt OUT_RANGE_exp10
}
;;
{.mfi
nop.m 0
// e= (x*log2(10)_hi-RN(x*log2(10)_hi))+log2(10)_lo*x
fma.s1 FR_E0= f8, FR_L2_10_low, FR_DX_L210
cmp.eq p7,p9= r0,r0 // Assume inexact result
}
{.mfi
nop.m 0
fma.s1 FR_P12= FR_COEFF2, FR_R, FR_COEFF1 // P12= C_1+C_2*r
nop.i 0
}
;;
{.mfi
nop.m 0
(p8) fcmp.eq.s1 p9,p7= FR_int_x, f8 // Test x positive integer
nop.i 0
}
{.mfi
nop.m 0
fma.s1 FR_T_low_K= FR_T_low, FR_2_TO_K, f0 // T= 2^{K-126}*T_low
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_E= FR_E0, FR_COEFF1, f0 // E= C_1*e
nop.i 0
}
{.mfi
nop.m 0
fma.s1 FR_P14= FR_R2, FR_P34, FR_P12 // P14= P12+r2*P34
nop.i 0
}
;;
// If x a positive integer, will it produce an exact result?
// p7 result will be inexact
// p9 result will be exact
{.mfi
nop.m 0
(p9) fcmp.le.s1 p9,p7= f8, FR_exact_limit // Test x gives exact result
nop.i 0
}
{.mfi
nop.m 0
fma.s1 FR_T= FR_T_low_K, FR_T_high, f0 // T= T*T_high
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_P= FR_P14, FR_R, FR_E // P= P14*r+E
nop.i 0
}
;;
.pred.rel "mutex",p7,p9
{.mfi
nop.m 0
(p7) fma.d.s0 f8= FR_P, FR_T, FR_T // result= T+T*P, inexact set
nop.i 0
}
{.mfb
nop.m 0
(p9) fma.d.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
br.ret.sptk b0 // return
}
;;
SPECIAL_exp10:
{.mfi
nop.m 0
fclass.m p6, p0= f8, 0x22 // x= -Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p7, p0= f8, 0x21 // x= +Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p8, p0= f8, 0x7 // x= +/-Zero ?
nop.i 0
}
{.mfb
nop.m 0
(p6) mov f8= f0 // exp10(-Infinity)= 0
(p6) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
nop.f 0
(p7) br.ret.spnt b0 // exp10(+Infinity)= +Infinity
}
;;
{.mfb
nop.m 0
(p8) mov f8= f1 // exp10(+/-0)= 1
(p8) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
fma.d.s0 f8= f8, f1, f0 // Remaining cases: NaNs
br.ret.sptk b0
}
;;
OUT_RANGE_exp10:
// overflow: p8= 1
{.mii
(p8) mov GR_EXPMAX= 0x1fffe
nop.i 0
nop.i 0
}
;;
{.mmb
(p8) mov GR_Parameter_TAG= 166
(p8) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfi
nop.m 999
(p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow
nop.i 999
}
// underflow: p6= 1
{.mii
nop.m 0
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
{.mmb
nop.m 0
(p6) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfb
nop.m 999
(p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow
(p6) br.ret.sptk b0 // will not call libm_error for underflow
}
;;
GLOBAL_IEEE754_END(exp10)
weak_alias (exp10, pow10)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
add GR_Parameter_Y= -32, sp // Parameter 2 value
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS= ar.pfs // Save ar.pfs
}
{.mfi
.fframe 64
add sp= -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP= gp // Save gp
}
;;
{.mmi
stfd [ GR_Parameter_Y ]= FR_Y, 16 // STORE Parameter 2 on stack
add GR_Parameter_X= 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0= b0 // Save b0
}
;;
.body
{.mib
stfd [ GR_Parameter_X ]= FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT= 0, GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{.mib
stfd [ GR_Parameter_Y ]= FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y= -16, GR_Parameter_Y
br.call.sptk b0= __libm_error_support# // Call error handling function
}
;;
{.mmi
add GR_Parameter_RESULT= 48, sp
nop.m 0
nop.i 0
}
;;
{.mmi
ldfd f8= [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp= 64, sp // Restore stack pointer
mov b0= GR_SAVE_B0 // Restore return address
}
;;
{.mib
mov gp= GR_SAVE_GP // Restore gp
mov ar.pfs= GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
}
;;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

561
sysdeps/ia64/fpu/e_exp10f.S Normal file
View File

@ -0,0 +1,561 @@
.file "exp10f.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/25/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/06/02 Improved performance and accuracy; no inexact flags on exact cases
// 01/29/03 Added missing } to bundle templates
//
// API
//==============================================================
// float exp10f(float)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x= (K + fh + fl + r)/log2(10), where
// K is an integer, fh= 0.b1 b2 b3 b4 b5,
// fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0),
// and |r|<2^{-11}
// Th is a table that stores 2^fh (32 entries) rounded to
// double extended precision (only mantissa is stored)
// Tl is a table that stores 2^fl (32 entries) rounded to
// double extended precision (only mantissa is stored)
//
// 10^x is approximated as
// 2^K * Th [ f ] * Tl [ f ] * (1+c1*r+c2*r^2)
// Note there are only 10 non-zero values that produce an exact result:
// 1.0, 2.0, ... 10.0.
// We test for these cases and use s1 to avoid setting the inexact flag.
// Special values
//==============================================================
// exp10(0)= 1
// exp10(+inf)= inf
// exp10(-inf)= 0
//
// Registers used
//==============================================================
// r2-r3, r14-r40
// f6-f15, f32-f51
// p6-p9, p12
//
GR_TBL_START = r2
GR_LOG_TBL = r3
GR_OF_LIMIT = r14
GR_UF_LIMIT = r15
GR_EXP_CORR = r16
GR_F_low = r17
GR_F_high = r18
GR_K = r19
GR_Flow_ADDR = r20
GR_BIAS = r21
GR_Fh = r22
GR_Fh_ADDR = r23
GR_EXPMAX = r24
GR_ROUNDVAL = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
GR_COEFF_START = r30
GR_exact_limit = r31
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_COEFF1 = f6
FR_COEFF2 = f7
FR_R = f9
FR_LOG2_10 = f10
FR_2P53 = f11
FR_KF0 = f12
FR_COEFF3 = f13
FR_COEFF4 = f14
FR_UF_LIMIT = f15
FR_OF_LIMIT = f32
FR_DX_L210 = f33
FR_ROUNDVAL = f34
FR_KF = f35
FR_2_TO_K = f36
FR_T_low = f37
FR_T_high = f38
FR_P12 = f41
FR_T_low_K = f42
FR_T = f44
FR_P = f45
FR_E = f49
FR_exact_limit = f50
FR_int_x = f51
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0xd49a784bcd1b8afe, 0x00003fcb // log2(10)*2^(10-63)
data8 0xb17217f7d1cf79ab, 0x00004033 // C_1 * 2^53
data8 0xf5fdeffc162c7541, 0x00004066 // C_2 * 2^106
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
// 2^{0.00000 b6 b7 b8 b9 b10}
data8 0x8000000000000000, 0x8016302f17467628
data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
data8 0x80855ad965e88b83, 0x809ba2264dada76a
data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
data8 0x813801881d886f7b, 0x814e67cceb90502c
data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
data8 0x8272fb97b2a5894c, 0x828998760d01faf3
data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
//
//
// 2^{0.b1 b2 b3 b4 b5}
data8 0x8000000000000000, 0x82cd8698ac2ba1d7
data8 0x85aac367cc487b14, 0x88980e8092da8527
data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
data8 0x9ef5326091a111ad, 0xa27043030c496818
data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
data8 0xbd08a39f580c36be, 0xc12c4cca66709456
data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
LOCAL_OBJECT_END(T_table)
.section .text
GLOBAL_IEEE754_ENTRY(exp10f)
{.mfi
alloc r32= ar.pfs, 1, 4, 4, 0
// will continue only for non-zero normal/denormal numbers
fclass.nm.unc p12, p7= f8, 0x1b
nop.i 0
}
{.mlx
// GR_TBL_START= pointer to log2(10), C_1...C_4 followed by T_table
addl GR_TBL_START= @ltoff(poly_coeffs), gp
movl GR_ROUNDVAL= 0x3fc00000 // 1.5 (SP)
}
;;
{.mfi
ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
nop.i 0
}
;;
{.mlx
nop.m 0
movl GR_UF_LIMIT= 0xc2349e35 // (-2^7-22) / log2(10)
}
{.mlx
setf.s FR_ROUNDVAL= GR_ROUNDVAL
movl GR_OF_LIMIT= 0x421a209a // Overflow threshold
}
;;
{.mib
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
nop.i 0
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
}
;;
{.mfi
setf.s FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
fma.s0 f8= f8, f1, f0 // normalize x
nop.i 0
}
;;
{.mfi
nop.m 0
(p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer
nop.i 0
}
{.mfi
setf.s FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
fma.s1 FR_KF0= f8, FR_LOG2_10, FR_ROUNDVAL // y= (x*log2(10)*2^10 +
// 1.5*2^63) * 2^(-63)
mov GR_EXP_CORR= 0xffff-126
}
;;
{.mfi
ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)*2^(10-63)
mov GR_MASK= 1023
}
;;
{.mfi
ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
nop.f 0
mov GR_MASK_low= 31
}
;;
{.mlx
getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
(p8) movl GR_exact_limit= 0x41200000 // Largest x for exact result,
// +10.0
}
;;
{.mfi
add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
fcmp.gt.s1 p12, p7= f8, FR_OF_LIMIT // x>overflow threshold ?
nop.i 0
}
;;
{.mfi
(p8) setf.s FR_exact_limit = GR_exact_limit // Largest x for exact result
(p8) fcvt.xf FR_int_x = FR_int_x // Integral part of x
shr GR_K= GR_KF0, 10 // K
}
{.mfi
and GR_F_high= GR_MASK, GR_KF0 // f_high*32
fms.s1 FR_R= f8, FR_LOG2_10, FR_KF // r*2^(-53)= [ x*log2(10)-
// (K+f) ] *2^{10-63}
and GR_F_low= GR_KF0, GR_MASK_low // f_low
}
;;
{.mmi
shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
shr GR_Fh= GR_F_high, 5 // f_high
}
;;
{.mfi
setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
(p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x<underflow threshold ?
shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high}
}
{.mfi
ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low}
nop.f 0
nop.i 0
}
;;
{.mfb
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
nop.f 0
(p12) br.cond.spnt OUT_RANGE_exp10
}
;;
{.mfi
nop.m 0
fma.s1 FR_P12= FR_COEFF2, FR_R, FR_COEFF1 // P12= C_1+C_2*r
cmp.eq p7,p9= r0,r0 // Assume inexact result
}
;;
{.mfi
nop.m 0
(p8) fcmp.eq.s1 p9,p7= FR_int_x, f8 // Test x positive integer
nop.i 0
}
{.mfi
nop.m 0
fma.s1 FR_T_low_K= FR_T_low, FR_2_TO_K, f0 // T= 2^{K-126}*T_low
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_P= FR_P12, FR_R, f0 // P= P12*r
nop.i 0
}
;;
// If x a positive integer, will it produce an exact result?
// p7 result will be inexact
// p9 result will be exact
{.mfi
nop.m 0
(p9) fcmp.le.s1 p9,p7= f8, FR_exact_limit // Test x gives exact result
nop.i 0
}
{.mfi
nop.m 0
fma.s1 FR_T= FR_T_low_K, FR_T_high, f0 // T= T*T_high
nop.i 0
}
;;
.pred.rel "mutex",p7,p9
{.mfi
nop.m 0
(p7) fma.s.s0 f8= FR_P, FR_T, FR_T // result= T+T*P, inexact set
nop.i 0
}
{.mfb
nop.m 0
(p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
br.ret.sptk b0 // return
}
;;
SPECIAL_exp10:
{.mfi
nop.m 0
fclass.m p6, p0= f8, 0x22 // x= -Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p7, p0= f8, 0x21 // x= +Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p8, p0= f8, 0x7 // x= +/-Zero ?
nop.i 0
}
{.mfb
nop.m 0
(p6) mov f8= f0 // exp10(-Infinity)= 0
(p6) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
nop.f 0
(p7) br.ret.spnt b0 // exp10(+Infinity)= +Infinity
}
;;
{.mfb
nop.m 0
(p8) mov f8= f1 // exp10(+/-0)= 1
(p8) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
fma.s.s0 f8= f8, f1, f0 // Remaining cases: NaNs
br.ret.sptk b0
}
;;
OUT_RANGE_exp10:
// overflow: p8= 1
{.mii
(p8) mov GR_EXPMAX= 0x1fffe
nop.i 0
nop.i 0
}
;;
{.mmb
(p8) mov GR_Parameter_TAG= 167
(p8) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfi
nop.m 999
(p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow
nop.i 999
}
// underflow: p6= 1
{.mii
nop.m 0
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
{.mmb
nop.m 0
(p6) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfb
nop.m 999
(p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow
(p6) br.ret.sptk b0 // will not call libm_error for underflow
}
;;
GLOBAL_IEEE754_END(exp10f)
weak_alias (exp10f, pow10f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
add GR_Parameter_Y= -32, sp // Parameter 2 value
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS= ar.pfs // Save ar.pfs
}
{.mfi
.fframe 64
add sp= -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP= gp // Save gp
}
;;
{.mmi
stfs [ GR_Parameter_Y ]= FR_Y, 16 // STORE Parameter 2 on stack
add GR_Parameter_X= 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0= b0 // Save b0
}
;;
.body
{.mib
stfs [ GR_Parameter_X ]= FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT= 0, GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{.mib
stfs [ GR_Parameter_Y ]= FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y= -16, GR_Parameter_Y
br.call.sptk b0= __libm_error_support# // Call error handling function
}
;;
{.mmi
add GR_Parameter_RESULT= 48, sp
nop.m 0
nop.i 0
}
;;
{.mmi
ldfs f8= [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp= 64, sp // Restore stack pointer
mov b0= GR_SAVE_B0 // Restore return address
}
;;
{.mib
mov gp= GR_SAVE_GP // Restore gp
mov ar.pfs= GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
}
;;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

805
sysdeps/ia64/fpu/e_exp10l.S Normal file
View File

@ -0,0 +1,805 @@
.file "exp10l.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/25/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 05/08/03 Reformatted assembly source; corrected overflow result for round to
// -inf and round to zero; exact results now don't set inexact flag
//
// API
//==============================================================
// long double exp10l(long double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x= (K + f + r)/log2(10), where
// K is an integer, f= 0.b1 b2... b8 (f>= 0),
// and |r|<2^{-9}
// T is a table that stores 2^f (256 entries) rounded to
// double extended precision (only mantissa is stored)
// D stores (2^f/T [ f ] - 1), rounded to single precision
//
// 10^x is approximated as
// 2^K * T [ f ] * ((1+c1*r+c2*r^2+...+c6*r^6)*(1+c1*e)+D [ f ] ),
// where e= log2(10)_lo*x+(log2(10)_hi*x-RN(log2(10)_hi*x))
//
// Special values
//==============================================================
// exp10(0)= 1
// exp10(+inf)= inf
// exp10(-inf)= 0
//
// Registers used
//==============================================================
// f6-f15, f32-f62
// r14-r30, r32-r40
// p6-p8, p12-p14
//
FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_COEFF1 = f6
FR_COEFF2 = f7
FR_KF0 = f9
FR_LOG10 = f10
FR_CONST1 = f11
FR_XL10 = f12
FR_COEFF3 = f13
FR_COEFF4 = f14
FR_UF_TEST = f15
FR_OF_TEST = f32
FR_L10_LOW = f33
FR_COEFF5 = f34
FR_COEFF6 = f35
FR_L10 = f36
FR_C_L10 = f37
FR_XL10_H = f38
FR_XL10_L = f39
FR_KF = f40
FR_E = f41
FR_T = f42
FR_D = f43
FR_EXP_M_63 = f44
FR_R = f45
FR_E1 = f46
FR_COEFF2 = f47
FR_P34 = f48
FR_P56 = f49
FR_R2 = f50
FR_RE = f51
FR_D1 = f52
FR_P36 = f53
FR_R3E = f54
FR_P1 = f55
FR_P = f56
FR_T1 = f57
FR_XINT = f58
FR_XINTF = f59
FR_4 = f60
FR_28 = f61
FR_32 = f62
GR_ADDR0 = r14
GR_D_ADDR = r15
GR_ADDR = r16
GR_B63 = r17
GR_KBITS = r18
GR_F = r19
GR_K = r20
GR_D = r21
GR_BM63 = r22
GR_T = r23
GR_CONST1 = r24
GR_EMIN = r25
GR_CONST2 = r26
GR_BM8 = r27
GR_SREG = r28
GR_4_BIAS = r29
GR_32_BIAS = r30
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT= r39
GR_Parameter_TAG = r40
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0xd49a784bcd1b8afe, 0x00004008 // log2(10)*2^8
data8 0x9a209a84fbcff798, 0x0000400b // overflow threshold
data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
data8 0x3fac6b08d704a0c0 // C_3
data8 0x3f83b2ab6fba4e77 // C_4
data8 0x3f55d87fe78a6731 // C_5
data8 0x3f2430912f86c787 // C_6
data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127)
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
// 2^{0.b1 b2 b3 b4 b5 b6 b7 b8}
data8 0x8000000000000000, 0x8058d7d2d5e5f6b1
data8 0x80b1ed4fd999ab6c, 0x810b40a1d81406d4
data8 0x8164d1f3bc030773, 0x81bea1708dde6056
data8 0x8218af4373fc25ec, 0x8272fb97b2a5894c
data8 0x82cd8698ac2ba1d7, 0x83285071e0fc4547
data8 0x8383594eefb6ee37, 0x83dea15b9541b132
data8 0x843a28c3acde4046, 0x8495efb3303efd30
data8 0x84f1f656379c1a29, 0x854e3cd8f9c8c95d
data8 0x85aac367cc487b15, 0x86078a2f23642a9f
data8 0x8664915b923fba04, 0x86c1d919caef5c88
data8 0x871f61969e8d1010, 0x877d2afefd4e256c
data8 0x87db357ff698d792, 0x88398146b919f1d4
data8 0x88980e8092da8527, 0x88f6dd5af155ac6b
data8 0x8955ee03618e5fdd, 0x89b540a7902557a4
data8 0x8a14d575496efd9a, 0x8a74ac9a79896e47
data8 0x8ad4c6452c728924, 0x8b3522a38e1e1032
data8 0x8b95c1e3ea8bd6e7, 0x8bf6a434adde0085
data8 0x8c57c9c4646f4dde, 0x8cb932c1bae97a95
data8 0x8d1adf5b7e5ba9e6, 0x8d7ccfc09c50e2f8
data8 0x8ddf042022e69cd6, 0x8e417ca940e35a01
data8 0x8ea4398b45cd53c0, 0x8f073af5a2013520
data8 0x8f6a8117e6c8e5c4, 0x8fce0c21c6726481
data8 0x9031dc431466b1dc, 0x9095f1abc540ca6b
data8 0x90fa4c8beee4b12b, 0x915eed13c89689d3
data8 0x91c3d373ab11c336, 0x9228ffdc10a051ad
data8 0x928e727d9531f9ac, 0x92f42b88f673aa7c
data8 0x935a2b2f13e6e92c, 0x93c071a0eef94bc1
data8 0x9426ff0fab1c04b6, 0x948dd3ac8ddb7ed3
data8 0x94f4efa8fef70961, 0x955c5336887894d5
data8 0x95c3fe86d6cc7fef, 0x962bf1cbb8d97560
data8 0x96942d3720185a00, 0x96fcb0fb20ac4ba3
data8 0x97657d49f17ab08e, 0x97ce9255ec4357ab
data8 0x9837f0518db8a96f, 0x98a1976f7597e996
data8 0x990b87e266c189aa, 0x9975c1dd47518c77
data8 0x99e0459320b7fa65, 0x9a4b13371fd166ca
data8 0x9ab62afc94ff864a, 0x9b218d16f441d63d
data8 0x9b8d39b9d54e5539, 0x9bf93118f3aa4cc1
data8 0x9c6573682ec32c2d, 0x9cd200db8a0774cb
data8 0x9d3ed9a72cffb751, 0x9dabfdff6367a2aa
data8 0x9e196e189d472420, 0x9e872a276f0b98ff
data8 0x9ef5326091a111ae, 0x9f6386f8e28ba651
data8 0x9fd228256400dd06, 0xa041161b3d0121be
data8 0xa0b0510fb9714fc2, 0xa11fd9384a344cf7
data8 0xa18faeca8544b6e4, 0xa1ffd1fc25cea188
data8 0xa27043030c496819, 0xa2e102153e918f9e
data8 0xa3520f68e802bb93, 0xa3c36b345991b47c
data8 0xa43515ae09e6809e, 0xa4a70f0c95768ec5
data8 0xa5195786be9ef339, 0xa58bef536dbeb6ee
data8 0xa5fed6a9b15138ea, 0xa6720dc0be08a20c
data8 0xa6e594cfeee86b1e, 0xa7596c0ec55ff55b
data8 0xa7cd93b4e965356a, 0xa8420bfa298f70d1
data8 0xa8b6d5167b320e09, 0xa92bef41fa77771b
data8 0xa9a15ab4ea7c0ef8, 0xaa1717a7b5693979
data8 0xaa8d2652ec907629, 0xab0386ef48868de1
data8 0xab7a39b5a93ed337, 0xabf13edf162675e9
data8 0xac6896a4be3fe929, 0xace0413ff83e5d04
data8 0xad583eea42a14ac6, 0xadd08fdd43d01491
data8 0xae493452ca35b80e, 0xaec22c84cc5c9465
data8 0xaf3b78ad690a4375, 0xafb51906e75b8661
data8 0xb02f0dcbb6e04584, 0xb0a957366fb7a3c9
data8 0xb123f581d2ac2590, 0xb19ee8e8c94feb09
data8 0xb21a31a66618fe3b, 0xb295cff5e47db4a4
data8 0xb311c412a9112489, 0xb38e0e38419fae18
data8 0xb40aaea2654b9841, 0xb487a58cf4a9c180
data8 0xb504f333f9de6484, 0xb58297d3a8b9f0d2
data8 0xb60093a85ed5f76c, 0xb67ee6eea3b22b8f
data8 0xb6fd91e328d17791, 0xb77c94c2c9d725e9
data8 0xb7fbefca8ca41e7c, 0xb87ba337a1743834
data8 0xb8fbaf4762fb9ee9, 0xb97c143756844dbf
data8 0xb9fcd2452c0b9deb, 0xba7de9aebe5fea09
data8 0xbaff5ab2133e45fb, 0xbb81258d5b704b6f
data8 0xbc034a7ef2e9fb0d, 0xbc85c9c560e7b269
data8 0xbd08a39f580c36bf, 0xbd8bd84bb67ed483
data8 0xbe0f6809860993e2, 0xbe935317fc378238
data8 0xbf1799b67a731083, 0xbf9c3c248e2486f8
data8 0xc0213aa1f0d08db0, 0xc0a6956e8836ca8d
data8 0xc12c4cca66709456, 0xc1b260f5ca0fbb33
data8 0xc238d2311e3d6673, 0xc2bfa0bcfad907c9
data8 0xc346ccda24976407, 0xc3ce56c98d21b15d
data8 0xc4563ecc5334cb33, 0xc4de8523c2c07baa
data8 0xc5672a115506dadd, 0xc5f02dd6b0bbc3d9
data8 0xc67990b5aa245f79, 0xc70352f04336c51e
data8 0xc78d74c8abb9b15d, 0xc817f681416452b2
data8 0xc8a2d85c8ffe2c45, 0xc92e1a9d517f0ecc
data8 0xc9b9bd866e2f27a3, 0xca45c15afcc72624
data8 0xcad2265e4290774e, 0xcb5eecd3b38597c9
data8 0xcbec14fef2727c5d, 0xcc799f23d11510e5
data8 0xcd078b86503dcdd2, 0xcd95da6a9ff06445
data8 0xce248c151f8480e4, 0xceb3a0ca5dc6a55d
data8 0xcf4318cf191918c1, 0xcfd2f4683f94eeb5
data8 0xd06333daef2b2595, 0xd0f3d76c75c5db8d
data8 0xd184df6251699ac6, 0xd2164c023056bcab
data8 0xd2a81d91f12ae45a, 0xd33a5457a3029054
data8 0xd3ccf099859ac379, 0xd45ff29e0972c561
data8 0xd4f35aabcfedfa1f, 0xd5872909ab75d18a
data8 0xd61b5dfe9f9bce07, 0xd6aff9d1e13ba2fe
data8 0xd744fccad69d6af4, 0xd7da67311797f56a
data8 0xd870394c6db32c84, 0xd9067364d44a929c
data8 0xd99d15c278afd7b6, 0xda3420adba4d8704
data8 0xdacb946f2ac9cc72, 0xdb63714f8e295255
data8 0xdbfbb797daf23755, 0xdc9467913a4f1c92
data8 0xdd2d818508324c20, 0xddc705bcd378f7f0
data8 0xde60f4825e0e9124, 0xdefb4e1f9d1037f2
data8 0xdf9612deb8f04420, 0xe031430a0d99e627
data8 0xe0ccdeec2a94e111, 0xe168e6cfd3295d23
data8 0xe2055afffe83d369, 0xe2a23bc7d7d91226
data8 0xe33f8972be8a5a51, 0xe3dd444c46499619
data8 0xe47b6ca0373da88d, 0xe51a02ba8e26d681
data8 0xe5b906e77c8348a8, 0xe658797368b3a717
data8 0xe6f85aaaee1fce22, 0xe798aadadd5b9cbf
data8 0xe8396a503c4bdc68, 0xe8da9958464b42ab
data8 0xe97c38406c4f8c57, 0xea1e4756550eb27b
data8 0xeac0c6e7dd24392f, 0xeb63b74317369840
data8 0xec0718b64c1cbddc, 0xecaaeb8ffb03ab41
data8 0xed4f301ed9942b84, 0xedf3e6b1d418a491
data8 0xee990f980da3025b, 0xef3eab20e032bc6b
data8 0xefe4b99bdcdaf5cb, 0xf08b3b58cbe8b76a
data8 0xf13230a7ad094509, 0xf1d999d8b7708cc1
data8 0xf281773c59ffb13a, 0xf329c9233b6bae9c
data8 0xf3d28fde3a641a5b, 0xf47bcbbe6db9fddf
data8 0xf5257d152486cc2c, 0xf5cfa433e6537290
data8 0xf67a416c733f846e, 0xf7255510c4288239
data8 0xf7d0df730ad13bb9, 0xf87ce0e5b2094d9c
data8 0xf92959bb5dd4ba74, 0xf9d64a46eb939f35
data8 0xfa83b2db722a033a, 0xfb3193cc4227c3f4
data8 0xfbdfed6ce5f09c49, 0xfc8ec01121e447bb
data8 0xfd3e0c0cf486c175, 0xfdedd1b496a89f35
data8 0xfe9e115c7b8f884c, 0xff4ecb59511ec8a5
LOCAL_OBJECT_END(T_table)
LOCAL_OBJECT_START(D_table)
data4 0x00000000, 0x9f55c08f, 0x1e93ffa3, 0x1dcd43a8
data4 0x1f751f79, 0x9f3cdd88, 0x9f43d155, 0x1eda222c
data4 0x1ef35513, 0x9f597895, 0x9e698881, 0x1ec71073
data4 0x1e50e371, 0x9dc01e19, 0x1de74133, 0x1e2f028c
data4 0x9edefb47, 0x1ebbac48, 0x9e8b0330, 0x9e9e9314
data4 0x1edc1d11, 0x1f098529, 0x9f52827c, 0x1f50050d
data4 0x1f301e8e, 0x1f5b64d1, 0x9f45e3ee, 0x9ef64d6d
data4 0x1d6ec5e8, 0x9e61ad9a, 0x1d44ccbb, 0x9e4a8bbb
data4 0x9cf11576, 0x9dcce7e7, 0x9d02ac90, 0x1f26ccf0
data4 0x9f0877c6, 0x9ddd62ae, 0x9f4b7fc3, 0x1ea8ef6b
data4 0x1ea4378d, 0x1ef6fc38, 0x1db99fd9, 0x1f22bf6f
data4 0x1f53e172, 0x1e85504a, 0x9f37cc75, 0x1f0c5e17
data4 0x1dde8aac, 0x9cb42bb2, 0x1e153cd7, 0x1eb62bba
data4 0x9e9b941b, 0x9ea80e3c, 0x1f508823, 0x1ec3fd36
data4 0x1e9ffaa1, 0x1e21e2eb, 0x9d948b1d, 0x9e8ac93a
data4 0x1ef7ee6f, 0x9e80dda3, 0x1f0814be, 0x1dc5ddfe
data4 0x1eedb9d1, 0x9f2aaa26, 0x9ea5b0fc, 0x1edf702e
data4 0x9e391201, 0x1f1316bb, 0x1ea27fb7, 0x9e05ed18
data4 0x9f199ed2, 0x1ee7fd7c, 0x1f003db6, 0x9eac3793
data4 0x9e5b8c10, 0x9f3af17c, 0x1bc9a8be, 0x1ee3c004
data4 0x9f19b1b2, 0x9f242ce9, 0x9ce67dd1, 0x9e4f6275
data4 0x1e20742c, 0x1eb9328a, 0x9f477153, 0x1d969718
data4 0x9f1e6c43, 0x1f2f67f4, 0x9f39c7e4, 0x9e3c4feb
data4 0x1da3956b, 0x9e7c685d, 0x1f280911, 0x9f0d8afb
data4 0x1e314b40, 0x9eb4f250, 0x9f1a34ad, 0x1ef5d5e7
data4 0x9f145496, 0x1e604827, 0x9f1e5195, 0x1e9c1fc0
data4 0x1efde521, 0x1e69b385, 0x1f316830, 0x9f244eae
data4 0x1f1787ec, 0x9e939971, 0x1f0bb393, 0x9f0511d6
data4 0x1ed919de, 0x1d8b7b28, 0x1e5ca4a9, 0x1e7c357b
data4 0x9e3ff8e8, 0x1eef53b5, 0x9ed22ed7, 0x1f16659b
data4 0x9f2db102, 0x9e2c6a78, 0x1f328d7d, 0x9f2fec3c
data4 0x1eb395bd, 0x9f242b84, 0x9e2683e6, 0x1ed71e68
data4 0x1efd1df5, 0x9e9eeafd, 0x9ed2249c, 0x1eef129a
data4 0x1d1ea44c, 0x9e81f7ff, 0x1eaf77c9, 0x9ee7a285
data4 0x1e1864ed, 0x9ee7edbb, 0x9e15a27d, 0x9ae61655
data4 0x1f1ff1a2, 0x1da29755, 0x9e5f46fb, 0x1e901236
data4 0x9eecfb9b, 0x9f204d2f, 0x1ec64685, 0x9eb809bd
data4 0x9e0026c5, 0x1d9f1da1, 0x1f142b49, 0x9f20f22e
data4 0x1f24b067, 0x1f185a4c, 0x9f09765c, 0x9ece902f
data4 0x1e2ca5db, 0x1e6de464, 0x9f071f67, 0x1f1518c3
data4 0x1ea13ded, 0x1f0b8414, 0x1edb6ad4, 0x9e548740
data4 0x9ea10efb, 0x1ee48a60, 0x1e7954c5, 0x9edad013
data4 0x9f21517d, 0x9e9b6e0c, 0x9ee7f9a6, 0x9ebd4298
data4 0x9d65b24e, 0x1eed751f, 0x9f1573ea, 0x9d430377
data4 0x9e13fc0c, 0x1e47008a, 0x1e3d5c1d, 0x1ef41a91
data4 0x9e4a4ef7, 0x9e952f18, 0x1d620566, 0x1d9b8d33
data4 0x1db06247, 0x1e94b31e, 0x1f0730ad, 0x9d79ffb4
data4 0x1ed64d51, 0x9e91fd11, 0x9e28d35a, 0x9dea0ed9
data4 0x1e891def, 0x9ee28ac0, 0x1e1db99b, 0x9ee1ce38
data4 0x9bdd9bca, 0x1eb72cb9, 0x9e8c53c6, 0x1e0df6ca
data4 0x1e8f2ccd, 0x9e9b0886, 0x1eeb3bc7, 0x1ec7e772
data4 0x9e210776, 0x9daf246c, 0x1ea1f151, 0x1ece4dc6
data4 0x1ce741c8, 0x1ed3c88f, 0x9ec9a4fd, 0x9e0c8d30
data4 0x1d2fbb26, 0x9ef212a7, 0x1ee44f1c, 0x9e445550
data4 0x1e075f77, 0x9d9291a3, 0x1f09c2ee, 0x9e012c88
data4 0x1f057d62, 0x9e7bb0dc, 0x9d8758ee, 0x1ee8d6c1
data4 0x9e509a57, 0x9e4ca7b7, 0x1e2cb341, 0x9ec35106
data4 0x1ecf3baf, 0x1e11781c, 0x1ea0cc78, 0x1eb75ca6
data4 0x1e961e1a, 0x1eb88853, 0x1e7abf50, 0x1ee38704
data4 0x9dc5ab0f, 0x1afe197b, 0x9ec07523, 0x9d9b7f78
data4 0x1f011618, 0x1ed43b0b, 0x9f035945, 0x9e3fd014
data4 0x9bbda5cd, 0x9e83f8ab, 0x1e58a928, 0x1e392d61
data4 0x1efdbb52, 0x1ee310a8, 0x9ec7ecc1, 0x1e8c9ed6
data4 0x9ef82dee, 0x9e70545b, 0x9ea53fc4, 0x1e40f419
LOCAL_OBJECT_END(D_table)
.section .text
GLOBAL_IEEE754_ENTRY(exp10l)
{.mfi
alloc GR_SREG = ar.pfs, 1, 4, 4, 0
// will continue only for normal/denormal numbers
fclass.nm.unc p12, p7 = f8, 0x1b
// GR_ADDR0 = pointer to log2(10), C_1...C_6 followed by T_table
addl GR_ADDR0 = @ltoff(poly_coeffs), gp ;;
}
{.mfi
// load start address for C_1...C_6 followed by T_table
ld8 GR_ADDR0 = [ GR_ADDR0 ]
// X<0 ?
fcmp.lt.s1 p6, p8 = f8, f0
// GR_BM8 = bias-8
mov GR_BM8 = 0xffff-8
}
{.mlx
nop.m 0
// GR_EMIN = (-2^14-62)*2^{8}
movl GR_EMIN = 0xca807c00 ;;
}
{.mmb
// FR_CONST1 = 2^{-8}
setf.exp FR_CONST1 = GR_BM8
// load log2(10)*2^8
ldfe FR_LOG10 = [ GR_ADDR0 ], 16
(p12) br.cond.spnt SPECIAL_EXP10 ;;
}
{.mmf
setf.s FR_UF_TEST = GR_EMIN
// load overflow threshold
ldfe FR_OF_TEST = [ GR_ADDR0 ], 16
// normalize x
fma.s0 f8 = f8, f1, f0 ;;
}
{.mmi
// load C_1
ldfe FR_COEFF1 = [ GR_ADDR0 ], 16 ;;
// load C_2
ldfe FR_COEFF2 = [ GR_ADDR0 ], 16
nop.i 0 ;;
}
{.mmf
// GR_D_ADDR = pointer to D table
add GR_D_ADDR = 2048-64+96+16, GR_ADDR0
// load C_3, C_4
ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16
// y = x*log2(10)*2^8
fma.s1 FR_XL10 = f8, FR_LOG10, f0 ;;
}
{.mfi
// load C_5, C_6
ldfpd FR_COEFF5, FR_COEFF6 = [ GR_ADDR0 ], 16
// get int(x)
fcvt.fx.trunc.s1 FR_XINT = f8
nop.i 0
}
{.mfi
nop.m 0
// FR_LOG10 = log2(10)
fma.s1 FR_L10 = FR_LOG10, FR_CONST1, f0
nop.i 0 ;;
}
{.mfi
// load log2(10)_low
ldfe FR_L10_LOW = [ GR_ADDR0 ], 16
// y0 = x*log2(10) = x*log2(10)_hi
fma.s1 FR_LOG10 = f8, FR_L10, f0
mov GR_EMIN = 0xffff-63
}
{.mfi
mov GR_32_BIAS = 0xffff + 5
// (K+f)*2^8 = round_to_int(y)
fcvt.fx.s1 FR_KF0 = FR_XL10
mov GR_4_BIAS = 0xffff + 2;;
}
{.mfi
nop.m 0
// x>overflow threshold ?
fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST
nop.i 0 ;;
}
{.mfi
setf.exp FR_32 = GR_32_BIAS
// x<underflow threshold ?
(p7) fcmp.lt.s1 p12, p7 = FR_XL10, FR_UF_TEST
nop.i 0 ;;
}
{.mfi
setf.exp FR_4 = GR_4_BIAS
fcvt.xf FR_XINTF = FR_XINT
nop.i 0
}
{.mfi
nop.m 0
// FR_L10 = log2(10)_h*x-RN(log2(10)_h*x)
fms.s1 FR_L10 = f8, FR_L10, FR_LOG10
nop.i 0 ;;
}
{.mfi
getf.sig GR_BM8 = FR_KF0
fcvt.xf FR_KF0 = FR_KF0
mov GR_CONST2 = 255 ;;
}
{.mfi
// GR_CONST2 = f
and GR_CONST2 = GR_CONST2, GR_BM8
// FR_L10_LOW = e = log2(10)_l*x+(log2(10)_h*x-RN(log2(10)_h*x))
fma.s1 FR_L10_LOW = FR_L10_LOW, f8, FR_L10
// GR_BM8 = K
shr GR_BM8 = GR_BM8, 8 ;;
}
{.mmi
// address of D
shladd GR_D_ADDR = GR_CONST2, 2, GR_D_ADDR
// K+ = bias-63
add GR_BM8 = GR_BM8, GR_EMIN
// address of T
shladd GR_ADDR0 = GR_CONST2, 3, GR_ADDR0 ;;
}
{.mfb
// load D
ldfs FR_OF_TEST = [ GR_D_ADDR ]
// is input an integer ?
fcmp.eq.s1 p13, p14 = f8, FR_XINTF
(p12) br.cond.spnt OUT_RANGE_EXP10 ;;
}
{.mmf
// load T
ldf8 FR_UF_TEST = [ GR_ADDR0 ]
// FR_XL10 = 2^{K-63}
setf.exp FR_XL10 = GR_BM8
// r = x*log2(10)_hi-2^{-10}* [ (K+f)*2^{10} ]
fnma.s1 FR_KF0 = FR_KF0, FR_CONST1, FR_LOG10 ;;
}
{.mfi
nop.m 0
// get 28.0
fms.s1 FR_28 = FR_32, f1, FR_4
nop.i 0
}
{.mfi
nop.m 0
// E = 1+C_1*e
fma.s1 FR_L10 = FR_L10_LOW, FR_COEFF1, f1
nop.i 0 ;;
}
{.mfi
nop.m 0
// P12 = C_1+C_2*r
fma.s1 FR_COEFF2 = FR_COEFF2, FR_KF0, FR_COEFF1
nop.i 0
}
{.mfi
nop.m 0
// P34 = C_3+C_4*r
fma.s1 FR_COEFF4 = FR_COEFF4, FR_KF0, FR_COEFF3
nop.i 0 ;;
}
{.mfi
nop.m 0
// P56 = C_5+C_6*r
fma.s1 FR_COEFF5 = FR_COEFF6, FR_KF0, FR_COEFF5
nop.i 0
}
{.mfi
nop.m 0
// GR_ADDR0 = r*r
fma.s1 FR_COEFF3 = FR_KF0, FR_KF0, f0
nop.i 0 ;;
}
{.mfi
nop.m 0
// if input is integer, is it positive ?
(p13) fcmp.ge.s1 p13, p14 = f8, f0
nop.i 0
}
{.mfi
nop.m 0
// r' = r*E
fma.s1 FR_KF0 = FR_KF0, FR_L10, f0
nop.i 0 ;;
}
{.mfi
nop.m 0
// D' = D+C_1*e
fma.s1 FR_OF_TEST = FR_L10_LOW, FR_COEFF1, FR_OF_TEST
nop.i 0 ;;
}
{.mfi
nop.m 0
// P36 = P34+r2*P56
fma.s1 FR_COEFF4 = FR_COEFF5, FR_COEFF3, FR_COEFF4
nop.i 0
}
{.mfi
nop.m 0
// GR_D_ADDR = r'*r2
fma.s1 FR_COEFF3 = FR_COEFF3, FR_KF0, f0
nop.i 0 ;;
}
{.mfi
nop.m 0
// is input below 28.0 ?
(p13) fcmp.lt.s1 p13, p14 = f8, FR_28
nop.i 0
}
{.mfi
nop.m 0
// P' = P12*r'+D'
fma.s1 FR_COEFF2 = FR_COEFF2, FR_KF0, FR_OF_TEST
nop.i 0 ;;
}
{.mfi
nop.m 0
// P = P'+r3*P36
fma.s1 FR_COEFF3 = FR_COEFF3, FR_COEFF4, FR_COEFF2
nop.i 0
}
{.mfi
nop.m 0
// T = 2^{K-63}*T
fma.s1 FR_UF_TEST = FR_UF_TEST, FR_XL10, f0
nop.i 0 ;;
}
.pred.rel "mutex",p13,p14
{.mfi
nop.m 0
(p13) fma.s1 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
nop.i 0
}
{.mfb
nop.m 0
// result = T+T*P
(p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
// return
br.ret.sptk b0 ;;
}
SPECIAL_EXP10:
{.mfi
nop.m 0
// x = -Infinity ?
fclass.m p6, p0 = f8, 0x22
nop.i 0 ;;
}
{.mfi
nop.m 0
// x = +Infinity ?
fclass.m p7, p0 = f8, 0x21
nop.i 0 ;;
}
{.mfi
nop.m 0
// x = +/-Zero ?
fclass.m p8, p0 = f8, 0x7
nop.i 0
}
{.mfb
nop.m 0
// exp10(-Infinity) = 0
(p6) mov f8 = f0
(p6) br.ret.spnt b0 ;;
}
{.mfb
nop.m 0
// exp10(+Infinity) = +Infinity
nop.f 0
(p7) br.ret.spnt b0 ;;
}
{.mfb
nop.m 0
// exp10(+/-0) = 1
(p8) mov f8 = f1
(p8) br.ret.spnt b0 ;;
}
{.mfb
nop.m 0
// Remaining cases: NaNs
fma.s0 f8 = f8, f1, f0
br.ret.sptk b0 ;;
}
OUT_RANGE_EXP10:
{.mii
// overflow: p8 = 1
(p8) mov GR_CONST1 = 0x1fffe
nop.i 0
nop.i 0 ;;
}
{.mmb
(p8) mov GR_Parameter_TAG = 165
(p8) setf.exp FR_KF0 = GR_CONST1
nop.b 999 ;;
}
{.mfi
nop.m 999
(p8) fma.s0 f8 = FR_KF0, FR_KF0, f0
nop.i 999
}
{.mii
nop.m 0
// underflow: p6 = 1
(p6) mov GR_CONST1 = 1
nop.i 0 ;;
}
{.mmb
nop.m 0
(p6) setf.exp FR_KF0 = GR_CONST1
nop.b 999 ;;
}
{.mfb
nop.m 999
(p6) fma.s0 f8 = FR_KF0, FR_KF0, f0
// will not call libm_error for underflow
(p6) br.ret.sptk b0 ;;
}
GLOBAL_IEEE754_END(exp10l)
weak_alias (exp10l, pow10l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
add GR_Parameter_Y = -32, sp // Parameter 2 value
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
}
{.mfi
.fframe 64
add sp = -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP = gp ;; // Save gp
}
{.mmi
stfe [ GR_Parameter_Y ] = FR_Y, 16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0 = b0 ;; // Save b0
}
.body
{.mib
stfe [ GR_Parameter_X ] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0, GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{.mib
stfe [ GR_Parameter_Y ] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16, GR_Parameter_Y
br.call.sptk b0 = __libm_error_support# ;; // Call error handling function
}
{.mmi
add GR_Parameter_RESULT = 48, sp
nop.m 0
nop.i 0 ;;
}
{.mmi
ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp = 64, sp // Restore stack pointer
mov b0 = GR_SAVE_B0 ;; // Restore return address
}
{.mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 ;; // Return
}
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

563
sysdeps/ia64/fpu/e_exp2.S Normal file
View File

@ -0,0 +1,563 @@
.file "exp2.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/25/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/05/02 Improved performance
// 01/17/03 Fixed to call error support when x=1024.0
//
// API
//==============================================================
// double exp2(double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x= (K + fh + fl + r), where
// K is an integer, fh= 0.b1 b2 b3 b4 b5,
// fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0),
// and |r|<2^{-11}
// Th is a table that stores 2^fh (32 entries) rounded to
// double extended precision (only mantissa is stored)
// Tl is a table that stores 2^fl (32 entries) rounded to
// double extended precision (only mantissa is stored)
//
// 2^x is approximated as
// 2^K * Th [ f ] * Tl [ f ] * (1+c1*r+c2*r^2+c3*r^3+c4*r^4)
// Note: We use the following trick to speed up conversion from FP to integer:
//
// Let x = K + r, where K is an integer, and |r| <= 0.5
// Let N be the number of significand bits for the FP format used
// ( N=64 for double-extended, N=53 for double)
//
// Then let y = 1.5 * 2^(N-1) + x for RN mode
// K = y - 1.5 * 2^(N-1)
// r = x - K
//
// If we want to obtain the integer part and the first m fractional bits of x,
// we can use the same trick, but with a constant of 1.5 * 2^(N-1-m):
//
// Let x = K + f + r
// f = 0.b_1 b_2 ... b_m
// |r| <= 2^(-m-1)
//
// Then let y = 1.5 * 2^(N-1-m) + x for RN mode
// (K+f) = y - 1.5 * 2^(N-1-m)
// r = x - K
// Special values
//==============================================================
// exp2(0)= 1
// exp2(+inf)= inf
// exp2(-inf)= 0
//
// Registers used
//==============================================================
// r2-r3, r14-r40
// f6-f15, f32-f45
// p6-p8, p12
//
GR_TBL_START = r2
GR_LOG_TBL = r3
GR_OF_LIMIT = r14
GR_UF_LIMIT = r15
GR_EXP_CORR = r16
GR_F_low = r17
GR_F_high = r18
GR_K = r19
GR_Flow_ADDR = r20
GR_BIAS = r21
GR_Fh = r22
GR_Fh_ADDR = r23
GR_EXPMAX = r24
GR_EMIN = r25
GR_ROUNDVAL = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
GR_COEFF_START = r30
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_COEFF1 = f6
FR_COEFF2 = f7
FR_R = f9
FR_KF0 = f12
FR_COEFF3 = f13
FR_COEFF4 = f14
FR_UF_LIMIT = f15
FR_OF_LIMIT = f32
FR_EXPMIN = f33
FR_ROUNDVAL = f34
FR_KF = f35
FR_2_TO_K = f36
FR_T_low = f37
FR_T_high = f38
FR_P34 = f39
FR_R2 = f40
FR_P12 = f41
FR_T_low_K = f42
FR_P14 = f43
FR_T = f44
FR_P = f45
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0x3fac6b08d704a0c0, 0x3f83b2ab6fba4e77 // C_3 and C_4
data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
// 2^{0.00000 b6 b7 b8 b9 b10}
data8 0x8000000000000000, 0x8016302f17467628
data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
data8 0x80855ad965e88b83, 0x809ba2264dada76a
data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
data8 0x813801881d886f7b, 0x814e67cceb90502c
data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
data8 0x8272fb97b2a5894c, 0x828998760d01faf3
data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
//
//
// 2^{0.b1 b2 b3 b4 b5}
data8 0x8000000000000000, 0x82cd8698ac2ba1d7
data8 0x85aac367cc487b14, 0x88980e8092da8527
data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
data8 0x9ef5326091a111ad, 0xa27043030c496818
data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
data8 0xbd08a39f580c36be, 0xc12c4cca66709456
data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
LOCAL_OBJECT_END(T_table)
.section .text
GLOBAL_LIBM_ENTRY(exp2)
{.mfi
alloc r32= ar.pfs, 1, 4, 4, 0
// will continue only for non-zero normal/denormal numbers
fclass.nm p12, p0= f8, 0x1b
// GR_TBL_START= pointer to C_1...C_4 followed by T_table
addl GR_TBL_START= @ltoff(poly_coeffs), gp
}
{.mlx
mov GR_OF_LIMIT= 0xffff + 10 // Exponent of overflow limit
movl GR_ROUNDVAL= 0x5a400000 // 1.5*2^(63-10) (SP)
}
;;
// Form special constant 1.5*2^(63-10) to give integer part and first 10
// fractional bits of x
{.mfi
setf.s FR_ROUNDVAL= GR_ROUNDVAL // Form special constant
fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
nop.i 0
}
{.mfb
ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
nop.f 0
(p12) br.cond.spnt SPECIAL_exp2 // Branch if nan, inf, zero
}
;;
{.mlx
setf.exp FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
movl GR_UF_LIMIT= 0xc4866000 // (-2^10-51) = -1075
}
;;
{.mfi
ldfpd FR_COEFF3, FR_COEFF4= [ GR_COEFF_START ], 16 // load C_3, C_4
fma.s0 f8= f8, f1, f0 // normalize x
nop.i 0
}
;;
{.mmi
setf.s FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
mov GR_EXP_CORR= 0xffff-126
}
;;
{.mfi
ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
fma.s1 FR_KF0= f8, f1, FR_ROUNDVAL // y= x + 1.5*2^(63-10)
nop.i 0
}
;;
{.mfi
mov GR_MASK= 1023
fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)
mov GR_MASK_low= 31
}
;;
{.mfi
getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
fcmp.ge.s1 p12, p7= f8, FR_OF_LIMIT // x >= overflow threshold ?
add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
}
;;
{.mmi
and GR_F_low= GR_KF0, GR_MASK_low // f_low
and GR_F_high= GR_MASK, GR_KF0 // f_high*32
shr GR_K= GR_KF0, 10 // K
}
;;
{.mmi
shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
shr GR_Fh= GR_F_high, 5 // f_high
}
;;
{.mfi
setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
fnma.s1 FR_R= FR_KF, f1, f8 // r= x - (K+f)
shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high}
}
{.mlx
ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low}
movl GR_EMIN= 0xc47f8000 // EMIN= -1022
}
;;
{.mfi
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
(p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x<underflow threshold ?
nop.i 0
}
;;
{.mfi
setf.s FR_EXPMIN= GR_EMIN // FR_EXPMIN= EMIN
fma.s1 FR_P34= FR_COEFF4, FR_R, FR_COEFF3 // P34= C_3+C_4*r
nop.i 0
}
{.mfb
nop.m 0
fma.s1 FR_R2= FR_R, FR_R, f0 // r*r
(p12) br.cond.spnt OUT_RANGE_exp2
}
;;
{.mfi
nop.m 0
fma.s1 FR_P12= FR_COEFF2, FR_R, FR_COEFF1 // P12= C_1+C_2*r
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_T_low_K= FR_T_low, FR_2_TO_K, f0 // T= 2^{K-126}*T_low
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_P14= FR_R2, FR_P34, FR_P12 // P14= P12+r2*P34
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_T= FR_T_low_K, FR_T_high, f0 // T= T*T_high
nop.i 0
}
;;
{.mfi
nop.m 0
fcmp.lt.s0 p6, p8= f8, FR_EXPMIN // underflow (x<EMIN) ?
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_P= FR_P14, FR_R, f0 // P= P14*r
nop.i 0
}
;;
{.mfb
nop.m 0
fma.d.s0 f8= FR_P, FR_T, FR_T // result= T+T*P
(p8) br.ret.sptk b0 // return
}
;;
{.mfb
(p6) mov GR_Parameter_TAG= 162
nop.f 0
(p6) br.cond.sptk __libm_error_region
}
;;
SPECIAL_exp2:
{.mfi
nop.m 0
fclass.m p6, p0= f8, 0x22 // x= -Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p7, p0= f8, 0x21 // x= +Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p8, p0= f8, 0x7 // x= +/-Zero ?
nop.i 0
}
{.mfb
nop.m 0
(p6) mov f8= f0 // exp2(-Infinity)= 0
(p6) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
nop.f 0
(p7) br.ret.spnt b0 // exp2(+Infinity)= +Infinity
}
;;
{.mfb
nop.m 0
(p8) mov f8= f1 // exp2(+/-0)= 1
(p8) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
fma.d.s0 f8= f8, f1, f0 // Remaining cases: NaNs
br.ret.sptk b0
}
;;
OUT_RANGE_exp2:
// overflow: p8= 1
{.mii
(p8) mov GR_EXPMAX= 0x1fffe
nop.i 0
nop.i 0
}
;;
{.mmb
(p8) mov GR_Parameter_TAG= 161
(p8) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfi
nop.m 999
(p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow
nop.i 999
}
// underflow: p6= 1
{.mii
(p6) mov GR_Parameter_TAG= 162
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
{.mmb
nop.m 0
(p6) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfb
nop.m 999
(p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow
nop.b 0
}
;;
GLOBAL_LIBM_END(exp2)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
add GR_Parameter_Y= -32, sp // Parameter 2 value
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS= ar.pfs // Save ar.pfs
}
{.mfi
.fframe 64
add sp= -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP= gp // Save gp
}
;;
{.mmi
stfd [ GR_Parameter_Y ]= FR_Y, 16 // STORE Parameter 2 on stack
add GR_Parameter_X= 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0= b0 // Save b0
}
;;
.body
{.mib
stfd [ GR_Parameter_X ]= FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT= 0, GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{.mib
stfd [ GR_Parameter_Y ]= FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y= -16, GR_Parameter_Y
br.call.sptk b0= __libm_error_support# // Call error handling function
}
;;
{.mmi
add GR_Parameter_RESULT= 48, sp
nop.m 0
nop.i 0
}
;;
{.mmi
ldfd f8= [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp= 64, sp // Restore stack pointer
mov b0= GR_SAVE_B0 // Restore return address
}
;;
{.mib
mov gp= GR_SAVE_GP // Restore gp
mov ar.pfs= GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
}
;;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

538
sysdeps/ia64/fpu/e_exp2f.S Normal file
View File

@ -0,0 +1,538 @@
.file "exp2f.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/25/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/05/02 Improved performance and accuracy
// 01/17/03 Fixed to call error support when x=128.0
//
// API
//==============================================================
// float exp2f(float)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x= (K + fh + fl + r), where
// K is an integer, fh= 0.b1 b2 b3 b4 b5,
// fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0),
// and |r|<2^{-11}
// Th is a table that stores 2^fh (32 entries) rounded to
// double extended precision (only mantissa is stored)
// Tl is a table that stores 2^fl (32 entries) rounded to
// double extended precision (only mantissa is stored)
//
// 2^x is approximated as
// 2^K * Th [ f ] * Tl [ f ] * (1+c1*r+c2*r^2)
// Note: We use the following trick to speed up conversion from FP to integer:
//
// Let x = K + r, where K is an integer, and |r| <= 0.5
// Let N be the number of significand bits for the FP format used
// ( N=64 for double-extended, N=53 for double)
//
// Then let y = 1.5 * 2^(N-1) + x for RN mode
// K = y - 1.5 * 2^(N-1)
// r = x - K
//
// If we want to obtain the integer part and the first m fractional bits of x,
// we can use the same trick, but with a constant of 1.5 * 2^(N-1-m):
//
// Let x = K + f + r
// f = 0.b_1 b_2 ... b_m
// |r| <= 2^(-m-1)
//
// Then let y = 1.5 * 2^(N-1-m) + x for RN mode
// (K+f) = y - 1.5 * 2^(N-1-m)
// r = x - K
// Special values
//==============================================================
// exp2(0)= 1
// exp2(+inf)= inf
// exp2(-inf)= 0
//
// Registers used
//==============================================================
// r2-r3, r14-r40
// f6-f15, f32-f45
// p6-p8, p12
//
GR_TBL_START = r2
GR_LOG_TBL = r3
GR_OF_LIMIT = r14
GR_UF_LIMIT = r15
GR_EXP_CORR = r16
GR_F_low = r17
GR_F_high = r18
GR_K = r19
GR_Flow_ADDR = r20
GR_BIAS = r21
GR_Fh = r22
GR_Fh_ADDR = r23
GR_EXPMAX = r24
GR_EMIN = r25
GR_ROUNDVAL = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
GR_COEFF_START = r30
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_COEFF1 = f6
FR_COEFF2 = f7
FR_R = f9
FR_KF0 = f12
FR_UF_LIMIT = f15
FR_OF_LIMIT = f32
FR_EXPMIN = f33
FR_ROUNDVAL = f34
FR_KF = f35
FR_2_TO_K = f36
FR_T_low = f37
FR_T_high = f38
FR_P12 = f41
FR_T_low_K = f42
FR_T = f44
FR_P = f45
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
// 2^{0.00000 b6 b7 b8 b9 b10}
data8 0x8000000000000000, 0x8016302f17467628
data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
data8 0x80855ad965e88b83, 0x809ba2264dada76a
data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
data8 0x813801881d886f7b, 0x814e67cceb90502c
data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
data8 0x8272fb97b2a5894c, 0x828998760d01faf3
data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
//
//
// 2^{0.b1 b2 b3 b4 b5}
data8 0x8000000000000000, 0x82cd8698ac2ba1d7
data8 0x85aac367cc487b14, 0x88980e8092da8527
data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
data8 0x9ef5326091a111ad, 0xa27043030c496818
data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
data8 0xbd08a39f580c36be, 0xc12c4cca66709456
data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
LOCAL_OBJECT_END(T_table)
.section .text
GLOBAL_LIBM_ENTRY(exp2f)
{.mfi
alloc r32= ar.pfs, 1, 4, 4, 0
// will continue only for non-zero normal/denormal numbers
fclass.nm p12, p0= f8, 0x1b
// GR_TBL_START= pointer to C_1...C_2 followed by T_table
addl GR_TBL_START= @ltoff(poly_coeffs), gp
}
{.mlx
mov GR_OF_LIMIT= 0xffff + 7 // Exponent of overflow limit
movl GR_ROUNDVAL= 0x5a400000 // 1.5*2^(63-10) (SP)
}
;;
// Form special constant 1.5*2^(63-10) to give integer part and first 10
// fractional bits of x
{.mfi
setf.s FR_ROUNDVAL= GR_ROUNDVAL // Form special constant
fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
nop.i 0
}
{.mfb
ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
nop.f 0
(p12) br.cond.spnt SPECIAL_exp2 // Branch if nan, inf, zero
}
;;
{.mlx
setf.exp FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
movl GR_UF_LIMIT= 0xc3160000 // (-2^7-22) = -150
}
;;
{.mfi
ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
fma.s0 f8= f8, f1, f0 // normalize x
nop.i 0
}
;;
{.mmi
ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
setf.s FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
mov GR_EXP_CORR= 0xffff-126
}
;;
{.mfi
nop.m 0
fma.s1 FR_KF0= f8, f1, FR_ROUNDVAL // y= x + 1.5*2^(63-10)
nop.i 0
}
;;
{.mfi
mov GR_MASK= 1023
fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)
mov GR_MASK_low= 31
}
;;
{.mfi
getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
fcmp.ge.s1 p12, p7= f8, FR_OF_LIMIT // x >= overflow threshold ?
add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
}
;;
{.mmi
and GR_F_low= GR_KF0, GR_MASK_low // f_low
and GR_F_high= GR_MASK, GR_KF0 // f_high*32
shr GR_K= GR_KF0, 10 // K
}
;;
{.mmi
shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
shr GR_Fh= GR_F_high, 5 // f_high
}
;;
{.mfi
setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
fnma.s1 FR_R= FR_KF, f1, f8 // r= x - (K+f)
shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high}
}
{.mlx
ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low}
movl GR_EMIN= 0xc2fc0000 // EMIN= -126
}
;;
{.mfi
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
(p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x<underflow threshold ?
nop.i 0
}
;;
{.mfb
setf.s FR_EXPMIN= GR_EMIN // FR_EXPMIN= EMIN
fma.s1 FR_P12= FR_COEFF2, FR_R, FR_COEFF1 // P12= C_1+C_2*r
(p12) br.cond.spnt OUT_RANGE_exp2
}
;;
{.mfi
nop.m 0
fma.s1 FR_T_low_K= FR_T_low, FR_2_TO_K, f0 // T= 2^{K-126}*T_low
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_P= FR_R, FR_P12, f0 // P= P12+r
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_T= FR_T_low_K, FR_T_high, f0 // T= T*T_high
nop.i 0
}
;;
{.mfi
nop.m 0
fcmp.lt.s0 p6, p8= f8, FR_EXPMIN // underflow (x<EMIN) ?
nop.i 0
}
;;
{.mfb
nop.m 0
fma.s.s0 f8= FR_P, FR_T, FR_T // result= T+T*P
(p8) br.ret.sptk b0 // return
}
;;
{.mfb
(p6) mov GR_Parameter_TAG= 164
nop.f 0
(p6) br.cond.sptk __libm_error_region
}
;;
SPECIAL_exp2:
{.mfi
nop.m 0
fclass.m p6, p0= f8, 0x22 // x= -Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p7, p0= f8, 0x21 // x= +Infinity ?
nop.i 0
}
;;
{.mfi
nop.m 0
fclass.m p8, p0= f8, 0x7 // x= +/-Zero ?
nop.i 0
}
{.mfb
nop.m 0
(p6) mov f8= f0 // exp2(-Infinity)= 0
(p6) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
nop.f 0
(p7) br.ret.spnt b0 // exp2(+Infinity)= +Infinity
}
;;
{.mfb
nop.m 0
(p8) mov f8= f1 // exp2(+/-0)= 1
(p8) br.ret.spnt b0
}
;;
{.mfb
nop.m 0
fma.s.s0 f8= f8, f1, f0 // Remaining cases: NaNs
br.ret.sptk b0
}
;;
OUT_RANGE_exp2:
// overflow: p8= 1
{.mii
(p8) mov GR_EXPMAX= 0x1fffe
nop.i 0
nop.i 0
}
;;
{.mmb
(p8) mov GR_Parameter_TAG= 163
(p8) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfi
nop.m 999
(p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow
nop.i 999
}
// underflow: p6= 1
{.mii
(p6) mov GR_Parameter_TAG= 164
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
{.mmb
nop.m 0
(p6) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfb
nop.m 999
(p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow
nop.b 0
}
;;
GLOBAL_LIBM_END(exp2f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
add GR_Parameter_Y= -32, sp // Parameter 2 value
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS= ar.pfs // Save ar.pfs
}
{.mfi
.fframe 64
add sp= -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP= gp // Save gp
}
;;
{.mmi
stfs [ GR_Parameter_Y ]= FR_Y, 16 // STORE Parameter 2 on stack
add GR_Parameter_X= 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0= b0 // Save b0
}
;;
.body
{.mib
stfs [ GR_Parameter_X ]= FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT= 0, GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{.mib
stfs [ GR_Parameter_Y ]= FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y= -16, GR_Parameter_Y
br.call.sptk b0= __libm_error_support# // Call error handling function
}
;;
{.mmi
add GR_Parameter_RESULT= 48, sp
nop.m 0
nop.i 0
}
;;
{.mmi
ldfs f8= [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp= 64, sp // Restore stack pointer
mov b0= GR_SAVE_B0 // Restore return address
}
;;
{.mib
mov gp= GR_SAVE_GP // Restore gp
mov ar.pfs= GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
}
;;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

806
sysdeps/ia64/fpu/e_exp2l.S Normal file
View File

@ -0,0 +1,806 @@
.file "exp2l.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 07/27/00 Initial version
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [ the previously overwritten ] GR_Parameter_RESULT.
// 02/02/01 Added libm_error_support calls for underflow
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 05/07/03 Reformatted assembly source
//
// API
//==============================================================
// long double exp2l(long double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x= K + f + r, where
// K is an integer, f= 0.b1 b2... b8 (f>= 0),
// and |r|<2^{-8}
// T is a table that stores 2^f (256 entries) rounded to
// double extended precision (only mantissa is stored)
// D stores (2^f/T [ f ] - 1), rounded to single precision
//
// 2^x is approximated as
// 2^K * T [ f ] * (1+D [ f ] +c1*r+c2*r^2+...+c6*r^6)
//
// Special values
//==============================================================
// exp2(0)= 1
// exp2(+inf)= inf
// exp2(-inf)= 0
//
// Registers used
//==============================================================
// f6-f15, f32-f46
// r2-r3, r8-r11, r14-r40
// p6, p7, p8, p12
FR_X = f10
FR_Y = f1
FR_RESULT = f8
FR_KF0 = f6
FR_EXP63 = f7
FR_T = f9
FR_COEFF3 = f10
FR_COEFF4 = f11
FR_COEFF5 = f12
FR_COEFF6 = f13
FR_COEFF1 = f14
FR_COEFF2 = f15
FR_2P14 = f32
FR_UF_TEST = f33
FR_D = f34
FR_R = f35
FR_2EXP = f36
FR_EMIN = f37
FR_P34 = f38
FR_P56 = f39
FR_R2 = f40
FR_P12 = f41
FR_TS = f42
FR_P36 = f43
FR_P02 = f44
FR_R3 = f45
FR_P06 = f46
GR_ADDR0 = r2
GR_ADDR = r2
GR_D_ADDR0 = r3
GR_D_ADDR = r3
GR_LEADBITS = r8
GR_256 = r9
GR_EM63 = r10
GR_255 = r11
GR_EXPON = r14
GR_BM63 = r15
GR_UF_TEST = r16
GR_INDEX = r17
GR_K = r18
GR_KF = r19
GR_2P14 = r19
GR_EMIN = r20
GR_IT = r21
GR_ID = r22
GR_63 = r23
GR_CONST1 = r24
GR_EBIAS = r25
GR_CONST2 = r26
GR_CONST3 = r27
GR_SIGNIF = r28
GR_ARGEXP = r29
GR_SGN = r30
GR_EMIN1 = r31
GR_SREG = r32
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT= r39
GR_Parameter_TAG = r40
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0x3fac6b08d704a0c0 // C_3
data8 0x3f83b2ab6fba4e77 // C_4
data8 0x3f55d87fe78a6731 // C_5
data8 0x3f2430912f86c787 // C_6
data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
data8 0x8000000000000000, 0x8058d7d2d5e5f6b1
data8 0x80b1ed4fd999ab6c, 0x810b40a1d81406d4
data8 0x8164d1f3bc030773, 0x81bea1708dde6056
data8 0x8218af4373fc25ec, 0x8272fb97b2a5894c
data8 0x82cd8698ac2ba1d7, 0x83285071e0fc4547
data8 0x8383594eefb6ee37, 0x83dea15b9541b132
data8 0x843a28c3acde4046, 0x8495efb3303efd30
data8 0x84f1f656379c1a29, 0x854e3cd8f9c8c95d
data8 0x85aac367cc487b15, 0x86078a2f23642a9f
data8 0x8664915b923fba04, 0x86c1d919caef5c88
data8 0x871f61969e8d1010, 0x877d2afefd4e256c
data8 0x87db357ff698d792, 0x88398146b919f1d4
data8 0x88980e8092da8527, 0x88f6dd5af155ac6b
data8 0x8955ee03618e5fdd, 0x89b540a7902557a4
data8 0x8a14d575496efd9a, 0x8a74ac9a79896e47
data8 0x8ad4c6452c728924, 0x8b3522a38e1e1032
data8 0x8b95c1e3ea8bd6e7, 0x8bf6a434adde0085
data8 0x8c57c9c4646f4dde, 0x8cb932c1bae97a95
data8 0x8d1adf5b7e5ba9e6, 0x8d7ccfc09c50e2f8
data8 0x8ddf042022e69cd6, 0x8e417ca940e35a01
data8 0x8ea4398b45cd53c0, 0x8f073af5a2013520
data8 0x8f6a8117e6c8e5c4, 0x8fce0c21c6726481
data8 0x9031dc431466b1dc, 0x9095f1abc540ca6b
data8 0x90fa4c8beee4b12b, 0x915eed13c89689d3
data8 0x91c3d373ab11c336, 0x9228ffdc10a051ad
data8 0x928e727d9531f9ac, 0x92f42b88f673aa7c
data8 0x935a2b2f13e6e92c, 0x93c071a0eef94bc1
data8 0x9426ff0fab1c04b6, 0x948dd3ac8ddb7ed3
data8 0x94f4efa8fef70961, 0x955c5336887894d5
data8 0x95c3fe86d6cc7fef, 0x962bf1cbb8d97560
data8 0x96942d3720185a00, 0x96fcb0fb20ac4ba3
data8 0x97657d49f17ab08e, 0x97ce9255ec4357ab
data8 0x9837f0518db8a96f, 0x98a1976f7597e996
data8 0x990b87e266c189aa, 0x9975c1dd47518c77
data8 0x99e0459320b7fa65, 0x9a4b13371fd166ca
data8 0x9ab62afc94ff864a, 0x9b218d16f441d63d
data8 0x9b8d39b9d54e5539, 0x9bf93118f3aa4cc1
data8 0x9c6573682ec32c2d, 0x9cd200db8a0774cb
data8 0x9d3ed9a72cffb751, 0x9dabfdff6367a2aa
data8 0x9e196e189d472420, 0x9e872a276f0b98ff
data8 0x9ef5326091a111ae, 0x9f6386f8e28ba651
data8 0x9fd228256400dd06, 0xa041161b3d0121be
data8 0xa0b0510fb9714fc2, 0xa11fd9384a344cf7
data8 0xa18faeca8544b6e4, 0xa1ffd1fc25cea188
data8 0xa27043030c496819, 0xa2e102153e918f9e
data8 0xa3520f68e802bb93, 0xa3c36b345991b47c
data8 0xa43515ae09e6809e, 0xa4a70f0c95768ec5
data8 0xa5195786be9ef339, 0xa58bef536dbeb6ee
data8 0xa5fed6a9b15138ea, 0xa6720dc0be08a20c
data8 0xa6e594cfeee86b1e, 0xa7596c0ec55ff55b
data8 0xa7cd93b4e965356a, 0xa8420bfa298f70d1
data8 0xa8b6d5167b320e09, 0xa92bef41fa77771b
data8 0xa9a15ab4ea7c0ef8, 0xaa1717a7b5693979
data8 0xaa8d2652ec907629, 0xab0386ef48868de1
data8 0xab7a39b5a93ed337, 0xabf13edf162675e9
data8 0xac6896a4be3fe929, 0xace0413ff83e5d04
data8 0xad583eea42a14ac6, 0xadd08fdd43d01491
data8 0xae493452ca35b80e, 0xaec22c84cc5c9465
data8 0xaf3b78ad690a4375, 0xafb51906e75b8661
data8 0xb02f0dcbb6e04584, 0xb0a957366fb7a3c9
data8 0xb123f581d2ac2590, 0xb19ee8e8c94feb09
data8 0xb21a31a66618fe3b, 0xb295cff5e47db4a4
data8 0xb311c412a9112489, 0xb38e0e38419fae18
data8 0xb40aaea2654b9841, 0xb487a58cf4a9c180
data8 0xb504f333f9de6484, 0xb58297d3a8b9f0d2
data8 0xb60093a85ed5f76c, 0xb67ee6eea3b22b8f
data8 0xb6fd91e328d17791, 0xb77c94c2c9d725e9
data8 0xb7fbefca8ca41e7c, 0xb87ba337a1743834
data8 0xb8fbaf4762fb9ee9, 0xb97c143756844dbf
data8 0xb9fcd2452c0b9deb, 0xba7de9aebe5fea09
data8 0xbaff5ab2133e45fb, 0xbb81258d5b704b6f
data8 0xbc034a7ef2e9fb0d, 0xbc85c9c560e7b269
data8 0xbd08a39f580c36bf, 0xbd8bd84bb67ed483
data8 0xbe0f6809860993e2, 0xbe935317fc378238
data8 0xbf1799b67a731083, 0xbf9c3c248e2486f8
data8 0xc0213aa1f0d08db0, 0xc0a6956e8836ca8d
data8 0xc12c4cca66709456, 0xc1b260f5ca0fbb33
data8 0xc238d2311e3d6673, 0xc2bfa0bcfad907c9
data8 0xc346ccda24976407, 0xc3ce56c98d21b15d
data8 0xc4563ecc5334cb33, 0xc4de8523c2c07baa
data8 0xc5672a115506dadd, 0xc5f02dd6b0bbc3d9
data8 0xc67990b5aa245f79, 0xc70352f04336c51e
data8 0xc78d74c8abb9b15d, 0xc817f681416452b2
data8 0xc8a2d85c8ffe2c45, 0xc92e1a9d517f0ecc
data8 0xc9b9bd866e2f27a3, 0xca45c15afcc72624
data8 0xcad2265e4290774e, 0xcb5eecd3b38597c9
data8 0xcbec14fef2727c5d, 0xcc799f23d11510e5
data8 0xcd078b86503dcdd2, 0xcd95da6a9ff06445
data8 0xce248c151f8480e4, 0xceb3a0ca5dc6a55d
data8 0xcf4318cf191918c1, 0xcfd2f4683f94eeb5
data8 0xd06333daef2b2595, 0xd0f3d76c75c5db8d
data8 0xd184df6251699ac6, 0xd2164c023056bcab
data8 0xd2a81d91f12ae45a, 0xd33a5457a3029054
data8 0xd3ccf099859ac379, 0xd45ff29e0972c561
data8 0xd4f35aabcfedfa1f, 0xd5872909ab75d18a
data8 0xd61b5dfe9f9bce07, 0xd6aff9d1e13ba2fe
data8 0xd744fccad69d6af4, 0xd7da67311797f56a
data8 0xd870394c6db32c84, 0xd9067364d44a929c
data8 0xd99d15c278afd7b6, 0xda3420adba4d8704
data8 0xdacb946f2ac9cc72, 0xdb63714f8e295255
data8 0xdbfbb797daf23755, 0xdc9467913a4f1c92
data8 0xdd2d818508324c20, 0xddc705bcd378f7f0
data8 0xde60f4825e0e9124, 0xdefb4e1f9d1037f2
data8 0xdf9612deb8f04420, 0xe031430a0d99e627
data8 0xe0ccdeec2a94e111, 0xe168e6cfd3295d23
data8 0xe2055afffe83d369, 0xe2a23bc7d7d91226
data8 0xe33f8972be8a5a51, 0xe3dd444c46499619
data8 0xe47b6ca0373da88d, 0xe51a02ba8e26d681
data8 0xe5b906e77c8348a8, 0xe658797368b3a717
data8 0xe6f85aaaee1fce22, 0xe798aadadd5b9cbf
data8 0xe8396a503c4bdc68, 0xe8da9958464b42ab
data8 0xe97c38406c4f8c57, 0xea1e4756550eb27b
data8 0xeac0c6e7dd24392f, 0xeb63b74317369840
data8 0xec0718b64c1cbddc, 0xecaaeb8ffb03ab41
data8 0xed4f301ed9942b84, 0xedf3e6b1d418a491
data8 0xee990f980da3025b, 0xef3eab20e032bc6b
data8 0xefe4b99bdcdaf5cb, 0xf08b3b58cbe8b76a
data8 0xf13230a7ad094509, 0xf1d999d8b7708cc1
data8 0xf281773c59ffb13a, 0xf329c9233b6bae9c
data8 0xf3d28fde3a641a5b, 0xf47bcbbe6db9fddf
data8 0xf5257d152486cc2c, 0xf5cfa433e6537290
data8 0xf67a416c733f846e, 0xf7255510c4288239
data8 0xf7d0df730ad13bb9, 0xf87ce0e5b2094d9c
data8 0xf92959bb5dd4ba74, 0xf9d64a46eb939f35
data8 0xfa83b2db722a033a, 0xfb3193cc4227c3f4
data8 0xfbdfed6ce5f09c49, 0xfc8ec01121e447bb
data8 0xfd3e0c0cf486c175, 0xfdedd1b496a89f35
data8 0xfe9e115c7b8f884c, 0xff4ecb59511ec8a5
LOCAL_OBJECT_END(T_table)
LOCAL_OBJECT_START(D_table)
data4 0x00000000, 0x9f55c08f, 0x1e93ffa3, 0x1dcd43a8
data4 0x1f751f79, 0x9f3cdd88, 0x9f43d155, 0x1eda222c
data4 0x1ef35513, 0x9f597895, 0x9e698881, 0x1ec71073
data4 0x1e50e371, 0x9dc01e19, 0x1de74133, 0x1e2f028c
data4 0x9edefb47, 0x1ebbac48, 0x9e8b0330, 0x9e9e9314
data4 0x1edc1d11, 0x1f098529, 0x9f52827c, 0x1f50050d
data4 0x1f301e8e, 0x1f5b64d1, 0x9f45e3ee, 0x9ef64d6d
data4 0x1d6ec5e8, 0x9e61ad9a, 0x1d44ccbb, 0x9e4a8bbb
data4 0x9cf11576, 0x9dcce7e7, 0x9d02ac90, 0x1f26ccf0
data4 0x9f0877c6, 0x9ddd62ae, 0x9f4b7fc3, 0x1ea8ef6b
data4 0x1ea4378d, 0x1ef6fc38, 0x1db99fd9, 0x1f22bf6f
data4 0x1f53e172, 0x1e85504a, 0x9f37cc75, 0x1f0c5e17
data4 0x1dde8aac, 0x9cb42bb2, 0x1e153cd7, 0x1eb62bba
data4 0x9e9b941b, 0x9ea80e3c, 0x1f508823, 0x1ec3fd36
data4 0x1e9ffaa1, 0x1e21e2eb, 0x9d948b1d, 0x9e8ac93a
data4 0x1ef7ee6f, 0x9e80dda3, 0x1f0814be, 0x1dc5ddfe
data4 0x1eedb9d1, 0x9f2aaa26, 0x9ea5b0fc, 0x1edf702e
data4 0x9e391201, 0x1f1316bb, 0x1ea27fb7, 0x9e05ed18
data4 0x9f199ed2, 0x1ee7fd7c, 0x1f003db6, 0x9eac3793
data4 0x9e5b8c10, 0x9f3af17c, 0x1bc9a8be, 0x1ee3c004
data4 0x9f19b1b2, 0x9f242ce9, 0x9ce67dd1, 0x9e4f6275
data4 0x1e20742c, 0x1eb9328a, 0x9f477153, 0x1d969718
data4 0x9f1e6c43, 0x1f2f67f4, 0x9f39c7e4, 0x9e3c4feb
data4 0x1da3956b, 0x9e7c685d, 0x1f280911, 0x9f0d8afb
data4 0x1e314b40, 0x9eb4f250, 0x9f1a34ad, 0x1ef5d5e7
data4 0x9f145496, 0x1e604827, 0x9f1e5195, 0x1e9c1fc0
data4 0x1efde521, 0x1e69b385, 0x1f316830, 0x9f244eae
data4 0x1f1787ec, 0x9e939971, 0x1f0bb393, 0x9f0511d6
data4 0x1ed919de, 0x1d8b7b28, 0x1e5ca4a9, 0x1e7c357b
data4 0x9e3ff8e8, 0x1eef53b5, 0x9ed22ed7, 0x1f16659b
data4 0x9f2db102, 0x9e2c6a78, 0x1f328d7d, 0x9f2fec3c
data4 0x1eb395bd, 0x9f242b84, 0x9e2683e6, 0x1ed71e68
data4 0x1efd1df5, 0x9e9eeafd, 0x9ed2249c, 0x1eef129a
data4 0x1d1ea44c, 0x9e81f7ff, 0x1eaf77c9, 0x9ee7a285
data4 0x1e1864ed, 0x9ee7edbb, 0x9e15a27d, 0x9ae61655
data4 0x1f1ff1a2, 0x1da29755, 0x9e5f46fb, 0x1e901236
data4 0x9eecfb9b, 0x9f204d2f, 0x1ec64685, 0x9eb809bd
data4 0x9e0026c5, 0x1d9f1da1, 0x1f142b49, 0x9f20f22e
data4 0x1f24b067, 0x1f185a4c, 0x9f09765c, 0x9ece902f
data4 0x1e2ca5db, 0x1e6de464, 0x9f071f67, 0x1f1518c3
data4 0x1ea13ded, 0x1f0b8414, 0x1edb6ad4, 0x9e548740
data4 0x9ea10efb, 0x1ee48a60, 0x1e7954c5, 0x9edad013
data4 0x9f21517d, 0x9e9b6e0c, 0x9ee7f9a6, 0x9ebd4298
data4 0x9d65b24e, 0x1eed751f, 0x9f1573ea, 0x9d430377
data4 0x9e13fc0c, 0x1e47008a, 0x1e3d5c1d, 0x1ef41a91
data4 0x9e4a4ef7, 0x9e952f18, 0x1d620566, 0x1d9b8d33
data4 0x1db06247, 0x1e94b31e, 0x1f0730ad, 0x9d79ffb4
data4 0x1ed64d51, 0x9e91fd11, 0x9e28d35a, 0x9dea0ed9
data4 0x1e891def, 0x9ee28ac0, 0x1e1db99b, 0x9ee1ce38
data4 0x9bdd9bca, 0x1eb72cb9, 0x9e8c53c6, 0x1e0df6ca
data4 0x1e8f2ccd, 0x9e9b0886, 0x1eeb3bc7, 0x1ec7e772
data4 0x9e210776, 0x9daf246c, 0x1ea1f151, 0x1ece4dc6
data4 0x1ce741c8, 0x1ed3c88f, 0x9ec9a4fd, 0x9e0c8d30
data4 0x1d2fbb26, 0x9ef212a7, 0x1ee44f1c, 0x9e445550
data4 0x1e075f77, 0x9d9291a3, 0x1f09c2ee, 0x9e012c88
data4 0x1f057d62, 0x9e7bb0dc, 0x9d8758ee, 0x1ee8d6c1
data4 0x9e509a57, 0x9e4ca7b7, 0x1e2cb341, 0x9ec35106
data4 0x1ecf3baf, 0x1e11781c, 0x1ea0cc78, 0x1eb75ca6
data4 0x1e961e1a, 0x1eb88853, 0x1e7abf50, 0x1ee38704
data4 0x9dc5ab0f, 0x1afe197b, 0x9ec07523, 0x9d9b7f78
data4 0x1f011618, 0x1ed43b0b, 0x9f035945, 0x9e3fd014
data4 0x9bbda5cd, 0x9e83f8ab, 0x1e58a928, 0x1e392d61
data4 0x1efdbb52, 0x1ee310a8, 0x9ec7ecc1, 0x1e8c9ed6
data4 0x9ef82dee, 0x9e70545b, 0x9ea53fc4, 0x1e40f419
LOCAL_OBJECT_END(D_table)
.section .text
GLOBAL_LIBM_ENTRY(exp2l)
{.mii
// get exponent
getf.exp GR_EBIAS = f8
// GR_D_ADDR0 = pointer to D_table
addl GR_D_ADDR0 = @ltoff(D_table), gp
// GR_ADDR0 = pointer to C_1...C_6 followed by T_table
addl GR_ADDR0 = @ltoff(poly_coeffs), gp ;;
}
{.mfi
// get significand
getf.sig GR_SIGNIF = f8
// will continue only for normal/denormal numbers
fclass.nm.unc p12, p7 = f8, 0x1b
mov GR_63 = 63 ;;
}
{.mfi
nop.m 0
nop.f 0
// GR_CONST2 = bias+63-8
mov GR_CONST2 = 0xffff+55
}
{.mfi
// GR_CONST1 = bias+15
mov GR_CONST1 = 0xffff+15
nop.f 0
mov GR_CONST3 = 0x1ffff ;;
}
{.mfi
// load start address for C_1...C_6 followed by T_table
ld8 GR_ADDR = [ GR_ADDR0 ]
nop.f 0
// get sign of argument
andcm GR_SGN = GR_EBIAS, GR_CONST3
}
{.mfi
// GR_D_ADDR = pointer to D_table
ld8 GR_D_ADDR = [ GR_D_ADDR0 ]
nop.f 0
// get argument exponent
and GR_ARGEXP = GR_CONST3, GR_EBIAS ;;
}
{.mfi
alloc GR_SREG = ar.pfs, 1, 4, 4, 0
nop.f 0
// p6 = 1 if sign = 1
cmp.ne p6, p8 = GR_SGN, r0
}
{.mfi
// p7 = 1 if exponent> = 15 (argument out of range)
cmp.ge p7, p0 = GR_ARGEXP, GR_CONST1
nop.f 0
sub GR_EXPON = GR_CONST2, GR_ARGEXP ;;
}
{.mib
// load C_3, C_4
ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR ], 16
// get first exponent+8 bits
shr.u GR_LEADBITS = GR_SIGNIF, GR_EXPON
(p12) br.cond.spnt SPECIAL_exp2l
}
{.mib
mov GR_256 = 256
// exponent- = 63
sub GR_EM63 = GR_EBIAS, GR_63
(p7) br.cond.spnt OUT_RANGE_exp2l ;;
}
{.mlx
// load C_5, C_6
ldfpd FR_COEFF5, FR_COEFF6 = [ GR_ADDR ], 16
// GR_2P14 = 2^14
movl GR_2P14 = 0x46800000 ;;
}
{.mfi
// load C_1
ldfe FR_COEFF1 = [ GR_ADDR ], 16
fma.s0 f8 = f8, f1, f0
// GR_BM63 = bias-63
mov GR_BM63 = 0xffff-63 ;;
}
{.mlx
setf.s FR_2P14 = GR_2P14
// GR_UF_TEST = -2^14-62
movl GR_UF_TEST = 0xc6807c00
}
{.mfi
// load C_2
ldfe FR_COEFF2 = [ GR_ADDR ], 16
nop.f 0
mov GR_255 = 255 ;;
}
{.mib
// get 8-bit index
and GR_INDEX = GR_255, GR_LEADBITS
// get K = integer part
shr.u GR_K = GR_LEADBITS, 8
nop.b 0 ;;
}
{.mmi
// if sign = 1 && f>0, set p7 = 1
(p6) cmp.gt.unc p7, p0 = GR_INDEX, r0
setf.s FR_UF_TEST = GR_UF_TEST
shl GR_KF = GR_LEADBITS, GR_EXPON ;;
}
{.mfi
// if sign = 1 && f>0, set f = 1-f
(p7) sub GR_INDEX = GR_256, GR_INDEX
nop.f 0
// if sign = 1 && f>0, set K = K+1
(p7) add GR_K = GR_K, r0, 1 ;;
}
{.mfi
// FR_EXP63 = 2^{expon-63}
setf.exp FR_EXP63 = GR_EM63
nop.f 0
nop.i 0 ;;
}
.pred.rel "mutex", p6, p8
{.mfi
// if sign = 0, set scale factor exponent S = K+bias-63
(p8) add GR_K = GR_K, GR_BM63
nop.f 0
// if sign = 1, set scale factor exponent S = -K+bias-63
(p6) sub GR_K = GR_BM63, GR_K ;;
}
{.mmi
// FR_KF0 = 2^{63-expon}*(K+f)
setf.sig FR_KF0 = GR_KF
nop.m 0
// GR_EMIN = EMIN = 2-2^14
mov GR_EMIN = 0x18cfff ;;
}
{.mfi
// get T_table index
shladd GR_IT = GR_INDEX, 3, GR_ADDR
// p7 = 1 if x> = 2^10
fcmp.ge.s1 p7, p12 = f8, FR_2P14
// get D_table index
shladd GR_ID = GR_INDEX, 2, GR_D_ADDR ;;
}
{.mfi
// load T_table value
ldf8 FR_T = [ GR_IT ]
// p7 = 1 if x<-2^10-50
(p12) fcmp.lt.s1 p7, p0 = f8, FR_UF_TEST
// GR_EMIN1 = EMIN = 2-2^14
shl GR_EMIN1 = GR_EMIN, 11 ;;
}
{.mmb
// f50 = scale factor = 2^{K-63}
setf.exp FR_2EXP = GR_K
// load D_table value
ldfs FR_D = [ GR_ID ]
(p7) br.cond.spnt OUT_RANGE_exp2l ;;
}
{.mfi
nop.m 0
// get r = x-(K+f)
fnma.s1 FR_R = FR_KF0, FR_EXP63, f8
nop.i 0 ;;
}
{.mfi
// FR_EMIN = EMIN
setf.s FR_EMIN = GR_EMIN1
// P34 = C_4*r+C_3
fma.s1 FR_P34 = FR_COEFF4, FR_R, FR_COEFF3
nop.i 0
}
{.mfi
nop.m 0
// P56 = C_6*r+C_5
fma.s1 FR_P56 = FR_COEFF6, FR_R, FR_COEFF5
nop.i 0 ;;
}
{.mfi
nop.m 0
// r*r
fma.s1 FR_R2 = FR_R, FR_R, f0
nop.i 0
}
{.mfi
nop.m 0
// P12 = C_2*r+C_1
fma.s1 FR_P12 = FR_COEFF2, FR_R, FR_COEFF1
nop.i 0 ;;
}
{.mfi
nop.m 0
// T* = scaling factor
fma.s1 FR_TS = FR_T, FR_2EXP, f0
nop.i 0
}
{.mfi
nop.m 0
// P36 = P34+r2*P56
fma.s1 FR_P36 = FR_P56, FR_R2, FR_P34
nop.i 0 ;;
}
{.mfi
nop.m 0
// P02 = D+r*P12
fma.s1 FR_P02 = FR_P12, FR_R, FR_D
nop.i 0
}
{.mfi
nop.m 0
// GR_ID = r*r2
fma.s1 FR_R3 = FR_R2, FR_R, f0
nop.i 0 ;;
}
{.mfi
nop.m 0
// P06 = P02+r3*P36
fma.s1 FR_P06 = FR_P36, FR_R3, FR_P02
nop.i 0 ;;
}
{.mfi
nop.m 0
// underflow (x<EMIN) ?
fcmp.lt.s0 p6, p8 = f8, FR_EMIN
nop.i 0 ;;
}
{.mfb
nop.m 0
// result = T+T*P06
fma.s0 f8 = FR_TS, FR_P06, FR_TS
// return
(p8) br.ret.sptk b0
}
{.mfb
(p6) mov GR_Parameter_TAG = 160
nop.f 0
(p6) br.cond.sptk __libm_error_region ;;
}
SPECIAL_exp2l:
{.mfi
nop.m 0
// x = -Infinity ?
fclass.m p6, p0 = f8, 0x22
nop.i 0 ;;
}
{.mfi
nop.m 0
// x = +Infinity ?
fclass.m p7, p0 = f8, 0x21
nop.i 0 ;;
}
{.mfi
nop.m 0
// x = +/-Zero ?
fclass.m p8, p0 = f8, 0x7
nop.i 0
}
{.mfb
nop.m 0
// exp2l(-Infinity) = 0
(p6) mov f8 = f0
(p6) br.ret.spnt b0 ;;
}
{.mfb
nop.m 0
// exp2l(+Infinity) = +Infinity
nop.f 0
(p7) br.ret.spnt b0 ;;
}
{.mfb
nop.m 0
// exp2l(+/-0) = 1
(p8) mov f8 = f1
(p8) br.ret.spnt b0 ;;
}
{.mfb
nop.m 0
// Remaining cases: NaNs
fma.s0 f8 = f8, f1, f0
br.ret.sptk b0 ;;
}
OUT_RANGE_exp2l:
{.mfi
// overflow: p8 = 1
(p8) mov GR_EM63 = 0x1fffe
// normalize input, to detect pseudo-zeroes
fma.s0 f8 = f8, f1, f0
nop.i 0 ;;
}
{.mfi
nop.m 0
// f8 = 0?
fcmp.eq.s1 p7, p0 = f8, f0
nop.i 0 ;;
}
{.mmb
(p8) mov GR_Parameter_TAG = 159
(p8) setf.exp FR_TS = GR_EM63
nop.b 999 ;;
}
{.mfb
nop.m 0
// pseudo-zero
(p7) mov f8 = f1
(p7) br.ret.sptk b0 ;;
}
{.mfi
nop.m 999
(p8) fma.s0 f8 = FR_TS, FR_TS, f0
nop.i 999
}
{.mii
nop.m 0
// underflow: p6 = 1
(p6) mov GR_EM63 = 1
nop.i 0 ;;
}
{.mmb
(p6) mov GR_Parameter_TAG = 160
(p6) setf.exp FR_TS = GR_EM63
nop.b 999 ;;
}
{.mfb
nop.m 999
(p6) fma.s0 f8 = FR_TS, FR_TS, f0
nop.b 0 ;;
}
GLOBAL_LIBM_END(exp2l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
add GR_Parameter_Y = -32, sp // Parameter 2 value
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
}
{.mfi
.fframe 64
add sp = -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP = gp ;; // Save gp
}
{.mmi
stfe [ GR_Parameter_Y ] = FR_Y, 16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0 = b0 ;; // Save b0
}
.body
{.mib
stfe [ GR_Parameter_X ] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0, GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{.mib
stfe [ GR_Parameter_Y ] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16, GR_Parameter_Y
br.call.sptk b0 = __libm_error_support# ;; // Call error handling function
}
{.mmi
add GR_Parameter_RESULT = 48, sp
nop.m 0
nop.i 0 ;;
}
{.mmi
ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp = 64, sp // Restore stack pointer
mov b0 = GR_SAVE_B0 ;; // Restore return address
}
{.mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 ;; // Return
}
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,10 @@
.file "fmod.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,16 +35,20 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 03/02/00 New Algorithm
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/28/00 Set FR_Y to f9
// 11/28/00 Set FR_Y to f9
// 03/11/02 Fixed flags for fmod(qnan,zero)
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
//
// API
//====================================================================
@ -81,11 +84,6 @@
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
#include "libm_support.h"
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@ -101,17 +99,9 @@ FR_Y = f9
FR_RESULT = f8
.proc fmod#
.align 32
.global fmod#
.align 32
.section .text
GLOBAL_IEEE754_ENTRY(fmod)
fmod:
#ifdef _LIBC
.global __ieee754_fmod
.type __ieee754_fmod,@function
__ieee754_fmod:
#endif
// inputs in f8, f9
// result in f8
@ -138,7 +128,7 @@ __ieee754_fmod:
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0xe7
fclass.m.unc p7,p0 = f9, 0xe7
nop.i 999;;
}
@ -149,14 +139,14 @@ __ieee754_fmod:
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
@ -187,8 +177,8 @@ __ieee754_fmod:
nop.i 0
}
{.bbb
(p9) br.cond.spnt L(FMOD_X_NAN_INF)
(p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
(p9) br.cond.spnt FMOD_X_NAN_INF
(p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
@ -250,7 +240,7 @@ __ieee754_fmod:
}
.align 32
L(loop53):
loop53:
{.mfi
nop.m 0
// compare q2, 2^32
@ -354,59 +344,76 @@ L(loop53):
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk L(loop53);;
br.cond.sptk loop53;;
}
L(FMOD_X_NAN_INF):
FMOD_X_NAN_INF:
// Y zero ?
{.mfi
nop.m 0
fclass.m p10,p0=f8,0xc3 // Test x=nan
nop.i 0
}
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fma.s0 f8=f8,f1,f0
nop.i 0
}
{.mfi
nop.m 0
(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
nop.i 0;;
}
{.mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FMOD_Y_ZERO);;
(p11) br.cond.spnt FMOD_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p9 = f8, 0x23
fclass.m.unc p8,p9 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
nop.i 0;;
nop.i 0;;
}
{.mfi
nop.m 999
nop.m 999
(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
}
{ .mfi
nop.m 999
// also set Denormal flag if necessary
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p8) fma.d f8=f8,f1,f0
nop.b 999 ;;
(p8) fma.d.s0 f8=f8,f1,f0
nop.b 999 ;;
}
{ .mfb
@ -416,35 +423,35 @@ L(FMOD_X_NAN_INF):
}
L(FMOD_Y_NAN_INF_ZERO):
FMOD_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.d f8=f8,f1,f0
(p7) fma.d.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.d f8=f9,f1,f0
(p9) fma.d.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FMOD_Y_ZERO):
FMOD_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -452,13 +459,13 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 0
// set Invalid
frcpa f12,p0=f0,f0
frcpa.s0 f12,p0=f0,f0
nop.i 0
}
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -469,34 +476,30 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f9,f9
(p0) mov GR_Parameter_TAG = 121 ;;
(p10) frcpa.s0 f11,p7 = f9,f9
mov GR_Parameter_TAG = 121 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfb
nop.m 999
(p0) fma.d f8=f11,f1,f0
(p0) br.sptk __libm_error_region;;
fma.d.s0 f8=f11,f1,f0
br.sptk __libm_error_region;;
}
.endp fmod
ASM_SIZE_DIRECTIVE(fmod)
ASM_SIZE_DIRECTIVE(__ieee754_fmod)
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(fmod)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -520,7 +523,7 @@ __libm_error_region:
{ .mib
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
@ -544,8 +547,12 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,10 +1,10 @@
.file "fmodf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,9 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -37,16 +35,20 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 03/02/00 New Algorithm
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/28/00 Set FR_Y to f9
// 11/28/00 Set FR_Y to f9
// 03/11/02 Fixed flags for fmodf(qnan,zero)
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
//
// API
//====================================================================
@ -82,10 +84,6 @@
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
#include "libm_support.h"
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@ -101,18 +99,9 @@ FR_Y = f9
FR_RESULT = f8
.section .text
GLOBAL_IEEE754_ENTRY(fmodf)
.proc fmodf#
.align 32
.global fmodf#
.align 32
fmodf:
#ifdef _LIBC
.global __ieee754_fmodf
.type __ieee754_fmodf,@function
__ieee754_fmodf:
#endif
// inputs in f8, f9
// result in f8
@ -140,7 +129,7 @@ __ieee754_fmodf:
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0xe7
fclass.m.unc p7,p0 = f9, 0xe7
nop.i 999;;
}
@ -151,14 +140,14 @@ __ieee754_fmodf:
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
@ -189,8 +178,8 @@ __ieee754_fmodf:
nop.i 0
}
{.bbb
(p9) br.cond.spnt L(FMOD_X_NAN_INF)
(p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
(p9) br.cond.spnt FMOD_X_NAN_INF
(p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
@ -253,7 +242,7 @@ __ieee754_fmodf:
}
.align 32
L(loop24):
loop24:
{.mfi
nop.m 0
// compare q2, 2^32
@ -357,64 +346,80 @@ L(loop24):
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk L(loop24);;
br.cond.sptk loop24;;
}
{ .mmb
nop.m 0
nop.m 0
br.ret.sptk b0;;
nop.m 0
nop.m 0
br.ret.sptk b0;;
}
L(FMOD_X_NAN_INF):
FMOD_X_NAN_INF:
// Y zero ?
{.mfi
nop.m 0
fclass.m p10,p0=f8,0xc3 // Test x=nan
nop.i 0
}
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fma.s0 f8=f8,f1,f0
nop.i 0
}
{.mfi
nop.m 0
(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
nop.i 0;;
}
{.mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FMOD_Y_ZERO);;
(p11) br.cond.spnt FMOD_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p9 = f8, 0x23
fclass.m.unc p8,p9 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
nop.i 0;;
nop.i 0;;
}
{.mfi
nop.m 999
nop.m 999
(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
}
{ .mfi
nop.m 999
// also set Denormal flag if necessary
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p8) fma.s f8=f8,f1,f0
nop.b 999 ;;
(p8) fma.s.s0 f8=f8,f1,f0
nop.b 999 ;;
}
{ .mfb
@ -424,35 +429,35 @@ L(FMOD_X_NAN_INF):
}
L(FMOD_Y_NAN_INF_ZERO):
FMOD_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.s f8=f8,f1,f0
(p7) fma.s.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.s f8=f9,f1,f0
(p9) fma.s.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FMOD_Y_ZERO):
FMOD_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -460,13 +465,13 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 0
// set Invalid
frcpa f12,p0=f0,f0
frcpa.s0 f12,p0=f0,f0
nop.i 999
}
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -477,43 +482,39 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.s f8=f11,f1,f0
fma.s.s0 f8=f11,f1,f0
nop.i 999;;
}
L(EXP_ERROR_RETURN):
EXP_ERROR_RETURN:
{ .mib
nop.m 0
(p0) mov GR_Parameter_TAG=122
(p0) br.sptk __libm_error_region;;
mov GR_Parameter_TAG=122
br.sptk __libm_error_region;;
}
.endp fmodf
ASM_SIZE_DIRECTIVE(fmodf)
ASM_SIZE_DIRECTIVE(__ieee754_fmodf)
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(fmodf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -537,7 +538,7 @@ __libm_error_region:
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
@ -561,8 +562,9 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,11 +1,10 @@
.file "fmodl.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,16 +35,20 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 03/02/00 New Algorithm
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/28/00 Set FR_Y to f9
// 11/28/00 Set FR_Y to f9
// 03/11/02 Fixed flags for fmodl(qnan,zero)
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
//
// API
//====================================================================
@ -76,10 +79,6 @@
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
#include "libm_support.h"
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@ -95,18 +94,9 @@ FR_Y = f9
FR_RESULT = f8
.section .text
GLOBAL_IEEE754_ENTRY(fmodl)
.proc fmodl#
.align 32
.global fmodl#
.align 32
fmodl:
#ifdef _LIBC
.global __ieee754_fmodl
.type __ieee754_fmodl,@function
__ieee754_fmodl:
#endif
// inputs in f8, f9
// result in f8
@ -152,7 +142,7 @@ cmp.eq p7,p10=r29,r0;;
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p11 = f8, 0xe3
fclass.m.unc p9,p11 = f8, 0xe3
nop.i 999
}
@ -178,7 +168,7 @@ cmp.eq p7,p10=r29,r0;;
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
// pseudo-NaN ?
// pseudo-NaN ?
(p10) fclass.nm p7,p0 = f9, 0xff
nop.i 999
}
@ -209,8 +199,8 @@ cmp.eq p7,p10=r29,r0;;
nop.i 0
}
{.bbb
(p9) br.cond.spnt L(FMOD_X_NAN_INF)
(p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
(p9) br.cond.spnt FMOD_X_NAN_INF
(p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
@ -273,7 +263,7 @@ cmp.eq p7,p10=r29,r0;;
.align 32
L(loop64):
loop64:
{.mfi
nop.m 0
// compare q2, 2^32
@ -378,43 +368,59 @@ L(loop64):
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk L(loop64);;
br.cond.sptk loop64;;
}
L(FMOD_X_NAN_INF):
FMOD_X_NAN_INF:
// Y zero ?
{.mfi
nop.m 0
fclass.m p10,p0=f8,0xc3 // Test x=nan
nop.i 0
}
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fma.s0 f8=f8,f1,f0
nop.i 0
}
{.mfi
nop.m 0
(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
nop.i 0;;
}
{.mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FMOD_Y_ZERO);;
(p11) br.cond.spnt FMOD_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
// set p7 t0 0
cmp.ne p7,p0=r0,r0
(p0) fclass.m.unc p8,p9 = f8, 0x23
// set p7 t0 0
cmp.ne p7,p0=r0,r0
fclass.m.unc p8,p9 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
nop.i 0;;
nop.i 0;;
}
// Y not pseudo-zero ? (r29 holds significand)
{.mii
@ -423,13 +429,13 @@ L(FMOD_X_NAN_INF):
nop.i 0;;
}
{.mfi
nop.m 999
nop.m 999
(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
}
{ .mfi
nop.m 999
// also set Denormal flag if necessary
// also set Denormal flag if necessary
(p7) fnma.s0 f9=f9,f1,f9
nop.i 999 ;;
}
@ -437,7 +443,7 @@ L(FMOD_X_NAN_INF):
{ .mfb
nop.m 999
(p8) fma.s0 f8=f8,f1,f0
nop.b 999 ;;
nop.b 999 ;;
}
{ .mfb
@ -447,24 +453,24 @@ L(FMOD_X_NAN_INF):
}
L(FMOD_Y_NAN_INF_ZERO):
FMOD_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma f8=f8,f1,f0
(p7) fma.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f9, 0xc3
fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -475,11 +481,11 @@ L(FMOD_Y_NAN_INF_ZERO):
{ .mfb
nop.m 999
(p9) fma f8=f9,f1,f0
(p9) fma.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FMOD_Y_ZERO):
FMOD_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -487,13 +493,13 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 0
// set Invalid
frcpa f12,p0=f0,f0
frcpa.s0 f12,p0=f0,f0
nop.i 0
}
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -504,36 +510,33 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f9,f9
(p0) mov GR_Parameter_TAG = 120 ;;
(p10) frcpa.s0 f11,p7 = f9,f9
mov GR_Parameter_TAG = 120 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfb
nop.m 999
(p0) fma f8=f11,f1,f0
(p0) br.sptk __libm_error_region;;
fma.s0 f8=f11,f1,f0
br.sptk __libm_error_region;;
}
.endp fmodl
ASM_SIZE_DIRECTIVE(fmodl)
ASM_SIZE_DIRECTIVE(__ieee754_fmodl)
GLOBAL_IEEE754_END(fmodl)
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -557,7 +560,7 @@ __libm_error_region:
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
@ -581,10 +584,12 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -1,11 +1,10 @@
.file "hypot.asm"
.file "hypot.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
// Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,24 +35,27 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// *********************************************************************
//*********************************************************************
//
// History:
// 2/02/00 hand-optimized
// 4/04/00 Unwind support added
// 6/20/00 new version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 hand-optimized
// 04/04/00 Unwind support added
// 06/20/00 new version
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/17/03 Added missing mutex directive
//
// *********************************************************************
//*********************************************************************
// ___________
// Function: hypot(x,y) = |(x^2 + y^2) = for double precision values
// x and y
// Also provides cabs functionality.
//
// *********************************************************************
//*********************************************************************
//
// Resources Used:
//
@ -68,7 +70,7 @@
//
// Predicate Registers: p6 - p10
//
// *********************************************************************
//*********************************************************************
//
// IEEE Special Conditions:
//
@ -78,7 +80,7 @@
// hypot(QNaN and anything) = QNaN
// hypot(SNaN and anything ) = QNaN
//
// *********************************************************************
//*********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
@ -86,9 +88,7 @@
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double
//
// *********************************************************************
#include "libm_support.h"
//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@ -103,23 +103,11 @@ FR_Y = f33
FR_RESULT = f8
.section .text
#ifndef _LIBC
.proc cabs#
.global cabs#
cabs:
.endp cabs
#endif
.proc hypot#
.global hypot#
.align 64
hypot:
#ifdef _LIBC
.global __hypot
__hypot:
.global __ieee754_hypot
__ieee754_hypot:
#endif
LOCAL_LIBM_ENTRY(cabs)
LOCAL_LIBM_END(cabs)
GLOBAL_IEEE754_ENTRY(hypot)
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
@ -221,6 +209,7 @@ __ieee754_hypot:
mov r2=0x107fb;;
}
.pred.rel "mutex",p7,p8
{.mfb
nop.m 0
// if f8=Infinity or f9=Zero, return |f8|
@ -394,11 +383,8 @@ __ieee754_hypot:
// No overflow
(p9) br.ret.sptk b0;;
}
.endp hypot
ASM_SIZE_DIRECTIVE(hypot)
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(hypot)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -445,7 +431,8 @@ __libm_error_region:
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,11 +1,10 @@
.file "hypotf.asm"
.file "hypotf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
// Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,24 +35,27 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// *********************************************************************
//*********************************************************************
//
// History:
// 2/02/00 hand-optimized
// 4/04/00 Unwind support added
// 6/26/00 new version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 hand-optimized
// 04/04/00 Unwind support added
// 06/26/00 new version
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/17/03 Added missing mutex directive
//
// *********************************************************************
//*********************************************************************
// ___________
// Function: hypotf(x,y) = |(x^2 + y^2) = for single precision values
// x and y
// Also provides cabsf functionality.
//
// *********************************************************************
//*********************************************************************
//
// Resources Used:
//
@ -68,7 +70,7 @@
//
// Predicate Registers: p6 - p10
//
// *********************************************************************
//*********************************************************************
//
// IEEE Special Conditions:
//
@ -78,7 +80,7 @@
// hypotf(QNaN and anything) = QNaN
// hypotf(SNaN and anything ) = QNaN
//
// *********************************************************************
//*********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
@ -86,9 +88,7 @@
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to single precision
//
// *********************************************************************
#include "libm_support.h"
//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@ -103,23 +103,10 @@ FR_Y = f15
FR_RESULT = f8
.section .text
#ifndef _LIBC
.proc cabsf#
.global cabsf#
cabsf:
.endp cabsf
#endif
.proc hypotf#
.global hypotf#
.align 64
hypotf:
#ifdef _LIBC
.global __hypotf
__hypotf:
.global __ieee754_hypotf
__ieee754_hypotf:
#endif
LOCAL_LIBM_ENTRY(cabsf)
LOCAL_LIBM_END(cabsf)
GLOBAL_IEEE754_ENTRY(hypotf)
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
@ -207,6 +194,7 @@ __ieee754_hypotf:
nop.i 0;;
}
.pred.rel "mutex",p7,p8
{.mfb
nop.m 0
// if f8=Infinity or f9=Zero, return |f8|
@ -348,15 +336,12 @@ __ieee754_hypotf:
// No overflow
(p9) br.ret.sptk b0;;
}
.endp hypotf
ASM_SIZE_DIRECTIVE(hypotf)
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(hypotf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
(p0) mov GR_Parameter_TAG = 47
mov GR_Parameter_TAG = 47
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@ -400,8 +385,9 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,11 +1,10 @@
.file "hypotl.asm"
.file "hypotl.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
// Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,24 +35,26 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// *********************************************************************
//*********************************************************************
//
// History:
// 2/02/00 hand-optimized
// 4/04/00 Unwind support added
// 6/20/00 new version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 hand-optimized
// 04/04/00 Unwind support added
// 06/20/00 new version
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// *********************************************************************
//*********************************************************************
// ___________
// Function: hypotl(x,y) = |(x^2 + y^2) = for double extended values
// x and y
// Also provides cabsl functionality.
//
// *********************************************************************
//*********************************************************************
//
// Resources Used:
//
@ -68,7 +69,7 @@
//
// Predicate Registers: p6 - p10
//
// *********************************************************************
//*********************************************************************
//
// IEEE Special Conditions:
//
@ -78,7 +79,7 @@
// hypotl(QNaN and anything) = QNaN
// hypotl(SNaN and anything ) = QNaN
//
// *********************************************************************
//*********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
@ -86,9 +87,7 @@
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double extended
//
// *********************************************************************
#include "libm_support.h"
//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@ -103,23 +102,10 @@ FR_Y = f33
FR_RESULT = f8
.section .text
#ifndef _LIBC
.proc cabsl#
.global cabsl#
cabsl:
.endp cabsl
#endif
.proc hypotl#
.global hypotl#
.align 64
hypotl:
#ifdef _LIBC
.global __hypotl
__hypotl:
.global __ieee754_hypotl
__ieee754_hypotl:
#endif
LOCAL_LIBM_ENTRY(cabsl)
LOCAL_LIBM_END(cabsl)
GLOBAL_IEEE754_ENTRY(hypotl)
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
@ -434,11 +420,8 @@ __ieee754_hypotl:
// No overflow
(p9) br.ret.sptk b0;;
}
.endp hypotl
ASM_SIZE_DIRECTIVE(hypotl)
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(hypotl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -485,7 +468,9 @@ __libm_error_region:
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1,71 @@
/* file: lgamma_r.c */
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
// Contributed 2002 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//
// History
//==============================================================
// 02/04/02: Initial version
// 02/22/02: Removed lgammaf_r, gammaf_r
/*
// FUNCTIONS: double lgamma_r(double x, int* signgam)
// double gamma_r(double x, int* signgam)
// Natural logarithm of GAMMA function
*/
#include "libm_support.h"
extern double __libm_lgamma(double /*x*/, int* /*signgam*/, int /*signgamsz*/);
double __ieee754_lgamma_r(double x, int* signgam)
{
return __libm_lgamma(x, signgam, sizeof(*signgam));
}
weak_alias(__ieee754_lgamma_r, lgamma_r)
#ifndef _LIBC
double __ieee754_gamma_r(double x, int* signgam)
{
return __libm_lgamma(x, signgam, sizeof(*signgam));
}
weak_alias(__ieee754_gamma_r, gamma_r)
#endif

View File

@ -0,0 +1,71 @@
/* file: lgammaf_r.c */
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
// Contributed 2002 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//
// History
//==============================================================
// 02/04/02: Initial version
// 02/22/02: Removed lgamma_r, gamma_r
/*
// FUNCTIONS: float lgammaf_r(float x, int* signgam)
// float gammaf_r(float x, int* signgam)
// Natural logarithm of GAMMA function
*/
#include "libm_support.h"
extern float __libm_lgammaf(float /*x*/, int* /*signgam*/, int /*signgamsz*/);
float __ieee754_lgammaf_r(float x, int* signgam)
{
return __libm_lgammaf(x, signgam, sizeof(*signgam));
}
weak_alias(__ieee754_lgammaf_r, lgammaf_r)
#ifndef _LIBC
float __ieee754_gammaf_r(float x, int* signgam)
{
return __libm_lgammaf(x, signgam, sizeof(*signgam));
}
weak_alias(__ieee754_gammaf_r, gammaf_r)
#endif

View File

@ -0,0 +1,70 @@
/* file: lgammal_r.c */
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
// Contributed 2002 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//
// History
//==============================================================
// 08/15/02: Initial version
/*
// FUNCTIONS: long double lgammal_r(long double x, int* signgam)
// long double gammal_r(long double x, int* signgam)
// Natural logarithm of GAMMA function
*/
#include "libm_support.h"
extern double __libm_lgammal(long double /*x*/, int* /*signgam*/, int /*signgamsz*/);
long double __ieee754_lgammal_r(long double x, int* signgam)
{
return __libm_lgammal(x, signgam, sizeof(*signgam));
}
weak_alias(__ieee754_lgammal_r, lgammal_r)
#ifndef _LIBC
long double __ieee754_gammal_r(long double x, int* signgam)
{
return __libm_lgammal(x, signgam, sizeof(*signgam));
}
weak_alias(__ieee754_gammal_r, gammal_r)
#endif

File diff suppressed because it is too large Load Diff

710
sysdeps/ia64/fpu/e_log2.S Normal file
View File

@ -0,0 +1,710 @@
.file "log2.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//=================================================================
// 09/11/00 Initial version
// 03/19/01 Added one polynomial coefficient, to improve accuracy
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/18/03 Reformatted T[255]
//
// API
//=================================================================
// double log2(double)
//
// Overview of operation
//=================================================================
// Background
//
// Implementation
//
// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 (table index)
// j=0 if f<128; j=1 if f>=128
// T is a table that stores log2(1/y) (in entries 1..255) rounded to
// double extended precision; f is used as an index; T[255]=0
//
// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
// and 0 is used instead of T[0]
// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
// for m=2(1-r'), 0<=r'<2^{-9})
//
// log2(x) is approximated as
// (l-j) + T[f] + (c1*r+c2*r^2+...+c7*r^7), if f>0
//
// Special values
//=================================================================
// log2(0)=-inf, raises Divide by Zero
// log2(+inf)=inf
// log2(x)=NaN, raises Invalid if x<0
//
// Registers used
//==============================================================
// f6-f15, f32-f33
// r2-r3, r23-r30
// p6,p7,p8,p12
//
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0xbfd0000000000000, 0x3fc999999999999a //C_4, C_5
data8 0xbfc5555555555555, 0x3fc2492492492492 //C_6, C_7
data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // C_3=1/3
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
data8 0xb8d8752172fed131, 0x00003ff6
data8 0x8ae7f475764180a3, 0x00003ff8
data8 0xe7f73862e72ee35d, 0x00003ff8
data8 0xa2b25310c941a2f2, 0x00003ff9
data8 0xcbb91d671abb2e85, 0x00003ff9
data8 0xfac91e34daa50483, 0x00003ff9
data8 0x9504a5042eb495c5, 0x00003ffa
data8 0xa9c4a0bbb580ee02, 0x00003ffa
data8 0xc19264dc8a5e3bf9, 0x00003ffa
data8 0xd67aa6703ebf4a77, 0x00003ffa
data8 0xee76cac6d6e08ce7, 0x00003ffa
data8 0x81c3f7de5434ed04, 0x00003ffb
data8 0x8c563033a3ce01e4, 0x00003ffb
data8 0x9876e9f09a98661c, 0x00003ffb
data8 0xa31e0ac9b2326ce2, 0x00003ffb
data8 0xadcf09e1fd10e4a5, 0x00003ffb
data8 0xb889f992cf03cdb6, 0x00003ffb
data8 0xc34eec68d901a714, 0x00003ffb
data8 0xce1df524e9909ed9, 0x00003ffb
data8 0xd8f726bcb0b80ad0, 0x00003ffb
data8 0xe3da945b878e27d1, 0x00003ffb
data8 0xeec851633b76a320, 0x00003ffb
data8 0xf82ea4bb6101421a, 0x00003ffb
data8 0x8197ddd7736b2864, 0x00003ffc
data8 0x871dad4f994253f0, 0x00003ffc
data8 0x8ca8cae3e892d549, 0x00003ffc
data8 0x916d6e1559a4b697, 0x00003ffc
data8 0x97028118efabeb7d, 0x00003ffc
data8 0x9bcfbce1592ad5d5, 0x00003ffc
data8 0xa16ee95d0da54a91, 0x00003ffc
data8 0xa644dcf3403fa5d0, 0x00003ffc
data8 0xab1ee14ffd659064, 0x00003ffc
data8 0xb0cd12faebcc6757, 0x00003ffc
data8 0xb5affdf9b3b221e0, 0x00003ffc
data8 0xba970fb307c6ade1, 0x00003ffc
data8 0xbf824f3a9f3e7561, 0x00003ffc
data8 0xc544c055fde99333, 0x00003ffc
data8 0xca39266532bdf26c, 0x00003ffc
data8 0xcf31d124b8fa2f56, 0x00003ffc
data8 0xd42ec7f59017b6ab, 0x00003ffc
data8 0xd930124bea9a2c67, 0x00003ffc
data8 0xde35b7af70e4dab3, 0x00003ffc
data8 0xe33fbfbb8533ef03, 0x00003ffc
data8 0xe77625911a7dcef3, 0x00003ffc
data8 0xec884bd689cc12e3, 0x00003ffc
data8 0xf19eeabf9e99a40a, 0x00003ffc
data8 0xf6ba0a35e3d88051, 0x00003ffc
data8 0xfbd9b237f7b4192b, 0x00003ffc
data8 0x80111d4a1ee0c79e, 0x00003ffd
data8 0x82a523a5f875bbfc, 0x00003ffd
data8 0x84ccecdc92cd0815, 0x00003ffd
data8 0x87653369d92c057a, 0x00003ffd
data8 0x89ffd1742da3aa21, 0x00003ffd
data8 0x8c2d2227d053d9b6, 0x00003ffd
data8 0x8e5c189793f7f798, 0x00003ffd
data8 0x90fd0a20e72f3c96, 0x00003ffd
data8 0x932fa937301e59ae, 0x00003ffd
data8 0x95d5061a5f0f5f7f, 0x00003ffd
data8 0x980b5a2ef10e7023, 0x00003ffd
data8 0x9a4361c5514d3c27, 0x00003ffd
data8 0x9c7d1f7d541313fd, 0x00003ffd
data8 0x9f2b16040b500d04, 0x00003ffd
data8 0xa168a0fa9db22c98, 0x00003ffd
data8 0xa3a7eaa1f9116293, 0x00003ffd
data8 0xa5e8f5b4072a3d44, 0x00003ffd
data8 0xa82bc4f11a5e88aa, 0x00003ffd
data8 0xaa705b2001db8317, 0x00003ffd
data8 0xacb6bb0e1e0f8005, 0x00003ffd
data8 0xaefee78f75707221, 0x00003ffd
data8 0xb148e37ec994dd99, 0x00003ffd
data8 0xb394b1bdaca0bc17, 0x00003ffd
data8 0xb5e255349707e496, 0x00003ffd
data8 0xb831d0d2fda791cc, 0x00003ffd
data8 0xba83278f6838ab20, 0x00003ffd
data8 0xbcd65c67881c7d47, 0x00003ffd
data8 0xbeb3e0f21d72dc92, 0x00003ffd
data8 0xc10a7a03457d35dc, 0x00003ffd
data8 0xc362f9b6f51eddd3, 0x00003ffd
data8 0xc5bd6326ebfce656, 0x00003ffd
data8 0xc7a0b3d0637c8f97, 0x00003ffd
data8 0xc9fe96af0df8e4b5, 0x00003ffd
data8 0xcc5e6c214b4a2cd7, 0x00003ffd
data8 0xce46199f374d29cf, 0x00003ffd
data8 0xd0a978a14c0d9ebe, 0x00003ffd
data8 0xd293fecafec7f9b5, 0x00003ffd
data8 0xd4faf1f6f5cf32e6, 0x00003ffd
data8 0xd6e8595abaad34d1, 0x00003ffd
data8 0xd952eb7a8ffc1593, 0x00003ffd
data8 0xdb433ccd805f171e, 0x00003ffd
data8 0xddb178dc43e6bd84, 0x00003ffd
data8 0xdfa4bcfb333342a4, 0x00003ffd
data8 0xe19953741ccea015, 0x00003ffd
data8 0xe40cee16a2ff21c5, 0x00003ffd
data8 0xe6048470cdbde8ea, 0x00003ffd
data8 0xe7fd7308d6895b14, 0x00003ffd
data8 0xe9f7bbb6a1ff9f87, 0x00003ffd
data8 0xec7280138809433d, 0x00003ffd
data8 0xee6fda4365cd051f, 0x00003ffd
data8 0xf06e94a122ff1f12, 0x00003ffd
data8 0xf26eb1151441fce5, 0x00003ffd
data8 0xf470318b88a77e2f, 0x00003ffd
data8 0xf67317f4d4c8aa58, 0x00003ffd
data8 0xf8f8b250a9c4cde6, 0x00003ffd
data8 0xfafec54831f1a484, 0x00003ffd
data8 0xfd06449bf3eaea1e, 0x00003ffd
data8 0xff0f324ddb19ab67, 0x00003ffd
data8 0x808cc8320a9acf15, 0x00003ffe
data8 0x8192b0748f2cef06, 0x00003ffe
data8 0x829952f5e6a24ee5, 0x00003ffe
data8 0x83a0b0bfafe1424e, 0x00003ffe
data8 0x8466b29f9c41caea, 0x00003ffe
data8 0x856f5aae0881d857, 0x00003ffe
data8 0x8678c0eae8ee8190, 0x00003ffe
data8 0x8782e6685676b9d7, 0x00003ffe
data8 0x888dcc3abc4554ec, 0x00003ffe
data8 0x89997378de7b98b8, 0x00003ffe
data8 0x8aa5dd3be1044279, 0x00003ffe
data8 0x8b6facdfd0360ab8, 0x00003ffe
data8 0x8c7d6db7169e0cdb, 0x00003ffe
data8 0x8d8bf424d6e130b2, 0x00003ffe
data8 0x8e575b506f409fa6, 0x00003ffe
data8 0x8f673e418776492c, 0x00003ffe
data8 0x9077e9ed700ef9ba, 0x00003ffe
data8 0x9144ef1baec80b20, 0x00003ffe
data8 0x9256fcdb537f035f, 0x00003ffe
data8 0x9369d68d75e7e1d6, 0x00003ffe
data8 0x943880613b8f9f1e, 0x00003ffe
data8 0x954cc1d9e0d94206, 0x00003ffe
data8 0xd3c70a37bdf7a294, 0x0000bffd
data8 0xd19bb053fb0284ec, 0x0000bffd
data8 0xcffa1a3b7dafb8bf, 0x0000bffd
data8 0xcdcbe1e2776479ee, 0x0000bffd
data8 0xcc282218b8bfdda2, 0x0000bffd
data8 0xc9f703a9afcb38ac, 0x0000bffd
data8 0xc851146ab89593c6, 0x0000bffd
data8 0xc61d08265927a860, 0x0000bffd
data8 0xc474e39705912d26, 0x0000bffd
data8 0xc23de19ec30c6e3e, 0x0000bffd
data8 0xc09381cc45db45b4, 0x0000bffd
data8 0xbee82b4e025ff90c, 0x0000bffd
data8 0xbcace101149788ec, 0x0000bffd
data8 0xbaff46962ea47964, 0x0000bffd
data8 0xb950b1be5e0c14a2, 0x0000bffd
data8 0xb7110e6ce866f2bc, 0x0000bffd
data8 0xb5602ccc2a81db52, 0x0000bffd
data8 0xb3ae4ce740fc8ef1, 0x0000bffd
data8 0xb1fb6d92c8240ccc, 0x0000bffd
data8 0xafb609c09b244abc, 0x0000bffd
data8 0xae00d1cfdeb43cfd, 0x0000bffd
data8 0xac4a967a8c8c9bd0, 0x0000bffd
data8 0xaa93568c249e6c52, 0x0000bffd
data8 0xa8db10cdff375343, 0x0000bffd
data8 0xa68e6fc5a42376e3, 0x0000bffd
data8 0xa4d3c25e68dc57f2, 0x0000bffd
data8 0xa3180b0c192a3816, 0x0000bffd
data8 0xa15b488e7aa329a0, 0x0000bffd
data8 0x9f9d79a30f0e1d5f, 0x0000bffd
data8 0x9dde9d050ee7d4ac, 0x0000bffd
data8 0x9c1eb16d63d7356c, 0x0000bffd
data8 0x9a5db592a310c36a, 0x0000bffd
data8 0x989ba82907a9016f, 0x0000bffd
data8 0x96d887e26cd57b79, 0x0000bffd
data8 0x9514536e481c3a4f, 0x0000bffd
data8 0x934f0979a3715fc9, 0x0000bffd
data8 0x9188a8af1742a9d5, 0x0000bffd
data8 0x8fc12fb6c470995f, 0x0000bffd
data8 0x8df89d364e34f8f1, 0x0000bffd
data8 0x8c2eefd0d3f67dd6, 0x0000bffd
data8 0x8a642626eb093d54, 0x0000bffd
data8 0x88983ed6985bae58, 0x0000bffd
data8 0x86cb387b4a0feec6, 0x0000bffd
data8 0x84fd11add101024b, 0x0000bffd
data8 0x83c856dd81804b78, 0x0000bffd
data8 0x81f84c2c62afd6f1, 0x0000bffd
data8 0x80271d3e4be5ea5a, 0x0000bffd
data8 0xfca991447e7b485d, 0x0000bffc
data8 0xf90299c904793a3c, 0x0000bffc
data8 0xf559511d2dc1ed69, 0x0000bffc
data8 0xf2e72afee9bd2aee, 0x0000bffc
data8 0xef39ff1d8a40770e, 0x0000bffc
data8 0xeb8a7a2311c935dc, 0x0000bffc
data8 0xe7d8990dc620012f, 0x0000bffc
data8 0xe560b1e3b86e44b6, 0x0000bffc
data8 0xe1aadb38caee80c4, 0x0000bffc
data8 0xddf2a051f81b76a4, 0x0000bffc
data8 0xdb7678bafcaf4b5f, 0x0000bffc
data8 0xd7ba3a8f0df19bfc, 0x0000bffc
data8 0xd3fb8fdbdd5cebdb, 0x0000bffc
data8 0xd17b191905c35652, 0x0000bffc
data8 0xcdb85d29cefd7121, 0x0000bffc
data8 0xc9f32c3c88221ef6, 0x0000bffc
data8 0xc76e5741a95b5dae, 0x0000bffc
data8 0xc3a506d80d38c718, 0x0000bffc
data8 0xbfd938ccef8b68c1, 0x0000bffc
data8 0xbd4ff63e82eef78c, 0x0000bffc
data8 0xb97ffa2b563865bd, 0x0000bffc
data8 0xb6f3eb3011eddcea, 0x0000bffc
data8 0xb31fb7d64898b3e6, 0x0000bffc
data8 0xb090d63a409e7880, 0x0000bffc
data8 0xacb8623c7ffa4f39, 0x0000bffc
data8 0xa8dd5c83d2e45246, 0x0000bffc
data8 0xa649e998a8d91f2e, 0x0000bffc
data8 0xa26a93fed6faa94f, 0x0000bffc
data8 0x9fd43df079d0db1f, 0x0000bffc
data8 0x9d3cbe69aecac4c2, 0x0000bffc
data8 0x99574f13c570d0fb, 0x0000bffc
data8 0x96bce349bf7ee6c7, 0x0000bffc
data8 0x92d30c9b86cee18e, 0x0000bffc
data8 0x9035adef17c5bd5c, 0x0000bffc
data8 0x8c4765e8e8b5f251, 0x0000bffc
data8 0x89a70da448316ffa, 0x0000bffc
data8 0x85b44a24474af78a, 0x0000bffc
data8 0x8310f17aab5adf70, 0x0000bffc
data8 0x806c6388d0965f29, 0x0000bffc
data8 0xf8e69092bf0c5ead, 0x0000bffb
data8 0xf397608bfd2d90e6, 0x0000bffb
data8 0xee45be24d0eedbc4, 0x0000bffb
data8 0xe646af233db881e9, 0x0000bffb
data8 0xe0eee4e1ce3d06fb, 0x0000bffb
data8 0xdb94a049e6e87a4f, 0x0000bffb
data8 0xd3888ef9a4249f5a, 0x0000bffb
data8 0xce280e6fbac39194, 0x0000bffb
data8 0xc8c50b72319ad574, 0x0000bffb
data8 0xc0abcd39f41e329b, 0x0000bffb
data8 0xbb4279cfa7f9667b, 0x0000bffb
data8 0xb5d69bac77ec398a, 0x0000bffb
data8 0xb068306bf20d6233, 0x0000bffb
data8 0xa83dc1b019ddb6a8, 0x0000bffb
data8 0xa2c8eb1886c2d024, 0x0000bffb
data8 0x9d517ee93f8e16c0, 0x0000bffb
data8 0x97d77aae659b92fb, 0x0000bffb
data8 0x8f9b91da5736d415, 0x0000bffb
data8 0x8a1b06b09b7fd1d1, 0x0000bffb
data8 0x8497daca0a2e077a, 0x0000bffb
data8 0xfe241745a453f10c, 0x0000bffa
data8 0xf3132d6708d723c5, 0x0000bffa
data8 0xe7fcf2e21a0e7d77, 0x0000bffa
data8 0xd75198b04afb8da9, 0x0000bffa
data8 0xcc2dfe1a4a8ca305, 0x0000bffa
data8 0xc10500d63aa65882, 0x0000bffa
data8 0xb5d69bac77ec398a, 0x0000bffa
data8 0xaaa2c95dc66abcde, 0x0000bffa
data8 0x9f6984a342d13101, 0x0000bffa
data8 0x942ac82e5387ac51, 0x0000bffa
data8 0x88e68ea899a0976c, 0x0000bffa
data8 0xefebc4409ccf872e, 0x0000bff9
data8 0xd947b0c6642ef69e, 0x0000bff9
data8 0xc2987d51e043d407, 0x0000bff9
data8 0xabde1eeee6bfd257, 0x0000bff9
data8 0x95188a9917cf2e01, 0x0000bff9
data8 0xfc8f6a777c1b7f1e, 0x0000bff8
data8 0xced727635c59725c, 0x0000bff8
data8 0xa108358a4c904615, 0x0000bff8
data8 0xe644fcbeb3ac9c90, 0x0000bff7
data8 0x8a4bd667bf08e7de, 0x0000bff7
data8 0x0000000000000000 // T[255] Low
data8 0x0000000000000000 // T[255] High
LOCAL_OBJECT_END(T_table)
.section .text
GLOBAL_LIBM_ENTRY(log2)
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// y=frcpa(x)
frcpa.s1 f6,p0=f1,f8
// will form significand of 1.5 (to test whether the index is 128 or above)
mov r24=0xc
}
{.mfi
nop.m 0
// normalize x
fma.s1 f7=f8,f1,f0
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
}
{.mfi
// get significand
getf.sig r25=f8
// f8 denormal ?
fclass.m p8,p10=f8,0x9
// will form significand of 1.5 (to test whether the index is 128 or above)
shl r24=r24,60
}
{.mfi
mov r26=0x804
nop.f 0
// r23=bias-1
mov r23=0xfffe;;
}
{.mmf
getf.exp r29=f8
// load start address for C_1...C_6 followed by T_table
ld8 r2=[r2]
// will continue only for positive normal/denormal numbers
fclass.nm.unc p12,p7 = f8, 0x19 ;;
}
.pred.rel "mutex",p8,p10
{.mfi
// denormal input, repeat get significand (after normalization)
(p8) getf.sig r25=f7
// x=1 ?
fcmp.eq.s0 p6,p0=f8,f1
// get T_index
(p10) shr.u r28=r25,63-8
}
{.mfi
// f32=0.5
setf.exp f32=r23
nop.f 0
// r27=bias
mov r27=0xffff;;
}
{.mmi
// denormal input, repeat get exponent (after normalization)
(p8) getf.exp r29=f7
mov r23=0xff
// r26=0x80400...0 (threshold for using polynomial approximation)
shl r26=r26,64-12;;
}
{.mfb
add r3=48,r2
// r=1-x*y
fms.s1 f6=f6,f8,f1
(p12) br.cond.spnt SPECIAL_LOG2
}
{.mfi
// load C_4, C_5
ldfpd f10,f11=[r2],16
nop.f 0
cmp.geu p12,p0=r25,r24;;
}
{.mmi
// load C_6, C_7
ldfpd f12,f13=[r2],16
// r27=bias-1 (if index >=128, will add exponent+1)
(p12) mov r27=0xfffe
(p8) shr.u r28=r25,63-8;;
}
{.mfi
// load C_1
ldfe f14=[r2],32
fmerge.se f7=f1,f7
// if first 9 bits after leading 1 are all zero, then p8=1
cmp.ltu p8,p12=r25,r26
}
{.mfi
// load C_3
ldfe f15=[r3]
nop.f 0
// get T_index
and r28=r28,r23;;
}
{.mfi
// r29=exponent-bias
sub r29=r29,r27
// x=1, return 0
(p6) fma.d.s0 f8=f0,f0,f0
// get T address
shladd r2=r28,4,r2
}
{.mfb
// first 8 bits after leading 1 are all ones ?
cmp.eq p10,p0=r23,r28
// if first 8 bits after leading bit are 0, use polynomial approx. only
(p8) fms.s1 f6=f7,f1,f1
// x=1, return
(p6) br.ret.spnt b0;;
}
{.mfi
// r26=1
mov r26=1
// if first 8 bits after leading 1 are all ones, use polynomial approx. only
(p10) fms.s1 f6=f7,f32,f1
nop.i 0;;
}
.pred.rel "mutex",p8,p12
{.mmf
// load T (unless first 9 bits after leading 1 are 0)
(p12) ldfe f33=[r2]
// f8=expon - bias
setf.sig f8=r29
// set T=0 (if first 9 bits after leading 1 are 0)
(p8) fma.s1 f33=f0,f0,f0;;
}
{.mfi
nop.m 0
// P12=1-0.5*r
fnma.s1 f32=f32,f6,f1
// r26=2^{63}
shl r26=r26,63
}
{.mfi
nop.m 0
// r2=r*r
fma.s1 f7=f6,f6,f0
nop.i 0;;
}
{.mfi
// significand(x)=1 ?
cmp.eq p0,p6=r26,r25
// P67=C_6+C_7*r
fma.s1 f13=f13,f6,f12
nop.i 0
}
{.mfi
nop.m 0
// P45=C_4+C_5*r
fma.s1 f10=f11,f6,f10
nop.i 0;;
}
{.mfi
nop.m 0
// C_1*r
(p6) fma.s1 f14=f14,f6,f0
nop.i 0;;
}
{.mfi
nop.m 0
// normalize additive term (l=exponent of x)
fcvt.xf f8=f8
nop.i 0
}
{.mfi
nop.m 0
// P13=1-0.5*r+C_3*r^2
(p6) fma.s1 f15=f15,f7,f32
nop.i 0;;
}
{.mfi
nop.m 0
// P47=P45+r2*P67
(p6) fma.s1 f13=f13,f7,f10
// if significand(x)=1, return exponent (l)
nop.i 0
}
{.mfi
nop.m 0
// r3=r^3
(p6) fma.s1 f7=f7,f6,f0
nop.i 0;;
}
{.mfi
nop.m 0
// add T+l
(p6) fma.s1 f8=f8,f1,f33
nop.i 0
}
{.mfi
nop.m 0
// P17=P13+r3*P47
(p6) fma.s1 f13=f13,f7,f15
nop.i 0;;
}
{.mfb
nop.m 0
// result=T+l+(C_1*r)*P16
(p6) fma.d.s0 f8=f13,f14,f8
// return
br.ret.sptk b0;;
}
SPECIAL_LOG2:
{.mfi
nop.m 0
// x=+Infinity ?
fclass.m p7,p0=f8,0x21
nop.i 0;;
}
{.mfi
nop.m 0
// x=+/-Zero ?
fclass.m p8,p0=f8,0x7
nop.i 0;;
}
{.mfi
nop.m 0
// x=-Infinity, -normal, -denormal ?
fclass.m p6,p0=f8,0x3a
nop.i 0;;
}
{.mfb
nop.m 0
// log2(+Infinity)=+Infinity
nop.f 0
(p7) br.ret.spnt b0;;
}
{.mfi
(p8) mov GR_Parameter_TAG = 170
// log2(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
(p8) fmerge.ns f8=f0,f8
nop.i 0;;
}
{.mfb
nop.m 0
(p8) frcpa.s0 f8,p0=f1,f8
(p8) br.cond.sptk __libm_error_region;;
}
{.mfb
(p6) mov GR_Parameter_TAG = 171
// x<0: return NaN, raise Invalid
(p6) frcpa.s0 f8,p0=f0,f0
(p6) br.cond.sptk __libm_error_region;;
}
{.mfb
nop.m 0
// Remaining cases: NaNs
fma.d.s0 f8=f8,f1,f0
br.ret.sptk b0;;
}
GLOBAL_LIBM_END(log2)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

550
sysdeps/ia64/fpu/e_log2f.S Normal file
View File

@ -0,0 +1,550 @@
.file "log2f.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 09/11/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// float log2f(float)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 (table index)
// j=0 if f<128; j=1 if f>=128
// T is a table that stores log2(1/y) (in entries 1..255) rounded to
// double extended precision; f is used as an index; T[255]=0
//
// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
// and 0 is used instead of T[0]
// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
// for m=2(1-r'), 0<=r'<2^{-9})
//
// log2f(x) is approximated as
// (l-j) + T[f] + (c1*r+c2*r^2+...+c6*r^6), if f>0
//
// Special values
//==============================================================
// log2f(0)=-inf, raises Divide by Zero
// log2f(+inf)=inf
// log2f(x)=NaN, raises Invalid if x<0
//
// Registers used
//==============================================================
// f6-f14
// r2-r3, r23-r30
// p6,p7,p8,p12
//
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe //C_3 and C_4
data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
data8 0xb8aa3b295c17f0bc, 0x0000bffe // C_2
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
data8 0x3f671b0ea42e5fda, 0x3f815cfe8eaec830
data8 0x3f8cfee70c5ce5dc, 0x3f94564a62192834
data8 0x3f997723ace35766, 0x3f9f5923c69b54a1
data8 0x3fa2a094a085d693, 0x3fa538941776b01e
data8 0x3fa8324c9b914bc7, 0x3faacf54ce07d7e9
data8 0x3fadced958dadc12, 0x3fb0387efbca869e
data8 0x3fb18ac6067479c0, 0x3fb30edd3e13530d
data8 0x3fb463c15936464e, 0x3fb5b9e13c3fa21d
data8 0x3fb7113f3259e07a, 0x3fb869dd8d1b2035
data8 0x3fb9c3bea49d3214, 0x3fbb1ee4d7961701
data8 0x3fbc7b528b70f1c5, 0x3fbdd90a2c676ed4
data8 0x3fbf05d4976c2028, 0x3fc032fbbaee6d65
data8 0x3fc0e3b5a9f3284a, 0x3fc195195c7d125b
data8 0x3fc22dadc2ab3497, 0x3fc2e050231df57d
data8 0x3fc379f79c2b255b, 0x3fc42ddd2ba1b4a9
data8 0x3fc4c89b9e6807f5, 0x3fc563dc29ffacb2
data8 0x3fc619a25f5d798d, 0x3fc6b5ffbf367644
data8 0x3fc752e1f660f8d6, 0x3fc7f049e753e7cf
data8 0x3fc8a8980abfbd32, 0x3fc94724cca657be
data8 0x3fc9e63a24971f46, 0x3fca85d8feb202f7
data8 0x3fcb2602497d5346, 0x3fcbc6b6f5ee1c9b
data8 0x3fcc67f7f770a67e, 0x3fcceec4b2234fba
data8 0x3fcd91097ad13982, 0x3fce33dd57f3d335
data8 0x3fced74146bc7b10, 0x3fcf7b3646fef683
data8 0x3fd00223a943dc19, 0x3fd054a474bf0eb7
data8 0x3fd0999d9b9259a1, 0x3fd0eca66d3b2581
data8 0x3fd13ffa2e85b475, 0x3fd185a444fa0a7b
data8 0x3fd1cb8312f27eff, 0x3fd21fa1441ce5e8
data8 0x3fd265f526e603cb, 0x3fd2baa0c34be1ec
data8 0x3fd3016b45de21ce, 0x3fd3486c38aa29a8
data8 0x3fd38fa3efaa8262, 0x3fd3e562c0816a02
data8 0x3fd42d141f53b646, 0x3fd474fd543f222c
data8 0x3fd4bd1eb680e548, 0x3fd505789e234bd1
data8 0x3fd54e0b64003b70, 0x3fd596d761c3c1f0
data8 0x3fd5dfdcf1eeae0e, 0x3fd6291c6fd9329c
data8 0x3fd6729637b59418, 0x3fd6bc4aa692e0fd
data8 0x3fd7063a1a5fb4f2, 0x3fd75064f1ed0715
data8 0x3fd79acb8cf10390, 0x3fd7d67c1e43ae5c
data8 0x3fd8214f4068afa7, 0x3fd86c5f36dea3dc
data8 0x3fd8b7ac64dd7f9d, 0x3fd8f4167a0c6f92
data8 0x3fd93fd2d5e1bf1d, 0x3fd98bcd84296946
data8 0x3fd9c8c333e6e9a5, 0x3fda152f142981b4
data8 0x3fda527fd95fd8ff, 0x3fda9f5e3edeb9e6
data8 0x3fdadd0b2b5755a7, 0x3fdb2a5d6f51ff83
data8 0x3fdb686799b00be3, 0x3fdbb62f1b887cd8
data8 0x3fdbf4979f666668, 0x3fdc332a6e8399d4
data8 0x3fdc819dc2d45fe4, 0x3fdcc0908e19b7bd
data8 0x3fdcffae611ad12b, 0x3fdd3ef776d43ff4
data8 0x3fdd8e5002710128, 0x3fddcdfb486cb9a1
data8 0x3fde0dd294245fe4, 0x3fde4dd622a28840
data8 0x3fde8e06317114f0, 0x3fdece62fe9a9915
data8 0x3fdf1f164a15389a, 0x3fdf5fd8a9063e35
data8 0x3fdfa0c8937e7d5d, 0x3fdfe1e649bb6335
data8 0x3fe011990641535a, 0x3fe032560e91e59e
data8 0x3fe0532a5ebcd44a, 0x3fe0741617f5fc28
data8 0x3fe08cd653f38839, 0x3fe0adeb55c1103b
data8 0x3fe0cf181d5d1dd0, 0x3fe0f05ccd0aced7
data8 0x3fe111b9875788ab, 0x3fe1332e6f1bcf73
data8 0x3fe154bba77c2088, 0x3fe16df59bfa06c1
data8 0x3fe18fadb6e2d3c2, 0x3fe1b17e849adc26
data8 0x3fe1caeb6a0de814, 0x3fe1ece7c830eec9
data8 0x3fe20efd3dae01df, 0x3fe2289de375d901
data8 0x3fe24adf9b6a6fe0, 0x3fe26d3ad1aebcfc
data8 0x3fe287100c2771f4, 0x3fe2a9983b3c1b28
data8 0xbfda78e146f7bef4, 0xbfda33760a7f6051
data8 0xbfd9ff43476fb5f7, 0xbfd9b97c3c4eec8f
data8 0xbfd98504431717fc, 0xbfd93ee07535f967
data8 0xbfd90a228d5712b2, 0xbfd8c3a104cb24f5
data8 0xbfd88e9c72e0b226, 0xbfd847bc33d8618e
data8 0xbfd812703988bb69, 0xbfd7dd0569c04bff
data8 0xbfd7959c202292f1, 0xbfd75fe8d2c5d48f
data8 0xbfd72a1637cbc183, 0xbfd6e221cd9d0cde
data8 0xbfd6ac059985503b, 0xbfd675c99ce81f92
data8 0xbfd63f6db2590482, 0xbfd5f6c138136489
data8 0xbfd5c01a39fbd688, 0xbfd58952cf519193
data8 0xbfd5526ad18493ce, 0xbfd51b6219bfe6ea
data8 0xbfd4d1cdf8b4846f, 0xbfd49a784bcd1b8b
data8 0xbfd4630161832547, 0xbfd42b6911cf5465
data8 0xbfd3f3af3461e1c4, 0xbfd3bbd3a0a1dcfb
data8 0xbfd383d62dac7ae7, 0xbfd34bb6b2546218
data8 0xbfd313750520f520, 0xbfd2db10fc4d9aaf
data8 0xbfd2a28a6dc90387, 0xbfd269e12f346e2c
data8 0xbfd2311515e2e855, 0xbfd1f825f6d88e13
data8 0xbfd1bf13a6c9c69f, 0xbfd185ddfa1a7ed0
data8 0xbfd14c84c4dd6128, 0xbfd11307dad30b76
data8 0xbfd0d9670f6941fe, 0xbfd09fa235ba2020
data8 0xbfd0790adbb03009, 0xbfd03f09858c55fb
data8 0xbfd004e3a7c97cbd, 0xbfcf9532288fcf69
data8 0xbfcf205339208f27, 0xbfceab2a23a5b83e
data8 0xbfce5ce55fdd37a5, 0xbfcde73fe3b1480f
data8 0xbfcd714f44623927, 0xbfccfb1321b8c400
data8 0xbfccac163c770dc9, 0xbfcc355b67195dd0
data8 0xbfcbbe540a3f036f, 0xbfcb6ecf175f95e9
data8 0xbfcaf74751e1be33, 0xbfca7f71fb7bab9d
data8 0xbfca2f632320b86b, 0xbfc9b70ba539dfae
data8 0xbfc93e6587910444, 0xbfc8edcae8352b6c
data8 0xbfc874a0db01a719, 0xbfc7fb27199df16d
data8 0xbfc7a9fec7d05ddf, 0xbfc72fff456ac70d
data8 0xbfc6de7d66023dbc, 0xbfc663f6fac91316
data8 0xbfc6121ac74813cf, 0xbfc5970c478fff4a
data8 0xbfc51bab907a5c8a, 0xbfc4c93d33151b24
data8 0xbfc44d527fdadf55, 0xbfc3fa87be0f3a1b
data8 0xbfc3a797cd35d959, 0xbfc32ae9e278ae1a
data8 0xbfc2d79c6937efdd, 0xbfc25a619370d9dc
data8 0xbfc206b5bde2f8b8, 0xbfc188ecbd1d16be
data8 0xbfc134e1b489062e, 0xbfc0b6894488e95f
data8 0xbfc0621e2f556b5c, 0xbfc00d8c711a12cc
data8 0xbfbf1cd21257e18c, 0xbfbe72ec117fa5b2
data8 0xbfbdc8b7c49a1ddb, 0xbfbcc8d5e467b710
data8 0xbfbc1ddc9c39c7a1, 0xbfbb7294093cdd0f
data8 0xbfba7111df348494, 0xbfb9c501cdf75872
data8 0xbfb918a16e46335b, 0xbfb81579a73e83c6
data8 0xbfb7684f39f4ff2d, 0xbfb6bad3758efd87
data8 0xbfb60d060d7e41ac, 0xbfb507b836033bb7
data8 0xbfb4591d6310d85a, 0xbfb3aa2fdd27f1c3
data8 0xbfb2faef55ccb372, 0xbfb1f3723b4ae6db
data8 0xbfb14360d6136ffa, 0xbfb092fb594145c1
data8 0xbfafc482e8b48a7e, 0xbfae6265ace11ae4
data8 0xbfacff9e5c4341d0, 0xbfaaea3316095f72
data8 0xbfa985bfc3495194, 0xbfa820a01ac754cb
data8 0xbfa6bad3758efd87, 0xbfa554592bb8cd58
data8 0xbfa3ed3094685a26, 0xbfa2855905ca70f6
data8 0xbfa11cd1d5133413, 0xbf9dfd78881399f1
data8 0xbf9b28f618cc85df, 0xbf98530faa3c087b
data8 0xbf957bc3dddcd7fa, 0xbf92a3115322f9e6
data8 0xbf8f91ed4eef8370, 0xbf89dae4ec6b8b2e
data8 0xbf842106b1499209, 0xbf7cc89f97d67594
data8 0xbf71497accf7e11d, 0x0000000000000000
LOCAL_OBJECT_END(T_table)
.section .text
GLOBAL_LIBM_ENTRY(log2f)
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// y=frcpa(x)
frcpa.s1 f6,p0=f1,f8
// will form significand of 1.5 (to test whether the index is 128 or above)
mov r24=0xc
}
{.mfi
nop.m 0
// normalize x
fma.s1 f7=f8,f1,f0
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
}
{.mfi
// get significand
getf.sig r25=f8
// f8 denormal ?
fclass.m p8,p10=f8,0x9
// will form significand of 1.5 (to test whether the index is 128 or above)
shl r24=r24,60
}
{.mfi
mov r26=0x804
nop.f 0
// r23=bias-1
mov r23=0xfffe;;
}
{.mmf
getf.exp r29=f8
// load start address for C_1...C_6 followed by T_table
ld8 r2=[r2]
// will continue only for positive normal/denormal numbers
fclass.nm.unc p12,p7 = f8, 0x19 ;;
}
.pred.rel "mutex",p8,p10
{.mfi
// denormal input, repeat get significand (after normalization)
(p8) getf.sig r25=f7
// x=1 ?
fcmp.eq.s0 p6,p0=f8,f1
// get T_index
(p10) shr.u r28=r25,63-8
}
{.mfi
// f12=0.5
setf.exp f12=r23
nop.f 0
// r27=bias
mov r27=0xffff;;
}
{.mfb
// denormal input, repeat get exponent (after normalization)
(p8) getf.exp r29=f7
nop.f 0
(p12) br.cond.spnt SPECIAL_log2f
}
{.mfi
cmp.geu p12,p0=r25,r24
nop.f 0
mov r23=0xff;;
}
{.mfi
add r3=32,r2
// r=1-x*y
fms.s1 f6=f6,f8,f1
// r26=0x80400...0 (threshold for using polynomial approximation)
shl r26=r26,64-12
}
{.mfi
// load C_3, C_4
ldfpd f10,f11=[r2],16
nop.f 0
// r27=bias-1 (if index >=128, will add exponent+1)
(p12) mov r27=0xfffe;;
}
{.mfi
// load C_1
ldfe f14=[r2],32
// x=1, return 0
(p6) fma.s.s0 f8=f0,f0,f0
(p8) shr.u r28=r25,63-8
}
{.mib
// load C_2
ldfe f13=[r3]
// r29=exponent-bias
sub r29=r29,r27
// x=1, return
(p6) br.ret.spnt b0;;
}
{.mfi
// get T_index
and r28=r28,r23
fmerge.se f7=f1,f7
// if first 9 bits after leading 1 are all zero, then p8=1
cmp.ltu p8,p12=r25,r26;;
}
{.mfi
// f8=expon - bias
setf.sig f8=r29
nop.f 0
// get T address
shladd r2=r28,3,r2
}
{.mfi
// first 8 bits after leading 1 are all ones ?
cmp.eq p10,p0=r23,r28
// if first 8 bits after leading bit are 0, use polynomial approx. only
(p8) fms.s1 f6=f7,f1,f1
nop.i 0;;
}
{.mfi
//r26=1
mov r26=1
// if first 8 bits after leading 1 are all ones, use polynomial approx. only
(p10) fms.s1 f6=f7,f12,f1
nop.i 0;;
}
.pred.rel "mutex",p8,p12
{.mmf
// load T (unless first 9 bits after leading 1 are 0)
(p12) ldfd f12=[r2]
nop.m 0
// set T=0 (if first 9 bits after leading 1 are 0)
(p8) fma.s1 f12=f0,f0,f0;;
}
{.mfi
nop.m 0
// P34=C_3+C_4*r
fma.s1 f10=f11,f6,f10
// r26=2^{63}
shl r26=r26,63
}
{.mfi
nop.m 0
// r2=r*r
fma.s1 f11=f6,f6,f0
nop.i 0;;
}
{.mfi
// significand of x is 1 ?
cmp.eq p0,p6=r25,r26
// P12=C_1+C_2*r
fma.s1 f14=f13,f6,f14
nop.i 0;;
}
{.mfi
nop.m 0
// normalize additive term (l=exponent of x)
fcvt.xf f8=f8
// if significand(x)=1, return exponent (l)
nop.i 0;;
}
{.mfi
nop.m 0
// add T+l
(p6) fma.s1 f8=f8,f1,f12
nop.i 0
}
{.mfi
nop.m 0
// P14=P12+r2*P34
(p6) fma.s1 f13=f10,f11,f14
nop.i 0;;
}
{.mfb
nop.m 0
// result=T+l+r*P14
(p6) fma.s.s0 f8=f13,f6,f8
// return
br.ret.sptk b0;;
}
SPECIAL_log2f:
{.mfi
nop.m 0
// x=+Infinity ?
fclass.m p7,p0=f8,0x21
nop.i 0;;
}
{.mfi
nop.m 0
// x=+/-Zero ?
fclass.m p8,p0=f8,0x7
nop.i 0;;
}
{.mfi
nop.m 0
// x=-Infinity, -normal, -denormal ?
fclass.m p6,p0=f8,0x3a
nop.i 0;;
}
{.mfb
nop.m 0
// log2f(+Infinity)=+Infinity
nop.f 0
(p7) br.ret.spnt b0;;
}
{.mfi
(p8) mov GR_Parameter_TAG = 172
// log2f(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
(p8) fmerge.ns f8=f0,f8
nop.i 0;;
}
{.mfb
nop.m 0
(p8) frcpa.s0 f8,p0=f1,f8
(p8) br.cond.sptk __libm_error_region;;
}
{.mfb
(p6) mov GR_Parameter_TAG = 173
// x<0: return NaN, raise Invalid
(p6) frcpa.s0 f8,p0=f0,f0
(p6) br.cond.sptk __libm_error_region;;
}
{.mfb
nop.m 0
// Remaining cases: NaNs
fma.s.s0 f8=f8,f1,f0
br.ret.sptk b0;;
}
GLOBAL_LIBM_END(log2f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

816
sysdeps/ia64/fpu/e_log2l.S Normal file
View File

@ -0,0 +1,816 @@
.file "log2l.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 09/25/00 Initial version
// 11/22/00 Fixed accuracy bug (for mantissas near 1, 2)
// 12/07/00 Fixed C_1l constant, eliminated rounding errors in
// reduced argument (x*frcpa(x)-1)
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// long double log2l(long double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8
// T_hi is a table that stores the 24 most significant bits of log2(1/y)
// (in entries 1..255) in single precision format
// T_low is a table that stores (log2(1/y)-T_high), rounded to double
// precision
//
// f is used as an index; T_high[255]=T_low[255]=0
//
// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
// and 0 is used instead of T_high[0], T_low[0]
// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
// for m=2(1-r'), 0<=r'<2^{-9})
//
// If 2^{-9}<=m<2-2^{-8} or (input not near 1), let C1r=(2^{16}+C1*r)-2^{16}
// and let E=((RN(m*y)-1)-r)+(m*y-RN(m*y))
// Else let C1r=C1*r (rounded to 64 significant bits) and let E=0
//
// Let D=C1*r-C1r
//
//
// log2l(x) is approximated as
// (l+T_high[f]+C1r) + (D+r*(c1+c2*r+c3*r^2...+c8*r^7)+(T_low[f]+C_1*E))
//
// Special values
//==============================================================
// log2l(0)=-inf, raises Divide by Zero
// log2l(+inf)=inf
// log2l(x)=NaN, raises Invalid if x<0
//
// Registers used
//==============================================================
// f6-f15, f32-f36
// r2-r3, r23-r23
// p6,p7,p8,p12
//
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35 // This reg. can safely be used
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f1
FR_RESULT = f8
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(poly_coeffs)
data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
data8 0x3fca61762a7aded9, 0xbfc71547652b82fe // C_7, C_8
data8 0x3fd2776c50ef9bfe, 0xbfcec709dc3a03fd // C_5, C_6
data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe // C_3, C_4
//data8 0xd871319ff0342580, 0x0000bfbd // C_1l (low part of C1)
data8 0x82f0025f2dc582ee, 0x0000bfbe // C_1l (low part of C1)
data8 0xb8aa3b295c17f0bc, 0x0000bffe // C_2
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
data4 0x3b38d875, 0x3c0ae7f4, 0x3c67f738, 0x3ca2b253
data4 0x3ccbb91d, 0x3cfac91e, 0x3d1504a5, 0x3d29c4a0
data4 0x3d419264, 0x3d567aa6, 0x3d6e76ca, 0x3d81c3f7
data4 0x3d8c5630, 0x3d9876e9, 0x3da31e0a, 0x3dadcf09
data4 0x3db889f9, 0x3dc34eec, 0x3dce1df5, 0x3dd8f726
data4 0x3de3da94, 0x3deec851, 0x3df82ea4, 0x3e0197dd
data4 0x3e071dad, 0x3e0ca8ca, 0x3e116d6e, 0x3e170281
data4 0x3e1bcfbc, 0x3e216ee9, 0x3e2644dc, 0x3e2b1ee1
data4 0x3e30cd12, 0x3e35affd, 0x3e3a970f, 0x3e3f824f
data4 0x3e4544c0, 0x3e4a3926, 0x3e4f31d1, 0x3e542ec7
data4 0x3e593012, 0x3e5e35b7, 0x3e633fbf, 0x3e677625
data4 0x3e6c884b, 0x3e719eea, 0x3e76ba0a, 0x3e7bd9b2
data4 0x3e80111d, 0x3e82a523, 0x3e84ccec, 0x3e876533
data4 0x3e89ffd1, 0x3e8c2d22, 0x3e8e5c18, 0x3e90fd0a
data4 0x3e932fa9, 0x3e95d506, 0x3e980b5a, 0x3e9a4361
data4 0x3e9c7d1f, 0x3e9f2b16, 0x3ea168a0, 0x3ea3a7ea
data4 0x3ea5e8f5, 0x3ea82bc4, 0x3eaa705b, 0x3eacb6bb
data4 0x3eaefee7, 0x3eb148e3, 0x3eb394b1, 0x3eb5e255
data4 0x3eb831d0, 0x3eba8327, 0x3ebcd65c, 0x3ebeb3e0
data4 0x3ec10a7a, 0x3ec362f9, 0x3ec5bd63, 0x3ec7a0b3
data4 0x3ec9fe96, 0x3ecc5e6c, 0x3ece4619, 0x3ed0a978
data4 0x3ed293fe, 0x3ed4faf1, 0x3ed6e859, 0x3ed952eb
data4 0x3edb433c, 0x3eddb178, 0x3edfa4bc, 0x3ee19953
data4 0x3ee40cee, 0x3ee60484, 0x3ee7fd73, 0x3ee9f7bb
data4 0x3eec7280, 0x3eee6fda, 0x3ef06e94, 0x3ef26eb1
data4 0x3ef47031, 0x3ef67317, 0x3ef8f8b2, 0x3efafec5
data4 0x3efd0644, 0x3eff0f32, 0x3f008cc8, 0x3f0192b0
data4 0x3f029952, 0x3f03a0b0, 0x3f0466b2, 0x3f056f5a
data4 0x3f0678c0, 0x3f0782e6, 0x3f088dcc, 0x3f099973
data4 0x3f0aa5dd, 0x3f0b6fac, 0x3f0c7d6d, 0x3f0d8bf4
data4 0x3f0e575b, 0x3f0f673e, 0x3f1077e9, 0x3f1144ef
data4 0x3f1256fc, 0x3f1369d6, 0x3f143880, 0x3f154cc1
data4 0x3f161c7a, 0x3f173227, 0x3f1802f2, 0x3f191a0f
data4 0x3f19ebee, 0x3f1b047e, 0x3f1bd775, 0x3f1cf17b
data4 0x3f1dc58e, 0x3f1ee10f, 0x3f1fb63f, 0x3f208bea
data4 0x3f21a98f, 0x3f22805c, 0x3f2357a7, 0x3f247778
data4 0x3f254fe9, 0x3f2628d9, 0x3f270249, 0x3f2824fb
data4 0x3f28ff97, 0x3f29dab4, 0x3f2ab654, 0x3f2b9277
data4 0x3f2cb8c8, 0x3f2d961e, 0x3f2e73fa, 0x3f2f525b
data4 0x3f303143, 0x3f3110b1, 0x3f31f0a7, 0x3f32d125
data4 0x3f33b22b, 0x3f3493bc, 0x3f3575d6, 0x3f36587b
data4 0x3f373bab, 0x3f381f68, 0x3f3903b1, 0x3f39e888
data4 0x3f3acdec, 0x3f3bb3e0, 0x3f3c9a63, 0x3f3d8177
data4 0x3f3e1bd4, 0x3f3f03d9, 0x3f3fec71, 0x3f40d59b
data4 0x3f41bf59, 0x3f42a9ab, 0x3f434635, 0x3f443180
data4 0x3f451d61, 0x3f4609d9, 0x3f46a7d3, 0x3f479549
data4 0x3f488357, 0x3f492261, 0x3f4a1171, 0x3f4b011c
data4 0x3f4ba139, 0x3f4c91e8, 0x3f4d8334, 0x3f4e246a
data4 0x3f4f16be, 0x3f5009b1, 0x3f50ac02, 0x3f51a001
data4 0x3f524305, 0x3f533812, 0x3f53dbca, 0x3f54d1e7
data4 0x3f55c8a8, 0x3f566d85, 0x3f57655b, 0x3f580af0
data4 0x3f58b0d0, 0x3f59aa2c, 0x3f5a50c7, 0x3f5b4b3c
data4 0x3f5bf294, 0x3f5cee26, 0x3f5d963c, 0x3f5e92ed
data4 0x3f5f3bc3, 0x3f5fe4e7, 0x3f60e32d, 0x3f618d13
data4 0x3f623748, 0x3f63372a, 0x3f63e223, 0x3f648d6b
data4 0x3f658eee, 0x3f663afe, 0x3f66e75e, 0x3f67ea86
data4 0x3f6897b0, 0x3f69452c, 0x3f69f2f9, 0x3f6af847
data4 0x3f6ba6e2, 0x3f6c55d0, 0x3f6d0510, 0x3f6e0c8d
data4 0x3f6ebc9f, 0x3f6f6d04, 0x3f701dbe, 0x3f70cecd
data4 0x3f718030, 0x3f728ae6, 0x3f733d20, 0x3f73efaf
data4 0x3f74a296, 0x3f7555d3, 0x3f760967, 0x3f76bd53
data4 0x3f777197, 0x3f7880a1, 0x3f7935c2, 0x3f79eb3c
data4 0x3f7aa10f, 0x3f7b573b, 0x3f7c0dc2, 0x3f7cc4a3
data4 0x3f7d7bdf, 0x3f7e3376, 0x3f7eeb68, 0x00000000
LOCAL_OBJECT_END(T_table)
LOCAL_OBJECT_START(T_low)
data8 0x3dc0b97f689876ef, 0x3dfd5d906028ac01
data8 0x3df8b9cbb8d7240b, 0x3de0c941a2f220cd
data8 0x3e09c6aecba15936, 0x3dfa6d528241827c
data8 0x3dd0bad25714903c, 0x3e2776b01dc036a2
data8 0x3e2b914bc77f158b, 0x3e1c0fafd29dc74a
data8 0x3e28dadc119cd3de, 0x3e3bca869da085be
data8 0x3e19d1e700f2200a, 0x3e3e13530cc37504
data8 0x3e3936464d9c41ee, 0x3e3c3fa21c9499d0
data8 0x3e3259e079b6c6e8, 0x3e2a364069c4f7f3
data8 0x3e1274c84f6c6364, 0x3e3796170159f454
data8 0x3e26e1e389f4364e, 0x3e28cedda8c7f658
data8 0x3e376c2028433268, 0x3e4aee6d650c82e1
data8 0x3e33e65094fbeeb4, 0x3e4c7d125aa92c5d
data8 0x3e1559a4b69691d8, 0x3e18efabeb7d7221
data8 0x3e4c2b255abaa8de, 0x3e37436952a4538b
data8 0x3e4e6807f4ba00b8, 0x3e33ff5964190e42
data8 0x3e4f5d798cead43c, 0x3e4f3676443bf453
data8 0x3e4660f8d5bc1bf5, 0x3e2d4f9f3ab04f36
data8 0x3e357f7a64ccd537, 0x3e394caf7c9b05af
data8 0x3e225c7d17ab29b0, 0x3e4eb202f6d55a12
data8 0x3e32faa68b19bcd2, 0x3e45ee1c9b566a8b
data8 0x3e4770a67de054ff, 0x3e42234fb9de6d6b
data8 0x3e4ad139825c6e19, 0x3e47f3d334814a93
data8 0x3e2af1ec402867b6, 0x3e2bfbda0c956e3d
data8 0x3e4287b831e77ff2, 0x3e54bf0eb77f7b89
data8 0x3e5b9259a1029607, 0x3e4a764b015e699d
data8 0x3e4d0b68ea883ab5, 0x3e33e829ecdadf46
data8 0x3e52f27efef3031b, 0x3e3073979e4af89e
data8 0x3e3b980f2cd6c253, 0x3e2a5f0f5f7f66a9
data8 0x3e37788738117b02, 0x3e58aa29a784d52f
data8 0x3e4f5504c4ff2466, 0x3e002d40340fa647
data8 0x3e5f53b64592f4c3, 0x3e543f222c526802
data8 0x3e5680e547a872fa, 0x3e5e234bd1154450
data8 0x3e3000edc18b6d21, 0x3e1c3c1f000942a8
data8 0x3e51eeae0e442d6e, 0x3e4fb265376623f2
data8 0x3e57b5941782d830, 0x3e3a4b83f24ae52c
data8 0x3e5a5fb4f23978de, 0x3e51ed071563fb02
data8 0x3e49e2071f51a7a8, 0x3e5e43ae5b924234
data8 0x3dfa2be9aedf374a, 0x3e56dea3dbba67d5
data8 0x3e3375fe732b3c3e, 0x3e5a0c6f91f2e77e
data8 0x3e55e1bf1c969e41, 0x3e30a5a5166b8eee
data8 0x3e53e6e9a539d46c, 0x3e542981b3d7b0e6
data8 0x3e595fd8ff36ad64, 0x3e5edeb9e65cbbb4
data8 0x3e46aeab4d3434c1, 0x3e4ea3ff0564b010
data8 0x3e59b00be2e3c25a, 0x3e5b887cd7b0821f
data8 0x3e5f666668547b4d, 0x3e4d0733a805273f
data8 0x3e26a2ff21c4aec5, 0x3e4c336f7a3a78f3
data8 0x3e11ad12b628e2d0, 0x3e56d43ff3f0ea64
data8 0x3e238809433cccd2, 0x3e40d9734147d40f
data8 0x3e54245fe3e24e06, 0x3e251441fce4d48c
data8 0x3e517114efc5d1f9, 0x3e5e9a99154b0d82
data8 0x3e442a71337970f8, 0x3e420c7c69211fdf
data8 0x3e537e7d5d43c6a7, 0x3e4376c66ad9ad8b
data8 0x3e49054d678a4f1c, 0x3e5d23cb3bc19f18
data8 0x3e6ebcd449dcab2b, 0x3e67f5fc2849c88a
data8 0x3e63f388395d3e84, 0x3e65c1103b0ad7e9
data8 0x3e6d5d1dd031f353, 0x3e5a159dae75c4d0
data8 0x3e4d5e22aa75f71d, 0x3e5e379ee62e1e35
data8 0x3e4df082213cb2dc, 0x3e6bfa06c156f521
data8 0x3e66e2d3c19b517b, 0x3e426b7098590071
data8 0x3e541bd027e9854e, 0x3e5061dd924b0ac0
data8 0x3e6dae01df373a03, 0x3e3baec80b207b0b
data8 0x3e6b6a6fe06bebac, 0x3e61aebcfc3ab5d1
data8 0x3e584ee3e7c79d83, 0x3e6b3c1b2840cb40
data8 0x3e6c842085d6befd, 0x3e6ac04fd7b141e0
data8 0x3e6c48250474141d, 0x3e2d889b86125f69
data8 0x3e6e74740225dad0, 0x3e45940d31d50a7c
data8 0x3e695476a6c39ddc, 0x3e6d9a6d857a060a
data8 0x3e4a3e9bb4b69337, 0x3e484f3ce4707ed6
data8 0x3e39dd125d25fc27, 0x3e563fb400de8732
data8 0x3e5fdd6d0ee28b48, 0x3e669d15b869bb07
data8 0x3e40687cfad7964d, 0x3e69317990d43957
data8 0x3e633d57e24ae1bd, 0x3e618bf03710eabb
data8 0x3e4b4df6fccd1160, 0x3e3fb26ddaa1ec45
data8 0x3e3810a5e1817fd4, 0x3e6857373642fa5c
data8 0x3e673db6193add31, 0x3e63200c8acbc9c3
data8 0x3e3d2dee448ebb62, 0x3e6a19723a80db6a
data8 0x3e5e7cdab8fd3e6a, 0x3e671855cd660672
data8 0x3e473c3c78a85ecd, 0x3e5f5e23056a7cf2
data8 0x3e52538519527367, 0x3e4b573bcf2580e9
data8 0x3e6d6f856fe90c60, 0x3e2d932a8487642e
data8 0x3e5236fc78b6174c, 0x3e50cb91d406db50
data8 0x3e650e8bd562aa57, 0x3e424ee3d9a82f2e
data8 0x3e59363960e1e3d9, 0x3e379604c1150a3e
data8 0x3e6d914f6c2ac258, 0x3e62967a451a7b48
data8 0x3e684b5f01139cb2, 0x3e448bbfbf6d292c
data8 0x3e6227e7fb487e73, 0x3e6d39d50290f458
data8 0x3e58368342b4b668, 0x3e65dc0c25bd1763
data8 0x3e61b7dc362e22b5, 0x3e671691f094bb80
data8 0x3e5011642d5123f2, 0x3e4c4eb7f11e41be
data8 0x3e5dcee36ca242cf, 0x3e6791cefff688f1
data8 0x3e60e23c8dda4ecd, 0x3e48e6a22fe78cfe
data8 0x3e6d703f244adc86, 0x3e6a281a85a5049d
data8 0x3e570f20e6403d9e, 0x3e2211518a12956f
data8 0x3e6737d1e54d71df, 0x3e66b1881476f5e9
data8 0x3e6e1bbeef085376, 0x3e47cad4944a32be
data8 0x3e527f2c738e7ee9, 0x3e699883a4b9fb29
data8 0x3e5c17d1108740d9, 0x3e5d4a9c79a43389
data8 0x3e49fdc24462ba3b, 0x3e24dbb3a60cceb2
data8 0x3e5c5bf618780748, 0x3e5c38005b0c778c
data8 0x3e6be168dd6dd3fe, 0x3e633ab9370693b0
data8 0x3dd290556b0ae339, 0x3e607c317927096a
data8 0x3e59651353b3d90e, 0x3e4d8751e5e0ae0d
data8 0x3e46c81023272a85, 0x3e6b23c988f391b2
data8 0x3e608741d215209c, 0x3e60b8ba506d758f
data8 0x3e62ddbe74803297, 0x3e5dbb8b5087587d
data8 0x3e642aa529048131, 0x3e3dcbda6835dcf4
data8 0x3e6db503ce854d2a, 0x3e6dd00b49bc6849
data8 0x3e4db2f11243bc84, 0x3e3b9848efc2ea97
data8 0x3e58f18e17c82609, 0x3e6ed8645e16c312
data8 0x3e4065bdb60a5dd4, 0x3e490453c6e6c30a
data8 0x3e62373994aa31ba, 0x3e56305f0e6b2a95
data8 0x3e68c1601a6614ee, 0x3e614e204f19d93f
data8 0x3e6e5037ca773299, 0x3e693f98892561a6
data8 0x3e639de4f4bf700d, 0x3e416c071e93fd97
data8 0x3e65466991b415ef, 0x3e6896a324afac9d
data8 0x3e44f64802e2f11c, 0x3e64d7d747e2191a
data8 0x3e6174b7581de84c, 0x3e44c7b946e1d43c
data8 0x3e6a3bcbe30512ec, 0x3e5d3ed411c95ce4
data8 0x3e3e5b5735cfaf8e, 0x3e6e538ab34efb51
data8 0x3e514e204f19d93f, 0x3e5a88e6550c89a4
data8 0x3e66b97a5d9dfd8b, 0x3e5f46b1e14ebaf3
data8 0x3e357665f6893f5d, 0x3e6bbf633078d1d5
data8 0x3e5e7337a212c417, 0x3e3570fde15fc8cc
data8 0x3e21119402da92b4, 0x3e6566e830d1ff3b
data8 0x3e558883e480e220, 0x3e589ca3a68da411
data8 0x3e44eb66df73d648, 0x3e1a0a629b1b7e68
data8 0x3e54cc207b8c1116, 0x0000000000000000
LOCAL_OBJECT_END(T_low)
.section .text
GLOBAL_IEEE754_ENTRY(log2l)
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// normalize x
// y=frcpa(x)
frcpa.s1 f41,p0=f1,f8
// r26=bias-1
mov r26=0xfffe
}
{.mfi
// r23=bias+16
mov r23=0xffff+16
fma.s1 f7=f8,f1,f0
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
}
{.mfi
// get significand
getf.sig r25=f8
// f8 denormal ?
fclass.m p8,p10=f8,0x9
// r24=bias-8
mov r24=0xffff-8;;
}
{.mfi
setf.exp f36=r26
nop.f 0
// r27=bias
mov r27=0xffff;;
}
{.mmf
getf.exp r29=f8
// load start address for C_1...C_7 followed by T_table
ld8 r2=[r2]
// will continue only for positive normal/unnormal numbers
fclass.m.unc p0,p12 = f8, 0x19;;
}
.pred.rel "mutex",p8,p10
{.mfi
// denormal input, repeat get significand (after normalization)
(p8) getf.sig r25=f7
// x=1 ?
fcmp.eq.s0 p6,p0=f8,f1
// get T_index
(p10) shr.u r28=r25,63-8
}
{.mfi
// f32=2^16
setf.exp f32=r23
nop.f 0
mov r26=0x804;;
}
{.mfi
// denormal input, repeat get exponent (after normalization)
(p8) getf.exp r29=f7
// f33=0
mov f33=f0
// r26=0x80400...0 (threshold for using polynomial approximation)
shl r26=r26,64-12;;
}
{.mfb
add r3=16,r2
// r=x*y-1
fms.s1 f6=f41,f8,f1
(p12) br.cond.spnt SPECIAL_log2l
}
{.mfi
// load C_1
ldfe f14=[r2],48
// RN(x*y)
fma.s1 f43=f41,f8,f0
mov r23=0xff;;
}
{.mmi
// load C_7, C_8
ldfpd f10,f11=[r3],16
// load C_3,C_4
ldfpd f15,f42=[r2],16
(p8) shr.u r28=r25,63-8;;
}
{.mfi
// load C_5, C_6
ldfpd f12,f13=[r3]
// pseudo-zero ?
fcmp.eq.s0 p7,p0=f7,f0
// if first 9 bits after leading 1 are all zero, then p8=1
cmp.ltu p8,p12=r25,r26
}
{.mfi
// load C1l
ldfe f34=[r2],16
fmerge.se f7=f1,f7
// get T_index
and r28=r28,r23;;
}
{.mfi
// r29=exponent-bias
sub r29=r29,r27
// if first 8 bits after leading bit are 0, use polynomial approx. only
(p8) fms.s1 f6=f7,f1,f1
// start address of T_low
add r3=1024+16,r2
}
{.mfi
// load C_2
ldfe f35=[r2],16
// x=1, return 0
(p6) fma.s0 f8=f0,f0,f0
// first 8 bits after leading 1 are all ones ?
cmp.eq p10,p0=r23,r28;;
}
{.mfb
// if first 8 bits after leading 1 are all ones, use polynomial approx. only
// add 1 to the exponent additive term, and estimate log2(1-r)
(p10) add r29=1,r29
nop.f 0
(p7) br.cond.spnt LOG2_PSEUDO_ZERO
}
{.mfi
// get T_low adress
shladd r3=r28,3,r3
// if first 8 bits after leading 1 are all ones, use polynomial approx. only
(p10) fms.s1 f6=f7,f36,f1
// p10 --> p8=1, p12=0
(p10) cmp.eq p8,p12=r0,r0;;
}
{.mfi
// get T_high address
shladd r2=r28,2,r2
// L(x*y)=x*y-RN(x*y)
fms.s1 f41=f41,f8,f43
nop.i 0
}
{.mfi
// p13=p12
(p12) cmp.eq.unc p13,p0=r0,r0
// RtH=RN(x*y)-1 (will eliminate rounding errors in r)
fms.s1 f43=f43,f1,f1
nop.i 0;;
}
.pred.rel "mutex",p8,p12
{.mfb
// load T_high (unless first 9 bits after leading 1 are 0)
(p12) ldfs f7=[r2]
// set T_high=0 (if first 9 bits after leading 1 are 0)
(p8) fma.s1 f7=f0,f0,f0
// x=1, return
(p6) br.ret.spnt b0
}
.pred.rel "mutex",p8,p12
{.mfi
// p12: load T_low
(p12) ldfd f36=[r3]
// p8: set T_low=0
(p8) fma.s1 f36=f0,f0,f0
(p8) cmp.eq p8,p12=r29,r0;; //nop.i 0;;
}
.pred.rel "mutex",p8,p12
{.mfi
// f8=expon - bias
setf.sig f8=r29
// general case: 2^{16}+C1*r
(p12) fma.s1 f33=f6,f14,f32
nop.i 0
}
{.mfi
// r26=1
mov r26=1
// p8 (mantissa is close to 1, or close to 2): 2^{-8}+C1*r
(p8) fma.s1 f32=f6,f14,f33
nop.i 0;;
}
{.mfi
nop.m 0
// P78=C_7+C_8*r
fma.s1 f10=f11,f6,f10
// r26=2^{63}
shl r26=r26,63
}
{.mfi
nop.m 0
// P34=C_3+r*C_4
fma.s1 f15=f42,f6,f15
nop.i 0;;
}
{.mfi
nop.m 0
// r2=r*r
fma.s1 f11=f6,f6,f0
nop.i 0
}
{.mfi
nop.m 0
// P56=C_5+C_6*r
fma.s1 f13=f13,f6,f12
nop.i 0;;
}
{.mfi
nop.m 0
// Rth-r
(p13) fms.s1 f43=f43,f1,f6
nop.i 0
}
{.mfi
// significand(x)=1 ?
cmp.eq p0,p6=r25,r26
// P12=C1l+C_2*r
fma.s1 f34=f35,f6,f34
nop.i 0;;
}
.pred.rel "mutex",p8,p12
{.mfi
nop.m 0
// p12: C1r=(2^{16}+C1*r)-2^{16}
(p12) fms.s1 f32=f33,f1,f32
nop.i 0
}
{.mfi
nop.m 0
// p8: C1r=C1*r (double extended)
(p8) fms.s1 f32=f32,f1,f33
nop.i 0;;
}
{.mfi
nop.m 0
// L(x*y)*C_1+T_low
(p13) fma.s1 f36=f41,f14,f36
nop.i 0
}
{.mfi
nop.m 0
// P58=P56+r2*P78
fma.s1 f13=f11,f10,f13
nop.i 0;;
}
{.mfi
nop.m 0
// P14=P12+r2*P34
fma.s1 f15=f15,f11,f34
nop.i 0
}
{.mfi
nop.m 0
// r4=r2*r2
fma.s1 f11=f11,f11,f0
nop.i 0;;
}
{.mfi
nop.m 0
// normalize additive term (l=exponent of x)
fcvt.xf f8=f8
nop.i 0;;
}
{.mfi
nop.m 0
// D=C1*r-C1r
(p6) fms.s1 f12=f14,f6,f32
nop.i 0;;
}
{.mfi
nop.m 0
// T_low'=(Rth-r)*C1+(L(x*y)*C1+T_low)
(p13) fma.s1 f36=f43,f14,f36
nop.i 0;;
}
{.mfi
nop.m 0
// P18=P14+r4*P58
(p6) fma.s1 f13=f11,f13,f15
nop.i 0;;
}
{.mfi
nop.m 0
// add T_high+l
(p6) fma.s1 f8=f8,f1,f7
nop.i 0;;
}
{.mfi
nop.m 0
// D+T_low
(p6) fma.s1 f12=f12,f1,f36
nop.i 0;;
}
{.mfi
nop.m 0
// (T_high+l)+C1r
(p6) fma.s1 f8=f8,f1,f32
nop.i 0
}
{.mfi
nop.m 0
// (D+T_low)+r*P18
(p6) fma.s1 f13=f13,f6,f12
nop.i 0;;
}
//{.mfb
//nop.m 0
//mov f8=f36
//fma.s0 f8=f13,f6,f0
//br.ret.sptk b0;;
//}
{.mfb
nop.m 0
// result=((T_high+l)+C1r)+((D+T_low)+r*P18)
(p6) fma.s0 f8=f13,f1,f8
// return
br.ret.sptk b0;;
}
SPECIAL_log2l:
{.mfi
nop.m 0
mov FR_X=f8
nop.i 0
}
{.mfi
nop.m 0
// x=+Infinity ?
fclass.m p7,p0=f8,0x21
nop.i 0;;
}
{.mfi
nop.m 0
// x=+/-Zero ?
fclass.m p8,p0=f7,0x7
nop.i 0;;
}
{.mfi
nop.m 0
// x=-Infinity, -normal, -denormal ?
fclass.m p6,p0=f8,0x3a
nop.i 0;;
}
{.mfb
nop.m 0
// log2l(+Infinity)=+Infinity
nop.f 0
(p7) br.ret.spnt b0;;
}
{.mfi
(p8) mov GR_Parameter_TAG = 168
// log2l(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
(p8) fmerge.ns f8=f0,f8
nop.i 0;;
}
{.mfb
nop.m 0
(p8) frcpa.s0 f8,p0=f1,f8
(p8) br.cond.sptk __libm_error_region;;
}
{.mfb
(p6) mov GR_Parameter_TAG = 169
// x<0: return NaN, raise Invalid
(p6) frcpa.s0 f8,p0=f0,f0
(p6) br.cond.sptk __libm_error_region;;
}
{.mfb
nop.m 0
// Remaining cases: NaNs
fma.s0 f8=f8,f1,f0
br.ret.sptk b0;;
}
LOG2_PSEUDO_ZERO:
{.mfi
nop.m 0
mov FR_X=f8
nop.i 0
}
{.mfi
mov GR_Parameter_TAG = 168
// log2l(+/-0)=-infinity, raises Divide by Zero
// set f8=-0
fmerge.ns f8=f0,f8
nop.i 0;;
}
{.mfb
nop.m 0
frcpa.s0 f8,p0=f1,f8
br.cond.sptk __libm_error_region;;
}
GLOBAL_IEEE754_END(log2l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

File diff suppressed because it is too large Load Diff

1198
sysdeps/ia64/fpu/e_logl.S Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
.file "remainder.asm"
// Copyright (C) 2000, 2001, Intel Corporation
.file "remainder.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, Bob Norin,
// Shane Story, and Ping Tak Peter Tang of the Computational Software Lab,
// Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,17 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 03/02/00 New Algorithm
// 04/04/00 Unwind support added
// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/29/00 Set FR_Y to f9
// 11/29/00 Set FR_Y to f9
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//====================================================================
@ -78,16 +80,12 @@
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
#include "libm_support.h"
// Registers used
//====================================================================
// Predicate registers: p6-p14
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15,f32
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@ -103,18 +101,9 @@ FR_Y = f9
FR_RESULT = f8
.section .text
GLOBAL_IEEE754_ENTRY(remainder)
.proc remainder#
.align 32
.global remainder#
.align 32
remainder:
#ifdef _LIBC
.global __remainder
.type __remainder,@function
__remainder:
#endif
// inputs in f8, f9
// result in f8
@ -139,7 +128,7 @@ __remainder:
// Y +-NAN, +-inf, +-0? p11
{ .mfi
setf.exp f32=r28
(p0) fclass.m.unc p11,p0 = f9, 0xe7
fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@ -148,7 +137,7 @@ __remainder:
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
@ -167,8 +156,8 @@ __remainder:
}
{.bbb
(p9) br.cond.spnt L(FREM_X_NAN_INF)
(p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
(p9) br.cond.spnt FREM_X_NAN_INF
(p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
nop.b 0
} {.mfi
nop.m 0
@ -178,7 +167,7 @@ __remainder:
}
L(remloop24):
remloop24:
{ .mfi
nop.m 0
// Step (2)
@ -200,7 +189,7 @@ L(remloop24):
{.mfi
nop.m 0
// q1=q0*(1+e0)
fma.s1 f15=f12,f7,f12
(p6) fma.s1 f15=f12,f7,f12
nop.i 0
}
{ .mfi
@ -331,7 +320,7 @@ L(remloop24):
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk L(remloop24);;
(p12) br.cond.sptk remloop24;;
}
// last iteration
@ -388,7 +377,7 @@ L(remloop24):
}
L(FREM_X_NAN_INF):
FREM_X_NAN_INF:
// Y zero ?
{.mfi
@ -405,19 +394,19 @@ L(FREM_X_NAN_INF):
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FREM_Y_ZERO);;
(p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p0 = f8, 0x23
fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f8, 0x23
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
@ -445,14 +434,14 @@ L(FREM_X_NAN_INF):
}
{ .mfi
nop.m 999
(p8) fma.d f8=f8,f1,f0
(p8) fma.d.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt L(EXP_ERROR_RETURN);;
(p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
@ -461,35 +450,35 @@ L(FREM_X_NAN_INF):
}
L(FREM_Y_NAN_INF_ZERO):
FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.d f8=f8,f1,f0
(p7) fma.d.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.d f8=f9,f1,f0
(p9) fma.d.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FREM_Y_ZERO):
FREM_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -497,7 +486,7 @@ L(FREM_Y_ZERO):
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -508,47 +497,41 @@ L(FREM_Y_ZERO):
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.d f8=f11,f1,f0
fma.d.s0 f8=f11,f1,f0
nop.i 999
}
L(EXP_ERROR_RETURN):
EXP_ERROR_RETURN:
{ .mib
(p0) mov GR_Parameter_TAG = 124
mov GR_Parameter_TAG = 124
nop.i 999
(p0) br.sptk __libm_error_region;;
br.sptk __libm_error_region;;
}
.endp remainder
ASM_SIZE_DIRECTIVE(remainder)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__remainder)
#endif
GLOBAL_IEEE754_END(remainder)
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -596,10 +579,11 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,11 +1,10 @@
.file "remainderf.asm"
// Copyright (C) 2000, 2001, Intel Corporation
.file "remainderf.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab,
// Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,17 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New algorithm
// 4/04/00 Unwind support added
// 7/21/00 Fixed quotient=2^{24*m+23} bug
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 03/02/00 New algorithm
// 04/04/00 Unwind support added
// 07/21/00 Fixed quotient=2^{24*m+23} bug
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/29/00 Set FR_Y to f9
// 11/29/00 Set FR_Y to f9
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//====================================================================
@ -78,9 +79,6 @@
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
#include "libm_support.h"
//
// Registers used
//====================================================================
@ -89,8 +87,6 @@
// Floating point registers: f6-f15
//
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@ -106,17 +102,9 @@ FR_Y = f9
FR_RESULT = f8
.proc remainderf#
.align 32
.global remainderf#
.align 32
.section .text
GLOBAL_IEEE754_ENTRY(remainderf)
remainderf:
#ifdef _LIBC
.global __remainderf
.type __remainderf,@function
__remainderf:
#endif
// inputs in f8, f9
// result in f8
@ -141,7 +129,7 @@ __remainderf:
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f9, 0xe7
fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@ -150,7 +138,7 @@ __remainderf:
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
@ -168,8 +156,8 @@ __remainderf:
nop.i 0;;
}
{.bbb
(p9) br.cond.spnt L(FREM_X_NAN_INF)
(p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
(p9) br.cond.spnt FREM_X_NAN_INF
(p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
nop.b 0
} {.mfi
nop.m 0
@ -179,7 +167,7 @@ __remainderf:
}
.align 32
L(remloop24):
remloop24:
{ .mfi
// f12=2^{24}-2
setf.s f12=r3
@ -347,7 +335,7 @@ L(remloop24):
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk L(remloop24);;
(p12) br.cond.sptk remloop24;;
}
// last iteration
@ -408,7 +396,7 @@ L(remloop24):
}
L(FREM_X_NAN_INF):
FREM_X_NAN_INF:
// Y zero ?
{.mfi
@ -425,19 +413,19 @@ L(FREM_X_NAN_INF):
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FREM_Y_ZERO);;
(p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p0 = f8, 0x23
fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f8, 0x23
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
@ -465,14 +453,14 @@ L(FREM_X_NAN_INF):
}
{ .mfi
nop.m 999
(p8) fma.s f8=f8,f1,f0
(p8) fma.s.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt L(EXP_ERROR_RETURN);;
(p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
@ -481,35 +469,35 @@ L(FREM_X_NAN_INF):
}
L(FREM_Y_NAN_INF_ZERO):
FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.s f8=f8,f1,f0
(p7) fma.s.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.s f8=f9,f1,f0
(p9) fma.s.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FREM_Y_ZERO):
FREM_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -517,7 +505,7 @@ L(FREM_Y_ZERO):
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -528,47 +516,41 @@ L(FREM_Y_ZERO):
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.s f8=f11,f1,f0
fma.s.s0 f8=f11,f1,f0
nop.i 999
}
L(EXP_ERROR_RETURN):
EXP_ERROR_RETURN:
{ .mib
(p0) mov GR_Parameter_TAG = 125
mov GR_Parameter_TAG = 125
nop.i 999
(p0) br.sptk __libm_error_region;;
br.sptk __libm_error_region;;
}
.endp remainderf
ASM_SIZE_DIRECTIVE(remainderf)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__remainderf)
#endif
GLOBAL_IEEE754_END(remainderf)
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -616,9 +598,11 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,10 +1,10 @@
.file "remainderl.asm"
// Copyright (C) 2000, 2001, Intel Corporation
.file "remainderl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,17 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New algorithm
// 4/04/00 Unwind support added
// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 03/02/00 New algorithm
// 04/04/00 Unwind support added
// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/29/00 Set FR_Y to f9
// 11/29/00 Set FR_Y to f9
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//====================================================================
@ -77,9 +79,6 @@
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
#include "libm_support.h"
//
// Registers used
//====================================================================
@ -87,8 +86,6 @@
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15,f32
//
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
@ -105,19 +102,9 @@ FR_Y = f9
FR_RESULT = f8
.section .text
GLOBAL_IEEE754_ENTRY(remainderl)
.proc remainderl#
.align 32
.global remainderl#
.align 32
remainderl:
#ifdef _LIBC
.global __remainderl
.type __remainderl,@function
__remainderl:
#endif
// inputs in f8, f9
// result in f8
@ -159,7 +146,7 @@ cmp.eq p11,p10=r29,r0;;
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p8 = f8, 0xe3
fclass.m.unc p9,p8 = f8, 0xe3
nop.i 999;;
}
@ -196,8 +183,8 @@ cmp.eq p11,p10=r29,r0;;
}
{.bbb
(p9) br.cond.spnt L(FREM_X_NAN_INF)
(p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
(p9) br.cond.spnt FREM_X_NAN_INF
(p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
nop.b 0
} {.mfi
nop.m 0
@ -206,7 +193,7 @@ cmp.eq p11,p10=r29,r0;;
nop.i 0;;
}
L(remloop24):
remloop24:
{ .mfi
nop.m 0
// Step (2)
@ -228,7 +215,7 @@ L(remloop24):
{.mfi
nop.m 0
// q1=q0*(1+e0)
fma.s1 f15=f12,f7,f12
(p6) fma.s1 f15=f12,f7,f12
nop.i 0
}
{ .mfi
@ -358,7 +345,7 @@ L(remloop24):
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk L(remloop24);;
(p12) br.cond.sptk remloop24;;
}
// last iteration
@ -416,7 +403,7 @@ L(remloop24):
L(FREM_X_NAN_INF):
FREM_X_NAN_INF:
// Y zero ?
{.mfi
@ -433,19 +420,19 @@ L(FREM_X_NAN_INF):
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FREM_Y_ZERO);;
(p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p0 = f8, 0x23
fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f8, 0x23
fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
@ -473,14 +460,14 @@ L(FREM_X_NAN_INF):
}
{ .mfi
nop.m 999
(p8) fma f8=f8,f1,f0
(p8) fma.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt L(EXP_ERROR_RETURN);;
(p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
@ -489,24 +476,24 @@ L(FREM_X_NAN_INF):
}
L(FREM_Y_NAN_INF_ZERO):
FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma f8=f8,f1,f0
(p7) fma.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f9, 0xc3
fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -517,11 +504,11 @@ L(FREM_Y_NAN_INF_ZERO):
{ .mfb
nop.m 999
(p9) fma f8=f9,f1,f0
(p9) fma.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FREM_Y_ZERO):
FREM_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -529,7 +516,7 @@ L(FREM_Y_ZERO):
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@ -540,43 +527,37 @@ L(FREM_Y_ZERO):
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma f8=f11,f1,f0
fma.s0 f8=f11,f1,f0
nop.i 999;;
}
L(EXP_ERROR_RETURN):
EXP_ERROR_RETURN:
{ .mib
(p0) mov GR_Parameter_TAG = 123
mov GR_Parameter_TAG = 123
nop.i 999
(p0) br.sptk __libm_error_region;;
br.sptk __libm_error_region;;
}
.endp remainderl
ASM_SIZE_DIRECTIVE(remainderl)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__remainderl)
#endif
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(remainderl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -624,9 +605,12 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,10 +1,10 @@
.file "scalb.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,12 +35,14 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 Scalb completely reworked and now standalone version
// 02/02/00 Initial version
// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@ -53,8 +55,6 @@
//
//
#include "libm_support.h"
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
@ -84,19 +84,8 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalb
.section .text
.proc scalb
.align 32
scalb:
#ifdef _LIBC
.global __ieee754_scalb
.type __ieee754_scalb,@function
__ieee754_scalb:
#endif
GLOBAL_IEEE754_ENTRY(scalb)
//
// Is x NAN, INF, ZERO, +-?
@ -140,12 +129,12 @@ __ieee754_scalb:
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt L(SCALB_NAN_INF_ZERO)
(p6) br.cond.spnt SCALB_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt L(SCALB_NAN_INF_ZERO)
(p7) br.cond.spnt SCALB_NAN_INF_ZERO
};;
//
@ -212,7 +201,7 @@ __ieee754_scalb:
}
{ .mfb
nop.m 0
(p7) frcpa f8,p11 = f0,f0
(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
@ -246,7 +235,7 @@ __ieee754_scalb:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x00000000000303FF
movl GR_Scratch = 0x00000000000303FF
};;
{ .mfi
nop.m 0
@ -255,7 +244,7 @@ __ieee754_scalb:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x00000000000103FF
movl GR_Scratch1= 0x00000000000103FF
};;
// Set up necessary status fields
@ -266,12 +255,12 @@ __ieee754_scalb:
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
fsetc.s2 0x7F,0x42
nop.i 999
};;
@ -345,7 +334,7 @@ __ieee754_scalb:
{ .mfb
(p6) addl GR_Tag = 54, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(SCALB_UNDERFLOW)
(p6) br.cond.spnt SCALB_UNDERFLOW
};;
//
@ -353,8 +342,8 @@ __ieee754_scalb:
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(SCALB_OVERFLOW)
(p9) br.cond.spnt L(SCALB_OVERFLOW)
(p7) br.cond.spnt SCALB_OVERFLOW
(p9) br.cond.spnt SCALB_OVERFLOW
};;
//
@ -366,7 +355,7 @@ __ieee754_scalb:
br.ret.sptk b0;;
}
L(SCALB_NAN_INF_ZERO):
SCALB_NAN_INF_ZERO:
//
// Convert N to a fp integer
@ -471,16 +460,11 @@ L(SCALB_NAN_INF_ZERO):
br.ret.sptk b0
};;
.endp scalb
ASM_SIZE_DIRECTIVE(scalb)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__ieee754_scalb)
#endif
.proc __libm_error_region
GLOBAL_IEEE754_END(scalb)
__libm_error_region:
L(SCALB_OVERFLOW):
L(SCALB_UNDERFLOW):
SCALB_OVERFLOW:
SCALB_UNDERFLOW:
//
// Get stack address of N
@ -557,8 +541,7 @@ L(SCALB_UNDERFLOW):
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,10 +1,10 @@
.file "scalbf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,12 +35,14 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 Scalb completely reworked and now standalone version
// 02/02/00 Initial version
// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@ -53,8 +55,6 @@
//
//
#include "libm_support.h"
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
@ -84,19 +84,8 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalbf
.section .text
.proc scalbf
.align 32
scalbf:
#ifdef _LIBC
.global __ieee754_scalbf
.type __ieee754_scalbf,@function
__ieee754_scalbf:
#endif
GLOBAL_IEEE754_ENTRY(scalbf)
//
// Is x NAN, INF, ZERO, +-?
@ -140,12 +129,12 @@ __ieee754_scalbf:
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
(p7) br.cond.spnt SCALBF_NAN_INF_ZERO
};;
//
@ -212,7 +201,7 @@ __ieee754_scalbf:
}
{ .mfb
nop.m 0
(p7) frcpa f8,p11 = f0,f0
(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
@ -246,7 +235,7 @@ __ieee754_scalbf:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x000000000003007F
movl GR_Scratch = 0x000000000003007F
};;
{ .mfi
nop.m 0
@ -255,7 +244,7 @@ __ieee754_scalbf:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x000000000001007F
movl GR_Scratch1= 0x000000000001007F
};;
// Set up necessary status fields
@ -266,12 +255,12 @@ __ieee754_scalbf:
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
fsetc.s2 0x7F,0x42
nop.i 999
};;
@ -345,7 +334,7 @@ __ieee754_scalbf:
{ .mfb
(p6) addl GR_Tag = 56, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(SCALBF_UNDERFLOW)
(p6) br.cond.spnt SCALBF_UNDERFLOW
};;
//
@ -353,8 +342,8 @@ __ieee754_scalbf:
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(SCALBF_OVERFLOW)
(p9) br.cond.spnt L(SCALBF_OVERFLOW)
(p7) br.cond.spnt SCALBF_OVERFLOW
(p9) br.cond.spnt SCALBF_OVERFLOW
};;
//
@ -366,7 +355,7 @@ __ieee754_scalbf:
br.ret.sptk b0;;
}
L(SCALBF_NAN_INF_ZERO):
SCALBF_NAN_INF_ZERO:
//
// Convert N to a fp integer
@ -471,16 +460,11 @@ L(SCALBF_NAN_INF_ZERO):
br.ret.sptk b0
};;
.endp scalbf
ASM_SIZE_DIRECTIVE(scalbf)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__ieee754_scalbf)
#endif
.proc __libm_error_region
GLOBAL_IEEE754_END(scalbf)
__libm_error_region:
L(SCALBF_OVERFLOW):
L(SCALBF_UNDERFLOW):
SCALBF_OVERFLOW:
SCALBF_UNDERFLOW:
//
// Get stack address of N
@ -557,8 +541,7 @@ L(SCALBF_UNDERFLOW):
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -1,10 +1,10 @@
.file "scalbl.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,12 +35,14 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 Scalb completely reworked and now standalone version
// 02/02/00 Initial version
// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@ -53,8 +55,6 @@
//
//
#include "libm_support.h"
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
@ -84,19 +84,8 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalbl
.section .text
.proc scalbl
.align 32
scalbl:
#ifdef _LIBC
.global __ieee754_scalbl
.type __ieee754_scalbl,@function
__ieee754_scalbl:
#endif
GLOBAL_IEEE754_ENTRY(scalbl)
//
// Is x NAN, INF, ZERO, +-?
@ -140,12 +129,12 @@ __ieee754_scalbl:
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
(p7) br.cond.spnt SCALBL_NAN_INF_ZERO
};;
//
@ -212,7 +201,7 @@ __ieee754_scalbl:
}
{ .mfb
nop.m 0
(p7) frcpa f8,p11 = f0,f0
(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
@ -246,7 +235,7 @@ __ieee754_scalbl:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x0000000000033FFF
movl GR_Scratch = 0x0000000000033FFF
};;
{ .mfi
nop.m 0
@ -255,7 +244,7 @@ __ieee754_scalbl:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x0000000000013FFF
movl GR_Scratch1= 0x0000000000013FFF
};;
// Set up necessary status fields
@ -266,12 +255,12 @@ __ieee754_scalbl:
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
fsetc.s2 0x7F,0x42
nop.i 999
};;
@ -345,7 +334,7 @@ __ieee754_scalbl:
{ .mfb
(p6) addl GR_Tag = 52, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(SCALBL_UNDERFLOW)
(p6) br.cond.spnt SCALBL_UNDERFLOW
};;
//
@ -353,8 +342,8 @@ __ieee754_scalbl:
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(SCALBL_OVERFLOW)
(p9) br.cond.spnt L(SCALBL_OVERFLOW)
(p7) br.cond.spnt SCALBL_OVERFLOW
(p9) br.cond.spnt SCALBL_OVERFLOW
};;
//
@ -366,7 +355,7 @@ __ieee754_scalbl:
br.ret.sptk b0;;
}
L(SCALBL_NAN_INF_ZERO):
SCALBL_NAN_INF_ZERO:
//
// Convert N to a fp integer
@ -471,16 +460,11 @@ L(SCALBL_NAN_INF_ZERO):
br.ret.sptk b0
};;
.endp scalbl
ASM_SIZE_DIRECTIVE(scalbl)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__ieee754_scalbl)
#endif
.proc __libm_error_region
GLOBAL_IEEE754_END(scalbl)
__libm_error_region:
L(SCALBL_OVERFLOW):
L(SCALBL_UNDERFLOW):
SCALBL_OVERFLOW:
SCALBL_UNDERFLOW:
//
// Get stack address of N
@ -557,8 +541,7 @@ L(SCALBL_UNDERFLOW):
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
.file "sqrt.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,27 +35,28 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// ********************************************************************
//********************************************************************
// History
// ********************************************************************
// 2/02/00 Initial version
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
//********************************************************************
// 02/02/00 Initial version
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// ********************************************************************
//********************************************************************
//
// Function: Combined sqrt(x), where
// _
// sqrt(x) = |x, for double precision x values
//
// ********************************************************************
//********************************************************************
//
// Accuracy: Correctly Rounded
//
// ********************************************************************
//********************************************************************
//
// Resources Used:
//
@ -68,7 +69,7 @@
//
// Predicate Registers: p6, p7, p8
//
// *********************************************************************
//*********************************************************************
//
// IEEE Special Conditions:
//
@ -78,15 +79,13 @@
// sqrt(+/-0) = +/-0
// sqrt(negative) = QNaN and error handling is called
//
// *********************************************************************
//*********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
// *********************************************************************
#include "libm_support.h"
//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@ -98,19 +97,7 @@ GR_Parameter_RESULT = r39
.section .text
.proc sqrt#
.global sqrt#
.align 64
sqrt:
#ifdef _LIBC
.global __sqrt
.type __sqrt,@function
__sqrt:
.global __ieee754_sqrt
.type __ieee754_sqrt,@function
__ieee754_sqrt:
#endif
GLOBAL_IEEE754_ENTRY(sqrt)
{ .mfi
alloc r32= ar.pfs,0,5,4,0
frsqrta.s0 f7,p6=f8
@ -255,7 +242,7 @@ __ieee754_sqrt:
{ .mfb
nop.m 0
(p0) mov f8 = f7
mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
{ .mfb
@ -264,13 +251,7 @@ __ieee754_sqrt:
(p7) br.cond.sptk __libm_error_region ;;
}
// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
.endp sqrt#
ASM_SIZE_DIRECTIVE(sqrt)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__sqrt)
ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
#endif
GLOBAL_IEEE754_END(sqrt)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
@ -286,8 +267,7 @@ ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
// save gp restore ar.pfs
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
//
// This branch includes all those special values that are not negative,
@ -352,8 +332,9 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -1,10 +1,10 @@
.file "sqrtf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,27 +35,29 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// *********************************************************************
//*********************************************************************
// History:
//
// 2/02/00 Initial version
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 Initial version
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// *********************************************************************
//*********************************************************************
//
// Function: Combined sqrtf(x), where
// _
// sqrtf(x) = |x, for single precision x values
//
// ********************************************************************
//********************************************************************
//
// Accuracy: Correctly Rounded
//
// ********************************************************************
//********************************************************************
//
// Resources Used:
//
@ -68,7 +70,7 @@
//
// Predicate Registers: p6, p7, p8
//
// ********************************************************************
//********************************************************************
//
// IEEE Special Conditions:
//
@ -78,15 +80,14 @@
// sqrtf(+/-0) = +/-0
// sqrtf(negative) = QNaN and error handling is called
//
// ********************************************************************
//********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
// ********************************************************************
//********************************************************************
#include "libm_support.h"
GR_SAVE_B0 = r34
GR_SAVE_PFS = r33
@ -102,21 +103,8 @@ FR_Y = f0
FR_RESULT = f8
.section .text
.proc sqrtf#
.global sqrtf#
.align 64
sqrtf:
#ifdef _LIBC
.global __sqrtf
.type __sqrtf,@function
__sqrtf:
.global __ieee754_sqrtf
.type __ieee754_sqrtf,@function
__ieee754_sqrtf:
#endif
GLOBAL_IEEE754_ENTRY(sqrtf)
{ .mlx
// BEGIN SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
alloc r32= ar.pfs,0,5,4,0
@ -197,7 +185,7 @@ __ieee754_sqrtf:
// Step (10)
// d1 = a - S1 * S1 in f9
(p6) fnma.s1 f9=f7,f7,f8
nop.i 0;;;
nop.i 0;;
} { .mfb
nop.m 0
// Step (11)
@ -207,27 +195,20 @@ __ieee754_sqrtf:
// END SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
} { .mfb
nop.m 0
(p0) mov f8 = f7
mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
//
// This branch includes all those special values that are not negative,
// with the result equal to frcpa(x)
//
.endp sqrtf
ASM_SIZE_DIRECTIVE(sqrtf)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__sqrtf)
ASM_SIZE_DIRECTIVE(__ieee754_sqrtf)
#endif
GLOBAL_IEEE754_END(sqrtf)
.proc __libm_error_region
__libm_error_region:
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
(p0) mov GR_Parameter_TAG = 50
mov GR_Parameter_TAG = 50
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@ -271,8 +252,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function

View File

@ -1,10 +1,10 @@
.file "sqrtl.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,23 +35,25 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// ********************************************************************
//********************************************************************
//
// History:
// 2/02/00 (hand-optimized)
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// 02/02/00 (hand-optimized)
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// ********************************************************************
//********************************************************************
//
// Function: Combined sqrtl(x), where
// _
// sqrtl(x) = |x, for double-extended precision x values
//
// ********************************************************************
//********************************************************************
//
// Resources Used:
//
@ -64,7 +66,7 @@
//
// Predicate Registers: p6, p7, p8
//
// ********************************************************************
//********************************************************************
//
// IEEE Special Conditions:
//
@ -74,15 +76,13 @@
// sqrtl(+/-0) = +/-0
// sqrtl(negative) = QNaN and error handling is called
//
// ********************************************************************
//********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
// ********************************************************************
#include "libm_support.h"
//********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@ -97,19 +97,7 @@ FR_Y = f0
FR_RESULT = f8
.section .text
.proc sqrtl#
.global sqrtl#
.align 64
sqrtl:
#ifdef _LIBC
.global __sqrtl
.type __sqrtl,@function
__sqrtl:
.global __ieee754_sqrtl
.type __ieee754_sqrtl,@function
__ieee754_sqrtl:
#endif
GLOBAL_IEEE754_ENTRY(sqrtl)
{ .mlx
alloc r32= ar.pfs,0,5,4,0
// exponent of +1/2 in r2
@ -151,7 +139,7 @@ alloc r32= ar.pfs,0,5,4,0
}
{ .mfi
nop.m 0
(p0) mov f15=f8
mov f15=f8
nop.i 0;;
} { .mfi
nop.m 0
@ -221,8 +209,8 @@ alloc r32= ar.pfs,0,5,4,0
(p6) br.ret.sptk b0 ;;
}
{ .mfb
(p0) mov GR_Parameter_TAG = 48
(p0) mov f8 = f7
mov GR_Parameter_TAG = 48
mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
//
@ -232,15 +220,8 @@ alloc r32= ar.pfs,0,5,4,0
// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
.endp sqrtl#
ASM_SIZE_DIRECTIVE(sqrtl)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__sqrtl)
ASM_SIZE_DIRECTIVE(__ieee754_sqrtl)
#endif
.proc __libm_error_region
__libm_error_region:
GLOBAL_IEEE754_END(sqrtl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@ -288,7 +269,6 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1,80 @@
#!/bin/sh
libm_dir=$1
import() {
# $1 = name
# $2 = source file-name
# $3 = destination file-name
echo "$1 $libm_dir/$2 $3"
}
import_c() {
# $1 = name
# $2 = source file-name
# $3 = destination file-name
echo "$1 $libm_dir/$2 $3"
}
import_c DUMMY libm_support.h libm_support.h
import_c DUMMY libm_error.c libm_error.c
import_c scalblnf scalblnf.c s_scalblnf.c
for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \
scalb sinh sqrt; do
for t in "" f l; do
import $f$t $f$t.s e_$f$t.S
done
done
for f in atan2 exp; do
for t in "" f; do
import $f$t $f$t.s e_$f$t.S
done
done
import "atan" atan.s s_atan.S
import "atanf" atanf.s s_atanf.S
import "atan(2)?l" atanl.s s_atanl.S
import "exp(m1)?l" expl_m1.s s_expm1l.S
for t in "" f l; do
import "log(10)?$t" log$t.s e_log$t.S
import tgamma$t tgamma$t.s w_tgamma$t.S
import "(hypot|cabs)$t" hypot$t.s e_hypot$t.S
done
for f in asinh cbrt ceil erf erfc fabs floor \
ilogb log1p logb modf nearbyint nextafter nexttoward \
rint round significand fdim fma fmax tanh trunc; do
for t in "" f l; do
import $f$t $f$t.s s_$f$t.S
done
done
for t in "" f l; do
import "(tan|cot)$t" tancot$t.s s_tan$t.S
done
for t in "" f l; do
import "(sin|cos)$t" sincos$t.s s_cos$t.S
import_c frexp$t frexp$t.c s_frexp$t.c
import_c ldexp$t ldexp$t.c s_ldexp$t.c
import_c scalbn$t scalbn$t.c s_scalbn$t.c
done
import expm1 exp_m1.s s_expm1.S
import expm1f expf_m1.s s_expm1f.S
for f in frexp frexpf frexpl reduce; do
import __libm_$f libm_$f.s libm_$f.S
done
for t in "" f l; do
import __libm_ldexp$t libm_ldexp$t.s s_libm_ldexp$t.S
import "(__libm_)?(sincos|cis)$t" libm_sincos$t.s libm_sincos$t.S
import __libm_lgamma$t libm_lgamma$t.s libm_lgamma$t.S
import __libm_scalbn$t libm_scalbn$t.s s_libm_scalbn$t.S
done
import __libm_scalblnf libm_scalblnf.s libm_scalblnf.S
import "__libm_(sin|cos|sincos)_large" libm_sincos_large.s \
libm_sincos_large.S

View File

@ -0,0 +1,81 @@
#!/bin/sh
objdir="$1"
num_errors=0
check_syms() {
global_count=0
entry_count=0
while read value type name; do
if [ $value = "U" ]; then
name=$type
# undefined symbols must start with double-underscore
if [ $(expr $name : '\(..\)') != "__" ]; then
echo -e "$(basename $file):\tError: undefined reference $name doesn't start with \"__\"."
num_errors=$(($num_errors + 1))
fi
continue
fi
case "$type" in
W)
entry_count=$(($entry_count + 1))
;;
*)
entry_count=$(($entry_count + 1))
if [ "$(expr $name : '\(..\)')" != "__" ]; then
global_count=$(($global_count + 1))
fi
;;
esac
done
if [ $entry_count -gt 1 -a $global_count -gt 0 ]; then
echo -e "$(basename $file):\tError: detected $global_count strong " \
"global and $entry_count entry-points."
num_errors=$(($num_errors + 1))
fi
}
check_file() {
file=$1
size=$(readelf -S $file | \
(sz=0; while read line; do
if echo $line | fgrep -q " .rodata"; then
read sz rest
break
fi
done;
printf "%d" 0x$sz))
summands=$(readelf -s $file | fgrep " OBJECT " | tr -s ' ' |
cut -f4 -d' ' | sed 's,$,+,')0
sum=$(($summands))
if [ $sum != $size ]; then
echo -e "$(basename $file):\tError: sum of objects=$sum bytes, .rodata size=$size bytes"
num_errors=$(($num_errors + 1))
fi
tmp=$(tempfile -p syms)
nm -g $file > $tmp
check_syms < $tmp
}
do_checks() {
echo "Note: 1 error expected in w_tgammal.o due to 64-byte alignment-padding."
while read func_pattern src_file dst_file; do
if [ "$(expr $dst_file : '.*\(S\)$')" = "S" ]; then
objfile=$(expr $dst_file : '\(.*\)[.]S$')
check_file $objdir/$objfile.o
fi
done
}
do_checks < import_file_list
if [ $num_errors -gt 0 ]; then
echo "FAILURE: Detected $num_errors error(s)."
exit 1
fi
echo SUCCESS
exit 0

View File

@ -0,0 +1,7 @@
#!/bin/sh
do_diffs() {
while read func_pattern src_file dst_file; do
diff -up $src_file $dst_file
done
}
do_diffs < import_file_list

View File

@ -0,0 +1,148 @@
BEGIN {
getline;
while (!match($0, "^/[/*] static char cvs_id")) {
print;
getline;
}
getline;
while (!match($0, "^// WARRANTY DISCLAIMER")) {
print;
getline;
}
getline;
printf \
"// Redistribution and use in source and binary forms, with or without\n" \
"// modification, are permitted provided that the following conditions are\n" \
"// met:\n" \
"//\n" \
"// * Redistributions of source code must retain the above copyright\n" \
"// notice, this list of conditions and the following disclaimer.\n" \
"//\n" \
"// * Redistributions in binary form must reproduce the above copyright\n" \
"// notice, this list of conditions and the following disclaimer in the\n" \
"// documentation and/or other materials provided with the distribution.\n" \
"//\n" \
"// * The name of Intel Corporation may not be used to endorse or promote\n" \
"// products derived from this software without specific prior written\n" \
"// permission.\n\n";
if (LICENSE_ONLY == "y") {
do {
print;
} while (getline);
}
}
/^[.]data/ {
print "RODATA";
next;
}
/^([a-zA-Z_0-9]*_(tb[l0-9]|Tt|[tT]able|data|low|coeffs|constants|CONSTANTS|reduction|Stirling)(_?([1-9cdimpqstPQT]+|tail))?|(Constants|Poly|coeff)_.+|(double_sin_?cos|double_cis)[fl]?_.+):/ {
table_name=substr($1,1,length($1)-1);
printf "LOCAL_OBJECT_START(%s)\n", table_name;
getline;
while (!match($0, "^[ \t]*data")) {
print;
getline;
}
while (match($0, "(//|^[ \t]*data)")) {
print;
getline;
}
printf "LOCAL_OBJECT_END(%s)\n\n", table_name;
next;
}
/^[.]proc[ \t]+__libm_(error_region|callout)/ {
printf "LOCAL_LIBM_ENTRY(%s)\n", $2;
getline;
next;
}
/^[.]endp[ \t]+__libm_(error_region|callout)/ {
printf "LOCAL_LIBM_END(%s)\n", $2;
next;
}
/^[.]global/ {
split($2, part, "#");
name=part[1];
if (match(name, "^"FUNC"$")) {
next;
}
}
/^[.]proc/ {
split($2, part, "#");
name=part[1];
if (match(name, "^"FUNC"$")) {
local_funcs=("^(" \
"cis|cisf|cisl" \
"|cabs|cabsf|cabsl" \
"|cot|cotf|cotl" \
")$");
ieee754_funcs=("^(" \
"atan2|atan2f|atan2l|atanl" \
"|cos|cosf|cosl" \
"|cosh|coshf|coshl" \
"|exp|expf|expl" \
"|exp10|exp10f|exp10l" \
"|expm1|expm1f|expm1l" \
"|fmod|fmodf|fmodl" \
"|hypot|hypotf|hypotl" \
"|fabs|fabsf|fabsl" \
"|floor|floorf|floorl" \
"|log1p|log1pf|log1pl" \
"|log|log10|log10f|log10l|log2l|logf|logl" \
"|remainder|remainderf|remainderl|" \
"|rint|rintf|rintl|" \
"|scalb|scalbf|scalbl" \
"|sin|sinf|sinl" \
"|sincos|sincosf|sincosl" \
"|sinh|sinhf|sinhl" \
"|sqrt|sqrtf|sqrtl" \
"|tan|tanf|tanl" \
")$");
if (match(name, ieee754_funcs)) {
type="GLOBAL_IEEE754";
} else if (match (name, local_funcs)) {
type="LOCAL_LIBM";
} else {
type="GLOBAL_LIBM";
}
printf "%s_ENTRY(%s)\n", type, name;
getline;
while (!match($0, "^"name"#?:")) {
getline;
}
getline;
while (!match($0, "^.endp")) {
print
getline;
}
getline;
printf "%s_END(%s)\n", type, name;
if (match(name, "^exp10[fl]?$")) {
t=substr(name,6)
printf "weak_alias (exp10%s, pow10%s)\n", t, t
}
next;
}
}
/^[a-zA-Z_]+:/ {
split($1, part, ":");
name=part[1];
if (match(name, "^"FUNC"$")) {
printf "GLOBAL_LIBM_ENTRY(%s)\n", name;
getline;
while (!match($0, "^"name"#?:")) {
getline;
}
getline;
while (!match($0, "^.endp")) {
print
getline;
}
getline;
printf "GLOBAL_LIBM_END(%s)\n", name;
next;
}
}
{ print }

View File

@ -0,0 +1,42 @@
#!/bin/sh
# Notes:
# We don't import copysign finite, fpclassify, isinf, isnan, and signbit
# since our own versions are nicer and just as correct and fast (except
# perhaps that they don't handle non-finite arguments well?).
#
# Also, leave out cabs for now since it doesn't seem overridable in
# glibc.
libm_dir=$1
import_s() {
# $1 = name
# $2 = source file-name
# $3 = destination file-name
echo "Importing $1 from $2 -> $3"
awk -f import_file.awk FUNC=$1 $2 > $3
}
import_c() {
# $1 = name
# $2 = source file-name
# $3 = destination file-name
echo "Importing $1 from $2 -> $3"
awk -f import_file.awk LICENSE_ONLY=y $2 > $3
}
do_imports() {
while read func_pattern src_file dst_file; do
if [ "$(expr $src_file : '.*\(c\)$')" = "c" ]; then
import_c "$func_pattern" "$src_file" "$dst_file"
else
import_s "$func_pattern" "$src_file" "$dst_file"
fi
done
}
./gen_import_file_list $libm_dir > import_file_list
do_imports < import_file_list

View File

@ -0,0 +1,64 @@
#include <sysdep.h>
#undef ret /* get rid of the stupid "ret" macro; it breaks br.ret */
/* Support for compatible assembler handling. */
#ifdef __ELF__
# define ASM_SIZE_DIRECTIVE(name) .size name,.-name
# define ASM_TYPE_DIRECTIVE(name,T) .type name,T
#else
# define ASM_SIZE_DIRECTIVE(name)
# define ASM_TYPE_DIRECTIVE(name,T)
#endif
#define LOCAL_LIBM_ENTRY(name) \
.proc name; \
name:
#define LOCAL_LIBM_END(name) \
.endp name; \
ASM_SIZE_DIRECTIVE(name)
#define RODATA .rodata
#define LOCAL_OBJECT_START(name) \
name:; \
ASM_TYPE_DIRECTIVE(name, @object)
#define LOCAL_OBJECT_END(name) \
ASM_SIZE_DIRECTIVE(name)
#define GLOBAL_LIBM_ENTRY(name) \
LOCAL_LIBM_ENTRY(name); \
.global name
#define GLOBAL_LIBM_END(name) LOCAL_LIBM_END(name)
#define INTERNAL_LIBM_ENTRY(name) \
GLOBAL_LIBM_ENTRY(__libm_##name); \
.global __libm_##name
#define INTERNAL_LIBM_END(name) GLOBAL_LIBM_END(__libm_##name)
#define WEAK_LIBM_ENTRY(name) \
.align 32; \
LOCAL_LIBM_ENTRY(name); \
.global __##name; \
__##name:
#define WEAK_LIBM_END(name) \
weak_alias (__##name, name); \
.hidden __##name; \
LOCAL_LIBM_END(name); \
ASM_SIZE_DIRECTIVE(__##name); \
ASM_TYPE_DIRECTIVE(__##name, @function)
#define GLOBAL_IEEE754_ENTRY(name) \
WEAK_LIBM_ENTRY(name); \
.global __ieee754_##name; \
.hidden __ieee754_##name; \
__ieee754_##name:
#define GLOBAL_IEEE754_END(name) \
WEAK_LIBM_END(name); \
ASM_SIZE_DIRECTIVE(__ieee754_##name); \
ASM_TYPE_DIRECTIVE(__ieee754_##name, @function)
#if defined ASSEMBLER && !defined NOT_IN_libc
# define __libm_error_support HIDDEN_JUMPTARGET(__libm_error_support)
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,209 @@
.file "libm_frexp.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 03/20/00 Improved speed
// 06/01/00 Fixed bug when x a double-extended denormal
// 12/08/00 Corrected label on .endp
// 01/23/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// double __libm_frexp(double x, int* y, int int_type)
// input floating point f8, pointer to y (r33), int int_type (r34)
// output floating point f8, returns the fraction of x, 0.5 <= fraction < 1.0
// output int* y, returns the true exponent of x
//
// int_type = 0 if int is 32 bits
// int_type = 1 if int is 64 bits
//
// int* y is returned as a 32 bit integer if int_type = 0
// int* y is returned as a 64 bit integer if int_type = 1
//
// Overview of operation
//==============================================================
// break a floating point x number into fraction and an exponent
// The fraction is returned as a double
// The exponent is returned as an integer pointed to by y
// This is a true (not a biased exponent) but 0fffe is subtracted
// as a bias instead of 0xffff. This is because the fraction returned
// is between 0.5 and 1.0, not the expected IEEE range.
//
// The fraction is 0.5 <= fraction < 1.0
//
// Registers used
//==============================================================
//
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
// r17 exponent mask
// r18 exponent of x
// r19 exponent result
// r20 signexp of 2^64
// r32 on input contains the 64-bit IEEE double that is in f8
// r33 on input pointer to 32-bit or 64-bit integer for exponent
// r34 on input contains 0 if output int is 32 bits, else output int is 64 bits
//
// predicate registers:
// p6 set if x is Nan, zero, or infinity
// p7 set if x negative
// p8 set if x positive
// p9 set if x double-extended denormal
// p10 set if int_type = 0, 32-bit integer
// p11 set if int_type = 1, 64-bit integer
//
// floating-point registers:
// f8 input, output
// f9 normalized x
// f10 signexp for significand result for x positive
// f11 signexp for significand result for x negative
// f12 2^64
.section .text
GLOBAL_LIBM_ENTRY(__libm_frexp)
// Set signexp for significand result for x>0
// If x is a NaN, zero, or infinity, return it.
// Put 0 in the int pointer.
// x NAN, ZERO, INFINITY?
// Set signexp for significand result for x<0
{ .mfi
mov r15 = 0x0fffe
fclass.m p6,p7 = f8, 0xe7
mov r14 = 0x2fffe
}
// Form signexp of 2^64 in case x double-extended denormal
// Save the normalized value of input in f9
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
mov r20 = 0x1003f
fnorm.s0 f9 = f8
nop.i 999 ;;
}
// Move signexp for significand result for x>0 to FP reg
// Form 2^64 in case x double-extended denormal
{ .mmi
setf.exp f10 = r15
setf.exp f12 = r20
nop.i 999 ;;
}
// Move signexp for significand result for x<0 to FP reg
// p7 if x<0, else p8
// If x=0,nan,inf, set p10 if output int to be 32 bits, or set p11 if 64 bits
{ .mfi
setf.exp f11 = r14
(p7) fcmp.lt.s0 p7,p8 = f8,f0
(p6) cmp.eq.unc p10,p11 = r34, r0 ;;
}
// If x NAN, ZERO, INFINITY, set *y=0 and exit
{ .mmb
(p10) st4 [r33] = r0 // Store *y=0 as 32-bit integer
(p11) st8 [r33] = r0 // Store *y=0 as 64-bit integer
(p6) br.ret.spnt b0 ;;
}
// Form exponent mask
// Test for fnorm(x) denormal, means x double-extended denormal
{ .mfi
mov r17 = 0x1ffff
fclass.m p9,p0 = f9, 0x0b
nop.i 999 ;;
}
// If x double-extended denormal add 64 to exponent bias for scaling
// If x double-extended denormal multiply x * 2^64 which is normal
// Set p10 if output int to be 32 bits, or set p11 if 64 bits
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy.s0 f9 = f9, f12
cmp.eq p10,p11 = r34, r0 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r33
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Get signexp of normalized x
// If x<0 form significand result
{ .mfi
getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9
nop.i 999 ;;
}
// Get exp of normalized x
// Subtract off bias to get true exponent of x
{ .mmi
and r18 = r17,r16 ;;
sub r19 = r18,r15
nop.i 999 ;;
}
// Store int *y as a 32-bit integer
// Make the value a double
{ .mfi
(p10) st4 [r33] = r19 // Store *y as 32-bit integer
fnorm.d.s0 f8 = f8
nop.i 999
}
{ .mfb
(p11) st8 [r33] = r19 // Store *y as 64-bit integer
nop.f 999
br.ret.sptk b0 ;;
}
GLOBAL_LIBM_END(__libm_frexp)

View File

@ -0,0 +1,209 @@
.file "libm_frexpf.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 03/20/00 Improved speed
// 06/01/00 Fixed bug when x a double-extended denormal
// 12/08/00 Corrected label on .endp
// 01/23/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// float __libm_frexpf(float x, int* y, int int_type)
// input floating point f8, pointer to y (r33), int int_type (r34)
// output floating point f8, returns the fraction of x, 0.5 <= fraction < 1.0
// output int* y, returns the true exponent of x
//
// int_type = 0 if int is 32 bits
// int_type = 1 if int is 64 bits
//
// int* y is returned as a 32 bit integer if int_type = 0
// int* y is returned as a 64 bit integer if int_type = 1
//
// Overview of operation
//==============================================================
// break a floating point x number into fraction and an exponent
// The fraction is returned as a float
// The exponent is returned as an integer pointed to by y
// This is a true (not a biased exponent) but 0fffe is subtracted
// as a bias instead of 0xffff. This is because the fraction returned
// is between 0.5 and 1.0, not the expected IEEE range.
//
// The fraction is 0.5 <= fraction < 1.0
//
// Registers used
//==============================================================
//
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
// r17 exponent mask
// r18 exponent of x
// r19 exponent result
// r20 signexp of 2^64
// r32 on input contains the 32-bit IEEE float that is in f8
// r33 on input pointer to 32-bit or 64-bit integer for exponent
// r34 on input contains 0 if output int is 32 bits, else output int is 64 bits
//
// predicate registers:
// p6 set if x is Nan, zero, or infinity
// p7 set if x negative
// p8 set if x positive
// p9 set if x double-extended denormal
// p10 set if int_type = 0, 32-bit integer
// p11 set if int_type = 1, 64-bit integer
//
// floating-point registers:
// f8 input, output
// f9 normalized x
// f10 signexp for significand result for x positive
// f11 signexp for significand result for x negative
// f12 2^64
.section .text
GLOBAL_LIBM_ENTRY(__libm_frexpf)
// Set signexp for significand result for x>0
// If x is a NaN, zero, or infinity, return it.
// Put 0 in the int pointer.
// x NAN, ZERO, INFINITY?
// Set signexp for significand result for x<0
{ .mfi
mov r15 = 0x0fffe
fclass.m p6,p7 = f8, 0xe7
mov r14 = 0x2fffe
}
// Form signexp of 2^64 in case x double-extended denormal
// Save the normalized value of input in f9
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
mov r20 = 0x1003f
fnorm.s0 f9 = f8
nop.i 999 ;;
}
// Move signexp for significand result for x>0 to FP reg
// Form 2^64 in case x double-extended denormal
{ .mmi
setf.exp f10 = r15
setf.exp f12 = r20
nop.i 999 ;;
}
// Move signexp for significand result for x<0 to FP reg
// p7 if x<0, else p8
// If x=0,nan,inf, set p10 if output int to be 32 bits, or set p11 if 64 bits
{ .mfi
setf.exp f11 = r14
(p7) fcmp.lt.s0 p7,p8 = f8,f0
(p6) cmp.eq.unc p10,p11 = r34, r0 ;;
}
// If x NAN, ZERO, INFINITY, set *y=0 and exit
{ .mmb
(p10) st4 [r33] = r0 // Store *y=0 as 32-bit integer
(p11) st8 [r33] = r0 // Store *y=0 as 64-bit integer
(p6) br.ret.spnt b0 ;;
}
// Form exponent mask
// Test for fnorm(x) denormal, means x double-extended denormal
{ .mfi
mov r17 = 0x1ffff
fclass.m p9,p0 = f9, 0x0b
nop.i 999 ;;
}
// If x double-extended denormal add 64 to exponent bias for scaling
// If x double-extended denormal multiply x * 2^64 which is normal
// Set p10 if output int to be 32 bits, or set p11 if 64 bits
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy.s0 f9 = f9, f12
cmp.eq p10,p11 = r34, r0 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r33
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Get signexp of normalized x
// If x<0 form significand result
{ .mfi
getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9
nop.i 999 ;;
}
// Get exp of normalized x
// Subtract off bias to get true exponent of x
{ .mmi
and r18 = r17,r16 ;;
sub r19 = r18,r15
nop.i 999 ;;
}
// Store int *y as a 32-bit integer
// Make the value a float
{ .mfi
(p10) st4 [r33] = r19 // Store *y as 32-bit integer
fnorm.s.s0 f8 = f8
nop.i 999
}
{ .mfb
(p11) st8 [r33] = r19 // Store *y as 64-bit integer
nop.f 999
br.ret.sptk b0 ;;
}
GLOBAL_LIBM_END(__libm_frexpf)

View File

@ -0,0 +1,209 @@
.file "libm_frexpl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 03/20/00 Improved speed
// 06/01/00 Fixed bug when x a double-extended denormal
// 12/08/00 Corrected label on .endp
// 01/23/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// long double __libm_frexpl(long double x, int* y, int int_type)
// input floating point f8, pointer to y (r34), int int_type (r35)
// output floating point f8, returns the fraction of x, 0.5 <= fraction < 1.0
// output int* y, returns the true exponent of x
//
// int_type = 0 if int is 32 bits
// int_type = 1 if int is 64 bits
//
// int* y is returned as a 32 bit integer if int_type = 0
// int* y is returned as a 64 bit integer if int_type = 1
//
// Overview of operation
//==============================================================
// break a floating point x number into fraction and an exponent
// The fraction is returned as a long double
// The exponent is returned as an integer pointed to by y
// This is a true (not a biased exponent) but 0fffe is subtracted
// as a bias instead of 0xffff. This is because the fraction returned
// is between 0.5 and 1.0, not the expected IEEE range.
//
// The fraction is 0.5 <= fraction < 1.0
//
// Registers used
//==============================================================
//
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
// r17 exponent mask
// r18 exponent of x
// r19 exponent result
// r20 signexp of 2^64
// r32-33 on input contains the 80-bit IEEE long double that is in f8
// r34 on input pointer to 32-bit or 64-bit integer for exponent
// r35 on input contains 0 if output int is 32 bits, else output int is 64 bits
//
// predicate registers:
// p6 set if x is Nan, zero, or infinity
// p7 set if x negative
// p8 set if x positive
// p9 set if x double-extended denormal
// p10 set if int_type = 0, 32-bit integer
// p11 set if int_type = 1, 64-bit integer
//
// floating-point registers:
// f8 input, output
// f9 normalized x
// f10 signexp for significand result for x positive
// f11 signexp for significand result for x negative
// f12 2^64
.section .text
GLOBAL_LIBM_ENTRY(__libm_frexpl)
// Set signexp for significand result for x>0
// If x is a NaN, zero, or infinity, return it.
// Put 0 in the int pointer.
// x NAN, ZERO, INFINITY?
// Set signexp for significand result for x<0
{ .mfi
mov r15 = 0x0fffe
fclass.m p6,p7 = f8, 0xe7
mov r14 = 0x2fffe
}
// Form signexp of 2^64 in case x double-extended denormal
// Save the normalized value of input in f9
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
mov r20 = 0x1003f
fnorm.s0 f9 = f8
nop.i 999 ;;
}
// Move signexp for significand result for x>0 to FP reg
// Form 2^64 in case x double-extended denormal
{ .mmi
setf.exp f10 = r15
setf.exp f12 = r20
nop.i 999 ;;
}
// Move signexp for significand result for x<0 to FP reg
// p7 if x<0, else p8
// If x=0,nan,inf, set p10 if output int to be 32 bits, or set p11 if 64 bits
{ .mfi
setf.exp f11 = r14
(p7) fcmp.lt.s0 p7,p8 = f8,f0
(p6) cmp.eq.unc p10,p11 = r35, r0 ;;
}
// If x NAN, ZERO, INFINITY, set *y=0 and exit
{ .mmb
(p10) st4 [r34] = r0 // Store *y=0 as 32-bit integer
(p11) st8 [r34] = r0 // Store *y=0 as 64-bit integer
(p6) br.ret.spnt b0 ;;
}
// Form exponent mask
// Test for fnorm(x) denormal, means x double-extended denormal
{ .mfi
mov r17 = 0x1ffff
fclass.m p9,p0 = f9, 0x0b
nop.i 999 ;;
}
// If x double-extended denormal add 64 to exponent bias for scaling
// If x double-extended denormal multiply x * 2^64 which is normal
// Set p10 if output int to be 32 bits, or set p11 if 64 bits
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy.s0 f9 = f9, f12
cmp.eq p10,p11 = r35, r0 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r34
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Get signexp of normalized x
// If x<0 form significand result
{ .mfi
getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9
nop.i 999 ;;
}
// Get exp of normalized x
// Subtract off bias to get true exponent of x
{ .mmi
and r18 = r17,r16 ;;
sub r19 = r18,r15
nop.i 999 ;;
}
// Store int *y as a 32-bit integer
// Make the value a long double
{ .mfi
(p10) st4 [r34] = r19 // Store *y as 32-bit integer
fnorm.s0 f8 = f8
nop.i 999
}
{ .mfb
(p11) st8 [r34] = r19 // Store *y as 64-bit integer
nop.f 999
br.ret.sptk b0 ;;
}
GLOBAL_LIBM_END(__libm_frexpl)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
//.file "scalbnf.s"
.file "libm_scalblnf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2001 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2001 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,26 +35,30 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 scalbnf completely reworked and now standalone version
// 08/03/01 Initial version
// 08/23/01 Corrected error tag number
// 02/06/02 Corrected to handle 32- or 64-bit integers
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// float = scalbnf (float x, int n)
// input floating point f8 and int n (r33)
// float = __libm_scalblnf (float x, long int n, int long_int_type)
// input floating point f8 and long int n (r33)
// input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
//
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
@ -81,34 +85,36 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalbnf
.section .text
.proc scalbnf
.align 32
scalbnf:
GLOBAL_LIBM_ENTRY(__libm_scalblnf)
//
// Is x NAN, INF, ZERO, +-?
// Build the exponent Bias
//
{ .mfi
alloc r32=ar.pfs,1,2,4,0
alloc r32=ar.pfs,3,0,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Bias = 0x0FFFF,r0
}
//
// Sign extend input
// Is N zero?
// Normalize x
// Do we need to sign extend input (long_int_type = 0)?
//
{ .mfi
cmp.eq.unc p6,p0 = r33,r0
fnorm.s1 FR_Norm_X = FR_Floating_X
sxt4 GR_N_as_int = r33
cmp.eq.unc p8,p9 = r34,r0
}
;;
{ .mii
(p9) mov GR_N_as_int = r33 // Get n directly if long int 64 bits
(p8) sxt4 GR_N_as_int = r33 // Sign extend n if long int 32 bits
nop.i 0
}
;;
@ -173,7 +179,7 @@ scalbnf:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x000000000003007F
movl GR_Scratch = 0x000000000003007F
};;
@ -184,7 +190,7 @@ scalbnf:
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x000000000001007F
movl GR_Scratch1= 0x000000000001007F
};;
// Set up necessary status fields
@ -195,12 +201,12 @@ scalbnf:
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
fsetc.s2 0x7F,0x42
nop.i 999
};;
@ -247,7 +253,7 @@ scalbnf:
nop.i 999
}
{ .mfi
addl GR_Tag = 178, r0
addl GR_Tag = 205, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
@ -266,9 +272,9 @@ scalbnf:
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 179, r0
(p6) addl GR_Tag = 206, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(scalbnf_UNDERFLOW)
(p6) br.cond.spnt scalbnf_UNDERFLOW
};;
//
@ -276,8 +282,8 @@ scalbnf:
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(scalbnf_OVERFLOW)
(p9) br.cond.spnt L(scalbnf_OVERFLOW)
(p7) br.cond.spnt scalbnf_OVERFLOW
(p9) br.cond.spnt scalbnf_OVERFLOW
};;
//
@ -289,13 +295,11 @@ scalbnf:
br.ret.sptk b0;;
}
.endp scalbnf
ASM_SIZE_DIRECTIVE(scalbnf)
.proc __libm_error_region
GLOBAL_LIBM_END(__libm_scalblnf)
__libm_error_region:
L(scalbnf_OVERFLOW):
L(scalbnf_UNDERFLOW):
scalbnf_OVERFLOW:
scalbnf_UNDERFLOW:
//
// Get stack address of N
@ -372,8 +376,7 @@ L(scalbnf_UNDERFLOW):
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1,782 @@
.file "libm_sincos.s"
// Copyright (c) 2002 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2002 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/01/02 Initial version
// 02/18/02 Large arguments processing routine is excluded.
// External interface entry points are added
// 03/13/02 Corrected restore of predicate registers
// 03/19/02 Added stack unwind around call to __libm_cis_large
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// 1) double _Complex cis(double)
// 2) void sincos(double, double*s, double*c)
// 3) __libm_sincos - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
// Overview of operation
//==============================================================
//
// Step 1
// ======
// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k where k=4
// divide x by pi/2^k.
// Multiply by 2^k/pi.
// nfloat = Round result to integer (round-to-nearest)
//
// r = x - nfloat * pi/2^k
// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
// nfloat * LOW(pi/2^k)) -
// nfloat * LOWEST(pi/2^k) for increased accuracy.
// pi/2^k is stored as two numbers that when added make pi/2^k.
// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
// HIGH and LOW parts are rounded to zero values,
// and LOWEST is rounded to nearest one.
//
// x = (nfloat * pi/2^k) + r
// r is small enough that we can use a polynomial approximation
// and is referred to as the reduced argument.
//
// Step 3
// ======
// Take the unreduced part and remove the multiples of 2pi.
// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits
//
// nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)
// N * 2^(k+1)
// nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k
// nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k
// nfloat * pi/2^k = N2pi + M * pi/2^k
//
//
// Sin(x) = Sin((nfloat * pi/2^k) + r)
// = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)
//
// Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)
// = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)
// = Sin(Mpi/2^k)
//
// Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)
// = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)
// = Cos(Mpi/2^k)
//
// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
//
//
// Step 4
// ======
// 0 <= M < 2^(k+1)
// There are 2^(k+1) Sin entries in a table.
// There are 2^(k+1) Cos entries in a table.
//
// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.
//
//
// Step 5
// ======
// Calculate Cos(r) and Sin(r) by polynomial approximation.
//
// Cos(r) = 1 + r^2 q1 + r^4 q2 + r^6 q3 + ... = Series for Cos
// Sin(r) = r + r^3 p1 + r^5 p2 + r^7 p3 + ... = Series for Sin
//
// and the coefficients q1, q2, ... and p1, p2, ... are stored in a table
//
//
// Calculate
// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
//
// as follows
//
// S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)
// rsq = r*r
//
//
// P = p1 + r^2p2 + r^4p3 + r^6p4
// Q = q1 + r^2q2 + r^4q3 + r^6q4
//
// rcub = r * rsq
// Sin(r) = r + rcub * P
// = r + r^3p1 + r^5p2 + r^7p3 + r^9p4 + ... = Sin(r)
//
// The coefficients are not exactly these values, but almost.
//
// p1 = -1/6 = -1/3!
// p2 = 1/120 = 1/5!
// p3 = -1/5040 = -1/7!
// p4 = 1/362889 = 1/9!
//
// P = r + rcub * P
//
// Answer = S[m] Cos(r) + C[m] P
//
// Cos(r) = 1 + rsq Q
// Cos(r) = 1 + r^2 Q
// Cos(r) = 1 + r^2 (q1 + r^2q2 + r^4q3 + r^6q4)
// Cos(r) = 1 + r^2q1 + r^4q2 + r^6q3 + r^8q4 + ...
//
// S[m] Cos(r) = S[m](1 + rsq Q)
// S[m] Cos(r) = S[m] + S[m] rsq Q
// S[m] Cos(r) = S[m] + s_rsq Q
// Q = S[m] + s_rsq Q
//
// Then,
//
// Answer = Q + C[m] P
// Registers used
//==============================================================
// general input registers:
// r14 -> r19
// r32 -> r49
// predicate registers used:
// p6 -> p14
// floating-point registers used
// f9 -> f15
// f32 -> f100
// Assembly macros
//==============================================================
cis_Arg = f8
cis_Sin_res = f9
cis_Cos_res = f8
cis_NORM_f8 = f10
cis_W = f11
cis_int_Nfloat = f12
cis_Nfloat = f13
cis_r = f14
cis_rsq = f15
cis_rcub = f32
cis_Inv_Pi_by_16 = f33
cis_Pi_by_16_hi = f34
cis_Pi_by_16_lo = f35
cis_Inv_Pi_by_64 = f36
cis_Pi_by_16_lowest = f37
cis_r_exact = f38
cis_P1 = f39
cis_Q1 = f40
cis_P2 = f41
cis_Q2 = f42
cis_P3 = f43
cis_Q3 = f44
cis_P4 = f45
cis_Q4 = f46
cis_P_temp1 = f47
cis_P_temp2 = f48
cis_Q_temp1 = f49
cis_Q_temp2 = f50
cis_P = f51
cis_SIG_INV_PI_BY_16_2TO61 = f52
cis_RSHF_2TO61 = f53
cis_RSHF = f54
cis_2TOM61 = f55
cis_NFLOAT = f56
cis_W_2TO61_RSH = f57
cis_tmp = f58
cis_Sm_sin = f59
cis_Cm_sin = f60
cis_Sm_cos = f61
cis_Cm_cos = f62
cis_srsq_sin = f63
cis_srsq_cos = f64
cis_Q_sin = f65
cis_Q_cos = f66
cis_Q = f67
/////////////////////////////////////////////////////////////
cis_pResSin = r33
cis_pResCos = r34
cis_exp_limit = r35
cis_r_signexp = r36
cis_AD_beta_table = r37
cis_r_sincos = r38
cis_r_exp = r39
cis_r_17_ones = r40
cis_GR_sig_inv_pi_by_16 = r14
cis_GR_rshf_2to61 = r15
cis_GR_rshf = r16
cis_GR_exp_2tom61 = r17
cis_GR_n = r18
cis_GR_n_sin = r19
cis_GR_m_sin = r41
cis_GR_32m_sin = r41
cis_GR_n_cos = r42
cis_GR_m_cos = r43
cis_GR_32m_cos = r43
cis_AD_2_sin = r44
cis_AD_2_cos = r45
cis_gr_tmp = r46
GR_SAVE_B0 = r47
GR_SAVE_GP = r48
rB0_SAVED = r49
GR_SAVE_PFS = r50
GR_SAVE_PR = r51
cis_AD_1 = r52
RODATA
.align 16
// Pi/16 parts
LOCAL_OBJECT_START(double_cis_pi)
data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part
data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd part
data8 0xA4093822299F31D0, 0x00003F7A // pi/16 3rd part
LOCAL_OBJECT_END(double_cis_pi)
// Coefficients for polynomials
LOCAL_OBJECT_START(double_cis_pq_k4)
data8 0x3EC71C963717C63A // P4
data8 0x3EF9FFBA8F191AE6 // Q4
data8 0xBF2A01A00F4E11A8 // P3
data8 0xBF56C16C05AC77BF // Q3
data8 0x3F8111111110F167 // P2
data8 0x3FA555555554DD45 // Q2
data8 0xBFC5555555555555 // P1
data8 0xBFDFFFFFFFFFFFFC // Q1
LOCAL_OBJECT_END(double_cis_pq_k4)
// Sincos table (S[m], C[m])
LOCAL_OBJECT_START(double_sin_cos_beta_k4)
data8 0x0000000000000000 , 0x00000000 // sin( 0 pi/16) S0
data8 0x8000000000000000 , 0x00003fff // cos( 0 pi/16) C0
//
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin( 1 pi/16) S1
data8 0xfb14be7fbae58157 , 0x00003ffe // cos( 1 pi/16) C1
//
data8 0xc3ef1535754b168e , 0x00003ffd // sin( 2 pi/16) S2
data8 0xec835e79946a3146 , 0x00003ffe // cos( 2 pi/16) C2
//
data8 0x8e39d9cd73464364 , 0x00003ffe // sin( 3 pi/16) S3
data8 0xd4db3148750d181a , 0x00003ffe // cos( 3 pi/16) C3
//
data8 0xb504f333f9de6484 , 0x00003ffe // sin( 4 pi/16) S4
data8 0xb504f333f9de6484 , 0x00003ffe // cos( 4 pi/16) C4
//
data8 0xd4db3148750d181a , 0x00003ffe // sin( 5 pi/16) C3
data8 0x8e39d9cd73464364 , 0x00003ffe // cos( 5 pi/16) S3
//
data8 0xec835e79946a3146 , 0x00003ffe // sin( 6 pi/16) C2
data8 0xc3ef1535754b168e , 0x00003ffd // cos( 6 pi/16) S2
//
data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 7 pi/16) C1
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos( 7 pi/16) S1
//
data8 0x8000000000000000 , 0x00003fff // sin( 8 pi/16) C0
data8 0x0000000000000000 , 0x00000000 // cos( 8 pi/16) S0
//
data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 9 pi/16) C1
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos( 9 pi/16) -S1
//
data8 0xec835e79946a3146 , 0x00003ffe // sin(10 pi/16) C2
data8 0xc3ef1535754b168e , 0x0000bffd // cos(10 pi/16) -S2
//
data8 0xd4db3148750d181a , 0x00003ffe // sin(11 pi/16) C3
data8 0x8e39d9cd73464364 , 0x0000bffe // cos(11 pi/16) -S3
//
data8 0xb504f333f9de6484 , 0x00003ffe // sin(12 pi/16) S4
data8 0xb504f333f9de6484 , 0x0000bffe // cos(12 pi/16) -S4
//
data8 0x8e39d9cd73464364 , 0x00003ffe // sin(13 pi/16) S3
data8 0xd4db3148750d181a , 0x0000bffe // cos(13 pi/16) -C3
//
data8 0xc3ef1535754b168e , 0x00003ffd // sin(14 pi/16) S2
data8 0xec835e79946a3146 , 0x0000bffe // cos(14 pi/16) -C2
//
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin(15 pi/16) S1
data8 0xfb14be7fbae58157 , 0x0000bffe // cos(15 pi/16) -C1
//
data8 0x0000000000000000 , 0x00000000 // sin(16 pi/16) S0
data8 0x8000000000000000 , 0x0000bfff // cos(16 pi/16) -C0
//
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(17 pi/16) -S1
data8 0xfb14be7fbae58157 , 0x0000bffe // cos(17 pi/16) -C1
//
data8 0xc3ef1535754b168e , 0x0000bffd // sin(18 pi/16) -S2
data8 0xec835e79946a3146 , 0x0000bffe // cos(18 pi/16) -C2
//
data8 0x8e39d9cd73464364 , 0x0000bffe // sin(19 pi/16) -S3
data8 0xd4db3148750d181a , 0x0000bffe // cos(19 pi/16) -C3
//
data8 0xb504f333f9de6484 , 0x0000bffe // sin(20 pi/16) -S4
data8 0xb504f333f9de6484 , 0x0000bffe // cos(20 pi/16) -S4
//
data8 0xd4db3148750d181a , 0x0000bffe // sin(21 pi/16) -C3
data8 0x8e39d9cd73464364 , 0x0000bffe // cos(21 pi/16) -S3
//
data8 0xec835e79946a3146 , 0x0000bffe // sin(22 pi/16) -C2
data8 0xc3ef1535754b168e , 0x0000bffd // cos(22 pi/16) -S2
//
data8 0xfb14be7fbae58157 , 0x0000bffe // sin(23 pi/16) -C1
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos(23 pi/16) -S1
//
data8 0x8000000000000000 , 0x0000bfff // sin(24 pi/16) -C0
data8 0x0000000000000000 , 0x00000000 // cos(24 pi/16) S0
//
data8 0xfb14be7fbae58157 , 0x0000bffe // sin(25 pi/16) -C1
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos(25 pi/16) S1
//
data8 0xec835e79946a3146 , 0x0000bffe // sin(26 pi/16) -C2
data8 0xc3ef1535754b168e , 0x00003ffd // cos(26 pi/16) S2
//
data8 0xd4db3148750d181a , 0x0000bffe // sin(27 pi/16) -C3
data8 0x8e39d9cd73464364 , 0x00003ffe // cos(27 pi/16) S3
//
data8 0xb504f333f9de6484 , 0x0000bffe // sin(28 pi/16) -S4
data8 0xb504f333f9de6484 , 0x00003ffe // cos(28 pi/16) S4
//
data8 0x8e39d9cd73464364 , 0x0000bffe // sin(29 pi/16) -S3
data8 0xd4db3148750d181a , 0x00003ffe // cos(29 pi/16) C3
//
data8 0xc3ef1535754b168e , 0x0000bffd // sin(30 pi/16) -S2
data8 0xec835e79946a3146 , 0x00003ffe // cos(30 pi/16) C2
//
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(31 pi/16) -S1
data8 0xfb14be7fbae58157 , 0x00003ffe // cos(31 pi/16) C1
//
data8 0x0000000000000000 , 0x00000000 // sin(32 pi/16) S0
data8 0x8000000000000000 , 0x00003fff // cos(32 pi/16) C0
LOCAL_OBJECT_END(double_sin_cos_beta_k4)
.section .text
GLOBAL_IEEE754_ENTRY(sincos)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
{ .mlx
addl cis_AD_1 = @ltoff(double_cis_pi), gp
movl cis_GR_rshf_2to61 = 0x47b8000000000000
};;
{ .mfi
ld8 cis_AD_1 = [cis_AD_1]
fnorm.s1 cis_NORM_f8 = cis_Arg
cmp.eq p13, p14 = r0, r0 // p13 set for sincos
}
// cis_GR_exp_2tom61 = exponent of scaling factor 2^-61
{ .mib
mov cis_GR_exp_2tom61 = 0xffff-61
nop.i 0
br.cond.sptk _CIS_COMMON
};;
GLOBAL_IEEE754_END(sincos)
LOCAL_LIBM_ENTRY(cis)
LOCAL_LIBM_END(cis)
GLOBAL_LIBM_ENTRY(__libm_sincos)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
alloc GR_SAVE_PFS = ar.pfs,0,21,0,0
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
{ .mlx
addl cis_AD_1 = @ltoff(double_cis_pi), gp
movl cis_GR_rshf_2to61 = 0x47b8000000000000
};;
// p14 set for __libm_sincos and cis
{ .mfi
ld8 cis_AD_1 = [cis_AD_1]
fnorm.s1 cis_NORM_f8 = cis_Arg
cmp.eq p14, p13 = r0, r0
}
// cis_GR_exp_2tom61 = exponent of scaling factor 2^-61
{ .mib
mov cis_GR_exp_2tom61 = 0xffff-61
nop.i 0
nop.b 0
};;
_CIS_COMMON:
// Form two constants we need
// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
// fcmp used to set denormal, and invalid on snans
{ .mfi
setf.sig cis_SIG_INV_PI_BY_16_2TO61 = cis_GR_sig_inv_pi_by_16
fclass.m p6,p0 = cis_Arg, 0xe7 // if x=0,inf,nan
addl cis_gr_tmp = -1, r0
}
// 1.1000 2^63 for right shift
{ .mlx
setf.d cis_RSHF_2TO61 = cis_GR_rshf_2to61
movl cis_GR_rshf = 0x43e8000000000000
};;
// Form another constant
// 2^-61 for scaling Nfloat
// 0x1001a is register_bias + 27.
// So if f8 >= 2^27, go to large arguments routine
{ .mmi
getf.exp cis_r_signexp = cis_Arg
setf.exp cis_2TOM61 = cis_GR_exp_2tom61
mov cis_exp_limit = 0x1001a
};;
// Load the two pieces of pi/16
// Form another constant
// 1.1000...000 * 2^63, the right shift constant
{ .mmb
ldfe cis_Pi_by_16_hi = [cis_AD_1],16
setf.d cis_RSHF = cis_GR_rshf
(p6) br.cond.spnt _CIS_SPECIAL_ARGS
};;
// Create constant inexact set
{ .mmi
ldfe cis_Pi_by_16_lo = [cis_AD_1],16
setf.sig cis_tmp = cis_gr_tmp
nop.i 0
};;
{ .mfi
ldfe cis_Pi_by_16_lowest = [cis_AD_1],16
nop.f 0
nop.i 0
};;
// Start loading P, Q coefficients
{ .mib
ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
dep.z cis_r_exp = cis_r_signexp, 0, 17
nop.b 0
};;
// p10 is true if we must call routines to handle larger arguments
// p10 is true if f8 exp is > 0x1001a
{ .mmb
ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
cmp.ge p10, p0 = cis_r_exp, cis_exp_limit
(p10) br.cond.spnt _CIS_LARGE_ARGS // go to |x| >= 2^27 path
};;
// cis_W = x * cis_Inv_Pi_by_16
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
fma.s1 cis_W_2TO61_RSH = cis_NORM_f8,cis_SIG_INV_PI_BY_16_2TO61,cis_RSHF_2TO61
nop.i 0
};;
// cis_NFLOAT = Round_Int_Nearest(cis_W)
{ .mfi
ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16
fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
nop.i 0
};;
// get N = (int)cis_int_Nfloat
{ .mfi
getf.sig cis_GR_n = cis_W_2TO61_RSH
nop.f 0
nop.i 0
};;
// Add 2^(k-1) (which is in cis_r_sincos) to N
// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
{ .mfi
add cis_GR_n_cos = 0x8, cis_GR_n
fnma.s1 cis_r = cis_NFLOAT,cis_Pi_by_16_hi,cis_NORM_f8
nop.i 0
};;
//Get M (least k+1 bits of N)
{ .mmi
and cis_GR_m_sin = 0x1f,cis_GR_n
and cis_GR_m_cos = 0x1f,cis_GR_n_cos
nop.i 0
};;
{ .mmi
nop.m 0
nop.m 0
shl cis_GR_32m_sin = cis_GR_m_sin,5
};;
// Add 32*M to address of sin_cos_beta table
{ .mmi
add cis_AD_2_sin = cis_GR_32m_sin, cis_AD_1
nop.m 0
shl cis_GR_32m_cos = cis_GR_m_cos,5
};;
// Add 32*M to address of sin_cos_beta table
{ .mmf
ldfe cis_Sm_sin = [cis_AD_2_sin],16
add cis_AD_2_cos = cis_GR_32m_cos, cis_AD_1
fclass.m.unc p10,p0 = cis_Arg,0x0b // den. input - uflow
};;
{ .mfi
ldfe cis_Sm_cos = [cis_AD_2_cos], 16
fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
nop.i 0
};;
{ .mfi
ldfe cis_Cm_sin = [cis_AD_2_sin]
fma.s1 cis_rsq = cis_r, cis_r, f0 // get r^2
nop.i 0
}
// fmpy forces inexact flag
{ .mfi
nop.m 0
fmpy.s0 cis_tmp = cis_tmp,cis_tmp
nop.i 0
};;
{ .mfi
nop.m 0
fnma.s1 cis_r_exact = cis_NFLOAT, cis_Pi_by_16_lowest, cis_r
nop.i 0
};;
{ .mfi
ldfe cis_Cm_cos = [cis_AD_2_cos]
fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 cis_Q_temp1 = cis_rsq, cis_Q4, cis_Q3
nop.i 0
};;
{ .mfi
nop.m 0
fmpy.s1 cis_srsq_sin = cis_Sm_sin, cis_rsq
nop.i 0
}
{ .mfi
nop.m 0
fmpy.s1 cis_srsq_cos = cis_Sm_cos,cis_rsq
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cis_Q_temp2 = cis_rsq, cis_Q_temp1, cis_Q2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 cis_P_temp2 = cis_rsq, cis_P_temp1, cis_P2
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 cis_P = cis_rsq, cis_P_temp2, cis_P1
nop.i 0
};;
{ .mfi
nop.m 0
fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cis_Q_sin = cis_srsq_sin,cis_Q, cis_Sm_sin
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 cis_Q_cos = cis_srsq_cos,cis_Q, cis_Sm_cos
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cis_P = cis_rcub,cis_P, cis_r_exact // final P
nop.i 0
};;
// If den. arg, force underflow to be set
{ .mfi
nop.m 0
(p10) fmpy.d.s0 cis_tmp = cis_Arg,cis_Arg
nop.i 0
};;
{ .mfi
nop.m 0
fma.d.s0 cis_Sin_res = cis_Cm_sin,cis_P,cis_Q_sin//Final sin
nop.i 0
}
{ .mfb
nop.m 0
fma.d.s0 cis_Cos_res = cis_Cm_cos,cis_P,cis_Q_cos//Final cos
(p14) br.ret.sptk b0 // common exit for __libm_sincos and cis main path
};;
{ .mmb
stfd [cis_pResSin] = cis_Sin_res
stfd [cis_pResCos] = cis_Cos_res
br.ret.sptk b0 // common exit for sincos main path
};;
_CIS_SPECIAL_ARGS:
// sin(+/-0) = +/-0
// sin(Inf) = NaN
// sin(NaN) = NaN
{ .mfi
nop.m 999
fma.d.s0 cis_Sin_res = cis_Arg, f0, f0 // sinf(+/-0,NaN,Inf)
nop.i 999
};;
// cos(+/-0) = 1.0
// cos(Inf) = NaN
// cos(NaN) = NaN
{ .mfb
nop.m 999
fma.d.s0 cis_Cos_res = cis_Arg, f0, f1 // cosf(+/-0,NaN,Inf)
(p14) br.ret.sptk b0 //spec exit for __libm_sincos and cis main path
};;
{ .mmb
stfd [cis_pResSin] = cis_Sin_res
stfd [cis_pResCos] = cis_Cos_res
br.ret.sptk b0 // common exit for sincos main path
};;
GLOBAL_LIBM_END(__libm_sincos)
//// |x| > 2^27 path ///////
.proc _CIS_LARGE_ARGS
_CIS_LARGE_ARGS:
.prologue
{ .mfi
nop.m 0
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS = ar.pfs
}
;;
{ .mfi
mov GR_SAVE_GP = gp
nop.f 0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0 = b0
};;
.body
// Call of huge arguments sincos
{ .mib
nop.m 0
mov GR_SAVE_PR = pr
br.call.sptk b0 = __libm_sincos_large
};;
{ .mfi
mov gp = GR_SAVE_GP
nop.f 0
mov pr = GR_SAVE_PR, 0x1fffe
}
;;
{ .mfi
nop.m 0
nop.f 0
mov b0 = GR_SAVE_B0
}
;;
{ .mfi
nop.m 0
fma.d.s0 cis_Cos_res = cis_Cos_res, f1, f0
mov ar.pfs = GR_SAVE_PFS
}
{ .mfb
nop.m 0
fma.d.s0 cis_Sin_res = cis_Sin_res, f1, f0
(p14) br.ret.sptk b0 // exit for |x| > 2^27 path (__libm_sincos and cis)
};;
{ .mmb
stfd [cis_pResSin] = cis_Sin_res
stfd [cis_pResCos] = cis_Cos_res
br.ret.sptk b0 // exit for sincos |x| > 2^27 path
};;
.endp _CIS_LARGE_ARGS
.type __libm_sincos_large#,@function
.global __libm_sincos_large#

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,744 @@
.file "libm_sincosf.s"
// Copyright (c) 2002 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2002 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/01/02 Initial version
// 02/18/02 Large arguments processing routine is excluded.
// External interface entry points are added
// 02/26/02 Added temporary return of results in r8, r9
// 03/13/02 Corrected restore of predicate registers
// 03/19/02 Added stack unwind around call to __libm_cisf_large
// 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
// API
//==============================================================
// 1) float _Complex cisf(float)
// 2) void sincosf(float, float*s, float*c)
// 3) __libm_sincosf - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
// Overview of operation
//==============================================================
//
// Step 1
// ======
// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k where k=4
// divide x by pi/2^k.
// Multiply by 2^k/pi.
// nfloat = Round result to integer (round-to-nearest)
//
// r = x - nfloat * pi/2^k
// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) for increased accuracy.
// pi/2^k is stored as two numbers that when added make pi/2^k.
// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
// HIGH part is rounded to zero, LOW - to nearest
//
// x = (nfloat * pi/2^k) + r
// r is small enough that we can use a polynomial approximation
// and is referred to as the reduced argument.
//
// Step 3
// ======
// Take the unreduced part and remove the multiples of 2pi.
// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits
//
// nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)
// N * 2^(k+1)
// nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k
// nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k
// nfloat * pi/2^k = N2pi + M * pi/2^k
//
//
// Sin(x) = Sin((nfloat * pi/2^k) + r)
// = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)
//
// Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)
// = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)
// = Sin(Mpi/2^k)
//
// Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)
// = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)
// = Cos(Mpi/2^k)
//
// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
//
//
// Step 4
// ======
// 0 <= M < 2^(k+1)
// There are 2^(k+1) Sin entries in a table.
// There are 2^(k+1) Cos entries in a table.
//
// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.
//
//
// Step 5
// ======
// Calculate Cos(r) and Sin(r) by polynomial approximation.
//
// Cos(r) = 1 + r^2 q1 + r^4 q2 = Series for Cos
// Sin(r) = r + r^3 p1 + r^5 p2 = Series for Sin
//
// and the coefficients q1, q2 and p1, p2 are stored in a table
//
//
// Calculate
// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
//
// as follows
//
// S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)
// rsq = r*r
//
//
// P = p1 + r^2p2
// Q = q1 + r^2q2
//
// rcub = r * rsq
// Sin(r) = r + rcub * P
// = r + r^3p1 + r^5p2 = Sin(r)
//
// P = r + rcub * P
//
// Answer = S[m] Cos(r) + C[m] P
//
// Cos(r) = 1 + rsq Q
// Cos(r) = 1 + r^2 Q
// Cos(r) = 1 + r^2 (q1 + r^2q2)
// Cos(r) = 1 + r^2q1 + r^4q2
//
// S[m] Cos(r) = S[m](1 + rsq Q)
// S[m] Cos(r) = S[m] + S[m] rsq Q
// S[m] Cos(r) = S[m] + s_rsq Q
// Q = S[m] + s_rsq Q
//
// Then,
//
// Answer = Q + C[m] P
// Registers used
//==============================================================
// general input registers:
// r14 -> r19
// r32 -> r49
// predicate registers used:
// p6 -> p14
// floating-point registers used
// f9 -> f15
// f32 -> f100
// Assembly macros
//==============================================================
cisf_Arg = f8
cisf_Sin_res = f9
cisf_Cos_res = f8
cisf_NORM_f8 = f10
cisf_W = f11
cisf_int_Nfloat = f12
cisf_Nfloat = f13
cisf_r = f14
cisf_r_exact = f68
cisf_rsq = f15
cisf_rcub = f32
cisf_Inv_Pi_by_16 = f33
cisf_Pi_by_16_hi = f34
cisf_Pi_by_16_lo = f35
cisf_Inv_Pi_by_64 = f36
cisf_Pi_by_64_hi = f37
cisf_Pi_by_64_lo = f38
cisf_P1 = f39
cisf_Q1 = f40
cisf_P2 = f41
cisf_Q2 = f42
cisf_P3 = f43
cisf_Q3 = f44
cisf_P4 = f45
cisf_Q4 = f46
cisf_P_temp1 = f47
cisf_P_temp2 = f48
cisf_Q_temp1 = f49
cisf_Q_temp2 = f50
cisf_P = f51
cisf_SIG_INV_PI_BY_16_2TO61 = f52
cisf_RSHF_2TO61 = f53
cisf_RSHF = f54
cisf_2TOM61 = f55
cisf_NFLOAT = f56
cisf_W_2TO61_RSH = f57
cisf_tmp = f58
cisf_Sm_sin = f59
cisf_Cm_sin = f60
cisf_Sm_cos = f61
cisf_Cm_cos = f62
cisf_srsq_sin = f63
cisf_srsq_cos = f64
cisf_Q_sin = f65
cisf_Q_cos = f66
cisf_Q = f67
/////////////////////////////////////////////////////////////
cisf_pResSin = r33
cisf_pResCos = r34
cisf_exp_limit = r35
cisf_r_signexp = r36
cisf_AD_beta_table = r37
cisf_r_sincos = r38
cisf_r_exp = r39
cisf_r_17_ones = r40
cisf_GR_sig_inv_pi_by_16 = r14
cisf_GR_rshf_2to61 = r15
cisf_GR_rshf = r16
cisf_GR_exp_2tom61 = r17
cisf_GR_n = r18
cisf_GR_n_sin = r19
cisf_GR_m_sin = r41
cisf_GR_32m_sin = r41
cisf_GR_n_cos = r42
cisf_GR_m_cos = r43
cisf_GR_32m_cos = r43
cisf_AD_2_sin = r44
cisf_AD_2_cos = r45
cisf_gr_tmp = r46
GR_SAVE_B0 = r47
GR_SAVE_GP = r48
rB0_SAVED = r49
GR_SAVE_PFS = r50
GR_SAVE_PR = r51
cisf_AD_1 = r52
RODATA
.align 16
// Pi/16 parts
LOCAL_OBJECT_START(double_cisf_pi)
data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part
data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd part
LOCAL_OBJECT_END(double_cisf_pi)
// Coefficients for polynomials
LOCAL_OBJECT_START(double_cisf_pq_k4)
data8 0x3F810FABB668E9A2 // P2
data8 0x3FA552E3D6DE75C9 // Q2
data8 0xBFC555554447BC7F // P1
data8 0xBFDFFFFFC447610A // Q1
LOCAL_OBJECT_END(double_cisf_pq_k4)
// Sincos table (S[m], C[m])
LOCAL_OBJECT_START(double_sin_cos_beta_k4)
data8 0x0000000000000000 // sin ( 0 Pi / 16 )
data8 0x3FF0000000000000 // cos ( 0 Pi / 16 )
//
data8 0x3FC8F8B83C69A60B // sin ( 1 Pi / 16 )
data8 0x3FEF6297CFF75CB0 // cos ( 1 Pi / 16 )
//
data8 0x3FD87DE2A6AEA963 // sin ( 2 Pi / 16 )
data8 0x3FED906BCF328D46 // cos ( 2 Pi / 16 )
//
data8 0x3FE1C73B39AE68C8 // sin ( 3 Pi / 16 )
data8 0x3FEA9B66290EA1A3 // cos ( 3 Pi / 16 )
//
data8 0x3FE6A09E667F3BCD // sin ( 4 Pi / 16 )
data8 0x3FE6A09E667F3BCD // cos ( 4 Pi / 16 )
//
data8 0x3FEA9B66290EA1A3 // sin ( 5 Pi / 16 )
data8 0x3FE1C73B39AE68C8 // cos ( 5 Pi / 16 )
//
data8 0x3FED906BCF328D46 // sin ( 6 Pi / 16 )
data8 0x3FD87DE2A6AEA963 // cos ( 6 Pi / 16 )
//
data8 0x3FEF6297CFF75CB0 // sin ( 7 Pi / 16 )
data8 0x3FC8F8B83C69A60B // cos ( 7 Pi / 16 )
//
data8 0x3FF0000000000000 // sin ( 8 Pi / 16 )
data8 0x0000000000000000 // cos ( 8 Pi / 16 )
//
data8 0x3FEF6297CFF75CB0 // sin ( 9 Pi / 16 )
data8 0xBFC8F8B83C69A60B // cos ( 9 Pi / 16 )
//
data8 0x3FED906BCF328D46 // sin ( 10 Pi / 16 )
data8 0xBFD87DE2A6AEA963 // cos ( 10 Pi / 16 )
//
data8 0x3FEA9B66290EA1A3 // sin ( 11 Pi / 16 )
data8 0xBFE1C73B39AE68C8 // cos ( 11 Pi / 16 )
//
data8 0x3FE6A09E667F3BCD // sin ( 12 Pi / 16 )
data8 0xBFE6A09E667F3BCD // cos ( 12 Pi / 16 )
//
data8 0x3FE1C73B39AE68C8 // sin ( 13 Pi / 16 )
data8 0xBFEA9B66290EA1A3 // cos ( 13 Pi / 16 )
//
data8 0x3FD87DE2A6AEA963 // sin ( 14 Pi / 16 )
data8 0xBFED906BCF328D46 // cos ( 14 Pi / 16 )
//
data8 0x3FC8F8B83C69A60B // sin ( 15 Pi / 16 )
data8 0xBFEF6297CFF75CB0 // cos ( 15 Pi / 16 )
//
data8 0x0000000000000000 // sin ( 16 Pi / 16 )
data8 0xBFF0000000000000 // cos ( 16 Pi / 16 )
//
data8 0xBFC8F8B83C69A60B // sin ( 17 Pi / 16 )
data8 0xBFEF6297CFF75CB0 // cos ( 17 Pi / 16 )
//
data8 0xBFD87DE2A6AEA963 // sin ( 18 Pi / 16 )
data8 0xBFED906BCF328D46 // cos ( 18 Pi / 16 )
//
data8 0xBFE1C73B39AE68C8 // sin ( 19 Pi / 16 )
data8 0xBFEA9B66290EA1A3 // cos ( 19 Pi / 16 )
//
data8 0xBFE6A09E667F3BCD // sin ( 20 Pi / 16 )
data8 0xBFE6A09E667F3BCD // cos ( 20 Pi / 16 )
//
data8 0xBFEA9B66290EA1A3 // sin ( 21 Pi / 16 )
data8 0xBFE1C73B39AE68C8 // cos ( 21 Pi / 16 )
//
data8 0xBFED906BCF328D46 // sin ( 22 Pi / 16 )
data8 0xBFD87DE2A6AEA963 // cos ( 22 Pi / 16 )
//
data8 0xBFEF6297CFF75CB0 // sin ( 23 Pi / 16 )
data8 0xBFC8F8B83C69A60B // cos ( 23 Pi / 16 )
//
data8 0xBFF0000000000000 // sin ( 24 Pi / 16 )
data8 0x0000000000000000 // cos ( 24 Pi / 16 )
//
data8 0xBFEF6297CFF75CB0 // sin ( 25 Pi / 16 )
data8 0x3FC8F8B83C69A60B // cos ( 25 Pi / 16 )
//
data8 0xBFED906BCF328D46 // sin ( 26 Pi / 16 )
data8 0x3FD87DE2A6AEA963 // cos ( 26 Pi / 16 )
//
data8 0xBFEA9B66290EA1A3 // sin ( 27 Pi / 16 )
data8 0x3FE1C73B39AE68C8 // cos ( 27 Pi / 16 )
//
data8 0xBFE6A09E667F3BCD // sin ( 28 Pi / 16 )
data8 0x3FE6A09E667F3BCD // cos ( 28 Pi / 16 )
//
data8 0xBFE1C73B39AE68C8 // sin ( 29 Pi / 16 )
data8 0x3FEA9B66290EA1A3 // cos ( 29 Pi / 16 )
//
data8 0xBFD87DE2A6AEA963 // sin ( 30 Pi / 16 )
data8 0x3FED906BCF328D46 // cos ( 30 Pi / 16 )
//
data8 0xBFC8F8B83C69A60B // sin ( 31 Pi / 16 )
data8 0x3FEF6297CFF75CB0 // cos ( 31 Pi / 16 )
//
data8 0x0000000000000000 // sin ( 32 Pi / 16 )
data8 0x3FF0000000000000 // cos ( 32 Pi / 16 )
LOCAL_OBJECT_END(double_sin_cos_beta_k4)
.section .text
GLOBAL_IEEE754_ENTRY(sincosf)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // 16/pi signd
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
{ .mlx
addl cisf_AD_1 = @ltoff(double_cisf_pi), gp
movl cisf_GR_rshf_2to61 = 0x47b8000000000000 // 1.1 2^(63+63-2)
};;
{ .mfi
ld8 cisf_AD_1 = [cisf_AD_1]
fnorm.s1 cisf_NORM_f8 = cisf_Arg
cmp.eq p13, p14 = r0, r0 // p13 set for sincos
}
// cis_GR_exp_2tom61 = exponent of scaling factor 2^-61
{ .mib
mov cisf_GR_exp_2tom61 = 0xffff-61
nop.i 0
br.cond.sptk _CISF_COMMON
};;
GLOBAL_IEEE754_END(sincosf)
LOCAL_LIBM_ENTRY(cisf)
LOCAL_LIBM_END(cisf)
GLOBAL_LIBM_ENTRY(__libm_sincosf)
{ .mlx
// cisf_GR_sig_inv_pi_by_16 = significand of 16/pi
alloc GR_SAVE_PFS = ar.pfs,0,21,0,0
movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
}
// cisf_GR_rshf_2to61 = 1.1000 2^(63+63-2)
{ .mlx
addl cisf_AD_1 = @ltoff(double_cisf_pi), gp
movl cisf_GR_rshf_2to61 = 0x47b8000000000000
};;
// p14 set for __libm_sincos and cis
{ .mfi
ld8 cisf_AD_1 = [cisf_AD_1]
fnorm.s1 cisf_NORM_f8 = cisf_Arg
cmp.eq p14, p13 = r0, r0
}
// cisf_GR_exp_2tom61 = exponent of scaling factor 2^-61
{ .mib
mov cisf_GR_exp_2tom61 = 0xffff-61
nop.i 0
nop.b 0
};;
_CISF_COMMON:
// Form two constants we need
// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
// fcmp used to set denormal, and invalid on snans
{ .mfi
setf.sig cisf_SIG_INV_PI_BY_16_2TO61 = cisf_GR_sig_inv_pi_by_16
fclass.m p6,p0 = cisf_Arg, 0xe7//if x=0,inf,nan
addl cisf_gr_tmp = -1, r0
}
// cisf_GR_rshf = 1.1000 2^63 for right shift
{ .mlx
setf.d cisf_RSHF_2TO61 = cisf_GR_rshf_2to61
movl cisf_GR_rshf = 0x43e8000000000000
};;
// Form another constant
// 2^-61 for scaling Nfloat
// 0x10017 is register_bias + 24.
// So if f8 >= 2^24, go to large args routine
{ .mmi
getf.exp cisf_r_signexp = cisf_Arg
setf.exp cisf_2TOM61 = cisf_GR_exp_2tom61
mov cisf_exp_limit = 0x10017
};;
// Load the two pieces of pi/16
// Form another constant
// 1.1000...000 * 2^63, the right shift constant
{ .mmb
ldfe cisf_Pi_by_16_hi = [cisf_AD_1],16
setf.d cisf_RSHF = cisf_GR_rshf
(p6) br.cond.spnt _CISF_SPECIAL_ARGS
};;
{ .mmi
ldfe cisf_Pi_by_16_lo = [cisf_AD_1],16
setf.sig cisf_tmp = cisf_gr_tmp //constant for inexact set
nop.i 0
};;
// Start loading P, Q coefficients
{ .mmi
ldfpd cisf_P2,cisf_Q2 = [cisf_AD_1],16
nop.m 0
dep.z cisf_r_exp = cisf_r_signexp, 0, 17
};;
// p10 is true if we must call routines to handle larger arguments
// p10 is true if f8 exp is >= 0x10017
{ .mmb
ldfpd cisf_P1,cisf_Q1 = [cisf_AD_1], 16
cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit
(p10) br.cond.spnt _CISF_LARGE_ARGS // go to |x| >= 2^24 path
};;
// cisf_W = x * cisf_Inv_Pi_by_16
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
nop.m 0
fma.s1 cisf_W_2TO61_RSH = cisf_NORM_f8,cisf_SIG_INV_PI_BY_16_2TO61,cisf_RSHF_2TO61
nop.i 0
};;
// cisf_NFLOAT = Round_Int_Nearest(cisf_W)
{ .mfi
nop.m 0
fms.s1 cisf_NFLOAT = cisf_W_2TO61_RSH,cisf_2TOM61,cisf_RSHF
nop.i 0
};;
// N = (int)cisf_int_Nfloat
{ .mfi
getf.sig cisf_GR_n = cisf_W_2TO61_RSH
nop.f 0
nop.i 0
};;
// Add 2^(k-1) (which is in cisf_r_sincos) to N
// cisf_r = -cisf_Nfloat * cisf_Pi_by_16_hi + x
// cisf_r = cisf_r -cisf_Nfloat * cisf_Pi_by_16_lo
{ .mfi
add cisf_GR_n_cos = 0x8, cisf_GR_n
fnma.s1 cisf_r = cisf_NFLOAT, cisf_Pi_by_16_hi, cisf_NORM_f8
nop.i 0
};;
//Get M (least k+1 bits of N)
{ .mmi
and cisf_GR_m_sin = 0x1f,cisf_GR_n
and cisf_GR_m_cos = 0x1f,cisf_GR_n_cos
nop.i 0
};;
{ .mmi
shladd cisf_AD_2_cos = cisf_GR_m_cos,4, cisf_AD_1
shladd cisf_AD_2_sin = cisf_GR_m_sin,4, cisf_AD_1
nop.i 0
};;
// den. input to set uflow
{ .mmf
ldfpd cisf_Sm_sin, cisf_Cm_sin = [cisf_AD_2_sin]
ldfpd cisf_Sm_cos, cisf_Cm_cos = [cisf_AD_2_cos]
fclass.m.unc p10,p0 = cisf_Arg,0x0b
};;
{ .mfi
nop.m 0
fma.s1 cisf_rsq = cisf_r, cisf_r, f0 // get r^2
nop.i 0
}
{ .mfi
nop.m 0
fmpy.s0 cisf_tmp = cisf_tmp,cisf_tmp // inexact flag
nop.i 0
};;
{ .mmf
nop.m 0
nop.m 0
fnma.s1 cisf_r_exact = cisf_NFLOAT, cisf_Pi_by_16_lo, cisf_r
};;
{ .mfi
nop.m 0
fma.s1 cisf_P = cisf_rsq, cisf_P2, cisf_P1
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 cisf_Q = cisf_rsq, cisf_Q2, cisf_Q1
nop.i 0
};;
{ .mfi
nop.m 0
fmpy.s1 cisf_rcub = cisf_r_exact, cisf_rsq // get r^3
nop.i 0
};;
{ .mfi
nop.m 0
fmpy.s1 cisf_srsq_sin = cisf_Sm_sin,cisf_rsq
nop.i 0
}
{ .mfi
nop.m 0
fmpy.s1 cisf_srsq_cos = cisf_Sm_cos,cisf_rsq
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cisf_P = cisf_rcub,cisf_P,cisf_r_exact
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cisf_Q_sin = cisf_srsq_sin,cisf_Q, cisf_Sm_sin
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 cisf_Q_cos = cisf_srsq_cos,cisf_Q, cisf_Sm_cos
nop.i 0
};;
// If den. arg, force underflow to be set
{ .mfi
nop.m 0
(p10) fmpy.s.s0 cisf_tmp = cisf_Arg,cisf_Arg
nop.i 0
};;
//Final sin
{ .mfi
nop.m 0
fma.s.s0 cisf_Sin_res = cisf_Cm_sin, cisf_P, cisf_Q_sin
nop.i 0
}
//Final cos
{ .mfb
nop.m 0
fma.s.s0 cisf_Cos_res = cisf_Cm_cos, cisf_P, cisf_Q_cos
(p14) br.cond.sptk _CISF_RETURN //com. exit for __libm_sincos and cis main path
};;
{ .mmb
stfs [cisf_pResSin] = cisf_Sin_res
stfs [cisf_pResCos] = cisf_Cos_res
br.ret.sptk b0 // common exit for sincos main path
};;
_CISF_SPECIAL_ARGS:
// sinf(+/-0) = +/-0
// sinf(Inf) = NaN
// sinf(NaN) = NaN
{ .mfi
nop.m 999
fma.s.s0 cisf_Sin_res = cisf_Arg, f0, f0 // sinf(+/-0,NaN,Inf)
nop.i 999
};;
// cosf(+/-0) = 1.0
// cosf(Inf) = NaN
// cosf(NaN) = NaN
{ .mfb
nop.m 999
fma.s.s0 cisf_Cos_res = cisf_Arg, f0, f1 // cosf(+/-0,NaN,Inf)
(p14) br.cond.sptk _CISF_RETURN //spec exit for __libm_sincos and cis main path
};;
{ .mmb
stfs [cisf_pResSin] = cisf_Sin_res
stfs [cisf_pResCos] = cisf_Cos_res
br.ret.sptk b0 // special exit for sincos main path
};;
// exit for sincos
// NOTE! r8 and r9 used only because of compiler issue
// connected with float point complex function arguments pass
// After fix of this issue this operations can be deleted
_CISF_RETURN:
{ .mmb
getf.s r8 = cisf_Cos_res
getf.s r9 = cisf_Sin_res
br.ret.sptk b0 // exit for sincos
};;
GLOBAL_LIBM_END(__libm_sincosf)
//// |x| > 2^24 path ///////
.proc _CISF_LARGE_ARGS
_CISF_LARGE_ARGS:
.prologue
{ .mfi
nop.m 0
nop.f 0
.save ar.pfs, GR_SAVE_PFS
mov GR_SAVE_PFS = ar.pfs
};;
{ .mfi
mov GR_SAVE_GP = gp
nop.f 0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0 = b0
};;
.body
// Call of huge arguments sincos
{ .mib
nop.m 0
mov GR_SAVE_PR = pr
br.call.sptk b0 = __libm_sincos_large
};;
{ .mfi
mov gp = GR_SAVE_GP
nop.f 0
mov pr = GR_SAVE_PR, 0x1fffe
}
;;
{ .mfi
nop.m 0
nop.f 0
mov b0 = GR_SAVE_B0
}
;;
{ .mfi
nop.m 0
fma.s.s0 cisf_Cos_res = cisf_Cos_res, f1, f0
mov ar.pfs = GR_SAVE_PFS
}
// exit for |x| > 2^24 path (__libm_sincos and cis)
{ .mfb
nop.m 0
fma.s.s0 cisf_Sin_res = cisf_Sin_res, f1, f0
(p14) br.cond.sptk _CISF_RETURN
};;
{ .mmb
stfs [cisf_pResSin] = cisf_Sin_res
stfs [cisf_pResCos] = cisf_Cos_res
br.ret.sptk b0 // exit for sincos |x| > 2^24 path
};;
.endp _CISF_LARGE_ARGS
.type __libm_sincos_large#,@function
.global __libm_sincos_large#

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,10 @@
//
// Copyright (C) 2000, 2001, Intel Corporation
/* file: libm_support.h */
// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -19,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -34,45 +35,51 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History: 02/02/2000 Initial version
// 2/28/2000 added tags for logb and nextafter
// 3/22/2000 Changes to support _LIB_VERSION variable
// 3/22/2000 Changes to support _LIB_VERSIONIMF variable
// and filled some enum gaps. Added support for C99.
// 5/31/2000 added prototypes for __libm_frexp_4l/8l
// 8/10/2000 Changed declaration of _LIB_VERSION to work for library
// 8/10/2000 Changed declaration of _LIB_VERSIONIMF to work for library
// builds and other application builds (precompiler directives).
// 8/11/2000 Added pointers-to-matherr-functions declarations to allow
// for user-defined matherr functions in the dll build.
// 12/07/2000 Added scalbn error_types values.
// 5/01/2001 Added error_types values for C99 nearest integer
// functions.
// 6/07/2001 Added error_types values for fdim.
// 6/18/2001 Added include of complex_support.h.
// 8/03/2001 Added error_types values for nexttoward, scalbln.
// 8/23/2001 Corrected tag numbers from 186 and higher.
// 8/27/2001 Added check for long int and long long int definitions.
// 12/10/2001 Added error_types for erfc.
// 12/27/2001 Added error_types for degree argument functions.
// 01/02/2002 Added error_types for tand, cotd.
// 01/04/2002 Delete include of complex_support.h
// 01/23/2002 Deleted prototypes for __libm_frexp*. Added check for
// multiple int, long int, and long long int definitions.
// 05/20/2002 Added error_types for cot.
// 06/27/2002 Added error_types for sinhcosh.
// 12/05/2002 Added error_types for annuity and compound
// 04/10/2003 Added error_types for tgammal/tgamma/tgammaf
//
#ifndef __ASSEMBLER__
#include <math.h>
float __libm_frexp_4f( float x, int* exp);
float _GI___libm_frexp_4f( float x, int* exp);
float __libm_frexp_8f( float x, int* exp);
double __libm_frexp_4( double x, int* exp);
double _GI___libm_frexp_4( double x, int* exp);
double __libm_frexp_8( double x, int* exp);
long double __libm_frexp_4l( long double x, int* exp);
long double _GI___libm_frexp_4l( long double x, int* exp);
long double __libm_frexp_8l( long double x, int* exp);
void __libm_sincos_pi4(double,double*,double*,int);
void __libm_y0y1(double , double *, double *);
void __libm_j0j1(double , double *, double *);
double __libm_lgamma_kernel(double,int*,int,int);
double __libm_j0(double);
double __libm_j1(double);
double __libm_jn(int,double);
double __libm_y0(double);
double __libm_y1(double);
double __libm_yn(int,double);
double __libm_copysign (double, double);
float __libm_copysignf (float, float);
long double __libm_copysignl (long double, long double);
extern double rint(double);
extern double sqrt(double);
extern double fabs(double);
extern double log(double);
@ -112,24 +119,31 @@ extern long double log1pl(long double);
extern long double logl(long double);
extern long double sqrtl(long double);
extern long double expl(long double);
extern long lround(double);
extern long lroundf(float);
extern long lroundl(long double);
extern long double fabsl(long double);
#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
#error integer size not established; define SIZE_INT_32 or SIZE_INT_64
#error integer size not established; define SIZE_INT_32 or SIZE_INT_64
#endif
struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
unsigned lo_significand:32;
unsigned hi_significand:20;
unsigned exponent:11;
unsigned sign:1;
};
#if (defined(SIZE_INT_32) && defined(SIZE_INT_64))
#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64
#endif
#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
#if !(defined(SIZE_LONG_INT_32) || defined(SIZE_LONG_INT_64))
#error long int size not established; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
#endif
#if (defined(SIZE_LONG_INT_32) && defined(SIZE_LONG_INT_64))
#error multiple long int size definitions; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
#endif
#if !(defined(SIZE_LONG_LONG_INT_32) || defined(SIZE_LONG_LONG_INT_64))
#error long long int size not established; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
#endif
#if (defined(SIZE_LONG_LONG_INT_32) && defined(SIZE_LONG_LONG_INT_64))
#error multiple long long int size definitions; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
#endif
typedef enum
{
@ -235,18 +249,406 @@ typedef enum
log2f_zero, log2f_negative, /* 172, 173 */
scalbnl_overflow, scalbnl_underflow, /* 174, 175 */
scalbn_overflow, scalbn_underflow, /* 176, 177 */
scalbnf_overflow, scalbnf_underflow /* 178, 179 */
scalbnf_overflow, scalbnf_underflow, /* 178, 179 */
remquol_by_zero, /* 180 */
remquo_by_zero, /* 181 */
remquof_by_zero, /* 182 */
lrintl_large, lrint_large, lrintf_large, /* 183, 184, 185 */
llrintl_large, llrint_large, llrintf_large, /* 186, 187, 188 */
lroundl_large, lround_large, lroundf_large, /* 189, 190, 191 */
llroundl_large, llround_large, llroundf_large, /* 192, 193, 194 */
fdiml_overflow, fdim_overflow, fdimf_overflow, /* 195, 196, 197 */
nexttowardl_overflow, nexttoward_overflow,
nexttowardf_overflow, /* 198, 199, 200 */
scalblnl_overflow, scalblnl_underflow, /* 201, 202 */
scalbln_overflow, scalbln_underflow, /* 203, 204 */
scalblnf_overflow, scalblnf_underflow, /* 205, 206 */
erfcl_underflow, erfc_underflow, erfcf_underflow, /* 207, 208, 209 */
acosdl_gt_one, acosd_gt_one, acosdf_gt_one, /* 210, 211, 212 */
asindl_gt_one, asind_gt_one, asindf_gt_one, /* 213, 214, 215 */
atan2dl_zero, atan2d_zero, atan2df_zero, /* 216, 217, 218 */
tandl_overflow, tand_overflow, tandf_overflow, /* 219, 220, 221 */
cotdl_overflow, cotd_overflow, cotdf_overflow, /* 222, 223, 224 */
cotl_overflow, cot_overflow, cotf_overflow, /* 225, 226, 227 */
sinhcoshl_overflow, sinhcosh_overflow, sinhcoshf_overflow, /* 228, 229, 230 */
annuityl_by_zero, annuity_by_zero, annuityf_by_zero, /* 231, 232, 233 */
annuityl_less_m1, annuity_less_m1, annuityf_less_m1, /* 234, 235, 236 */
annuityl_overflow, annuity_overflow, annuityf_overflow, /* 237, 238, 239 */
annuityl_underflow, annuity_underflow, annuityf_underflow, /* 240, 241, 242 */
compoundl_by_zero, compound_by_zero, compoundf_by_zero, /* 243, 244, 245 */
compoundl_less_m1, compound_less_m1, compoundf_less_m1, /* 246, 247, 248 */
compoundl_overflow, compound_overflow, compoundf_overflow, /* 249, 250, 251 */
compoundl_underflow, compound_underflow, compoundf_underflow, /* 252, 253, 254 */
tgammal_overflow, tgammal_negative, tgammal_reserve, /* 255, 256, 257 */
tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */
tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */
} error_types;
void __libm_error_support(void*,void*,void*,error_types);
#ifdef _LIBC
libc_hidden_proto(__libm_error_support)
#endif
#define BIAS_64 1023
#define EXPINF_64 2047
#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
#define DOUBLE_HEX(HI, LO) 0x ## LO, 0x ## HI
#if !defined(__USE_EXTERNAL_FPMEMTYP_H__)
#define BIAS_32 0x007F
#define BIAS_64 0x03FF
#define BIAS_80 0x3FFF
#define MAXEXP_32 0x00FE
#define MAXEXP_64 0x07FE
#define MAXEXP_80 0x7FFE
#define EXPINF_32 0x00FF
#define EXPINF_64 0x07FF
#define EXPINF_80 0x7FFF
struct fp32 { /*// sign:1 exponent:8 significand:23 (implied leading 1)*/
#if defined(SIZE_INT_32)
unsigned significand:23;
unsigned exponent:8;
unsigned sign:1;
#elif defined(SIZE_INT_64)
unsigned significand:23;
unsigned exponent:8;
unsigned sign:1;
#endif
};
struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
#if defined(SIZE_INT_32)
unsigned lo_significand:32;
unsigned hi_significand:20;
unsigned exponent:11;
unsigned sign:1;
#elif defined(SIZE_INT_64)
unsigned significand:52;
unsigned exponent:11;
unsigned sign:1;
#endif
};
struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
#if defined(SIZE_INT_32)
unsigned lo_significand;
unsigned hi_significand;
unsigned exponent:15;
unsigned sign:1;
#elif defined(SIZE_INT_64)
unsigned significand;
unsigned exponent:15;
unsigned sign:1;
#endif
};
#endif /*__USE_EXTERNAL_FPMEMTYP_H__*/
/* macros to form a double value in hex representation (unsigned int type) */
#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/
/* macros to form a long double value in hex representation (unsigned short type) */
#if defined(_WIN32) || defined(_WIN64)
#define LDOUBLE_ALIGN 16
#else
#define LDOUBLE_ALIGN 12
#endif
#if (LDOUBLE_ALIGN == 16)
#define _XPD_ ,0x0000,0x0000,0x0000
#else /*12*/
#define _XPD_ ,0x0000
#endif
#define LDOUBLE_HEX(w4,w3,w2,w1,w0) 0x##w0,0x##w1,0x##w2,0x##w3,0x##w4 _XPD_ /*LITTLE_ENDIAN*/
/* macros to sign-expand low 'num' bits of 'val' to native integer */
#if defined(SIZE_INT_32)
# define SIGN_EXPAND(val,num) ((int)(val) << (32-(num))) >> (32-(num)) /* sign expand of 'num' LSBs */
#elif defined(SIZE_INT_64)
# define SIGN_EXPAND(val,num) ((int)(val) << (64-(num))) >> (64-(num)) /* sign expand of 'num' LSBs */
#endif
/* macros to form pointers to FP number on-the-fly */
#define FP32(f) ((struct fp32 *)&f)
#define FP64(d) ((struct fp64 *)&d)
#define FP80(ld) ((struct fp80 *)&ld)
/* macros to extract signed low and high doubleword of long double */
#if defined(SIZE_INT_32)
# define HI_DWORD_80(ld) ((((FP80(ld)->sign << 15) | FP80(ld)->exponent) << 16) | \
((FP80(ld)->hi_significand >> 16) & 0xFFFF))
# define LO_DWORD_80(ld) SIGN_EXPAND(FP80(ld)->lo_significand, 32)
#elif defined(SIZE_INT_64)
# define HI_DWORD_80(ld) ((((FP80(ld)->sign << 15) | FP80(ld)->exponent) << 16) | \
((FP80(ld)->significand >> 48) & 0xFFFF))
# define LO_DWORD_80(ld) SIGN_EXPAND(FP80(ld)->significand, 32)
#endif
/* macros to extract hi bits of significand.
* note that explicit high bit do not count (returns as is)
*/
#if defined(SIZE_INT_32)
# define HI_SIGNIFICAND_80(X,NBITS) ((X)->hi_significand >> (31 - (NBITS)))
#elif defined(SIZE_INT_64)
# define HI_SIGNIFICAND_80(X,NBITS) ((X)->significand >> (63 - (NBITS)))
#endif
/* macros to check, whether a significand bits are all zero, or some of them are non-zero.
* note that SIGNIFICAND_ZERO_80 tests high bit also, but SIGNIFICAND_NONZERO_80 does not
*/
#define SIGNIFICAND_ZERO_32(X) ((X)->significand == 0)
#define SIGNIFICAND_NONZERO_32(X) ((X)->significand != 0)
#if defined(SIZE_INT_32)
# define SIGNIFICAND_ZERO_64(X) (((X)->hi_significand == 0) && ((X)->lo_significand == 0))
# define SIGNIFICAND_NONZERO_64(X) (((X)->hi_significand != 0) || ((X)->lo_significand != 0))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_ZERO_64(X) ((X)->significand == 0)
# define SIGNIFICAND_NONZERO_64(X) ((X)->significand != 0)
#endif
#if defined(SIZE_INT_32)
# define SIGNIFICAND_ZERO_80(X) (((X)->hi_significand == 0x00000000) && ((X)->lo_significand == 0))
# define SIGNIFICAND_NONZERO_80(X) (((X)->hi_significand != 0x80000000) || ((X)->lo_significand != 0))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_ZERO_80(X) ((X)->significand == 0x0000000000000000)
# define SIGNIFICAND_NONZERO_80(X) ((X)->significand != 0x8000000000000000)
#endif
/* macros to compare long double with constant value, represented as hex */
#define SIGNIFICAND_EQ_HEX_32(X,BITS) ((X)->significand == 0x ## BITS)
#define SIGNIFICAND_GT_HEX_32(X,BITS) ((X)->significand > 0x ## BITS)
#define SIGNIFICAND_GE_HEX_32(X,BITS) ((X)->significand >= 0x ## BITS)
#define SIGNIFICAND_LT_HEX_32(X,BITS) ((X)->significand < 0x ## BITS)
#define SIGNIFICAND_LE_HEX_32(X,BITS) ((X)->significand <= 0x ## BITS)
#if defined(SIZE_INT_32)
# define SIGNIFICAND_EQ_HEX_64(X,HI,LO) \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
# define SIGNIFICAND_GT_HEX_64(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand > 0x ## LO)))
# define SIGNIFICAND_GE_HEX_64(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >= 0x ## LO)))
# define SIGNIFICAND_LT_HEX_64(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand < 0x ## LO)))
# define SIGNIFICAND_LE_HEX_64(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <= 0x ## LO)))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_EQ_HEX_64(X,HI,LO) ((X)->significand == 0x ## HI ## LO)
# define SIGNIFICAND_GT_HEX_64(X,HI,LO) ((X)->significand > 0x ## HI ## LO)
# define SIGNIFICAND_GE_HEX_64(X,HI,LO) ((X)->significand >= 0x ## HI ## LO)
# define SIGNIFICAND_LT_HEX_64(X,HI,LO) ((X)->significand < 0x ## HI ## LO)
# define SIGNIFICAND_LE_HEX_64(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
#endif
#if defined(SIZE_INT_32)
# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
# define SIGNIFICAND_GT_HEX_80(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand > 0x ## LO)))
# define SIGNIFICAND_GE_HEX_80(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >= 0x ## LO)))
# define SIGNIFICAND_LT_HEX_80(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand < 0x ## LO)))
# define SIGNIFICAND_LE_HEX_80(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <= 0x ## LO)))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) ((X)->significand == 0x ## HI ## LO)
# define SIGNIFICAND_GT_HEX_80(X,HI,LO) ((X)->significand > 0x ## HI ## LO)
# define SIGNIFICAND_GE_HEX_80(X,HI,LO) ((X)->significand >= 0x ## HI ## LO)
# define SIGNIFICAND_LT_HEX_80(X,HI,LO) ((X)->significand < 0x ## HI ## LO)
# define SIGNIFICAND_LE_HEX_80(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
#endif
#define VALUE_EQ_HEX_32(X,EXP,BITS) \
(((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_32(X, BITS)))
#define VALUE_GT_HEX_32(X,EXP,BITS) (((X)->exponent > (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_32(X, BITS))))
#define VALUE_GE_HEX_32(X,EXP,BITS) (((X)->exponent > (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_32(X, BITS))))
#define VALUE_LT_HEX_32(X,EXP,BITS) (((X)->exponent < (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_32(X, BITS))))
#define VALUE_LE_HEX_32(X,EXP,BITS) (((X)->exponent < (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_32(X, BITS))))
#define VALUE_EQ_HEX_64(X,EXP,HI,LO) \
(((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_64(X, HI, LO)))
#define VALUE_GT_HEX_64(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_64(X, HI, LO))))
#define VALUE_GE_HEX_64(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_64(X, HI, LO))))
#define VALUE_LT_HEX_64(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_64(X, HI, LO))))
#define VALUE_LE_HEX_64(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_64(X, HI, LO))))
#define VALUE_EQ_HEX_80(X,EXP,HI,LO) \
(((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_80(X, HI, LO)))
#define VALUE_GT_HEX_80(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_80(X, HI, LO))))
#define VALUE_GE_HEX_80(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_80(X, HI, LO))))
#define VALUE_LT_HEX_80(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_80(X, HI, LO))))
#define VALUE_LE_HEX_80(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
(((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_80(X, HI, LO))))
/* macros to compare two long doubles */
#define SIGNIFICAND_EQ_32(X,Y) ((X)->significand == (Y)->significand)
#define SIGNIFICAND_GT_32(X,Y) ((X)->significand > (Y)->significand)
#define SIGNIFICAND_GE_32(X,Y) ((X)->significand >= (Y)->significand)
#define SIGNIFICAND_LT_32(X,Y) ((X)->significand < (Y)->significand)
#define SIGNIFICAND_LE_32(X,Y) ((X)->significand <= (Y)->significand)
#if defined(SIZE_INT_32)
# define SIGNIFICAND_EQ_64(X,Y) \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
# define SIGNIFICAND_GT_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand)))
# define SIGNIFICAND_GE_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
# define SIGNIFICAND_LT_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand)))
# define SIGNIFICAND_LE_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_EQ_64(X,Y) ((X)->significand == (Y)->significand)
# define SIGNIFICAND_GT_64(X,Y) ((X)->significand > (Y)->significand)
# define SIGNIFICAND_GE_64(X,Y) ((X)->significand >= (Y)->significand)
# define SIGNIFICAND_LT_64(X,Y) ((X)->significand < (Y)->significand)
# define SIGNIFICAND_LE_64(X,Y) ((X)->significand <= (Y)->significand)
#endif
#if defined(SIZE_INT_32)
# define SIGNIFICAND_EQ_80(X,Y) \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
# define SIGNIFICAND_GT_80(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand)))
# define SIGNIFICAND_GE_80(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
# define SIGNIFICAND_LT_80(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand)))
# define SIGNIFICAND_LE_80(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_EQ_80(X,Y) ((X)->significand == (Y)->significand)
# define SIGNIFICAND_GT_80(X,Y) ((X)->significand > (Y)->significand)
# define SIGNIFICAND_GE_80(X,Y) ((X)->significand >= (Y)->significand)
# define SIGNIFICAND_LT_80(X,Y) ((X)->significand < (Y)->significand)
# define SIGNIFICAND_LE_80(X,Y) ((X)->significand <= (Y)->significand)
#endif
#define VALUE_EQ_32(X,Y) \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_32(X, Y)))
#define VALUE_GT_32(X,Y) (((X)->exponent > (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_32(X, Y))))
#define VALUE_GE_32(X,Y) (((X)->exponent > (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_32(X, Y))))
#define VALUE_LT_32(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_32(X, Y))))
#define VALUE_LE_32(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_32(X, Y))))
#define VALUE_EQ_64(X,Y) \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_64(X, Y)))
#define VALUE_GT_64(X,Y) (((X)->exponent > (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_64(X, Y))))
#define VALUE_GE_64(X,Y) (((X)->exponent > (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_64(X, Y))))
#define VALUE_LT_64(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_64(X, Y))))
#define VALUE_LE_64(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_64(X, Y))))
#define VALUE_EQ_80(X,Y) \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_80(X, Y)))
#define VALUE_GT_80(X,Y) (((X)->exponent > (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_80(X, Y))))
#define VALUE_GE_80(X,Y) (((X)->exponent > (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_80(X, Y))))
#define VALUE_LT_80(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_80(X, Y))))
#define VALUE_LE_80(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_80(X, Y))))
/* add/subtract 1 ulp macros */
#if defined(SIZE_INT_32)
# define ADD_ULP_80(X) \
if ((++(X)->lo_significand == 0) && \
(++(X)->hi_significand == (((X)->exponent == 0) ? 0x80000000 : 0))) \
{ \
(X)->hi_significand |= 0x80000000; \
++(X)->exponent; \
}
# define SUB_ULP_80(X) \
if (--(X)->lo_significand == 0xFFFFFFFF) { \
--(X)->hi_significand; \
if (((X)->exponent != 0) && \
((X)->hi_significand == 0x7FFFFFFF) && \
(--(X)->exponent != 0)) \
{ \
(X)->hi_significand |= 0x80000000; \
} \
}
#elif defined(SIZE_INT_64)
# define ADD_ULP_80(X) \
if (++(X)->significand == (((X)->exponent == 0) ? 0x8000000000000000 : 0))) { \
(X)->significand |= 0x8000000000000000; \
++(X)->exponent; \
}
# define SUB_ULP_80(X) \
{ \
--(X)->significand; \
if (((X)->exponent != 0) && \
((X)->significand == 0x7FFFFFFFFFFFFFFF) && \
(--(X)->exponent != 0)) \
{ \
(X)->significand |= 0x8000000000000000; \
} \
}
#endif
#if (defined(_WIN32) && !defined(_WIN64))
#define FP80_DECLARE()
#define _FPC_64 0x0300
static unsigned short __wControlWord, __wNewControlWord;
#define FP80_SET() { \
__asm { fnstcw word ptr [__wControlWord] } \
__wNewControlWord = __wControlWord | _FPC_64; \
__asm { fldcw word ptr [__wNewControlWord] } \
}
#define FP80_RESET() { \
__asm { fldcw word ptr [__wControlWord] } \
}
#else /* defined(_WIN32) && !defined(_WIN64) */
#define FP80_DECLARE()
#define FP80_SET()
#define FP80_RESET()
#endif /* defined(_WIN32) && !defined(_WIN64) */
#ifdef _LIBC
# include <math.h>
#else
#if 0
static const unsigned INF[] = {
DOUBLE_HEX(7ff00000, 00000000),
DOUBLE_HEX(fff00000, 00000000)
@ -300,18 +702,18 @@ struct exceptionl
};
#ifdef _MS_
#define MATHERR_F _matherrf
#define MATHERR_D _matherr
#define MATHERR_F _matherrf
#define MATHERR_D _matherr
#else
#define MATHERR_F matherrf
#define MATHERR_D matherr
#define MATHERR_F matherrf
#define MATHERR_D matherr
#endif
# ifdef __cplusplus
#define EXC_DECL_D __exception
#define EXC_DECL_D __exception
#else
// exception is a reserved name in C++
#define EXC_DECL_D exception
#define EXC_DECL_D exception
#endif
extern int MATHERR_F(struct exceptionf*);
@ -324,7 +726,7 @@ extern int matherrl(struct exceptionl*);
#define ERRNO_DOMAIN errno = EDOM
// Add code to support _LIB_VERSION
// Add code to support _LIB_VERSIONIMF
#ifndef _LIBC
typedef enum
{
@ -335,29 +737,19 @@ typedef enum
_ISOC_ // ISO C9X
} _LIB_VERSION_TYPE;
extern _LIB_VERSION_TYPE _LIB_VERSION;
#endif
// This is a run-time variable and may effect
// floating point behavior of the libm functions
#elif defined _LIBC
# if !defined NOT_IN_libc && defined SHARED && defined DO_VERSIONING \
&& !defined HAVE_BROKEN_ALIAS_ATTRIBUTE && !defined NO_HIDDEN
# define __libm_error_support __GI___libm_error_support
# endif
#endif /* __ASSEMBLER__ */
/* Support for compatible assembler handling. */
#if !defined L && defined _LIBC
#define L(name) .L##name
#endif
#ifdef __ELF__
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
#define ASM_TYPE_DIRECTIVE(name,T) .type name,T
#if !defined( LIBM_BUILD )
#if defined( _DLL )
extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF;
#else
#define ASM_SIZE_DIRECTIVE(name)
#define ASM_TYPE_DIRECTIVE(name,T)
extern _LIB_VERSION_TYPE _LIB_VERSIONIMF;
#endif /* _DLL */
#else
extern int (*pmatherrf)(struct exceptionf*);
extern int (*pmatherr)(struct EXC_DECL_D*);
extern int (*pmatherrl)(struct exceptionl*);
#endif /* LIBM_BUILD */
// This is a run-time variable and may affect
// floating point behavior of the libm functions
#endif

1136
sysdeps/ia64/fpu/s_asinh.S Normal file

File diff suppressed because it is too large Load Diff

937
sysdeps/ia64/fpu/s_asinhf.S Normal file
View File

@ -0,0 +1,937 @@
.file "asinhf.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// ==============================================================
// History
// ==============================================================
// 04/02/01 Initial version
// 04/19/01 Improved speed of the paths #1,2,3,4,5
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 05/21/03 Improved performance, fixed to handle unorms
//
// API
// ==============================================================
// float asinhf(float)
//
// Overview of operation
// ==============================================================
//
// There are 7 paths:
// 1. x = 0.0
// Return asinhf(x) = 0.0
// 2. 0.0 <|x| < 2^(-5)
// Return asinhf(x) = Pol5(x), where Pol5(x) = ((x^2)*C1 + C0)*x^3 + x
// 3. 2^(-5) <= |x| < 2^51
// Return asinhf(x) = sign(x)*(log(|x| + sqrt(x^2 + 1.0)))
// To compute x + sqrt(x^2 + 1.0) modified Newton Raphson method is used
// (2 iterations)
// Algorithm description for log function see below.
//
// 4. 2^51 <= |x| < +INF
// Return asinhf(x) = sign(x)*log(2*|x|)
// Algorithm description for log function see below.
//
// 5. x = INF
// Return asinhf(x) = INF
//
// 6. x = [S,Q]NaN
// Return asinhf(x) = QNaN
//
// 7. x = denormal
// Return asinhf(x) = x
//
//==============================================================
// Algorithm Description for log(x) function
// Below we are using the fact that inequality x - 1.0 > 2^(-6) is always
// true for this asinh implementation
//
// Consider x = 2^N 1.f1 f2 f3 f4...f63
// Log(x) = log(frcpa(x) x/frcpa(x))
// = log(1/frcpa(x)) + log(frcpa(x) x)
// = -log(frcpa(x)) + log(frcpa(x) x)
//
// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
//
// -log(frcpa(x)) = -log(C)
// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
//
// -log(frcpa(x)) = -log(C)
// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
//
// -log(frcpa(x)) = -log(C)
// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
//
// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
//
// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
// Log(x) = +Nlog2 + T + log(frcpa(x) x)
//
// Log(x) = +Nlog2 + T + log(C x)
//
// Cx = 1 + r
//
// Log(x) = +Nlog2 + T + log(1+r)
// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
//
// 1.f1 f2 ... f8 has 256 entries.
// They are 1 + k/2^8, k = 0 ... 255
// These 256 values are the table entries.
//
// Implementation
//==============================================================
// C = frcpa(x)
// r = C * x - 1
//
// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
//
// x = f * 2*n where f is 1.f_1f_2f_3....f_63
// Nfloat = float(n) where n is the true unbiased exponent
// pre-index = f_1f_2....f_8
// index = pre_index * 8
// get the dxt table entry at index + offset = T
//
// result = (T + Nfloat * log(2)) + rseries
//
// The T table is calculated as follows
// Form x_k = 1 + k/2^8 where k goes from 0... 255
// y_k = frcpa(x_k)
// log(1/y_k) in quad and round to double-extended
//
//
// Registers used
//==============================================================
// Floating Point registers used:
// f8, input
// f9 -> f15, f32 -> f55
// General registers used:
// r14 -> r27
// Predicate registers used:
// p6 -> p14
// p6 to filter out case when x = [Q,S]NaN or INF or zero
// p7 to filter out case when x < 0.0
// p8 to select path #2
// p11 to filter out case when x >= 0
// p12 to filter out case when x = + denormal
// p13 to select path #4
// p14 to filtef out case when x = - denormal
// Assembly macros
//==============================================================
log_GR_exp_17_ones = r14
log_GR_signexp_f8 = r15
log_table_address2 = r16
log_GR_exp_16_ones = r17
log_GR_exp_f8 = r18
log_GR_true_exp_f8 = r19
log_GR_significand_f8 = r20
log_GR_index = r21
log_GR_comp2 = r22
asinh_GR_f8 = r23
asinh_GR_comp = r24
asinh_GR_f8 = r25
log_table_address3 = r26
NR_table_address = r27
//==============================================================
log_y = f9
NR1 = f10
NR2 = f11
log_y_rs = f12
log_y_rs_iter = f13
log_y_rs_iter1 = f14
fNormX = f15
asinh_w_sq = f32
log_arg_early = f33
log_y_rs_iter2 = f34
log_P3 = f35
log_P2 = f36
log_P1 = f37
log2 = f38
log_C0 = f39
log_C1 = f40
asinh_f8 = f41
log_C = f42
log_arg = f43
asinh_w_cube = f44
log_int_Nfloat = f45
log_r = f46
log_rsq = f47
asinh_w_1 = f48
log_rp_p32 = f49
log_rcube = f50
log_rp_p10 = f51
log_rp_p2 = f52
log_Nfloat = f53
log_T = f54
log_T_plus_Nlog2 = f55
// Data tables
//==============================================================
RODATA
.align 16
LOCAL_OBJECT_START(log_table_1)
data8 0xbfd0001008f39d59 // p3
data8 0x3fd5556073e0c45a // p2
data8 0xbfdffffffffaea15 // p1
data8 0x3fe62e42fefa39ef // log(2)
LOCAL_OBJECT_END(log_table_1)
LOCAL_OBJECT_START(log_table_2)
data8 0x3FE0000000000000 // 0.5
data8 0x4008000000000000 // 3.0
data8 0x9979C79685A5EB16, 0x00003FFB // C1 3FFB9979C79685A5EB16
data8 0xAAAAA96F80786D62, 0x0000BFFC // C0 BFFCAAAAA96F80786D62
LOCAL_OBJECT_END(log_table_2)
LOCAL_OBJECT_START(log_table_3)
data8 0x3F60040155D5889E //log(1/frcpa(1+ 0/256)
data8 0x3F78121214586B54 //log(1/frcpa(1+ 1/256)
data8 0x3F841929F96832F0 //log(1/frcpa(1+ 2/256)
data8 0x3F8C317384C75F06 //log(1/frcpa(1+ 3/256)
data8 0x3F91A6B91AC73386 //log(1/frcpa(1+ 4/256)
data8 0x3F95BA9A5D9AC039 //log(1/frcpa(1+ 5/256)
data8 0x3F99D2A8074325F4 //log(1/frcpa(1+ 6/256)
data8 0x3F9D6B2725979802 //log(1/frcpa(1+ 7/256)
data8 0x3FA0C58FA19DFAAA //log(1/frcpa(1+ 8/256)
data8 0x3FA2954C78CBCE1B //log(1/frcpa(1+ 9/256)
data8 0x3FA4A94D2DA96C56 //log(1/frcpa(1+ 10/256)
data8 0x3FA67C94F2D4BB58 //log(1/frcpa(1+ 11/256)
data8 0x3FA85188B630F068 //log(1/frcpa(1+ 12/256)
data8 0x3FAA6B8ABE73AF4C //log(1/frcpa(1+ 13/256)
data8 0x3FAC441E06F72A9E //log(1/frcpa(1+ 14/256)
data8 0x3FAE1E6713606D07 //log(1/frcpa(1+ 15/256)
data8 0x3FAFFA6911AB9301 //log(1/frcpa(1+ 16/256)
data8 0x3FB0EC139C5DA601 //log(1/frcpa(1+ 17/256)
data8 0x3FB1DBD2643D190B //log(1/frcpa(1+ 18/256)
data8 0x3FB2CC7284FE5F1C //log(1/frcpa(1+ 19/256)
data8 0x3FB3BDF5A7D1EE64 //log(1/frcpa(1+ 20/256)
data8 0x3FB4B05D7AA012E0 //log(1/frcpa(1+ 21/256)
data8 0x3FB580DB7CEB5702 //log(1/frcpa(1+ 22/256)
data8 0x3FB674F089365A7A //log(1/frcpa(1+ 23/256)
data8 0x3FB769EF2C6B568D //log(1/frcpa(1+ 24/256)
data8 0x3FB85FD927506A48 //log(1/frcpa(1+ 25/256)
data8 0x3FB9335E5D594989 //log(1/frcpa(1+ 26/256)
data8 0x3FBA2B0220C8E5F5 //log(1/frcpa(1+ 27/256)
data8 0x3FBB0004AC1A86AC //log(1/frcpa(1+ 28/256)
data8 0x3FBBF968769FCA11 //log(1/frcpa(1+ 29/256)
data8 0x3FBCCFEDBFEE13A8 //log(1/frcpa(1+ 30/256)
data8 0x3FBDA727638446A2 //log(1/frcpa(1+ 31/256)
data8 0x3FBEA3257FE10F7A //log(1/frcpa(1+ 32/256)
data8 0x3FBF7BE9FEDBFDE6 //log(1/frcpa(1+ 33/256)
data8 0x3FC02AB352FF25F4 //log(1/frcpa(1+ 34/256)
data8 0x3FC097CE579D204D //log(1/frcpa(1+ 35/256)
data8 0x3FC1178E8227E47C //log(1/frcpa(1+ 36/256)
data8 0x3FC185747DBECF34 //log(1/frcpa(1+ 37/256)
data8 0x3FC1F3B925F25D41 //log(1/frcpa(1+ 38/256)
data8 0x3FC2625D1E6DDF57 //log(1/frcpa(1+ 39/256)
data8 0x3FC2D1610C86813A //log(1/frcpa(1+ 40/256)
data8 0x3FC340C59741142E //log(1/frcpa(1+ 41/256)
data8 0x3FC3B08B6757F2A9 //log(1/frcpa(1+ 42/256)
data8 0x3FC40DFB08378003 //log(1/frcpa(1+ 43/256)
data8 0x3FC47E74E8CA5F7C //log(1/frcpa(1+ 44/256)
data8 0x3FC4EF51F6466DE4 //log(1/frcpa(1+ 45/256)
data8 0x3FC56092E02BA516 //log(1/frcpa(1+ 46/256)
data8 0x3FC5D23857CD74D5 //log(1/frcpa(1+ 47/256)
data8 0x3FC6313A37335D76 //log(1/frcpa(1+ 48/256)
data8 0x3FC6A399DABBD383 //log(1/frcpa(1+ 49/256)
data8 0x3FC70337DD3CE41B //log(1/frcpa(1+ 50/256)
data8 0x3FC77654128F6127 //log(1/frcpa(1+ 51/256)
data8 0x3FC7E9D82A0B022D //log(1/frcpa(1+ 52/256)
data8 0x3FC84A6B759F512F //log(1/frcpa(1+ 53/256)
data8 0x3FC8AB47D5F5A310 //log(1/frcpa(1+ 54/256)
data8 0x3FC91FE49096581B //log(1/frcpa(1+ 55/256)
data8 0x3FC981634011AA75 //log(1/frcpa(1+ 56/256)
data8 0x3FC9F6C407089664 //log(1/frcpa(1+ 57/256)
data8 0x3FCA58E729348F43 //log(1/frcpa(1+ 58/256)
data8 0x3FCABB55C31693AD //log(1/frcpa(1+ 59/256)
data8 0x3FCB1E104919EFD0 //log(1/frcpa(1+ 60/256)
data8 0x3FCB94EE93E367CB //log(1/frcpa(1+ 61/256)
data8 0x3FCBF851C067555F //log(1/frcpa(1+ 62/256)
data8 0x3FCC5C0254BF23A6 //log(1/frcpa(1+ 63/256)
data8 0x3FCCC000C9DB3C52 //log(1/frcpa(1+ 64/256)
data8 0x3FCD244D99C85674 //log(1/frcpa(1+ 65/256)
data8 0x3FCD88E93FB2F450 //log(1/frcpa(1+ 66/256)
data8 0x3FCDEDD437EAEF01 //log(1/frcpa(1+ 67/256)
data8 0x3FCE530EFFE71012 //log(1/frcpa(1+ 68/256)
data8 0x3FCEB89A1648B971 //log(1/frcpa(1+ 69/256)
data8 0x3FCF1E75FADF9BDE //log(1/frcpa(1+ 70/256)
data8 0x3FCF84A32EAD7C35 //log(1/frcpa(1+ 71/256)
data8 0x3FCFEB2233EA07CD //log(1/frcpa(1+ 72/256)
data8 0x3FD028F9C7035C1C //log(1/frcpa(1+ 73/256)
data8 0x3FD05C8BE0D9635A //log(1/frcpa(1+ 74/256)
data8 0x3FD085EB8F8AE797 //log(1/frcpa(1+ 75/256)
data8 0x3FD0B9C8E32D1911 //log(1/frcpa(1+ 76/256)
data8 0x3FD0EDD060B78081 //log(1/frcpa(1+ 77/256)
data8 0x3FD122024CF0063F //log(1/frcpa(1+ 78/256)
data8 0x3FD14BE2927AECD4 //log(1/frcpa(1+ 79/256)
data8 0x3FD180618EF18ADF //log(1/frcpa(1+ 80/256)
data8 0x3FD1B50BBE2FC63B //log(1/frcpa(1+ 81/256)
data8 0x3FD1DF4CC7CF242D //log(1/frcpa(1+ 82/256)
data8 0x3FD214456D0EB8D4 //log(1/frcpa(1+ 83/256)
data8 0x3FD23EC5991EBA49 //log(1/frcpa(1+ 84/256)
data8 0x3FD2740D9F870AFB //log(1/frcpa(1+ 85/256)
data8 0x3FD29ECDABCDFA04 //log(1/frcpa(1+ 86/256)
data8 0x3FD2D46602ADCCEE //log(1/frcpa(1+ 87/256)
data8 0x3FD2FF66B04EA9D4 //log(1/frcpa(1+ 88/256)
data8 0x3FD335504B355A37 //log(1/frcpa(1+ 89/256)
data8 0x3FD360925EC44F5D //log(1/frcpa(1+ 90/256)
data8 0x3FD38BF1C3337E75 //log(1/frcpa(1+ 91/256)
data8 0x3FD3C25277333184 //log(1/frcpa(1+ 92/256)
data8 0x3FD3EDF463C1683E //log(1/frcpa(1+ 93/256)
data8 0x3FD419B423D5E8C7 //log(1/frcpa(1+ 94/256)
data8 0x3FD44591E0539F49 //log(1/frcpa(1+ 95/256)
data8 0x3FD47C9175B6F0AD //log(1/frcpa(1+ 96/256)
data8 0x3FD4A8B341552B09 //log(1/frcpa(1+ 97/256)
data8 0x3FD4D4F3908901A0 //log(1/frcpa(1+ 98/256)
data8 0x3FD501528DA1F968 //log(1/frcpa(1+ 99/256)
data8 0x3FD52DD06347D4F6 //log(1/frcpa(1+ 100/256)
data8 0x3FD55A6D3C7B8A8A //log(1/frcpa(1+ 101/256)
data8 0x3FD5925D2B112A59 //log(1/frcpa(1+ 102/256)
data8 0x3FD5BF406B543DB2 //log(1/frcpa(1+ 103/256)
data8 0x3FD5EC433D5C35AE //log(1/frcpa(1+ 104/256)
data8 0x3FD61965CDB02C1F //log(1/frcpa(1+ 105/256)
data8 0x3FD646A84935B2A2 //log(1/frcpa(1+ 106/256)
data8 0x3FD6740ADD31DE94 //log(1/frcpa(1+ 107/256)
data8 0x3FD6A18DB74A58C5 //log(1/frcpa(1+ 108/256)
data8 0x3FD6CF31058670EC //log(1/frcpa(1+ 109/256)
data8 0x3FD6F180E852F0BA //log(1/frcpa(1+ 110/256)
data8 0x3FD71F5D71B894F0 //log(1/frcpa(1+ 111/256)
data8 0x3FD74D5AEFD66D5C //log(1/frcpa(1+ 112/256)
data8 0x3FD77B79922BD37E //log(1/frcpa(1+ 113/256)
data8 0x3FD7A9B9889F19E2 //log(1/frcpa(1+ 114/256)
data8 0x3FD7D81B037EB6A6 //log(1/frcpa(1+ 115/256)
data8 0x3FD8069E33827231 //log(1/frcpa(1+ 116/256)
data8 0x3FD82996D3EF8BCB //log(1/frcpa(1+ 117/256)
data8 0x3FD85855776DCBFB //log(1/frcpa(1+ 118/256)
data8 0x3FD8873658327CCF //log(1/frcpa(1+ 119/256)
data8 0x3FD8AA75973AB8CF //log(1/frcpa(1+ 120/256)
data8 0x3FD8D992DC8824E5 //log(1/frcpa(1+ 121/256)
data8 0x3FD908D2EA7D9512 //log(1/frcpa(1+ 122/256)
data8 0x3FD92C59E79C0E56 //log(1/frcpa(1+ 123/256)
data8 0x3FD95BD750EE3ED3 //log(1/frcpa(1+ 124/256)
data8 0x3FD98B7811A3EE5B //log(1/frcpa(1+ 125/256)
data8 0x3FD9AF47F33D406C //log(1/frcpa(1+ 126/256)
data8 0x3FD9DF270C1914A8 //log(1/frcpa(1+ 127/256)
data8 0x3FDA0325ED14FDA4 //log(1/frcpa(1+ 128/256)
data8 0x3FDA33440224FA79 //log(1/frcpa(1+ 129/256)
data8 0x3FDA57725E80C383 //log(1/frcpa(1+ 130/256)
data8 0x3FDA87D0165DD199 //log(1/frcpa(1+ 131/256)
data8 0x3FDAAC2E6C03F896 //log(1/frcpa(1+ 132/256)
data8 0x3FDADCCC6FDF6A81 //log(1/frcpa(1+ 133/256)
data8 0x3FDB015B3EB1E790 //log(1/frcpa(1+ 134/256)
data8 0x3FDB323A3A635948 //log(1/frcpa(1+ 135/256)
data8 0x3FDB56FA04462909 //log(1/frcpa(1+ 136/256)
data8 0x3FDB881AA659BC93 //log(1/frcpa(1+ 137/256)
data8 0x3FDBAD0BEF3DB165 //log(1/frcpa(1+ 138/256)
data8 0x3FDBD21297781C2F //log(1/frcpa(1+ 139/256)
data8 0x3FDC039236F08819 //log(1/frcpa(1+ 140/256)
data8 0x3FDC28CB1E4D32FD //log(1/frcpa(1+ 141/256)
data8 0x3FDC4E19B84723C2 //log(1/frcpa(1+ 142/256)
data8 0x3FDC7FF9C74554C9 //log(1/frcpa(1+ 143/256)
data8 0x3FDCA57B64E9DB05 //log(1/frcpa(1+ 144/256)
data8 0x3FDCCB130A5CEBB0 //log(1/frcpa(1+ 145/256)
data8 0x3FDCF0C0D18F326F //log(1/frcpa(1+ 146/256)
data8 0x3FDD232075B5A201 //log(1/frcpa(1+ 147/256)
data8 0x3FDD490246DEFA6B //log(1/frcpa(1+ 148/256)
data8 0x3FDD6EFA918D25CD //log(1/frcpa(1+ 149/256)
data8 0x3FDD9509707AE52F //log(1/frcpa(1+ 150/256)
data8 0x3FDDBB2EFE92C554 //log(1/frcpa(1+ 151/256)
data8 0x3FDDEE2F3445E4AF //log(1/frcpa(1+ 152/256)
data8 0x3FDE148A1A2726CE //log(1/frcpa(1+ 153/256)
data8 0x3FDE3AFC0A49FF40 //log(1/frcpa(1+ 154/256)
data8 0x3FDE6185206D516E //log(1/frcpa(1+ 155/256)
data8 0x3FDE882578823D52 //log(1/frcpa(1+ 156/256)
data8 0x3FDEAEDD2EAC990C //log(1/frcpa(1+ 157/256)
data8 0x3FDED5AC5F436BE3 //log(1/frcpa(1+ 158/256)
data8 0x3FDEFC9326D16AB9 //log(1/frcpa(1+ 159/256)
data8 0x3FDF2391A2157600 //log(1/frcpa(1+ 160/256)
data8 0x3FDF4AA7EE03192D //log(1/frcpa(1+ 161/256)
data8 0x3FDF71D627C30BB0 //log(1/frcpa(1+ 162/256)
data8 0x3FDF991C6CB3B379 //log(1/frcpa(1+ 163/256)
data8 0x3FDFC07ADA69A910 //log(1/frcpa(1+ 164/256)
data8 0x3FDFE7F18EB03D3E //log(1/frcpa(1+ 165/256)
data8 0x3FE007C053C5002E //log(1/frcpa(1+ 166/256)
data8 0x3FE01B942198A5A1 //log(1/frcpa(1+ 167/256)
data8 0x3FE02F74400C64EB //log(1/frcpa(1+ 168/256)
data8 0x3FE04360BE7603AD //log(1/frcpa(1+ 169/256)
data8 0x3FE05759AC47FE34 //log(1/frcpa(1+ 170/256)
data8 0x3FE06B5F1911CF52 //log(1/frcpa(1+ 171/256)
data8 0x3FE078BF0533C568 //log(1/frcpa(1+ 172/256)
data8 0x3FE08CD9687E7B0E //log(1/frcpa(1+ 173/256)
data8 0x3FE0A10074CF9019 //log(1/frcpa(1+ 174/256)
data8 0x3FE0B5343A234477 //log(1/frcpa(1+ 175/256)
data8 0x3FE0C974C89431CE //log(1/frcpa(1+ 176/256)
data8 0x3FE0DDC2305B9886 //log(1/frcpa(1+ 177/256)
data8 0x3FE0EB524BAFC918 //log(1/frcpa(1+ 178/256)
data8 0x3FE0FFB54213A476 //log(1/frcpa(1+ 179/256)
data8 0x3FE114253DA97D9F //log(1/frcpa(1+ 180/256)
data8 0x3FE128A24F1D9AFF //log(1/frcpa(1+ 181/256)
data8 0x3FE1365252BF0865 //log(1/frcpa(1+ 182/256)
data8 0x3FE14AE558B4A92D //log(1/frcpa(1+ 183/256)
data8 0x3FE15F85A19C765B //log(1/frcpa(1+ 184/256)
data8 0x3FE16D4D38C119FA //log(1/frcpa(1+ 185/256)
data8 0x3FE18203C20DD133 //log(1/frcpa(1+ 186/256)
data8 0x3FE196C7BC4B1F3B //log(1/frcpa(1+ 187/256)
data8 0x3FE1A4A738B7A33C //log(1/frcpa(1+ 188/256)
data8 0x3FE1B981C0C9653D //log(1/frcpa(1+ 189/256)
data8 0x3FE1CE69E8BB106B //log(1/frcpa(1+ 190/256)
data8 0x3FE1DC619DE06944 //log(1/frcpa(1+ 191/256)
data8 0x3FE1F160A2AD0DA4 //log(1/frcpa(1+ 192/256)
data8 0x3FE2066D7740737E //log(1/frcpa(1+ 193/256)
data8 0x3FE2147DBA47A394 //log(1/frcpa(1+ 194/256)
data8 0x3FE229A1BC5EBAC3 //log(1/frcpa(1+ 195/256)
data8 0x3FE237C1841A502E //log(1/frcpa(1+ 196/256)
data8 0x3FE24CFCE6F80D9A //log(1/frcpa(1+ 197/256)
data8 0x3FE25B2C55CD5762 //log(1/frcpa(1+ 198/256)
data8 0x3FE2707F4D5F7C41 //log(1/frcpa(1+ 199/256)
data8 0x3FE285E0842CA384 //log(1/frcpa(1+ 200/256)
data8 0x3FE294294708B773 //log(1/frcpa(1+ 201/256)
data8 0x3FE2A9A2670AFF0C //log(1/frcpa(1+ 202/256)
data8 0x3FE2B7FB2C8D1CC1 //log(1/frcpa(1+ 203/256)
data8 0x3FE2C65A6395F5F5 //log(1/frcpa(1+ 204/256)
data8 0x3FE2DBF557B0DF43 //log(1/frcpa(1+ 205/256)
data8 0x3FE2EA64C3F97655 //log(1/frcpa(1+ 206/256)
data8 0x3FE3001823684D73 //log(1/frcpa(1+ 207/256)
data8 0x3FE30E97E9A8B5CD //log(1/frcpa(1+ 208/256)
data8 0x3FE32463EBDD34EA //log(1/frcpa(1+ 209/256)
data8 0x3FE332F4314AD796 //log(1/frcpa(1+ 210/256)
data8 0x3FE348D90E7464D0 //log(1/frcpa(1+ 211/256)
data8 0x3FE35779F8C43D6E //log(1/frcpa(1+ 212/256)
data8 0x3FE36621961A6A99 //log(1/frcpa(1+ 213/256)
data8 0x3FE37C299F3C366A //log(1/frcpa(1+ 214/256)
data8 0x3FE38AE2171976E7 //log(1/frcpa(1+ 215/256)
data8 0x3FE399A157A603E7 //log(1/frcpa(1+ 216/256)
data8 0x3FE3AFCCFE77B9D1 //log(1/frcpa(1+ 217/256)
data8 0x3FE3BE9D503533B5 //log(1/frcpa(1+ 218/256)
data8 0x3FE3CD7480B4A8A3 //log(1/frcpa(1+ 219/256)
data8 0x3FE3E3C43918F76C //log(1/frcpa(1+ 220/256)
data8 0x3FE3F2ACB27ED6C7 //log(1/frcpa(1+ 221/256)
data8 0x3FE4019C2125CA93 //log(1/frcpa(1+ 222/256)
data8 0x3FE4181061389722 //log(1/frcpa(1+ 223/256)
data8 0x3FE42711518DF545 //log(1/frcpa(1+ 224/256)
data8 0x3FE436194E12B6BF //log(1/frcpa(1+ 225/256)
data8 0x3FE445285D68EA69 //log(1/frcpa(1+ 226/256)
data8 0x3FE45BCC464C893A //log(1/frcpa(1+ 227/256)
data8 0x3FE46AED21F117FC //log(1/frcpa(1+ 228/256)
data8 0x3FE47A1527E8A2D3 //log(1/frcpa(1+ 229/256)
data8 0x3FE489445EFFFCCC //log(1/frcpa(1+ 230/256)
data8 0x3FE4A018BCB69835 //log(1/frcpa(1+ 231/256)
data8 0x3FE4AF5A0C9D65D7 //log(1/frcpa(1+ 232/256)
data8 0x3FE4BEA2A5BDBE87 //log(1/frcpa(1+ 233/256)
data8 0x3FE4CDF28F10AC46 //log(1/frcpa(1+ 234/256)
data8 0x3FE4DD49CF994058 //log(1/frcpa(1+ 235/256)
data8 0x3FE4ECA86E64A684 //log(1/frcpa(1+ 236/256)
data8 0x3FE503C43CD8EB68 //log(1/frcpa(1+ 237/256)
data8 0x3FE513356667FC57 //log(1/frcpa(1+ 238/256)
data8 0x3FE522AE0738A3D8 //log(1/frcpa(1+ 239/256)
data8 0x3FE5322E26867857 //log(1/frcpa(1+ 240/256)
data8 0x3FE541B5CB979809 //log(1/frcpa(1+ 241/256)
data8 0x3FE55144FDBCBD62 //log(1/frcpa(1+ 242/256)
data8 0x3FE560DBC45153C7 //log(1/frcpa(1+ 243/256)
data8 0x3FE5707A26BB8C66 //log(1/frcpa(1+ 244/256)
data8 0x3FE587F60ED5B900 //log(1/frcpa(1+ 245/256)
data8 0x3FE597A7977C8F31 //log(1/frcpa(1+ 246/256)
data8 0x3FE5A760D634BB8B //log(1/frcpa(1+ 247/256)
data8 0x3FE5B721D295F10F //log(1/frcpa(1+ 248/256)
data8 0x3FE5C6EA94431EF9 //log(1/frcpa(1+ 249/256)
data8 0x3FE5D6BB22EA86F6 //log(1/frcpa(1+ 250/256)
data8 0x3FE5E6938645D390 //log(1/frcpa(1+ 251/256)
data8 0x3FE5F673C61A2ED2 //log(1/frcpa(1+ 252/256)
data8 0x3FE6065BEA385926 //log(1/frcpa(1+ 253/256)
data8 0x3FE6164BFA7CC06B //log(1/frcpa(1+ 254/256)
data8 0x3FE62643FECF9743 //log(1/frcpa(1+ 255/256)
LOCAL_OBJECT_END(log_table_3)
.section .text
GLOBAL_LIBM_ENTRY(asinhf)
{ .mfi
getf.exp asinh_GR_f8 = f8 // Must recompute later if x unorm
fclass.m p12,p0 = f8, 0x0b // Test x unorm
mov log_GR_exp_17_ones = 0x1ffff
}
{ .mfi
addl NR_table_address = @ltoff(log_table_1), gp
fma.s1 log_y = f8, f8, f1 // y = x^2 + 1
mov asinh_GR_comp = 0xfffa
}
;;
{ .mfi
mov log_GR_exp_16_ones = 0xffff //BIAS
fclass.m p6,p0 = f8, 0xe7 // Test for x = NaN and inf and zero
mov log_GR_comp2 = 0x10032
}
{ .mfi
ld8 NR_table_address = [NR_table_address]
fma.s1 asinh_w_sq = f8,f8,f0 // x^2
nop.i 0
}
;;
{ .mfi
nop.m 0
fcmp.lt.s1 p7,p11 = f8,f0 // if x<0
nop.i 0
}
{ .mfb
nop.m 0
fnorm.s1 fNormX = f8 // Normalize x
(p12) br.cond.spnt ASINH_UNORM // Branch if x=unorm
}
;;
ASINH_COMMON:
// Return here if x=unorm and not denorm
{ .mfi
//to get second table address
adds log_table_address2 = 0x20, NR_table_address
fma.s1 log_arg = f8,f1,f8
}
{ .mfb
nop.m 0
(p6) fma.s.s0 f8 = f8,f1,f8 // quietize nan result if x=nan
(p6) br.ret.spnt b0 // Exit for x=nan and inf and zero
}
;;
{ .mfi
ldfpd NR1,NR2 = [log_table_address2],16
frsqrta.s1 log_y_rs,p0 = log_y // z=1/sqrt(y)
nop.i 0
}
;;
{ .mfi
ldfe log_C1 = [log_table_address2],16
nop.f 0
and asinh_GR_f8 = asinh_GR_f8,log_GR_exp_17_ones
}
;;
{ .mib
ldfe log_C0 = [log_table_address2],16
cmp.le p13,p0 = log_GR_comp2,asinh_GR_f8
(p13) br.cond.spnt LOG_COMMON1 // Branch if path 4: |x| >= 2^51
}
;;
{ .mfi
nop.m 0
fma.s1 log_y_rs_iter = log_y_rs,log_y,f0 // y*z
nop.i 0
}
;;
.pred.rel "mutex",p7,p11
{ .mfi
nop.m 0
(p11) mov asinh_f8 = fNormX
nop.i 0
}
{ .mfb
cmp.gt p8,p0 = asinh_GR_comp,asinh_GR_f8
(p7) fnma.s1 asinh_f8 = fNormX,f1,f0
(p8) br.cond.spnt ASINH_NEAR_ZERO // Branch if path 2: 0 < |x| < 2^-5
}
;;
// Here if main path, 2^-5 <= |x| < 2^51
///////////////////////////////// The first iteration /////////////////////////
{ .mfi
ldfpd log_P3,log_P2 = [NR_table_address],16
fnma.s1 log_y_rs_iter2 = log_y_rs_iter,log_y_rs,NR2 // 3-(y*z)*z
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 log_y_rs_iter1 = log_y_rs,NR1,f0 // 0.5*z
nop.i 0
}
;;
{ .mfi
ldfpd log_P1,log2 = [NR_table_address],16
// (0.5*z)*(3-(y*z)*z)
fma.s1 log_y_rs_iter = log_y_rs_iter1,log_y_rs_iter2,f0
nop.i 0
}
{ .mfi
nop.m 0
// (0.5*z)*(3-(y*z)*z)
fma.s1 log_arg_early = log_y_rs_iter1,log_y_rs_iter2,f0
nop.i 0
}
;;
////////////////////////////////// The second iteration ////////////////////////
{ .mfi
nop.m 0
fma.s1 log_y_rs = log_y_rs_iter,log_y,f0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 log_y_rs_iter1 = log_y_rs_iter,NR1,f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 log_arg_early = log_arg_early,log_y,asinh_f8
nop.i 0
}
;;
{ .mfi
nop.m 0
fnma.s1 log_y_rs = log_y_rs,log_y_rs_iter,NR2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 log_y_rs_iter1 = log_y_rs_iter1,log_y,f0
nop.i 0
}
;;
{ .mfi
nop.m 0
frcpa.s1 log_C,p0 = f1,log_arg_early
nop.i 0
}
;;
{ .mfi
getf.exp log_GR_signexp_f8 = log_arg_early
nop.f 0
nop.i 0
}
;;
{ .mfi
getf.sig log_GR_significand_f8 = log_arg_early
// (0.5*z)*(3-(y*z)*z)*y + |x|
fma.s1 log_arg = log_y_rs_iter1,log_y_rs,asinh_f8
//to get third table address
adds log_table_address3 = 0x30, NR_table_address
}
;;
/////////////////////////////////////////// The end NR iterations /////////////
{ .mfi
nop.m 0
nop.f 0
//significant bit destruction
and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
}
;;
{ .mfi
//BIAS subtraction
sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
(p7) fnma.s1 log2 = log2,f1,f0
nop.i 0
}
;;
{ .mfi
setf.sig log_int_Nfloat = log_GR_true_exp_f8
fms.s1 log_r = log_C,log_arg,f1 //C = frcpa(x); r = C * x - 1
extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
}
;;
{ .mmi
//pre-index*16 + index
shladd log_table_address3 = log_GR_index,3,log_table_address3
;;
ldfd log_T = [log_table_address3]
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 log_rsq = log_r, log_r, f0 //r^2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 log_rp_p10 = log_P1, log_r, f1
nop.i 0
}
;;
{ .mfi
nop.m 0
//convert N to the floating-point format
fcvt.xf log_Nfloat = log_int_Nfloat
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
nop.i 0
}
;;
.pred.rel "mutex",p7,p11
{ .mfi
nop.m 0
(p11) fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T if x>0
nop.i 0
}
{ .mfi
nop.m 0
(p7) fms.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 - T if x<0
nop.i 0
}
;;
{ .mfi
nop.m 0
(p11) fma.s.s0 f8 = log_rp_p2,log_r,log_T_plus_Nlog2
nop.i 0
}
{ .mfb
nop.m 0
(p7) fnma.s.s0 f8 = log_rp_p2,log_r,log_T_plus_Nlog2
br.ret.sptk b0 // Exit main path, path 3: 2^-5 <= |x| < 2^51
}
;;
// Here if path 4, |x| >= 2^51
LOG_COMMON1:
{ .mfi
ldfpd log_P3,log_P2 = [NR_table_address],16
nop.f 0
nop.i 0
}
;;
{ .mfi
ldfpd log_P1,log2 = [NR_table_address],16
frcpa.s1 log_C,p0 = f1,log_arg
nop.i 0
}
;;
{ .mfi
getf.exp log_GR_signexp_f8 = log_arg
nop.f 0
//to get third table address
adds log_table_address3 = 0x30, NR_table_address
}
;;
{ .mfi
getf.sig log_GR_significand_f8 = log_arg
nop.f 0
nop.i 0
}
;;
{ .mfi
nop.m 0
nop.f 0
//to destroy the most bit in the significant area
and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
}
;;
{ .mmf
nop.m 0
//BIAS subtraction
sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
fms.s1 log_r = log_C,log_arg,f1 //C = frcpa(x); r = C * x - 1
}
;;
{ .mfi
setf.sig log_int_Nfloat = log_GR_true_exp_f8
nop.f 0
extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
}
;;
{ .mmi
//pre-index*16 + index
shladd log_table_address3 = log_GR_index,3,log_table_address3
;;
ldfd log_T = [log_table_address3]
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 log_rsq = log_r, log_r, f0 //r^2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 log_rp_p10 = log_P1, log_r, f1
nop.i 0
}
{ .mfi
nop.m 0
(p7) fnma.s1 log2 = log2,f1,f0
nop.i 0
}
;;
{ .mfi
nop.m 0
//convert N to the floating-point format
fcvt.xf log_Nfloat = log_int_Nfloat
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
nop.i 0
}
;;
.pred.rel "mutex",p7,p11
{ .mfi
nop.m 0
(p11) fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T if x>0
nop.i 0
}
{ .mfi
nop.m 0
(p7) fms.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 - T if x<0
nop.i 0
}
;;
{ .mfi
nop.m 0
(p11) fma.s.s0 f8 = log_rp_p2,log_r,log_T_plus_Nlog2
nop.i 0
}
{ .mfb
nop.m 0
(p7) fnma.s.s0 f8 = log_rp_p2,log_r,log_T_plus_Nlog2
br.ret.sptk b0 // Exit path 4, |x| >= 2^51
}
;;
// Here if path 2, 0 < |x| < 2^-5
ASINH_NEAR_ZERO:
{ .mfi
nop.m 0
fma.s1 asinh_w_1 = asinh_w_sq,log_C1,log_C0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 asinh_w_cube = asinh_w_sq,fNormX,f0
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.s.s0 f8 = asinh_w_1,asinh_w_cube,fNormX
br.ret.sptk b0 // Exit path 2, 0 < |x| < 2^-5
}
;;
ASINH_UNORM:
// Here if x=unorm
{ .mfi
getf.exp asinh_GR_f8 = fNormX // Recompute if x unorm
fclass.m p0,p13 = fNormX, 0x0b // Test x denorm
nop.i 0
}
;;
{ .mfb
nop.m 0
fcmp.eq.s0 p14,p0 = f8, f0 // Dummy to set denormal flag
(p13) br.cond.sptk ASINH_COMMON // Continue if x unorm and not denorm
}
;;
.pred.rel "mutex",p7,p11
{ .mfi
nop.m 0
(p7) fma.s.s0 f8 = f8,f8,f8 // Result x+x^2 if x=-denorm
nop.i 0
}
{ .mfb
nop.m 0
(p11) fnma.s.s0 f8 = f8,f8,f8 // Result x-x^2 if x=+denorm
br.ret.spnt b0 // Exit if denorm
}
;;
GLOBAL_LIBM_END(asinhf)

1346
sysdeps/ia64/fpu/s_asinhl.S Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,10 @@
.file "atanf.s"
// THIS IS NOT OPTIMIZED AND NOT OFFICIAL
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -22,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -37,16 +35,18 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
// ?/??/00 Initial revision
// 8/17/00 Changed predicate register macro-usage to direct predicate
// 02/20/00 Initial version
// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
#include "libm_support.h"
// 02/06/02 Corrected .section statement
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align;
// added missing bundling
//
// Assembly macros
@ -140,16 +140,11 @@ atanf_answer = f8
//atanf_pred_GT1 = p7
#ifdef _LIBC
.rodata
#else
.data
#endif
RODATA
.align 16
atanf_coeff_1_table:
ASM_TYPE_DIRECTIVE(atanf_coeff_1_table,@object)
LOCAL_OBJECT_START(atanf_coeff_1_table)
data8 0x40c4c241be751ff2 // r4
data8 0x40e9f300c2f3070b // r5
data8 0x409babffef772075 // r3
@ -164,12 +159,11 @@ data8 0xbfc2473c5145ee38 // p3
data8 0x3fbc4f512b1865f5 // p4
data8 0x3fc9997e7afbff4e // p2
data8 0x3ff921fb54442d18 // pi/2
ASM_SIZE_DIRECTIVE(atanf_coeff_1_table)
LOCAL_OBJECT_END(atanf_coeff_1_table)
atanf_coeff_2_table:
ASM_TYPE_DIRECTIVE(atanf_coeff_2_table,@object)
LOCAL_OBJECT_START(atanf_coeff_2_table)
data8 0x4035000000004284 // r1
data8 0x406cdffff336a59b // r2
data8 0x3fbc4f512b1865f5 // p4 = q6
@ -182,18 +176,12 @@ data8 0xbfa6e10ba401393f // p7
data8 0x3f97105b4160f86b // p8
data8 0xbf7deaadaa336451 // p9
data8 0x3f522e5d33bc9baa // p10
ASM_SIZE_DIRECTIVE(atanf_coeff_2_table)
LOCAL_OBJECT_END(atanf_coeff_2_table)
.global atanf
.text
.proc atanf
.align 32
atanf:
.section .text
GLOBAL_LIBM_ENTRY(atanf)
{ .mfi
alloc r32 = ar.pfs,1,2,0,0
@ -325,7 +313,7 @@ atanf:
{ .mfb
nop.m 999
fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0
(p8) br.cond.spnt L(ATANF_X_INF_NAN_ZERO)
(p8) br.cond.spnt ATANF_X_INF_NAN_ZERO
}
;;
@ -487,7 +475,7 @@ atanf:
{ .mfi
nop.m 999
fma atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
fma.s0 atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
nop.i 999
}
{ .mfi
@ -530,27 +518,38 @@ atanf:
{ .mfi
nop.m 999
//(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
(p7) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
(p7) fnma.s.s0 atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
nop.i 999;;
}
{ .mfb
nop.m 999
//(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
(p6) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
(p6) fma.s.s0 atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
br.ret.sptk b0
}
L(ATANF_X_INF_NAN_ZERO):
ATANF_X_INF_NAN_ZERO:
fclass.m p8,p9 = f8,0x23 // @inf
{ .mfi
nop.m 0
fclass.m p8,p9 = f8,0x23 // @inf
nop.i 0
}
;;
{ .mfi
nop.m 0
(p8) fmerge.s f8 = f8, atanf_piby2
nop.i 0
}
;;
fnorm.s f8 = f8
{ .mfb
nop.m 0
fnorm.s.s0 f8 = f8
br.ret.sptk b0
}
;;
.endp atanf
ASM_SIZE_DIRECTIVE(atanf)
GLOBAL_LIBM_END(atanf)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,10 @@
.file "cbrtl.asm"
.file "cbrtl.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang
// of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,11 +35,13 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 4/28/00: Initial version
// 04/28/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@ -95,29 +96,26 @@
// r2-r3, r23-r30
// p6,p7,p12
#include "libm_support.h"
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
RODATA
.align 16
poly_coeffs:
ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
LOCAL_OBJECT_START(poly_coeffs)
data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
ASM_SIZE_DIRECTIVE(poly_coeffs)
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
T_table:
ASM_TYPE_DIRECTIVE(T_table,@object)
data8 0x80155c748c374836, 0x8040404b0879f7f9
data8 0x806b5dce4b405c10, 0x8096b586974669b1
@ -503,14 +501,15 @@ data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
ASM_SIZE_DIRECTIVE(T_table)
LOCAL_OBJECT_END(T_table)
D_table:
ASM_TYPE_DIRECTIVE(D_table,@object)
LOCAL_OBJECT_START(D_table)
data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
@ -703,25 +702,16 @@ data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
ASM_SIZE_DIRECTIVE(D_table)
LOCAL_OBJECT_END(D_table)
.align 32
.global cbrtl#
.section .text
.proc cbrtl#
.align 32
cbrtl:
GLOBAL_LIBM_ENTRY(cbrtl)
{ .mfi
getf.sig r3=f8
// will continue only for normal/denormal numbers
(p0) fclass.nm.unc p12,p7 = f8, 0x1b
fclass.nm.unc p12,p7 = f8, 0x1b
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
}
@ -898,5 +888,5 @@ cbrtl:
(p6) fma.s0 f8=f8,f6,f8
br.ret.sptk b0;;
}
.endp cbrtl
ASM_SIZE_DIRECTIVE(cbrtl)
GLOBAL_LIBM_END(cbrtl)

View File

@ -1,10 +1,10 @@
.file "ceil.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,75 +35,52 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
#include "libm_support.h"
.align 32
.global ceil#
.section .text
.proc ceil#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// 02/02/00 Initial version
// 06/13/00 Improved speed
// 06/27/00 Eliminated incorrect invalid flag setting
// 05/20/02 Cleaned up namespace and sf0 syntax
// 01/28/03 Improved performance
//==============================================================
// API
//==============================================================
// double ceil(double x)
//==============================================================
// general input registers:
// r14 - r19
ceil_GR_FFFF = r14
ceil_GR_signexp = r15
ceil_GR_exponent = r16
ceil_GR_expmask = r17
ceil_GR_bigexp = r18
rSignexp = r14
rExp = r15
rExpMask = r16
rBigexp = r17
rM1 = r18
rSignexpM1 = r19
// floating-point registers:
// f8 - f13
fXInt = f9
fNormX = f10
fTmp = f11
fAdj = f12
fPreResult = f13
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
// floating-point registers used:
CEIL_SIGNED_ZERO = f7
CEIL_NORM_f8 = f9
CEIL_FFFF = f10
CEIL_INEXACT = f11
CEIL_FLOAT_INT_f8 = f12
CEIL_INT_f8 = f13
CEIL_adj = f14
CEIL_MINUS_ONE = f15
// p6 - p10
// Overview of operation
//==============================================================
// double ceil(double x)
// Return an integer value (represented as a double) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceil(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@ -124,139 +101,124 @@ CEIL_MINUS_ONE = f15
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
ceil:
.section .text
GLOBAL_LIBM_ENTRY(ceil)
{ .mfi
getf.exp ceil_GR_signexp = f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
addl ceil_GR_bigexp = 0x10033, r0
getf.exp rSignexp = f8 // Get signexp, recompute if unorm
fclass.m p7,p0 = f8, 0x0b // Test x unorm
addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
{ .mfi
addl ceil_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov ceil_GR_expmask = 0x1FFFF ;;
mov rM1 = -1 // Set all ones
fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
mov rExpMask = 0x1FFFF // Form exponent mask
}
;;
// p7 ==> denorm
{ .mfi
setf.sig CEIL_FFFF = ceil_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
mov rSignexpM1 = 0x2FFFF // Form signexp of -1
fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
nop.i 0
}
{ .mfi
nop.m 999
fnorm CEIL_NORM_f8 = f8
nop.i 999 ;;
}
// Form 0 with sign of input in case negative zero is needed
{ .mfi
nop.m 999
fmerge.s CEIL_SIGNED_ZERO = f8, f0
nop.i 999
}
{ .mfi
nop.m 999
fsub.s1 CEIL_MINUS_ONE = f0, f1
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(CEIL_DENORM) ;;
setf.sig fTmp = rM1 // Make const for setting inexact
fnorm.s1 fNormX = f8 // Normalize input
(p7) br.cond.spnt CEIL_UNORM // Branch if x unorm
}
;;
CEIL_COMMON:
// Return here from CEIL_UNORM
{ .mfi
nop.m 0
fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
nop.i 0
}
;;
L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to add to trunc(x) for result
// If x>0, adjustment is 1.0
// If x<=0, adjustment is 0.0
{ .mfi
and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
(p9) fadd.s1 CEIL_adj = f1,f0
nop.i 999
nop.m 0
(p8) fma.s1 fAdj = f0, f0, f0 // If x < 0, adjustment is 0
nop.i 0
}
{ .mfi
nop.m 999
(p8) fadd.s1 CEIL_adj = f0,f0
nop.i 999 ;;
nop.m 0
(p9) fma.s1 fAdj = f1, f1, f0 // If x > 0, adjustment is +1
nop.i 0
}
;;
{ .mfi
(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
(p6) fnorm.d f8 = f8
nop.i 999 ;;
nop.m 0
fcvt.xf fPreResult = fXInt // trunc(x)
nop.i 0
}
{ .mfi
nop.m 999
(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm.d f8 = CEIL_NORM_f8
nop.i 999 ;;
}
// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
{ .mfi
nop.m 999
(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
nop.i 999 ;;
}
{ .mfi
(p14) cmp.ne p11,p0 = r0,r0
(p14) fnorm.d f8 = CEIL_SIGNED_ZERO
nop.i 999
}
{ .mfi
nop.m 999
(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd.d f8 = CEIL_FLOAT_INT_f8,CEIL_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm.d f8 = CEIL_NORM_f8
br.ret.sptk b0 ;;
nop.m 0
(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
;;
// Here if input denorm
L(CEIL_DENORM):
{ .mmi
and rExp = rSignexp, rExpMask // Get biased exponent
;;
cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^52?
(p8) cmp.lt.unc p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
}
;;
// If -1 < x < 0, we turn off p6 and compute result as -0
{ .mfi
(p10) cmp.ne p6,p0 = r0,r0
(p10) fmerge.s f8 = fNormX, f0
nop.i 0
}
;;
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fma.d.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^52
nop.i 0
}
{ .mfi
nop.m 0
(p7) fma.d.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^52
(p10) cmp.eq p6,p0 = r0,r0 // If -1 < x < 0, turn on p6 again
}
;;
{ .mfi
nop.m 0
(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
nop.i 0
}
;;
{ .mfi
nop.m 0
(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
nop.i 0
}
{ .mfb
getf.exp ceil_GR_signexp = CEIL_NORM_f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
br.cond.sptk L(CEIL_COMMON) ;;
nop.m 0
(p8) fma.d.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
br.ret.sptk b0 // Exit main path, 0 < |x| < 2^52
}
;;
.endp ceil
ASM_SIZE_DIRECTIVE(ceil)
CEIL_UNORM:
// Here if x unorm
{ .mfb
getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
br.cond.sptk CEIL_COMMON // Return to main path
}
;;
GLOBAL_LIBM_END(ceil)

View File

@ -1,10 +1,10 @@
.file "ceilf.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,75 +35,52 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
#include "libm_support.h"
.align 32
.global ceilf#
.section .text
.proc ceilf#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// 02/02/00 Initial version
// 06/13/00 Improved speed
// 06/27/00 Eliminated incorrect invalid flag setting
// 05/20/02 Cleaned up namespace and sf0 syntax
// 01/28/03 Improved performance
//==============================================================
// API
//==============================================================
// float ceilf(float x)
//==============================================================
// general input registers:
// r14 - r19
ceil_GR_FFFF = r14
ceil_GR_signexp = r15
ceil_GR_exponent = r16
ceil_GR_expmask = r17
ceil_GR_bigexp = r18
rSignexp = r14
rExp = r15
rExpMask = r16
rBigexp = r17
rM1 = r18
rSignexpM1 = r19
// floating-point registers:
// f8 - f13
fXInt = f9
fNormX = f10
fTmp = f11
fAdj = f12
fPreResult = f13
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
// floating-point registers used:
CEIL_SIGNED_ZERO = f7
CEIL_NORM_f8 = f9
CEIL_FFFF = f10
CEIL_INEXACT = f11
CEIL_FLOAT_INT_f8 = f12
CEIL_INT_f8 = f13
CEIL_adj = f14
CEIL_MINUS_ONE = f15
// p6 - p10
// Overview of operation
//==============================================================
// float ceilf(float x)
// Return an integer value (represented as a float) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceilf(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@ -124,139 +101,124 @@ CEIL_MINUS_ONE = f15
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
ceilf:
.section .text
GLOBAL_LIBM_ENTRY(ceilf)
{ .mfi
getf.exp ceil_GR_signexp = f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
addl ceil_GR_bigexp = 0x10016, r0
getf.exp rSignexp = f8 // Get signexp, recompute if unorm
fclass.m p7,p0 = f8, 0x0b // Test x unorm
addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
{ .mfi
addl ceil_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov ceil_GR_expmask = 0x1FFFF ;;
mov rM1 = -1 // Set all ones
fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
mov rExpMask = 0x1FFFF // Form exponent mask
}
;;
// p7 ==> denorm
{ .mfi
setf.sig CEIL_FFFF = ceil_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
mov rSignexpM1 = 0x2FFFF // Form signexp of -1
fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
nop.i 0
}
{ .mfi
nop.m 999
fnorm CEIL_NORM_f8 = f8
nop.i 999 ;;
}
// Form 0 with sign of input in case negative zero is needed
{ .mfi
nop.m 999
fmerge.s CEIL_SIGNED_ZERO = f8, f0
nop.i 999
}
{ .mfi
nop.m 999
fsub.s1 CEIL_MINUS_ONE = f0, f1
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(CEIL_DENORM) ;;
setf.sig fTmp = rM1 // Make const for setting inexact
fnorm.s1 fNormX = f8 // Normalize input
(p7) br.cond.spnt CEIL_UNORM // Branch if x unorm
}
;;
CEIL_COMMON:
// Return here from CEIL_UNORM
{ .mfi
nop.m 0
fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
nop.i 0
}
;;
L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to add to trunc(x) for result
// If x>0, adjustment is 1.0
// If x<=0, adjustment is 0.0
{ .mfi
and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
(p9) fadd.s1 CEIL_adj = f1,f0
nop.i 999
nop.m 0
(p8) fma.s1 fAdj = f0, f0, f0 // If x < 0, adjustment is 0
nop.i 0
}
{ .mfi
nop.m 999
(p8) fadd.s1 CEIL_adj = f0,f0
nop.i 999 ;;
nop.m 0
(p9) fma.s1 fAdj = f1, f1, f0 // If x > 0, adjustment is +1
nop.i 0
}
;;
{ .mfi
(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
(p6) fnorm.s f8 = f8
nop.i 999 ;;
nop.m 0
fcvt.xf fPreResult = fXInt // trunc(x)
nop.i 0
}
{ .mfi
nop.m 999
(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm.s f8 = CEIL_NORM_f8
nop.i 999 ;;
}
// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
{ .mfi
nop.m 999
(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
nop.i 999 ;;
}
{ .mfi
(p14) cmp.ne p11,p0 = r0,r0
(p14) fnorm.s f8 = CEIL_SIGNED_ZERO
nop.i 999
}
{ .mfi
nop.m 999
(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd.s f8 = CEIL_FLOAT_INT_f8,CEIL_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm.s f8 = CEIL_NORM_f8
br.ret.sptk b0 ;;
nop.m 0
(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
;;
// Here if input denorm
L(CEIL_DENORM):
{ .mmi
and rExp = rSignexp, rExpMask // Get biased exponent
;;
cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^23?
(p8) cmp.lt.unc p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
}
;;
// If -1 < x < 0, we turn off p6 and compute result as -0
{ .mfi
(p10) cmp.ne p6,p0 = r0,r0
(p10) fmerge.s f8 = fNormX, f0
nop.i 0
}
;;
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fma.s.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^23
nop.i 0
}
{ .mfi
nop.m 0
(p7) fma.s.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^23
(p10) cmp.eq p6,p0 = r0,r0 // If -1 < x < 0, turn on p6 again
}
;;
{ .mfi
nop.m 0
(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
nop.i 0
}
;;
{ .mfi
nop.m 0
(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
nop.i 0
}
{ .mfb
getf.exp ceil_GR_signexp = CEIL_NORM_f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
br.cond.sptk L(CEIL_COMMON) ;;
nop.m 0
(p8) fma.s.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
br.ret.sptk b0 // Exit main path, 0 < |x| < 2^23
}
;;
.endp ceilf
ASM_SIZE_DIRECTIVE(ceilf)
CEIL_UNORM:
// Here if x unorm
{ .mfb
getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
br.cond.sptk CEIL_COMMON // Return to main path
}
;;
GLOBAL_LIBM_END(ceilf)

View File

@ -1,10 +1,10 @@
.file "ceill.s"
// Copyright (C) 2000, 2001, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -35,75 +35,52 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
#include "libm_support.h"
.align 32
.global ceill#
.section .text
.proc ceill#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// 02/02/00 Initial version
// 06/13/00 Improved speed
// 06/27/00 Eliminated incorrect invalid flag setting
// 05/20/02 Cleaned up namespace and sf0 syntax
// 01/28/03 Improved performance
//==============================================================
// API
//==============================================================
// double ceill(double x)
// long double ceill(long double x)
//==============================================================
// general input registers:
// r14 - r19
ceil_GR_FFFF = r14
ceil_GR_signexp = r15
ceil_GR_exponent = r16
ceil_GR_expmask = r17
ceil_GR_bigexp = r18
rSignexp = r14
rExp = r15
rExpMask = r16
rBigexp = r17
rM1 = r18
rSignexpM1 = r19
// floating-point registers:
// f8 - f13
fXInt = f9
fNormX = f10
fTmp = f11
fAdj = f12
fPreResult = f13
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
// floating-point registers used:
CEIL_SIGNED_ZERO = f7
CEIL_NORM_f8 = f9
CEIL_FFFF = f10
CEIL_INEXACT = f11
CEIL_FLOAT_INT_f8 = f12
CEIL_INT_f8 = f13
CEIL_adj = f14
CEIL_MINUS_ONE = f15
// p6 - p10
// Overview of operation
//==============================================================
// long double ceill(long double x)
// Return an integer value (represented as a long double) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceill(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@ -124,139 +101,124 @@ CEIL_MINUS_ONE = f15
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
ceill:
.section .text
GLOBAL_LIBM_ENTRY(ceill)
{ .mfi
getf.exp ceil_GR_signexp = f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
addl ceil_GR_bigexp = 0x1003e, r0
getf.exp rSignexp = f8 // Get signexp, recompute if unorm
fclass.m p7,p0 = f8, 0x0b // Test x unorm
addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
{ .mfi
addl ceil_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov ceil_GR_expmask = 0x1FFFF ;;
mov rM1 = -1 // Set all ones
fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
mov rExpMask = 0x1FFFF // Form exponent mask
}
;;
// p7 ==> denorm
{ .mfi
setf.sig CEIL_FFFF = ceil_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
mov rSignexpM1 = 0x2FFFF // Form signexp of -1
fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
nop.i 0
}
{ .mfi
nop.m 999
fnorm CEIL_NORM_f8 = f8
nop.i 999 ;;
}
// Form 0 with sign of input in case negative zero is needed
{ .mfi
nop.m 999
fmerge.s CEIL_SIGNED_ZERO = f8, f0
nop.i 999
}
{ .mfi
nop.m 999
fsub.s1 CEIL_MINUS_ONE = f0, f1
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(CEIL_DENORM) ;;
setf.sig fTmp = rM1 // Make const for setting inexact
fnorm.s1 fNormX = f8 // Normalize input
(p7) br.cond.spnt CEIL_UNORM // Branch if x unorm
}
;;
CEIL_COMMON:
// Return here from CEIL_UNORM
{ .mfi
nop.m 0
fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
nop.i 0
}
;;
L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to add to trunc(x) for result
// If x>0, adjustment is 1.0
// If x<=0, adjustment is 0.0
{ .mfi
and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
(p9) fadd.s1 CEIL_adj = f1,f0
nop.i 999
nop.m 0
(p8) fma.s1 fAdj = f0, f0, f0 // If x < 0, adjustment is 0
nop.i 0
}
{ .mfi
nop.m 999
(p8) fadd.s1 CEIL_adj = f0,f0
nop.i 999 ;;
nop.m 0
(p9) fma.s1 fAdj = f1, f1, f0 // If x > 0, adjustment is +1
nop.i 0
}
;;
{ .mfi
(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
(p6) fnorm f8 = f8
nop.i 999 ;;
nop.m 0
fcvt.xf fPreResult = fXInt // trunc(x)
nop.i 0
}
{ .mfi
nop.m 999
(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm f8 = CEIL_NORM_f8
nop.i 999 ;;
}
// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
{ .mfi
nop.m 999
(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
nop.i 999 ;;
}
{ .mfi
(p14) cmp.ne p11,p0 = r0,r0
(p14) fnorm f8 = CEIL_SIGNED_ZERO
nop.i 999
}
{ .mfi
nop.m 999
(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd f8 = CEIL_FLOAT_INT_f8,CEIL_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm f8 = CEIL_NORM_f8
br.ret.sptk b0 ;;
nop.m 0
(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
;;
// Here if input denorm
L(CEIL_DENORM):
{ .mmi
and rExp = rSignexp, rExpMask // Get biased exponent
;;
cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^63?
(p8) cmp.lt.unc p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
}
;;
// If -1 < x < 0, we turn off p6 and compute result as -0
{ .mfi
(p10) cmp.ne p6,p0 = r0,r0
(p10) fmerge.s f8 = fNormX, f0
nop.i 0
}
;;
.pred.rel "mutex",p6,p7
{ .mfi
nop.m 0
(p6) fma.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^63
nop.i 0
}
{ .mfi
nop.m 0
(p7) fma.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^63
(p10) cmp.eq p6,p0 = r0,r0 // If -1 < x < 0, turn on p6 again
}
;;
{ .mfi
nop.m 0
(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
nop.i 0
}
;;
{ .mfi
nop.m 0
(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
nop.i 0
}
{ .mfb
getf.exp ceil_GR_signexp = CEIL_NORM_f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
br.cond.sptk L(CEIL_COMMON) ;;
nop.m 0
(p8) fma.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
br.ret.sptk b0 // Exit main path, 0 < |x| < 2^63
}
;;
.endp ceill
ASM_SIZE_DIRECTIVE(ceill)
CEIL_UNORM:
// Here if x unorm
{ .mfb
getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
br.cond.sptk CEIL_COMMON // Return to main path
}
;;
GLOBAL_LIBM_END(ceill)

View File

@ -29,6 +29,10 @@ END (__copysign)
strong_alias (__copysign, __copysignf)
strong_alias (__copysign, __copysignl)
strong_alias (__copysign, __libm_copysign)
strong_alias (__copysign, __libm_copysignf)
strong_alias (__copysign, __libm_copysignl)
weak_alias (__copysign, copysign)
weak_alias (__copysignf, copysignf)
weak_alias (__copysignl, copysignl)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

924
sysdeps/ia64/fpu/s_erf.S Normal file
View File

@ -0,0 +1,924 @@
.file "erf.s"
// Copyright (c) 2001 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2001 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 08/15/01 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// double erf(double)
//
// Overview of operation
//==============================================================
// Background
//
//
// There are 9 paths:
// 1. x = +/-0.0
// Return erf(x) = +/-0.0
//
// 2. 0.0 < |x| < 0.5
// Return erf(x) = x *Pol9(x^2)
//
// 3. For several subranges of 0.5 <= |x| < 5.90625
// Return erf(x) = sign(x)*Pol19(y),
// where y = (|x|-b)/a, Pol19(y) = A0 + A1*y^1 + A2*y^2 + ... + A19*y^19
//
// For each subrange there is particular set of coefficients.
// Below is the list of subranges:
// 3.1 0.5 <= |x| < 1.0 b = a = 0.5
// 3.2 1.0 <= |x| < 2.0, b = a = 1.0
// 3.3 2.0 <= |x| < 3.25 b = a = 2.0
// 3.4 4.0 <= |x| < 5.90625 b = 4.0, a = 2.0
//
// 4. 3.25 <= |x| < 4.0
// Return erf(x) = sign(x)*Pol14(|x| - 3.25)
//
// 5. 5.90625 <= |x| < +INF
// Return erf(x) = sign(x)*(1.0d - 2^(-63))
//
// 6. |x| = INF
// Return erf(x) = sign(x) * 1.0
//
// 7. x = [S,Q]NaN
// Return erf(x) = QNaN
//
// 8. x is positive denormal
// Return erf(x) = A0*x - x^2,
// where A0 = 2.0/sqrt(Pi)
//
// 9. x is negative denormal
// Return erf(x) = A0*x + x^2,
// where A0 = 2.0/sqrt(Pi)
//
// Registers used
//==============================================================
// Floating Point registers used:
// f8, input, output
// f32 -> f63
// General registers used:
// r32 -> r48, r2, r3
// Predicate registers used:
// p0, p6 -> p15
// p6 to filter out case when x = denormal
// p7 to filter out case when x = [Q,S]NaN or +/-0,
// used also to process denormals
// p8 to filter out case when 3.25 <= |x| < 4.0,
// used also to process denormals
// p9 to filter out case when |x| = inf
// p10 to filter out case when |x| < 0.5
// p11 set when |x| < 3.25 or |x| > 4.0
// p12 to filter out case when |x| >= 5.90625
// p13 set if 4.0 <=|x| < 5.90625
// p14 set to 1 for positive x
// p15 set to 1 for negative x
// Assembly macros
//==============================================================
rDataPtr = r2
rDataPtr1 = r3
rBias = r33
rCoeffAddr3 = r34
rThreeAndQ = r35
rCoeffAddr2 = r36
rMask = r37
rArg = r38
rSignBit = r39
rAbsArg = r40
rSaturation = r41
rIndex = r42
rCoeffAddr1 = r43
rCoeffAddr4 = r44
rShiftedArg = r45
rShiftedArgMasked = r46
rBiasedExpOf4 = r47
rShiftedAbsArg = r48
//==============================================================
fA0 = f32
fA1 = f33
fA2 = f34
fA3 = f35
fA4 = f36
fA5 = f37
fA6 = f38
fA7 = f39
fA8 = f40
fA9 = f41
fA10 = f42
fA11 = f43
fA12 = f44
fA13 = f45
fA14 = f46
fA15 = f47
fA16 = f48
fA17 = f49
fA18 = f50
fA19 = f51
fArgSqr = f52
fArgAbsNorm = f53
fSignumX = f54
fRes = f55
fThreeAndQ = f56
fArgAbs = f57
fTSqr = f58
fTQuadr = f59
fTDeg3 = f60
fTDeg7 = f61
fArgAbsNormSgn = f62
fTQuadrSgn = f63
// Data tables
//==============================================================
RODATA
.align 64
LOCAL_OBJECT_START(erf_data)
// Coefficients ##0..15
// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0
data8 0xB69AC40646D1F6C1, 0x00003FD2 //A19
data8 0x90AD48C0118FA10C, 0x00003FD7 //A18
data8 0x826FBAD055EA4AB8, 0x0000BFDB //A17
data8 0x8DAB171246CC2B89, 0x00003FDC //A16
data8 0xC0B1D6662F8A7564, 0x00003FDF //A15
data8 0xA46374AC35099BAF, 0x0000BFE1 //A14
data8 0xB2F230996346EF27, 0x0000BFE4 //A13
data8 0xCDEC50950FACE04A, 0x00003FE6 //A12
data8 0x826014649396E9D2, 0x00003FE9 //A11
data8 0xCDB787DC718B13F9, 0x0000BFEB //A10
data8 0x8E0B23C24EE0C8EE, 0x0000BFED //A9
data8 0xA49EA40A4E5A3F76, 0x00003FF0 //A8
data8 0xB11E30BE912617D3, 0x00003FF0 //A7
data8 0xCCF89D9351CE26E3, 0x0000BFF4 //A6
data8 0xEFF75AD1F0F22809, 0x00003FF2 //A5
data8 0xBB793EF404C09A22, 0x00003FF8 //A4
// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0
data8 0xBAE93FF4174EA59B, 0x00003FE6 //A19
data8 0x8A0FD46092F95D44, 0x0000BFEA //A18
data8 0xA37B3242B7809E12, 0x00003FEC //A17
data8 0xA0330A5CD2E91689, 0x0000BFED //A16
data8 0x8E34A678F3497D17, 0x0000BFEC //A15
data8 0xAC185D45A2772384, 0x00003FEF //A14
data8 0xB0C11347CE7EEDE8, 0x00003FEF //A13
data8 0xD3330DC14EA0E4EB, 0x0000BFF2 //A12
data8 0xB4A6DFDE578A428F, 0x00003FF1 //A11
data8 0xA0B4034310D2D9CB, 0x00003FF5 //A10
data8 0xF71662D3132B7759, 0x0000BFF5 //A9
data8 0x9C88BF157695E9EC, 0x0000BFF7 //A8
data8 0xF84B80EFCA43895D, 0x00003FF8 //A7
data8 0x9722D22DA628A17B, 0x00003FF7 //A6
data8 0x8DB0A586F8F3381F, 0x0000BFFB //A5
data8 0x8DB0A5879F87E5BE, 0x00003FFB //A4
// Polynomial coefficients for the erf(x), 2.0 <= |x| < 3.25
data8 0x9C4AF1F3A4B21AFC, 0x00003FF6 //A19
data8 0x8D40D5D5DB741AB8, 0x0000BFF9 //A18
data8 0xDEBE7099E0A75BA4, 0x00003FFA //A17
data8 0xB99A33294D32429D, 0x0000BFFB //A16
data8 0x8109D9C7197BC7C9, 0x00003FFB //A15
data8 0xC30DE8E2EFC2D760, 0x00003FFA //A14
data8 0x80DDA28C5B35DC73, 0x0000BFFC //A13
data8 0x9BE4DE5095BACE0D, 0x00003FF9 //A12
data8 0xDA4092509EE7D111, 0x00003FFC //A11
data8 0x89D98C561B0C9040, 0x0000BFFD //A10
data8 0xD20B26EB2F0881D4, 0x0000BFF9 //A9
data8 0xD089C56948731561, 0x00003FFD //A8
data8 0xDD704DEFFB21B7E7, 0x0000BFFD //A7
data8 0xF0C9A6BBDE469115, 0x00003FF9 //A6
data8 0xD673A02CB5766633, 0x00003FFD //A5
data8 0x8D162CBAD8A12649, 0x0000BFFE //A4
// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0
data8 0xD4428B75C6FE8FD1, 0x0000BFFC //A19
data8 0xF76BE1935675D5C8, 0x00003FFE //A18
data8 0xFD6BB3B14AA7A8E6, 0x0000BFFF //A17
data8 0x8BE8F573D348DDA4, 0x00004000 //A16
data8 0x81E91923A1030502, 0x0000BFFF //A15
data8 0xCE7FE87B26CFD286, 0x0000BFFE //A14
data8 0x84EF6B4E17404384, 0x00004000 //A13
data8 0x91FEF33015404991, 0x0000C000 //A12
data8 0xDEDF6A9370747E56, 0x00003FFF //A11
data8 0x8397E6FF56CDFD9D, 0x0000BFFF //A10
data8 0xFAD1CE912473937B, 0x00003FFD //A9
data8 0xC48C1EA8AAA624EA, 0x0000BFFC //A8
data8 0xFECAF0097ACF981B, 0x00003FFA //A7
data8 0x8829A394065E4B95, 0x0000BFF9 //A6
data8 0xED3003E477A53EE7, 0x00003FF6 //A5
data8 0xA4C07E9BB3FCB0F3, 0x0000BFF4 //A4
//
// Coefficients ##16..19
// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0
data8 0x95FA98C337005D13, 0x0000BFF9 //A3
data8 0xE0F7E524D2808A97, 0x0000BFFB //A2
data8 0xE0F7E524D2808A98, 0x00003FFD //A1
data8 0x853F7AE0C76E915F, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0
data8 0x8DB0A587A96ABCF0, 0x00003FFC //A3
data8 0xD488F84B7DE18DA8, 0x0000BFFD //A2
data8 0xD488F84B7DE12E9C, 0x00003FFD //A1
data8 0xD7BB3D3A08445636, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 2.0 <= |x| < 3.25
data8 0xC58571D23D5C4B3A, 0x00003FFD //A3
data8 0xA94DCF467CD6AFF3, 0x0000BFFC //A2
data8 0xA94DCF467CD10A16, 0x00003FFA //A1
data8 0xFECD70A13CAF1997, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0
data8 0xB01D2B4F0D5AB8B0, 0x00003FF1 //A3
data8 0x8858A465CE594BD1, 0x0000BFEE //A2
data8 0x8858A447456DE61D, 0x00003FEA //A1
data8 0xFFFFFFBDC88BB107, 0x00003FFE //A0
// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.5
data8 0xBE839EDBB36C7FCE //A9
data8 0x3EBB7745A18DD242 //A8
data8 0xBF4C02DB238F2AFC //A5
data8 0x3F7565BCD0A9A3EA //A4
data8 0xC093A3581BCF3333, 0x0000BFFD //A1
data8 0xBEEF4BB82AD8AE22 //A7
data8 0x3F1F9A2A57A218CD //A6
data8 0xBF9B82CE3127F4E4 //A3
data8 0x3FBCE2F21A042B25 //A2
data8 0x906EBA8214DB688D, 0x00003FFF //A0
// 1.0 - 2^(-63)
data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE
// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4.0
data8 0x95E91576C7A12250, 0x00003FE7 //A14
data8 0x8E5E0D0E1F5D3CB5, 0x0000BFEA //A13
data8 0xED761DAFAF814DE9, 0x00003FEB //A12
data8 0xB3A77D921D0ACFC7, 0x0000BFEC //A11
data8 0xA662D27096B08D7C, 0x0000BFEC //A10
data8 0xDA0F410AE6233EA5, 0x00003FEF //A9
data8 0xAB4A8B16B3124327, 0x0000BFF1 //A8
data8 0xB241E236A5EDCED3, 0x00003FF2 //A7
data8 0x8A2A65BA1F551F77, 0x0000BFF3 //A6
data8 0xA4852D0B1D87000A, 0x00003FF3 //A5
data8 0x963EB00039489476, 0x0000BFF3 //A4
data8 0xCD5244FF4F7313A5, 0x00003FF2 //A3
data8 0xC6F1E695363BCB26, 0x0000BFF1 //A2
data8 0xF4DAF4680DA54C02, 0x00003FEF //A1
data8 0xFFFFB7CFB3F2ABBE, 0x00003FFE //A0
// A = 2.0/sqrt(Pi)
data8 0x906EBA8214DB688D, 0x00003FFF
LOCAL_OBJECT_END(erf_data)
.section .text
GLOBAL_LIBM_ENTRY(erf)
{ .mfi
alloc r32 = ar.pfs, 0, 17, 0, 0
fmerge.se fArgAbsNorm = f1, f8 // normalized x
adds rSignBit = 0x1, r0
}
{ .mfi
addl rDataPtr = @ltoff(erf_data), gp
fma.s1 fArgSqr = f8, f8, f0 // x^2
addl rThreeAndQ = 0x400A0, r0 // shifted bits of 3.25
}
;;
{ .mfi
getf.d rArg = f8 // x in GR
fclass.m p6,p0 = f8, 0x0b // is x denormal ?
shl rThreeAndQ = rThreeAndQ, 44 // bits of 3.25
}
{ .mfi
ld8 rDataPtr = [rDataPtr]
nop.f 0
addl rBiasedExpOf4 = 0x40100, r0 // shifted bits of 4.0
}
;;
{ .mfi
addl rSaturation = 0x4017A, r0 // shifted bits of 5.90625
fclass.m p7,p0 = f8, 0xc7 // is x [S,Q]NaN or +/-0 ?
shl rSignBit = rSignBit, 63 // mask for sign bit
}
{ .mfi
addl rMask = 0x7FF00, r0 // Mask for index bits
nop.f 0
addl rBias = 0x3FE00, r0 // bias of 0.5 << 8
}
;;
{ .mfi
setf.d fThreeAndQ = rThreeAndQ // 3.25 if FP register
fclass.m p9,p0 = f8, 0x23 // is x +/- inf?
shr.u rShiftedArg = rArg, 44
}
{ .mfb
andcm rAbsArg = rArg, rSignBit // |x| in GR
nop.f 0
(p6) br.cond.spnt erf_denormal // branch out if x is denormal
}
;;
{ .mfi
and rShiftedArgMasked = rShiftedArg, rMask // bias of x << 8
fmerge.s fArgAbs = f1, f8 // |x|
shr rShiftedAbsArg = rAbsArg, 44
}
{ .mfb
cmp.lt p8, p11 = rThreeAndQ, rAbsArg // p8 = 1 if |x| >= 3.25
(p7) fma.d.s0 f8 = f8,f1,f8 // NaN or +/-0
(p7) br.ret.spnt b0 // exit for x = NaN or +/-0
}
;;
{ .mfi
sub rIndex = rShiftedArgMasked, rBias // index << 8
nop.f 0
cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10 = 1 if |x| < 0.5
}
{ .mfb
// p8 = 1 if 3.25 <= |x| < 4.0
(p8) cmp.lt p8, p11 = rShiftedAbsArg, rBiasedExpOf4
fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1
(p10) br.cond.spnt erf_near_zero // branch out if |x| < 0.5
}
;;
.pred.rel "mutex", p8, p11
{ .mfi
(p8) adds rCoeffAddr1 = 1392, rDataPtr // coeff. for 3.25 <=|x|<4.0
(p9) fmerge.s f8 = f8,f1 // +/- inf
nop.i 0
}
{ .mfb
(p11) add rCoeffAddr1 = rDataPtr, rIndex// coeff. ##0,2,..14
nop.f 0
(p9) br.ret.spnt b0 // exit for x = +/- inf
}
;;
{ .mfi
adds rCoeffAddr2 = 16, rCoeffAddr1
fmerge.s fSignumX = f8, f1 // signum(x)
nop.i 0
}
{ .mfb
cmp.lt p12, p0 = rSaturation, rShiftedAbsArg // |x| > 5.90625?
nop.f 0
(p12) br.cond.spnt erf_saturation // branch out if x |x| >= 6.0
}
;;
// Here if paths #3,4
// if path #4 we'll branch out after loading of 14 necessary coefficients
{.mfi
ldfe fA19 = [rCoeffAddr1], 32
nop.f 0
nop.i 0
}
{.mfi
ldfe fA18 = [rCoeffAddr2], 32
nop.f 0
adds rCoeffAddr3 = 1024, rDataPtr
}
;;
{.mfi
ldfe fA17 = [rCoeffAddr1], 32
nop.f 0
nop.i 0
}
{.mfi
ldfe fA16 = [rCoeffAddr2], 32
nop.f 0
nop.i 0
}
;;
{.mfi
ldfe fA15 = [rCoeffAddr1], 32
fma.s1 fTSqr = fArgAbsNorm, fArgAbsNorm, f0
shr.u rIndex = rIndex, 2
}
{.mfi
ldfe fA14 = [rCoeffAddr2], 32
nop.f 0
adds rCoeffAddr4 = 16, r0
}
;;
{.mfi
ldfe fA13 = [rCoeffAddr1], 32
nop.f 0
// address of coefficients ##16..23
add rCoeffAddr3 = rCoeffAddr3, rIndex
}
{.mfi
ldfe fA12 = [rCoeffAddr2], 32
nop.f 0
cmp.lt p15, p14 = rArg, r0
}
;;
{.mfi
ldfe fA11 = [rCoeffAddr1], 32
nop.f 0
add rCoeffAddr4 = rCoeffAddr3, rCoeffAddr4
}
{.mfi
ldfe fA10 = [rCoeffAddr2], 32
nop.f 0
nop.i 0
}
;;
{.mfi
ldfe fA9 = [rCoeffAddr1], 32
nop.f 0
nop.i 0
}
{.mfi
ldfe fA8 = [rCoeffAddr2], 32
nop.f 0
nop.i 0
}
;;
{.mfi
ldfe fA7 = [rCoeffAddr1], 32
fms.s1 fArgAbs = fArgAbs, f1, fThreeAndQ
nop.i 0
}
{.mfb
ldfe fA6 = [rCoeffAddr2], 32
nop.f 0
(p8) br.cond.spnt erf_3q_4 // branch out if 3.25 < |x| < 4.0
}
;;
{.mfi
ldfe fA5 = [rCoeffAddr1], 32
fma.s1 fTDeg3 = fArgAbsNorm, fTSqr, f0
nop.i 0
}
{.mfi
ldfe fA4 = [rCoeffAddr2], 32
fma.s1 fTQuadr = fTSqr, fTSqr, f0
nop.i 0
}
;;
// Path #3 Polynomial Pol19(y) computation; y = fArgAbsNorm
{.mfi
ldfe fA3 = [rCoeffAddr3], 32
fma.s1 fArgAbsNormSgn = fArgAbsNorm, fSignumX, f0
nop.i 0
}
{.mfi
ldfe fA2 = [rCoeffAddr4], 32
nop.f 0
nop.i 0
}
;;
{.mfi
ldfe fA1 = [rCoeffAddr3], 32
fma.s1 fRes = fA19, fArgAbsNorm, fA18
nop.i 0
}
{.mfi
ldfe fA0 = [rCoeffAddr4], 32
nop.f 0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA17 = fA17, fArgAbsNorm, fA16
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fArgAbsNorm, fA14
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fTDeg7 = fTDeg3, fTQuadr, f0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA13 = fA13, fArgAbsNorm, fA12
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fArgAbsNorm, fA10
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA9 = fA9, fArgAbsNorm, fA8
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTSqr, fA17
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA7 = fA7, fArgAbsNorm, fA6
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA5 = fA5, fArgAbsNorm, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fTSqr, fA13
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA4 = fA4, fArgAbsNorm, fA3
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA2 = fA2, fArgAbsNorm, fA1
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fTSqr, fA9
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA7 = fA7, fTSqr, fA5
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTQuadr, fA15
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA4 = fA4, fTSqr, fA2
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTQuadr, fA11
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA4 = fA7, fTDeg3, fA4
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTDeg7, fA4
nop.i 0
}
;;
{ .mfi
nop.m 0
// result for negative argument
(p15) fms.d.s0 f8 = fRes, fArgAbsNormSgn, fA0
nop.i 0
}
{ .mfb
nop.m 0
// result for positive argument
(p14) fma.d.s0 f8 = fRes, fArgAbsNormSgn, fA0
br.ret.sptk b0
}
// Here if 3.25 < |x| < 4.0
.align 32
erf_3q_4:
.pred.rel "mutex", p14, p15
{ .mfi
ldfe fA5 = [rCoeffAddr1], 32
fma.s1 fTSqr = fArgAbs, fArgAbs, f0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fRes = fA19, fArgAbs, fA18
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA17 = fA17, fArgAbs, fA16
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fArgAbs, fA14
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA13 = fA13, fArgAbs, fA12
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fArgAbs, fA10
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA9 = fA9, fArgAbs, fA8
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fArgAbsNormSgn = fArgAbs, fSignumX, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fTQuadr = fTSqr, fTSqr, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTSqr, fA17
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA15 = fA15, fTSqr, fA13
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fTSqr, fA9
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA7 = fA7, fArgAbs, fA6
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fTDeg7 = fTQuadr, fTSqr, f0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTQuadr, fA15
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA11 = fA11, fTSqr, fA7
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTDeg7, fA11
nop.i 0
}
;;
{ .mfi
nop.m 0
// result for negative argument
(p15) fms.d.s0 f8 = fRes, fArgAbsNormSgn, fA5
nop.i 0
}
{ .mfb
nop.m 0
// result for positive argument
(p14) fma.d.s0 f8 = fRes, fArgAbsNormSgn, fA5
br.ret.sptk b0
}
;;
// Here if |x| < 0.5
.align 32
erf_near_zero:
{ .mfi
adds rCoeffAddr1 = 1280, rDataPtr // address of A9
fma.s1 fTSqr = fArgSqr, fArgSqr, f0 // x^4
nop.i 0
}
{ .mfi
adds rCoeffAddr2 = 1328, rDataPtr // address of A7
nop.f 0
nop.i 0
}
;;
{ .mfi
ldfpd fA9, fA8 = [rCoeffAddr1], 16
nop.f 0
nop.i 0
}
{ .mfi
ldfpd fA7, fA6 = [rCoeffAddr2], 16
nop.f 0
nop.i 0
}
;;
{ .mfi
ldfpd fA5, fA4 = [rCoeffAddr1], 16
nop.f 0
nop.i 0
}
{ .mfi
ldfpd fA3, fA2 = [rCoeffAddr2], 16
nop.f 0
nop.i 0
}
;;
{ .mfi
ldfe fA1 = [rCoeffAddr1]
nop.f 0
nop.i 0
}
{ .mfi
ldfe fA0 = [rCoeffAddr2]
nop.f 0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fTQuadr = fTSqr, fTSqr, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fA9, fArgSqr, fA8
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA7 = fA7, fArgSqr, fA6
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA3 = fA3, fArgSqr, fA2
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fA5 = fA5, fArgSqr, fA4
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA1 = fA1, fArgSqr, fA0
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 fTQuadrSgn = fTQuadr, f8, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTSqr, fA7
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA1 = fA3, fTSqr, fA1
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fRes = fRes, fTSqr, fA5
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA1 = fA1, f8, f0
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.d.s0 f8 = fRes, fTQuadrSgn, fA1 // x*Pol9(x^2)
br.ret.sptk b0 // Exit for |x| < 0.5
};;
// Here if 5.90625 <= |x| < +inf
.align 32
erf_saturation:
{ .mfi
adds rDataPtr = 1376, rDataPtr // address of A0
nop.f 0
nop.i 0
}
;;
{ .mfi
ldfe fA0 = [rDataPtr]
nop.f 0
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.d.s0 f8 = fA0, fSignumX, f0 // sign(x)*(1.0 - 2^(-63))
// Exit for 5.90625 <= |x| < +inf
br.ret.sptk b0 // Exit for 5.90625 <=|x|< +inf
}
;;
// Here if x is double precision denormal
.align 32
erf_denormal:
{ .mfi
adds rDataPtr = 1632, rDataPtr // address of A0
fclass.m p7,p8 = f8, 0x0a // is x -denormal ?
nop.i 0
}
;;
{ .mfi
ldfe fA0 = [rDataPtr] // A0
nop.f 0
nop.i 0
}
;;
{ .mfi
nop.m 0
fma.s1 fA0 = fA0,f8,f0 // A0*x
nop.i 0
}
;;
{ .mfi
nop.m 0
(p7) fma.d.s0 f8 = f8,f8,fA0 // -denormal
nop.i 0
}
{ .mfb
nop.m 0
(p8) fnma.d.s0 f8 = f8,f8,fA0 // +denormal
br.ret.sptk b0 // Exit for denormal
}
;;
GLOBAL_LIBM_END(erf)

1197
sysdeps/ia64/fpu/s_erfc.S Normal file

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More