1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-30 22:43:12 +03:00

Fix pow overflow in non-default rounding modes (bug 16315).

This patch fixes bug 16315, bad pow handling of overflow/underflow in
non-default rounding modes.  Tests of pow are duly converted to
ALL_RM_TEST to run all tests in all rounding modes.

There are two main issues here.  First, various implementations
compute a negative result by negating a positive result, but this
yields inappropriate overflow / underflow values for directed
rounding, so either overflow / underflow results need recomputing in
the correct sign, or the relevant overflowing / underflowing operation
needs to be made to have a result of the correct sign.  Second, the
dbl-64 implementation sets FE_TONEAREST internally; in the overflow /
underflow case, the result needs recomputing in the original rounding
mode.

Tested x86_64 and x86 and ulps updated accordingly.

	[BZ #16315]
	* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly
	overflowing or underflowing operations take place with sign of
	result.
	* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise.
	* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
	* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>.
	(__ieee754_pow): Recompute overflowing and underflowing results in
	original rounding mode.
	* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>.
	(__powl_helper): Allow negative argument X and scale negated value
	as needed.  Avoid passing value outside [-1, 1] to f2xm1.
	* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly
	overflowing or underflowing operations take place with sign of
	result.
	* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]:
	Include <math.h>.
	* math/auto-libm-test-in: Add more tests of pow.
	* math/auto-libm-test-out: Regenerated.
	* math/libm-test.inc (pow_test): Use ALL_RM_TEST.
	(pow_tonearest_test_data): Remove.
	(pow_test_tonearest): Likewise.
	(pow_towardzero_test_data): Likewise.
	(pow_test_towardzero): Likewise.
	(pow_downward_test_data): Likewise.
	(pow_test_downward): Likewise.
	(pow_upward_test_data): Likewise.
	(pow_test_upward): Likewise.
	(main): Don't call removed functions.
	* sysdeps/i386/fpu/libm-test-ulps: Update.
	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
This commit is contained in:
Joseph Myers
2014-06-23 20:12:33 +00:00
parent 5686b236cc
commit 4da6db5188
14 changed files with 2955 additions and 235 deletions

View File

@ -1,3 +1,37 @@
2014-06-23 Joseph Myers <joseph@codesourcery.com>
[BZ #16315]
* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly
overflowing or underflowing operations take place with sign of
result.
* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise.
* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>.
(__ieee754_pow): Recompute overflowing and underflowing results in
original rounding mode.
* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>.
(__powl_helper): Allow negative argument X and scale negated value
as needed. Avoid passing value outside [-1, 1] to f2xm1.
* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly
overflowing or underflowing operations take place with sign of
result.
* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]:
Include <math.h>.
* math/auto-libm-test-in: Add more tests of pow.
* math/auto-libm-test-out: Regenerated.
* math/libm-test.inc (pow_test): Use ALL_RM_TEST.
(pow_tonearest_test_data): Remove.
(pow_test_tonearest): Likewise.
(pow_towardzero_test_data): Likewise.
(pow_test_towardzero): Likewise.
(pow_downward_test_data): Likewise.
(pow_test_downward): Likewise.
(pow_upward_test_data): Likewise.
(pow_test_upward): Likewise.
(main): Don't call removed functions.
* sysdeps/i386/fpu/libm-test-ulps: Update.
* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
2014-06-23 Roland McGrath <roland@hack.frob.com> 2014-06-23 Roland McGrath <roland@hack.frob.com>
* sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data: * sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data:

22
NEWS
View File

@ -10,17 +10,17 @@ Version 2.20
* The following bugs are resolved with this release: * The following bugs are resolved with this release:
6804, 9894, 12994, 13347, 13651, 14308, 14770, 15119, 15132, 15347, 15514, 6804, 9894, 12994, 13347, 13651, 14308, 14770, 15119, 15132, 15347, 15514,
15698, 15804, 15894, 15946, 16002, 16064, 16095, 16198, 16284, 16348, 15698, 15804, 15894, 15946, 16002, 16064, 16095, 16198, 16284, 16315,
16349, 16357, 16362, 16447, 16516, 16532, 16545, 16564, 16574, 16599, 16348, 16349, 16357, 16362, 16447, 16516, 16532, 16545, 16564, 16574,
16600, 16609, 16610, 16611, 16613, 16619, 16623, 16629, 16632, 16634, 16599, 16600, 16609, 16610, 16611, 16613, 16619, 16623, 16629, 16632,
16639, 16642, 16648, 16649, 16670, 16674, 16677, 16680, 16681, 16683, 16634, 16639, 16642, 16648, 16649, 16670, 16674, 16677, 16680, 16681,
16689, 16695, 16701, 16706, 16707, 16712, 16713, 16714, 16724, 16731, 16683, 16689, 16695, 16701, 16706, 16707, 16712, 16713, 16714, 16724,
16739, 16740, 16743, 16754, 16758, 16759, 16760, 16770, 16786, 16789, 16731, 16739, 16740, 16743, 16754, 16758, 16759, 16760, 16770, 16786,
16791, 16796, 16799, 16800, 16815, 16823, 16824, 16831, 16838, 16849, 16789, 16791, 16796, 16799, 16800, 16815, 16823, 16824, 16831, 16838,
16854, 16876, 16877, 16878, 16882, 16885, 16888, 16890, 16912, 16915, 16849, 16854, 16876, 16877, 16878, 16882, 16885, 16888, 16890, 16912,
16916, 16917, 16922, 16927, 16928, 16932, 16943, 16958, 16965, 16966, 16915, 16916, 16917, 16922, 16927, 16928, 16932, 16943, 16958, 16965,
16967, 16977, 16978, 16984, 16990, 16996, 17009, 17022, 17031, 17042, 16966, 16967, 16977, 16978, 16984, 16990, 16996, 17009, 17022, 17031,
17048, 17058, 17062, 17069, 17075, 17079. 17042, 17048, 17058, 17062, 17069, 17075, 17079.
* Optimized strchr implementation for AArch64. Contributed by ARM Ltd. * Optimized strchr implementation for AArch64. Contributed by ARM Ltd.

View File

@ -1613,6 +1613,71 @@ pow -max 0x1.ffffffffffffffffffffffffffffp+112
pow -max 0x1.ffffffffffffffffffffffffffffp+113 pow -max 0x1.ffffffffffffffffffffffffffffp+113
pow -max max pow -max max
pow -0x1p65 2
pow -0x1p65 3
pow -0x1p65 4
pow -0x1p65 5
pow -0x1p43 3
pow -0x1p43 4
pow -0x1p43 5
pow -0x1p33 4
pow -0x1p33 5
pow -0x1p26 5
pow -0x1p-65 -2
pow -0x1p-65 -3
pow -0x1p-65 -4
pow -0x1p-65 -5
pow -0x1p-43 -3
pow -0x1p-43 -4
pow -0x1p-43 -5
pow -0x1p-33 -4
pow -0x1p-33 -5
pow -0x1p-26 -5
pow -0x1p513 2
pow -0x1p513 3
pow -0x1p513 4
pow -0x1p513 5
pow -0x1p342 3
pow -0x1p342 4
pow -0x1p342 5
pow -0x1p257 4
pow -0x1p257 5
pow -0x1p205 5
pow -0x1p-513 -2
pow -0x1p-513 -3
pow -0x1p-513 -4
pow -0x1p-513 -5
pow -0x1p-342 -3
pow -0x1p-342 -4
pow -0x1p-342 -5
pow -0x1p-257 -4
pow -0x1p-257 -5
pow -0x1p-205 -5
pow -0x1p8192 2
pow -0x1p8192 3
pow -0x1p8192 4
pow -0x1p8192 5
pow -0x1p5462 3
pow -0x1p5462 4
pow -0x1p5462 5
pow -0x1p4097 4
pow -0x1p4097 5
pow -0x1p3277 5
pow -0x1p64 257
pow -0x1p-8192 -2
pow -0x1p-8192 -3
pow -0x1p-8192 -4
pow -0x1p-8192 -5
pow -0x1p-5462 -3
pow -0x1p-5462 -4
pow -0x1p-5462 -5
pow -0x1p-4097 -4
pow -0x1p-4097 -5
pow -0x1p-3277 -5
pow -0x1p-64 -257
pow -0.5 126 pow -0.5 126
pow -0.5 127 pow -0.5 127
pow -0.5 -126 pow -0.5 -126

File diff suppressed because it is too large Load Diff

View File

@ -8697,69 +8697,7 @@ static const struct test_ff_f_data pow_test_data[] =
static void static void
pow_test (void) pow_test (void)
{ {
ALL_RM_TEST (pow, 0, pow_test_data, RUN_TEST_LOOP_ff_f, END);
START (pow, 0);
RUN_TEST_LOOP_ff_f (pow, pow_test_data, );
END;
}
static const struct test_ff_f_data pow_tonearest_test_data[] =
{
AUTO_TESTS_ff_f (pow),
};
static void
pow_test_tonearest (void)
{
START (pow_tonearest, 0);
RUN_TEST_LOOP_ff_f (pow, pow_tonearest_test_data, FE_TONEAREST);
END;
}
static const struct test_ff_f_data pow_towardzero_test_data[] =
{
TEST_ff_f (pow, 1.0625L, 1.125L, 1.070582293028761362162622578677070098674L),
TEST_ff_f (pow, 1.5L, 1.03125L, 1.519127098714743184071644334163037684948L),
};
static void
pow_test_towardzero (void)
{
START (pow_towardzero, 0);
RUN_TEST_LOOP_ff_f (pow, pow_towardzero_test_data, FE_TOWARDZERO);
END;
}
static const struct test_ff_f_data pow_downward_test_data[] =
{
TEST_ff_f (pow, 1.0625L, 1.125L, 1.070582293028761362162622578677070098674L),
TEST_ff_f (pow, 1.5L, 1.03125L, 1.519127098714743184071644334163037684948L),
};
static void
pow_test_downward (void)
{
START (pow_downward, 0);
RUN_TEST_LOOP_ff_f (pow, pow_downward_test_data, FE_DOWNWARD);
END;
}
static const struct test_ff_f_data pow_upward_test_data[] =
{
TEST_ff_f (pow, 1.0625L, 1.125L, 1.070582293028761362162622578677070098674L),
TEST_ff_f (pow, 1.5L, 1.03125L, 1.519127098714743184071644334163037684948L),
};
static void
pow_test_upward (void)
{
START (pow_upward, 0);
RUN_TEST_LOOP_ff_f (pow, pow_upward_test_data, FE_UPWARD);
END;
} }
@ -9946,10 +9884,6 @@ main (int argc, char **argv)
fabs_test (); fabs_test ();
hypot_test (); hypot_test ();
pow_test (); pow_test ();
pow_test_tonearest ();
pow_test_towardzero ();
pow_test_downward ();
pow_test_upward ();
sqrt_test (); sqrt_test ();
/* Error and gamma functions: */ /* Error and gamma functions: */

View File

@ -144,12 +144,22 @@ ENTRY(__ieee754_pow)
4: fldl MO(one) // 1 : x 4: fldl MO(one) // 1 : x
fxch fxch
/* If y is even, take the absolute value of x. Otherwise,
ensure all intermediate values that might overflow have the
sign of x. */
testb $1, %al
jnz 6f
fabs
6: shrdl $1, %edx, %eax 6: shrdl $1, %edx, %eax
jnc 5f jnc 5f
fxch fxch
fabs
fmul %st(1) // x : ST*x fmul %st(1) // x : ST*x
fxch fxch
5: fmul %st(0), %st // x*x : ST*x 5: fld %st // x : x : ST*x
fabs // |x| : x : ST*x
fmulp // |x|*x : ST*x
shrl $1, %edx shrl $1, %edx
movl %eax, %ecx movl %eax, %ecx
orl %edx, %ecx orl %edx, %ecx
@ -207,27 +217,28 @@ ENTRY(__ieee754_pow)
fxch // fract(y*log2(x)) : int(y*log2(x)) fxch // fract(y*log2(x)) : int(y*log2(x))
f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) // Before scaling, we must negate if x is negative and y is an
// odd integer.
testb $2, %dh testb $2, %dh
jz 292f jz 291f
// x is negative. If y is an odd integer, negate the result. // x is negative. If y is an odd integer, negate the result.
fldl 20(%esp) // y : abs(result) fldl 20(%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x))
fld %st // y : y : abs(result) fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x))
fabs // |y| : y : abs(result) fabs // |y| : y : 2^fract(y*log2(x)) : int(y*log2(x))
fcompl MO(p63) // y : abs(result) fcompl MO(p63) // y : 2^fract(y*log2(x)) : int(y*log2(x))
fnstsw fnstsw
sahf sahf
jnc 291f jnc 290f
// We must find out whether y is an odd integer. // We must find out whether y is an odd integer.
fld %st // y : y : abs(result) fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x))
fistpll (%esp) // y : abs(result) fistpll (%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x))
fildll (%esp) // int(y) : y : abs(result) fildll (%esp) // int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x))
fucompp // abs(result) fucompp // 2^fract(y*log2(x)) : int(y*log2(x))
fnstsw fnstsw
sahf sahf
jne 292f jne 291f
// OK, the value is an integer, but is it odd? // OK, the value is an integer, but is it odd?
popl %eax popl %eax
@ -235,14 +246,17 @@ ENTRY(__ieee754_pow)
popl %edx popl %edx
cfi_adjust_cfa_offset (-4) cfi_adjust_cfa_offset (-4)
andb $1, %al andb $1, %al
jz 290f // jump if not odd jz 292f // jump if not odd
// It's an odd integer. // It's an odd integer.
fchs fchs
290: ret jmp 292f
cfi_adjust_cfa_offset (8) cfi_adjust_cfa_offset (8)
291: fstp %st(0) // abs(result) 290: fstp %st(0) // 2^fract(y*log2(x)) : int(y*log2(x))
292: addl $8, %esp 291: addl $8, %esp
cfi_adjust_cfa_offset (-8) cfi_adjust_cfa_offset (-8)
292: fscale // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
fstp %st(1) // +/- 2^fract(y*log2(x))*2^int(y*log2(x))
ret ret

View File

@ -129,12 +129,22 @@ ENTRY(__ieee754_powf)
4: fldl MO(one) // 1 : x 4: fldl MO(one) // 1 : x
fxch fxch
/* If y is even, take the absolute value of x. Otherwise,
ensure all intermediate values that might overflow have the
sign of x. */
testb $1, %dl
jnz 6f
fabs
6: shrl $1, %edx 6: shrl $1, %edx
jnc 5f jnc 5f
fxch fxch
fabs
fmul %st(1) // x : ST*x fmul %st(1) // x : ST*x
fxch fxch
5: fmul %st(0), %st // x*x : ST*x 5: fld %st // x : x : ST*x
fabs // |x| : x : ST*x
fmulp // |x|*x : ST*x
testl %edx, %edx testl %edx, %edx
jnz 6b jnz 6b
fstp %st(0) // ST*x fstp %st(0) // ST*x

View File

@ -151,7 +151,7 @@ ENTRY(__ieee754_powl)
fcompl MO(p3) // y : x fcompl MO(p3) // y : x
fnstsw fnstsw
sahf sahf
jnc 2f jnc 3f
popl %eax popl %eax
cfi_adjust_cfa_offset (-4) cfi_adjust_cfa_offset (-4)
popl %edx popl %edx
@ -166,12 +166,22 @@ ENTRY(__ieee754_powl)
4: fldl MO(one) // 1 : x 4: fldl MO(one) // 1 : x
fxch fxch
/* If y is even, take the absolute value of x. Otherwise,
ensure all intermediate values that might overflow have the
sign of x. */
testb $1, %al
jnz 6f
fabs
6: shrdl $1, %edx, %eax 6: shrdl $1, %edx, %eax
jnc 5f jnc 5f
fxch fxch
fabs
fmul %st(1) // x : ST*x fmul %st(1) // x : ST*x
fxch fxch
5: fmul %st(0), %st // x*x : ST*x 5: fld %st // x : x : ST*x
fabs // |x| : x : ST*x
fmulp // |x|*x : ST*x
shrl $1, %edx shrl $1, %edx
movl %eax, %ecx movl %eax, %ecx
orl %edx, %ecx orl %edx, %ecx
@ -198,79 +208,31 @@ ENTRY(__ieee754_powl)
cfi_adjust_cfa_offset (8) cfi_adjust_cfa_offset (8)
.align ALIGNARG(4) .align ALIGNARG(4)
2: // y is a large integer (absolute value at least 8), but 2: // y is a large integer (absolute value at least 1L<<63).
// may be odd unless at least 1L<<64. So it may be necessary
// to adjust the sign of a negative result afterwards.
fxch // x : y
fabs // |x| : y
fxch // y : |x|
// If y has absolute value at least 1L<<78, then any finite // If y has absolute value at least 1L<<78, then any finite
// nonzero x will result in 0 (underflow), 1 or infinity (overflow). // nonzero x will result in 0 (underflow), 1 or infinity (overflow).
// Saturate y to those bounds to avoid overflow in the calculation // Saturate y to those bounds to avoid overflow in the calculation
// of y*log2(x). // of y*log2(x).
fld %st // y : y : |x| fld %st // y : y : x
fabs // |y| : y : |x| fabs // |y| : y : x
fcompl MO(p78) // y : |x| fcompl MO(p78) // y : x
fnstsw fnstsw
sahf sahf
jc 3f jc 3f
fstp %st(0) // pop y fstp %st(0) // pop y
fldl MO(p78) // 1L<<78 : |x| fldl MO(p78) // 1L<<78 : x
testb $2, %dl testb $2, %dl
jz 3f // y > 0 jz 3f // y > 0
fchs // -(1L<<78) : |x| fchs // -(1L<<78) : x
.align ALIGNARG(4) .align ALIGNARG(4)
3: /* y is a real number. */ 3: /* y is a real number. */
subl $28, %esp subl $28, %esp
cfi_adjust_cfa_offset (28) cfi_adjust_cfa_offset (28)
fstpt 12(%esp) // x fstpt 12(%esp) // x
fstpt (%esp) // <empty> fstpt (%esp) // <empty>
mov %edx, 24(%esp)
call HIDDEN_JUMPTARGET (__powl_helper) // <result> call HIDDEN_JUMPTARGET (__powl_helper) // <result>
mov 24(%esp), %edx addl $36, %esp
addl $28, %esp cfi_adjust_cfa_offset (-36)
cfi_adjust_cfa_offset (-28)
testb $2, %dh
jz 292f
// x is negative. If y is an odd integer, negate the result.
#ifdef PIC
LOAD_PIC_REG (cx)
#endif
fldt 24(%esp) // y : abs(result)
fld %st // y : y : abs(result)
fabs // |y| : y : abs(result)
fcompl MO(p64) // y : abs(result)
fnstsw
sahf
jnc 291f
fldl MO(p63) // p63 : y : abs(result)
fxch // y : p63 : abs(result)
fprem // y%p63 : p63 : abs(result)
fstp %st(1) // y%p63 : abs(result)
// We must find out whether y is an odd integer.
fld %st // y : y : abs(result)
fistpll (%esp) // y : abs(result)
fildll (%esp) // int(y) : y : abs(result)
fucompp // abs(result)
fnstsw
sahf
jne 292f
// OK, the value is an integer, but is it odd?
popl %eax
cfi_adjust_cfa_offset (-4)
popl %edx
cfi_adjust_cfa_offset (-4)
andb $1, %al
jz 290f // jump if not odd
// It's an odd integer.
fchs
290: ret
cfi_adjust_cfa_offset (8)
291: fstp %st(0) // abs(result)
292: addl $8, %esp
cfi_adjust_cfa_offset (-8)
ret ret
// pow(x,<EFBFBD>0) = 1 // pow(x,<EFBFBD>0) = 1

View File

@ -1653,6 +1653,8 @@ double: 1
float: 1 float: 1
idouble: 1 idouble: 1
ifloat: 1 ifloat: 1
ildouble: 4
ldouble: 4
Function: "pow_tonearest": Function: "pow_tonearest":
ildouble: 1 ildouble: 1
@ -1663,14 +1665,16 @@ double: 1
float: 1 float: 1
idouble: 1 idouble: 1
ifloat: 1 ifloat: 1
ildouble: 1
ldouble: 1
Function: "pow_upward": Function: "pow_upward":
double: 1 double: 1
float: 1 float: 1
idouble: 1 idouble: 1
ifloat: 1 ifloat: 1
ildouble: 1 ildouble: 2
ldouble: 1 ldouble: 2
Function: "sin": Function: "sin":
ildouble: 1 ildouble: 1

View File

@ -34,6 +34,7 @@
/* round to nearest mode of IEEE 754 standard. */ /* round to nearest mode of IEEE 754 standard. */
/* */ /* */
/***************************************************************************/ /***************************************************************************/
#include <math.h>
#include "endian.h" #include "endian.h"
#include "upow.h" #include "upow.h"
#include <dla.h> #include <dla.h>
@ -91,27 +92,33 @@ __ieee754_pow (double x, double y)
{ /* if y<-1 or y>1 */ { /* if y<-1 or y>1 */
double retval; double retval;
SET_RESTORE_ROUND (FE_TONEAREST); {
SET_RESTORE_ROUND (FE_TONEAREST);
/* Avoid internal underflow for tiny y. The exact value of y does /* Avoid internal underflow for tiny y. The exact value of y does
not matter if |y| <= 2**-64. */ not matter if |y| <= 2**-64. */
if (ABS (y) < 0x1p-64) if (ABS (y) < 0x1p-64)
y = y < 0 ? -0x1p-64 : 0x1p-64; y = y < 0 ? -0x1p-64 : 0x1p-64;
z = log1 (x, &aa, &error); /* x^y =e^(y log (X)) */ z = log1 (x, &aa, &error); /* x^y =e^(y log (X)) */
t = y * CN; t = y * CN;
y1 = t - (t - y); y1 = t - (t - y);
y2 = y - y1; y2 = y - y1;
t = z * CN; t = z * CN;
a1 = t - (t - z); a1 = t - (t - z);
a2 = (z - a1) + aa; a2 = (z - a1) + aa;
a = y1 * a1; a = y1 * a1;
aa = y2 * a1 + y * a2; aa = y2 * a1 + y * a2;
a1 = a + aa; a1 = a + aa;
a2 = (a - a1) + aa; a2 = (a - a1) + aa;
error = error * ABS (y); error = error * ABS (y);
t = __exp1 (a1, a2, 1.9e16 * error); /* return -10 or 0 if wasn't computed exactly */ t = __exp1 (a1, a2, 1.9e16 * error); /* return -10 or 0 if wasn't computed exactly */
retval = (t > 0) ? t : power1 (x, y); retval = (t > 0) ? t : power1 (x, y);
}
if (__isinf (retval))
retval = huge * huge;
else if (retval == 0)
retval = tiny * tiny;
return retval; return retval;
} }
@ -164,7 +171,21 @@ __ieee754_pow (double x, double y)
return y < 0 ? 0.0 : INF.x; return y < 0 ? 0.0 : INF.x;
} }
/* if y even or odd */ /* if y even or odd */
return (k == 1) ? __ieee754_pow (-x, y) : -__ieee754_pow (-x, y); if (k == 1)
return __ieee754_pow (-x, y);
else
{
double retval;
{
SET_RESTORE_ROUND (FE_TONEAREST);
retval = -__ieee754_pow (-x, y);
}
if (__isinf (retval))
retval = -huge * huge;
else if (retval == 0)
retval = -tiny * tiny;
return retval;
}
} }
/* x>0 */ /* x>0 */

View File

@ -18,6 +18,7 @@
#include <math.h> #include <math.h>
#include <math_private.h> #include <math_private.h>
#include <stdbool.h>
/* High parts and low parts of -log (k/16), for integer k from 12 to /* High parts and low parts of -log (k/16), for integer k from 12 to
24. */ 24. */
@ -63,15 +64,32 @@ acc_split (long double *rhi, long double *rlo, long double hi, long double lo,
extern long double __powl_helper (long double x, long double y); extern long double __powl_helper (long double x, long double y);
libm_hidden_proto (__powl_helper) libm_hidden_proto (__powl_helper)
/* Given X a value that is finite and nonzero, or a NaN, and only /* Given X a value that is finite and nonzero, or a NaN, and Y a
negative if Y is not an integer, and Y a finite nonzero value with finite nonzero value with 0x1p-79 <= |Y| <= 0x1p78, compute X to
0x1p-79 <= |Y| <= 0x1p78, compute X to the power Y. */ the power Y. */
long double long double
__powl_helper (long double x, long double y) __powl_helper (long double x, long double y)
{ {
if (isnan (x) || x < 0) if (isnan (x))
return __ieee754_expl (y * __ieee754_logl (x)); return __ieee754_expl (y * __ieee754_logl (x));
bool negate;
if (x < 0)
{
long double absy = fabsl (y);
if (absy >= 0x1p64L)
negate = false;
else
{
unsigned long long yll = absy;
if (yll != absy)
return __ieee754_expl (y * __ieee754_logl (x));
negate = (yll & 1) != 0;
}
x = fabsl (x);
}
else
negate = false;
/* We need to compute Y * log2 (X) to at least 64 bits after the /* We need to compute Y * log2 (X) to at least 64 bits after the
point for normal results (that is, to at least 78 bits point for normal results (that is, to at least 78 bits
@ -199,11 +217,17 @@ __powl_helper (long double x, long double y)
fractional parts. */ fractional parts. */
long double log2_res_int = __roundl (log2_res_hi); long double log2_res_int = __roundl (log2_res_hi);
long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo; long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo;
/* If the integer part is very large, the computed fractional part
may be outside the valid range for f2xm1. */
if (fabsl (log2_res_int) > 16500)
log2_res_frac = 0;
/* Compute the final result. */ /* Compute the final result. */
long double res; long double res;
asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac)); asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac));
res += 1.0L; res += 1.0L;
if (negate)
res = -res;
asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int)); asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int));
return res; return res;
} }

View File

@ -141,7 +141,7 @@ ENTRY(__ieee754_powl)
fabs // |y| : 8 : y : x fabs // |y| : 8 : y : x
fcomip %st(1), %st // 8 : y : x fcomip %st(1), %st // 8 : y : x
fstp %st(0) // y : x fstp %st(0) // y : x
jnc 2f jnc 3f
mov -8(%rsp),%eax mov -8(%rsp),%eax
mov -4(%rsp),%edx mov -4(%rsp),%edx
orl $0, %edx orl $0, %edx
@ -154,12 +154,22 @@ ENTRY(__ieee754_powl)
4: fldl MO(one) // 1 : x 4: fldl MO(one) // 1 : x
fxch fxch
/* If y is even, take the absolute value of x. Otherwise,
ensure all intermediate values that might overflow have the
sign of x. */
testb $1, %al
jnz 6f
fabs
6: shrdl $1, %edx, %eax 6: shrdl $1, %edx, %eax
jnc 5f jnc 5f
fxch fxch
fabs
fmul %st(1) // x : ST*x fmul %st(1) // x : ST*x
fxch fxch
5: fmul %st(0), %st // x*x : ST*x 5: fld %st // x : x : ST*x
fabs // |x| : x : ST*x
fmulp // |x|*x : ST*x
shrl $1, %edx shrl $1, %edx
movl %eax, %ecx movl %eax, %ecx
orl %edx, %ecx orl %edx, %ecx
@ -177,71 +187,32 @@ ENTRY(__ieee754_powl)
ret ret
.align ALIGNARG(4) .align ALIGNARG(4)
2: // y is a large integer (absolute value at least 8), but 2: // y is a large integer (absolute value at least 1L<<63).
// may be odd unless at least 1L<<64. So it may be necessary
// to adjust the sign of a negative result afterwards.
fxch // x : y
fabs // |x| : y
fxch // y : |x|
// If y has absolute value at least 1L<<78, then any finite // If y has absolute value at least 1L<<78, then any finite
// nonzero x will result in 0 (underflow), 1 or infinity (overflow). // nonzero x will result in 0 (underflow), 1 or infinity (overflow).
// Saturate y to those bounds to avoid overflow in the calculation // Saturate y to those bounds to avoid overflow in the calculation
// of y*log2(x). // of y*log2(x).
fldl MO(p78) // 1L<<78 : y : |x| fldl MO(p78) // 1L<<78 : y : x
fld %st(1) // y : 1L<<78 : y : |x| fld %st(1) // y : 1L<<78 : y : x
fabs // |y| : 1L<<78 : y : |x| fabs // |y| : 1L<<78 : y : x
fcomip %st(1), %st // 1L<<78 : y : |x| fcomip %st(1), %st // 1L<<78 : y : x
fstp %st(0) // y : |x| fstp %st(0) // y : x
jc 3f jc 3f
fstp %st(0) // pop y fstp %st(0) // pop y
fldl MO(p78) // 1L<<78 : |x| fldl MO(p78) // 1L<<78 : x
testb $2, %dl testb $2, %dl
jz 3f // y > 0 jz 3f // y > 0
fchs // -(1L<<78) : |x| fchs // -(1L<<78) : x
.align ALIGNARG(4) .align ALIGNARG(4)
3: /* y is a real number. */ 3: /* y is a real number. */
subq $40, %rsp subq $40, %rsp
cfi_adjust_cfa_offset (40) cfi_adjust_cfa_offset (40)
fstpt 16(%rsp) // x fstpt 16(%rsp) // x
fstpt (%rsp) // <empty> fstpt (%rsp) // <empty>
mov %edx, 32(%rsp)
call HIDDEN_JUMPTARGET (__powl_helper) // <result> call HIDDEN_JUMPTARGET (__powl_helper) // <result>
mov 32(%rsp), %edx
addq $40, %rsp addq $40, %rsp
cfi_adjust_cfa_offset (-40) cfi_adjust_cfa_offset (-40)
testb $2, %dh ret
jz 292f
// x is negative. If y is an odd integer, negate the result.
fldt 24(%rsp) // y : abs(result)
fldl MO(p64) // 1L<<64 : y : abs(result)
fld %st(1) // y : 1L<<64 : y : abs(result)
fabs // |y| : 1L<<64 : y : abs(result)
fcomip %st(1), %st // 1L<<64 : y : abs(result)
fstp %st(0) // y : abs(result)
jnc 291f
fldl MO(p63) // p63 : y : abs(result)
fxch // y : p63 : abs(result)
fprem // y%p63 : p63 : abs(result)
fstp %st(1) // y%p63 : abs(result)
// We must find out whether y is an odd integer.
fld %st // y : y : abs(result)
fistpll -8(%rsp) // y : abs(result)
fildll -8(%rsp) // int(y) : y : abs(result)
fucomip %st(1),%st // y : abs(result)
ffreep %st // abs(result)
jne 292f
// OK, the value is an integer, but is it odd?
mov -8(%rsp), %eax
mov -4(%rsp), %edx
andb $1, %al
jz 290f // jump if not odd
// It's an odd integer.
fchs
290: ret
291: fstp %st(0) // abs(result)
292: ret
// pow(x,<EFBFBD>0) = 1 // pow(x,<EFBFBD>0) = 1
.align ALIGNARG(4) .align ALIGNARG(4)

View File

@ -1736,8 +1736,12 @@ ildouble: 1
ldouble: 1 ldouble: 1
Function: "pow_downward": Function: "pow_downward":
double: 1
float: 1 float: 1
idouble: 1
ifloat: 1 ifloat: 1
ildouble: 4
ldouble: 4
Function: "pow_tonearest": Function: "pow_tonearest":
float: 1 float: 1
@ -1746,15 +1750,21 @@ ildouble: 1
ldouble: 1 ldouble: 1
Function: "pow_towardzero": Function: "pow_towardzero":
double: 1
float: 1 float: 1
ifloat: 1 idouble: 1
Function: "pow_upward":
float: 1
ifloat: 1 ifloat: 1
ildouble: 1 ildouble: 1
ldouble: 1 ldouble: 1
Function: "pow_upward":
double: 1
float: 1
idouble: 1
ifloat: 1
ildouble: 2
ldouble: 2
Function: "sin": Function: "sin":
ildouble: 1 ildouble: 1
ldouble: 1 ldouble: 1

View File

@ -1,5 +1,6 @@
#ifdef HAVE_FMA4_SUPPORT #ifdef HAVE_FMA4_SUPPORT
# include <init-arch.h> # include <init-arch.h>
# include <math.h>
# include <math_private.h> # include <math_private.h>
extern double __ieee754_pow_sse2 (double, double); extern double __ieee754_pow_sse2 (double, double);