1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-10-30 10:45:40 +03:00

Simplify powl computation for small integral y [BZ #33411]

The powl implementation for x86_64 ends up multiplying X once more than
necessary and then throwing away that result.  This results in an
overflow flag being set in cases where there is no overflow.

Simplify the relevant portion by special casing the -3 to 3 range and
simply multiplying repetitively.

Resolves: BZ #33411
Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
Reviewed by: Paul Zimmermann <Paul.Zimmermann@inria.fr>
This commit is contained in:
Siddhesh Poyarekar
2025-10-10 20:21:13 -04:00
parent 87e850fb26
commit 1b657c53c2
3 changed files with 96 additions and 26 deletions

View File

@@ -8372,6 +8372,9 @@ pow 0x1.059c76p+0 0x1.ff80bep+11
pow 0x1.7ac7cp+5 23 pow 0x1.7ac7cp+5 23
pow -0x1.7ac7cp+5 23 pow -0x1.7ac7cp+5 23
# BZ33411. xfail for binary32 due to BZ#33563.
pow 0x1p+8192 1.0 xfail:binary32
pown 0 0 pown 0 0
pown 0 -0 pown 0 -0
pown -0 0 pown -0 0

View File

@@ -44221,3 +44221,68 @@ pow -0x1.7ac7cp+5 23
= pow tonearest ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5a4p+124 : inexact-ok = pow tonearest ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5a4p+124 : inexact-ok
= pow towardzero ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5ap+124 : inexact-ok = pow towardzero ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5ap+124 : inexact-ok
= pow upward ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5ap+124 : inexact-ok = pow upward ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5ap+124 : inexact-ok
pow 0x1p+8192 1.0 xfail:binary32
= pow downward binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow tonearest binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow towardzero binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow upward binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow downward binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow tonearest binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow towardzero binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow upward binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow downward intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow tonearest intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow towardzero intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow upward intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow downward m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow tonearest m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow towardzero m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow upward m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow downward binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow tonearest binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow towardzero binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow upward binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow downward ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow tonearest ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow towardzero ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow upward ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok
= pow downward binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow tonearest binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow towardzero binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow upward binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow downward intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow tonearest intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow towardzero intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow upward intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow downward m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow tonearest m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow towardzero m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow upward m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow downward binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow tonearest binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow towardzero binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow upward binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow downward ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow tonearest ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow towardzero ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow upward ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok
= pow downward intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow tonearest intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow towardzero intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow upward intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow downward m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow tonearest m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow towardzero m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow upward m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow downward binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow tonearest binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow towardzero binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow upward binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok
= pow downward binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow tonearest binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow towardzero binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow upward binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow downward ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow tonearest ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow towardzero ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok
= pow upward ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok

View File

@@ -144,39 +144,41 @@ ENTRY(__ieee754_powl)
fcomip %st(1), %st // 4 : y : x fcomip %st(1), %st // 4 : y : x
fstp %st(0) // y : x fstp %st(0) // y : x
jnc 3f jnc 3f
mov -8(%rsp),%eax
mov -4(%rsp),%edx /* Here onwards, it's just integral y in range [-3, 3]. */
orl $0, %edx movq -8(%rsp),%rax
orq $0, %rax
fstp %st(0) // x fstp %st(0) // x
jns 4f // y >= 0, jump jns 4f // y >= 0, jump
fdivrl MO(one) // 1/x (now referred to as x) fdivrl MO(one) // 1/x (now referred to as x)
negl %eax negq %rax
adcl $0, %edx
negl %edx
4: fldl MO(one) // 1 : x 4: fldl MO(one) // 1 : x
fxch
/* If y is even, take the absolute value of x. Otherwise, /* y range is further reduced to [0, 3]. Simply walk through the
ensure all intermediate values that might overflow have the options. First up, 0 and 1. */
sign of x. */ test %eax, %eax
jz 6f
fxch // x : 1
subl $1, %eax
jz 6f
/* Finally, y == 2 and 3. For y == 3 we do |x| * x * |x| because x * x
and |x| * |x| decay faster towards infinity compared to x * |x|. */
fld %st // x : x : 1
fabs // |x| : x : 1
fxch // x : |x| : 1
fld %st(1) // |x| : x : |x| : 1
testb $1, %al testb $1, %al
jnz 6f jz 7f
fabs fmulp %st(2) // x : |x| * |x| : 1
fstp %st(0) // |x| * |x| : 1
jmp 6f
7: fmulp // |x| * x : |x| : 1
fmulp // |x| * x * |x| : 1
6: shrdl $1, %edx, %eax /* We come here with the stack as RES : <something>, so pop off
jnc 5f <something>. */
fxch 6: fstp %st(1)
fabs
fmul %st(1) // x : ST*x
fxch
5: fld %st // x : x : ST*x
fabs // |x| : x : ST*x
fmulp // |x|*x : ST*x
shrl $1, %edx
movl %eax, %ecx
orl %edx, %ecx
jnz 6b
fstp %st(0) // ST*x
LDBL_CHECK_FORCE_UFLOW_NONNAN LDBL_CHECK_FORCE_UFLOW_NONNAN
ret ret