diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in index 1397d317fb..89a6760639 100644 --- a/math/auto-libm-test-in +++ b/math/auto-libm-test-in @@ -8372,6 +8372,9 @@ pow 0x1.059c76p+0 0x1.ff80bep+11 pow 0x1.7ac7cp+5 23 pow -0x1.7ac7cp+5 23 +# BZ33411. xfail for binary32 due to BZ#33563. +pow 0x1p+8192 1.0 xfail:binary32 + pown 0 0 pown 0 -0 pown -0 0 diff --git a/math/auto-libm-test-out-pow b/math/auto-libm-test-out-pow index 09ec53e49e..cbca46cd0c 100644 --- a/math/auto-libm-test-out-pow +++ b/math/auto-libm-test-out-pow @@ -44221,3 +44221,68 @@ pow -0x1.7ac7cp+5 23 = pow tonearest ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5a4p+124 : inexact-ok = pow towardzero ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5ap+124 : inexact-ok = pow upward ibm128 -0x2.f58f8p+4 0x1.7p+4 : -0xf.fffff29cf02eeec4a7cde7b5ap+124 : inexact-ok +pow 0x1p+8192 1.0 xfail:binary32 += pow downward binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow tonearest binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow towardzero binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow upward binary32 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow downward binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow tonearest binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow towardzero binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow upward binary64 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow downward intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow tonearest intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow towardzero intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow upward intel96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow downward m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow tonearest m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow towardzero m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow upward m68k96 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow downward binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow tonearest binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow towardzero binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow upward binary128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow downward ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow tonearest ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow towardzero ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow upward ibm128 0xf.fffffp+124 0x1p+0 : 0xf.fffffp+124 : xfail:binary32 inexact-ok += pow downward binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow tonearest binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow towardzero binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow upward binary64 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow downward intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow tonearest intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow towardzero intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow upward intel96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow downward m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow tonearest m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow towardzero m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow upward m68k96 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow downward binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow tonearest binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow towardzero binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow upward binary128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow downward ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow tonearest ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow towardzero ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow upward ibm128 0xf.ffffffffffff8p+1020 0x1p+0 : 0xf.ffffffffffff8p+1020 : xfail:binary32 inexact-ok += pow downward intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow tonearest intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow towardzero intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow upward intel96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow downward m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow tonearest m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow towardzero m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow upward m68k96 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow downward binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow tonearest binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow towardzero binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow upward binary128 0x1p+8192 0x1p+0 : 0x1p+8192 : xfail:binary32 inexact-ok += pow downward binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow tonearest binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow towardzero binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow upward binary128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow downward ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow tonearest ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow towardzero ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok += pow upward ibm128 0xf.ffffffffffffbffffffffffffcp+1020 0x1p+0 : 0xf.ffffffffffffbffffffffffffcp+1020 : xfail:binary32 inexact-ok diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index 620ef765a7..39f77480e8 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -144,39 +144,41 @@ ENTRY(__ieee754_powl) fcomip %st(1), %st // 4 : y : x fstp %st(0) // y : x jnc 3f - mov -8(%rsp),%eax - mov -4(%rsp),%edx - orl $0, %edx + + /* Here onwards, it's just integral y in range [-3, 3]. */ + movq -8(%rsp),%rax + orq $0, %rax fstp %st(0) // x jns 4f // y >= 0, jump fdivrl MO(one) // 1/x (now referred to as x) - negl %eax - adcl $0, %edx - negl %edx + negq %rax 4: fldl MO(one) // 1 : x - fxch - /* If y is even, take the absolute value of x. Otherwise, - ensure all intermediate values that might overflow have the - sign of x. */ + /* y range is further reduced to [0, 3]. Simply walk through the + options. First up, 0 and 1. */ + test %eax, %eax + jz 6f + fxch // x : 1 + subl $1, %eax + jz 6f + + /* Finally, y == 2 and 3. For y == 3 we do |x| * x * |x| because x * x + and |x| * |x| decay faster towards infinity compared to x * |x|. */ + fld %st // x : x : 1 + fabs // |x| : x : 1 + fxch // x : |x| : 1 + fld %st(1) // |x| : x : |x| : 1 testb $1, %al - jnz 6f - fabs + jz 7f + fmulp %st(2) // x : |x| * |x| : 1 + fstp %st(0) // |x| * |x| : 1 + jmp 6f +7: fmulp // |x| * x : |x| : 1 + fmulp // |x| * x * |x| : 1 -6: shrdl $1, %edx, %eax - jnc 5f - fxch - fabs - fmul %st(1) // x : ST*x - fxch -5: fld %st // x : x : ST*x - fabs // |x| : x : ST*x - fmulp // |x|*x : ST*x - shrl $1, %edx - movl %eax, %ecx - orl %edx, %ecx - jnz 6b - fstp %st(0) // ST*x + /* We come here with the stack as RES : , so pop off + . */ +6: fstp %st(1) LDBL_CHECK_FORCE_UFLOW_NONNAN ret