From ae49afe74d778de67d2da85c05fe39301f73c1a7 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Fri, 10 Oct 2025 09:52:35 -0300 Subject: [PATCH] math: Optimize fma call on log2pf1 The fma is required only for x == -0x1.da285cp-5 in FE_TONEAREST to provide correctly rounded results. Checked on x86_64-linux-gnu and i686-linux-gnu. Reviewed-by: Wilco Dijkstra --- math/auto-libm-test-in | 1 + math/auto-libm-test-out-log2p1 | 25 +++++++++++++++++++++++++ sysdeps/ieee754/flt-32/s_log2p1f.c | 9 ++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in index 7e8cb4cef8..03f6fee1f0 100644 --- a/math/auto-libm-test-in +++ b/math/auto-libm-test-in @@ -7714,6 +7714,7 @@ log2p1 -0x4.f37d3c9ce0b14bdd86eb157df5d4p-4 log2p1 0x7.2eca50c4d93196362b4f37f6e8dcp-4 log2p1 -0x6.3fef3067427e43dfcde9e48f74bcp-4 log2p1 0x6.af53d00fd2845d4772260ef5adc4p-4 +log2p1 -0x1.da285cp-5 mul 0 0 mul 0 -0 diff --git a/math/auto-libm-test-out-log2p1 b/math/auto-libm-test-out-log2p1 index 5e395fffa4..3902600a34 100644 --- a/math/auto-libm-test-out-log2p1 +++ b/math/auto-libm-test-out-log2p1 @@ -4467,3 +4467,28 @@ log2p1 0x6.af53d00fd2845d4772260ef5adc4p-4 = log2p1 tonearest ibm128 0x6.af53d00fd2845d4772260ef5acp-4 : 0x8.0efc6087a73bba3b9eb65a673p-4 : inexact-ok = log2p1 towardzero ibm128 0x6.af53d00fd2845d4772260ef5acp-4 : 0x8.0efc6087a73bba3b9eb65a673p-4 : inexact-ok = log2p1 upward ibm128 0x6.af53d00fd2845d4772260ef5acp-4 : 0x8.0efc6087a73bba3b9eb65a6734p-4 : inexact-ok +log2p1 -0x1.da285cp-5 += log2p1 downward binary32 -0xe.d142ep-8 : -0x1.60549p-4 : inexact-ok += log2p1 tonearest binary32 -0xe.d142ep-8 : -0x1.60549p-4 : inexact-ok += log2p1 towardzero binary32 -0xe.d142ep-8 : -0x1.60548ep-4 : inexact-ok += log2p1 upward binary32 -0xe.d142ep-8 : -0x1.60548ep-4 : inexact-ok += log2p1 downward binary64 -0xe.d142ep-8 : -0x1.60548f0000002p-4 : inexact-ok += log2p1 tonearest binary64 -0xe.d142ep-8 : -0x1.60548f0000001p-4 : inexact-ok += log2p1 towardzero binary64 -0xe.d142ep-8 : -0x1.60548f0000001p-4 : inexact-ok += log2p1 upward binary64 -0xe.d142ep-8 : -0x1.60548f0000001p-4 : inexact-ok += log2p1 downward intel96 -0xe.d142ep-8 : -0x1.60548f00000016dp-4 : inexact-ok += log2p1 tonearest intel96 -0xe.d142ep-8 : -0x1.60548f00000016dp-4 : inexact-ok += log2p1 towardzero intel96 -0xe.d142ep-8 : -0x1.60548f00000016cep-4 : inexact-ok += log2p1 upward intel96 -0xe.d142ep-8 : -0x1.60548f00000016cep-4 : inexact-ok += log2p1 downward m68k96 -0xe.d142ep-8 : -0x1.60548f00000016dp-4 : inexact-ok += log2p1 tonearest m68k96 -0xe.d142ep-8 : -0x1.60548f00000016dp-4 : inexact-ok += log2p1 towardzero m68k96 -0xe.d142ep-8 : -0x1.60548f00000016cep-4 : inexact-ok += log2p1 upward m68k96 -0xe.d142ep-8 : -0x1.60548f00000016cep-4 : inexact-ok += log2p1 downward binary128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656929dp-4 : inexact-ok += log2p1 tonearest binary128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656929dp-4 : inexact-ok += log2p1 towardzero binary128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656929cp-4 : inexact-ok += log2p1 upward binary128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656929cp-4 : inexact-ok += log2p1 downward ibm128 -0xe.d142ep-8 : -0x1.60548f00000016cf4743165693p-4 : inexact-ok += log2p1 tonearest ibm128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656928p-4 : inexact-ok += log2p1 towardzero ibm128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656928p-4 : inexact-ok += log2p1 upward ibm128 -0xe.d142ep-8 : -0x1.60548f00000016cf47431656928p-4 : inexact-ok diff --git a/sysdeps/ieee754/flt-32/s_log2p1f.c b/sysdeps/ieee754/flt-32/s_log2p1f.c index 09e77dc08a..d270db6375 100644 --- a/sysdeps/ieee754/flt-32/s_log2p1f.c +++ b/sysdeps/ieee754/flt-32/s_log2p1f.c @@ -231,7 +231,14 @@ __log2p1f (float x) int j = (m + ((int64_t) 1 << (52 - 8))) >> (52 - 7), k = j > 53; e += k; double xd = asdouble (m | (uint64_t) 0x3ff << 52); - z = fma (xd, ix[j], -1.0); +#ifndef __FP_FAST_FMA + /* The fma is required only for x == -0x1.da285cp-5f in FE_TONEAREST + to provide correctly rounded results. */ + if (__glibc_likely (x != -0x1.da285cp-5f)) + z = xd * ix[j] - 1.0; + else +#endif + z = fma (xd, ix[j], -1.0); static const double c[] = { 0x1.71547652b82fep+0, -0x1.71547652b82ffp-1, 0x1.ec709dc32988bp-2,