1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-11-02 09:33:31 +03:00

math: Consolidate erf/erfc definitions

The common code definitions are consolidated in s_erf_common.h
and s_erf_common.c.

Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
powerpc64le-linux-gnu.

Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
Adhemerval Zanella
2025-10-10 15:15:32 -03:00
parent fc419290f9
commit e4d812c980
5 changed files with 258 additions and 309 deletions

View File

@@ -363,6 +363,7 @@ type-double-routines := \
math_err \
s_asincosh_data \
s_atanh_data \
s_erf_common \
s_erf_data \
s_erfc_data \
sincostab \

View File

@@ -30,191 +30,12 @@ SOFTWARE.
#include <errno.h>
#include <libm-alias-double.h>
#include "math_config.h"
#include "s_erf_data.h"
#include "s_erf_common.h"
/* CH+CL is a double-double approximation of 2/sqrt(pi) to nearest */
static const double CH = 0x1.20dd750429b6dp+0;
static const double CL = 0x1.1ae3a914fed8p-56;
/* Add a + b, such that *hi + *lo approximates a + b.
Assumes |a| >= |b|.
For rounding to nearest we have hi + lo = a + b exactly.
For directed rounding, we have
(a) hi + lo = a + b exactly when the exponent difference between a and b
is at most 53 (the binary64 precision)
(b) otherwise |(a+b)-(hi+lo)| <= 2^-105 min(|a+b|,|hi|)
(see https://hal.inria.fr/hal-03798376)
We also have |lo| < ulp(hi). */
static inline void
fast_two_sum (double *hi, double *lo, double a, double b)
{
double e;
*hi = a + b;
e = *hi - a; /* exact */
*lo = b - e; /* exact */
}
/* Reference: https://hal.science/hal-01351529v3/document */
static inline void
two_sum (double *hi, double *lo, double a, double b)
{
*hi = a + b;
double aa = *hi - b;
double bb = *hi - aa;
double da = a - aa;
double db = b - bb;
*lo = da + db;
}
// Multiply exactly a and b, such that *hi + *lo = a * b.
static inline void
a_mul (double *hi, double *lo, double a, double b)
{
*hi = a * b;
*lo = fma (a, b, -*hi);
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an approximation
of erf(z). Return err the maximal relative error:
|(h + l)/erf(z) - 1| < err*|h+l| */
static double
cr_erf_fast (double *h, double *l, double z)
{
double th, tl;
/* we split [0,0x1.7afb48dc96626p+2] into intervals i/16 <= z < (i+1)/16,
and for each interval, we use a minimax polynomial:
* for i=0 (0 <= z < 1/16) we use a polynomial evaluated at zero,
since if we evaluate in the middle 1/32, we will get bad accuracy
for tiny z, and moreover z-1/32 might not be exact
* for 1 <= i <= 94, we use a polynomial evaluated in the middle of
the interval, namely i/16+1/32
*/
if (z < 0.0625) /* z < 1/16 */
{
double z2h, z2l, z4;
a_mul (&z2h, &z2l, z, z);
z4 = z2h * z2h;
double c9 = fma (C0[7], z2h, C0[6]);
double c5 = fma (C0[5], z2h, C0[4]);
c5 = fma (c9, z4, c5);
/* compute C0[2] + C0[3] + z2h*c5 */
a_mul (&th, &tl, z2h, c5);
fast_two_sum (h, l, C0[2], th);
*l += tl + C0[3];
/* compute C0[0] + C0[1] + (z2h + z2l)*(h + l) */
double h_copy = *h;
a_mul (&th, &tl, z2h, *h);
tl += fma (z2h, *l, C0[1]);
fast_two_sum (h, l, C0[0], th);
*l += fma (z2l, h_copy, tl);
/* multiply (h,l) by z */
a_mul (h, &tl, *h, z);
*l = fma (*l, z, tl);
return 0x1.78p-69; /* err < 2.48658249618372e-21, cf Analyze0() */
}
double v = floor (16.0 * z);
uint32_t i = 16.0 * z;
/* i/16 <= z < (i+1)/16 */
/* For 0.0625 0 <= z <= 0x1.7afb48dc96626p+2, z - 0.03125 is exact:
(1) either z - 0.03125 is in the same binade as z, then 0.03125 is
an integer multiple of ulp(z), so is z - 0.03125
(2) if z - 0.03125 is in a smaller binade, both z and 0.03125 are
integer multiple of the ulp() of that smaller binade.
Also, subtracting 0.0625 * v is exact. */
z = (z - 0.03125) - 0.0625 * v;
/* now |z| <= 1/32 */
const double *c = C[i - 1];
double z2 = z * z, z4 = z2 * z2;
/* the degree-10 coefficient is c[12] */
double c9 = fma (c[12], z, c[11]);
double c7 = fma (c[10], z, c[9]);
double c5 = fma (c[8], z, c[7]);
double c3h, c3l;
/* c3h, c3l <- c[5] + z*c[6] */
fast_two_sum (&c3h, &c3l, c[5], z * c[6]);
c7 = fma (c9, z2, c7);
/* c3h, c3l <- c3h, c3l + c5*z2 */
fast_two_sum (&c3h, &tl, c3h, c5 * z2);
c3l += tl;
/* c3h, c3l <- c3h, c3l + c7*z4 */
fast_two_sum (&c3h, &tl, c3h, c7 * z4);
c3l += tl;
/* c2h, c2l <- c[4] + z*(c3h + c3l) */
double c2h, c2l;
a_mul (&th, &tl, z, c3h);
fast_two_sum (&c2h, &c2l, c[4], th);
c2l += fma (z, c3l, tl);
/* compute c[2] + c[3] + z*(c2h + c2l) */
a_mul (&th, &tl, z, c2h);
fast_two_sum (h, l, c[2], th);
*l += tl + fma (z, c2l, c[3]);
/* compute c[0] + c[1] + z*(h + l) */
a_mul (&th, &tl, z, *h);
tl = fma (z, *l, tl); /* tl += z*l */
fast_two_sum (h, l, c[0], th);
*l += tl + c[1];
return 0x1.11p-69; /* err < 1.80414390200020e-21, cf analyze_p(1)
(larger values of i yield smaller error bounds) */
}
/* for |z| < 1/8, assuming z >= 2^-61, thus no underflow can occur */
static void
cr_erf_accurate_tiny (double *h, double *l, double z)
{
uint64_t i, j, k;
/* use dichotomy */
for (i = 0, j = EXCEPTIONS_LEN; i + 1 < j;)
{
k = (i + j) / 2;
if (EXCEPTIONS_TINY[k][0] <= z)
i = k;
else
j = k;
}
/* Either i=0 and z < exceptions[i+1][0],
or i=n-1 and exceptions[i][0] <= z
or 0 < i < n-1 and exceptions[i][0] <= z < exceptions[i+1][0].
In all cases z can only equal exceptions[i][0]. */
if (z == EXCEPTIONS_TINY[i][0])
{
*h = EXCEPTIONS_TINY[i][1];
*l = EXCEPTIONS_TINY[i][2];
return;
}
double z2 = z * z, th, tl;
*h = P[21 / 2 + 4]; /* degree 21 */
for (int a = 19; a > 11; a -= 2)
*h = fma (*h, z2, P[a / 2 + 4]); /* degree j */
*l = 0;
for (int a = 11; a > 7; a -= 2)
{
/* multiply h+l by z^2 */
a_mul (&th, &tl, *h, z);
tl = fma (*l, z, tl);
a_mul (h, l, th, z);
*l = fma (tl, z, *l);
/* add p[j] to h + l */
fast_two_sum (h, &tl, P[a / 2 + 4], *h);
*l += tl;
}
for (int a = 7; a >= 1; a -= 2)
{
/* multiply h+l by z^2 */
a_mul (&th, &tl, *h, z);
tl = fma (*l, z, tl);
a_mul (h, l, th, z);
*l = fma (tl, z, *l);
fast_two_sum (h, &tl, P[a - 1], *h);
*l += P[a] + tl;
}
/* multiply by z */
a_mul (h, &tl, *h, z);
*l = fma (*l, z, tl);
return;
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an accurate
approximation of erf(z).
Assumes z >= 2^-61, thus no underflow can occur. */
@@ -238,10 +59,7 @@ cr_erf_accurate (double *h, double *l, double z)
the interval, namely i/8+1/16
*/
if (z < 0.125) /* z < 1/8 */
{
cr_erf_accurate_tiny (h, l, z);
return;
}
return __cr_erf_accurate_tiny (h, l, z, true);
double v = floor (8.0 * z);
uint32_t i = 8.0 * z;
z = (z - 0.0625) - 0.125 * v;
@@ -313,7 +131,7 @@ __erf (double x)
return res;
}
/* now z >= 2^-61 */
err = cr_erf_fast (&h, &l, z);
err = __cr_erf_fast (&h, &l, z);
uint64_t u = asuint64 (h), v = asuint64 (l);
t = asuint64 (x);
u ^= t & SIGN_MASK;

View File

@@ -0,0 +1,170 @@
/* Common definitions for erf/erc implementation.
Copyright (c) 2023-2025 Paul Zimmermann
The original version of this file was copied from the CORE-MATH
project (file src/binary64/erf/erf.c, revision 384ed01d).
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <stdint.h>
#include <math.h>
#include <stddef.h>
#include <stdbool.h>
#include "s_erf_common.h"
double
__cr_erf_fast (double *h, double *l, double z)
{
double th, tl;
/* we split [0,0x1.7afb48dc96626p+2] into intervals i/16 <= z < (i+1)/16,
and for each interval, we use a minimax polynomial:
* for i=0 (0 <= z < 1/16) we use a polynomial evaluated at zero,
since if we evaluate in the middle 1/32, we will get bad accuracy
for tiny z, and moreover z-1/32 might not be exact
* for 1 <= i <= 94, we use a polynomial evaluated in the middle of
the interval, namely i/16+1/32
*/
if (z < 0.0625) /* z < 1/16 */
{
double z2h, z2l, z4;
a_mul (&z2h, &z2l, z, z);
z4 = z2h * z2h;
double c9 = fma (C0[7], z2h, C0[6]);
double c5 = fma (C0[5], z2h, C0[4]);
c5 = fma (c9, z4, c5);
/* compute C0[2] + C0[3] + z2h*c5 */
a_mul (&th, &tl, z2h, c5);
fast_two_sum (h, l, C0[2], th);
*l += tl + C0[3];
/* compute C0[0] + C0[1] + (z2h + z2l)*(h + l) */
double h_copy = *h;
a_mul (&th, &tl, z2h, *h);
tl += fma (z2h, *l, C0[1]);
fast_two_sum (h, l, C0[0], th);
*l += fma (z2l, h_copy, tl);
/* multiply (h,l) by z */
a_mul (h, &tl, *h, z);
*l = fma (*l, z, tl);
return 0x1.78p-69; /* err < 2.48658249618372e-21, cf Analyze0() */
}
double v = floor (16.0 * z);
uint32_t i = 16.0 * z;
/* i/16 <= z < (i+1)/16 */
/* For 0.0625 0 <= z <= 0x1.7afb48dc96626p+2, z - 0.03125 is exact:
(1) either z - 0.03125 is in the same binade as z, then 0.03125 is
an integer multiple of ulp(z), so is z - 0.03125
(2) if z - 0.03125 is in a smaller binade, both z and 0.03125 are
integer multiple of the ulp() of that smaller binade.
Also, subtracting 0.0625 * v is exact. */
z = (z - 0.03125) - 0.0625 * v;
/* now |z| <= 1/32 */
const double *c = C[i - 1];
double z2 = z * z, z4 = z2 * z2;
/* the degree-10 coefficient is c[12] */
double c9 = fma (c[12], z, c[11]);
double c7 = fma (c[10], z, c[9]);
double c5 = fma (c[8], z, c[7]);
double c3h, c3l;
/* c3h, c3l <- c[5] + z*c[6] */
fast_two_sum (&c3h, &c3l, c[5], z * c[6]);
c7 = fma (c9, z2, c7);
/* c3h, c3l <- c3h, c3l + c5*z2 */
fast_two_sum (&c3h, &tl, c3h, c5 * z2);
c3l += tl;
/* c3h, c3l <- c3h, c3l + c7*z4 */
fast_two_sum (&c3h, &tl, c3h, c7 * z4);
c3l += tl;
/* c2h, c2l <- c[4] + z*(c3h + c3l) */
double c2h, c2l;
a_mul (&th, &tl, z, c3h);
fast_two_sum (&c2h, &c2l, c[4], th);
c2l += fma (z, c3l, tl);
/* compute c[2] + c[3] + z*(c2h + c2l) */
a_mul (&th, &tl, z, c2h);
fast_two_sum (h, l, c[2], th);
*l += tl + fma (z, c2l, c[3]);
/* compute c[0] + c[1] + z*(h + l) */
a_mul (&th, &tl, z, *h);
tl = fma (z, *l, tl); /* tl += z*l */
fast_two_sum (h, l, c[0], th);
*l += tl + c[1];
return 0x1.11p-69; /* err < 1.80414390200020e-21, cf analyze_p(1)
(larger values of i yield smaller error bounds) */
}
void
__cr_erf_accurate_tiny (double *h, double *l, double z, bool exceptions)
{
if (exceptions)
{
uint64_t i, j, k;
/* use dichotomy */
for (i = 0, j = EXCEPTIONS_TINY_LEN; i + 1 < j;)
{
k = (i + j) / 2;
if (EXCEPTIONS_TINY[k][0] <= z)
i = k;
else
j = k;
}
/* Either i=0 and z < exceptions[i+1][0],
or i=n-1 and exceptions[i][0] <= z
or 0 < i < n-1 and exceptions[i][0] <= z < exceptions[i+1][0].
In all cases z can only equal exceptions[i][0]. */
if (z == EXCEPTIONS_TINY[i][0])
{
*h = EXCEPTIONS_TINY[i][1];
*l = EXCEPTIONS_TINY[i][2];
return;
}
}
double z2 = z * z, th, tl;
*h = P[21 / 2 + 4]; /* degree 21 */
for (int a = 19; a > 11; a -= 2)
*h = fma (*h, z2, P[a / 2 + 4]); /* degree j */
*l = 0;
for (int a = 11; a > 7; a -= 2)
{
/* multiply h+l by z^2 */
a_mul (&th, &tl, *h, z);
tl = fma (*l, z, tl);
a_mul (h, l, th, z);
*l = fma (tl, z, *l);
/* add p[j] to h + l */
fast_two_sum (h, &tl, P[a / 2 + 4], *h);
*l += tl;
}
for (int a = 7; a >= 1; a -= 2)
{
/* multiply h+l by z^2 */
a_mul (&th, &tl, *h, z);
tl = fma (*l, z, tl);
a_mul (h, l, th, z);
*l = fma (tl, z, *l);
fast_two_sum (h, &tl, P[a - 1], *h);
*l += P[a] + tl;
}
/* multiply by z */
a_mul (h, &tl, *h, z);
*l = fma (*l, z, tl);
return;
}

View File

@@ -0,0 +1,80 @@
/* Common definitions for erf/erc implementation.
Copyright (c) 2023-2025 Paul Zimmermann
This file is part of the CORE-MATH project
(https://core-math.gitlabpages.inria.fr/).
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#ifndef _S_ERF_COMMON_H
#define _S_ERF_COMMON_H
#include "s_erf_data.h"
/* Add a + b, such that *hi + *lo approximates a + b.
Assumes |a| >= |b|.
For rounding to nearest we have hi + lo = a + b exactly.
For directed rounding, we have
(a) hi + lo = a + b exactly when the exponent difference between a and b
is at most 53 (the binary64 precision)
(b) otherwise |(a+b)-(hi+lo)| <= 2^-105 min(|a+b|,|hi|)
(see https://hal.inria.fr/hal-03798376)
We also have |lo| < ulp(hi). */
static inline void
fast_two_sum (double *hi, double *lo, double a, double b)
{
double e;
*hi = a + b;
e = *hi - a; /* exact */
*lo = b - e; /* exact */
}
/* Reference: https://hal.science/hal-01351529v3/document */
static inline void
two_sum (double *hi, double *lo, double a, double b)
{
*hi = a + b;
double aa = *hi - b;
double bb = *hi - aa;
double da = a - aa;
double db = b - bb;
*lo = da + db;
}
// Multiply exactly a and b, such that *hi + *lo = a * b.
static inline void
a_mul (double *hi, double *lo, double a, double b)
{
*hi = a * b;
*lo = fma (a, b, -*hi);
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an approximation
of erf(z). Return err the maximal relative error:
|(h + l)/erf(z) - 1| < err*|h+l| */
double __cr_erf_fast (double *h, double *l, double z) attribute_hidden;
/* for |z| < 1/8, assuming z >= 2^-61, thus no underflow can occur */
void __cr_erf_accurate_tiny (double *h, double *l, double z, bool exceptions)
attribute_hidden;
#endif

View File

@@ -48,129 +48,9 @@ SOFTWARE.
#include <errno.h>
#include <libm-alias-double.h>
#include "math_config.h"
#include "s_erf_data.h"
#include "s_erf_common.h"
#include "s_erfc_data.h"
static inline void
fast_two_sum (double *hi, double *lo, double a, double b)
{
double e;
*hi = a + b;
e = *hi - a; /* exact */
*lo = b - e; /* exact */
}
static inline void
two_sum (double *hi, double *lo, double a, double b)
{
*hi = a + b;
double aa = *hi - b;
double bb = *hi - aa;
double da = a - aa;
double db = b - bb;
*lo = da + db;
}
static inline void
a_mul (double *hi, double *lo, double a, double b)
{
*hi = a * b;
*lo = fma (a, b, -*hi);
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an approximation
of erf(z). Return err the maximal relative error:
|(h + l)/erf(z) - 1| < err*|h+l| */
static double
cr_erf_fast (double *h, double *l, double z)
{
double th, tl;
if (z < 0.0625) /* z < 1/16 */
{
double z2h, z2l, z4;
a_mul (&z2h, &z2l, z, z);
z4 = z2h * z2h;
double c9 = fma (C0[7], z2h, C0[6]);
double c5 = fma (C0[5], z2h, C0[4]);
c5 = fma (c9, z4, c5);
a_mul (&th, &tl, z2h, c5);
fast_two_sum (h, l, C0[2], th);
*l += tl + C0[3];
double h_copy = *h;
a_mul (&th, &tl, z2h, *h);
tl += fma (z2h, *l, C0[1]);
fast_two_sum (h, l, C0[0], th);
*l += fma (z2l, h_copy, tl);
a_mul (h, &tl, *h, z);
*l = fma (*l, z, tl);
return 0x1.78p-69;
}
double v = floor (16.0 * z);
uint32_t i = 16.0 * z;
z = (z - 0.03125) - 0.0625 * v;
const double *c = C[i - 1];
double z2 = z * z, z4 = z2 * z2;
double c9 = fma (c[12], z, c[11]);
double c7 = fma (c[10], z, c[9]);
double c5 = fma (c[8], z, c[7]);
double c3h, c3l;
fast_two_sum (&c3h, &c3l, c[5], z * c[6]);
c7 = fma (c9, z2, c7);
fast_two_sum (&c3h, &tl, c3h, c5 * z2);
c3l += tl;
fast_two_sum (&c3h, &tl, c3h, c7 * z4);
c3l += tl;
double c2h, c2l;
a_mul (&th, &tl, z, c3h);
fast_two_sum (&c2h, &c2l, c[4], th);
c2l += fma (z, c3l, tl);
a_mul (&th, &tl, z, c2h);
fast_two_sum (h, l, c[2], th);
*l += tl + fma (z, c2l, c[3]);
a_mul (&th, &tl, z, *h);
tl = fma (z, *l, tl); /* tl += z*l */
fast_two_sum (h, l, c[0], th);
*l += tl + c[1];
return 0x1.11p-69;
}
/* for |z| < 1/8, assuming z >= 2^-61, thus no underflow can occur */
static void
cr_erf_accurate_tiny (double *h, double *l, double z)
{
double z2 = z * z, th, tl;
*h = P[21 / 2 + 4]; /* degree 21 */
for (int j = 19; j > 11; j -= 2)
*h = fma (*h, z2, P[j / 2 + 4]); /* degree j */
*l = 0;
for (int j = 11; j > 7; j -= 2)
{
/* multiply h+l by z^2 */
a_mul (&th, &tl, *h, z);
tl = fma (*l, z, tl);
a_mul (h, l, th, z);
*l = fma (tl, z, *l);
/* add P[j] to h + l */
fast_two_sum (h, &tl, P[j / 2 + 4], *h);
*l += tl;
}
for (int j = 7; j >= 1; j -= 2)
{
/* multiply h+l by z^2 */
a_mul (&th, &tl, *h, z);
tl = fma (*l, z, tl);
a_mul (h, l, th, z);
*l = fma (tl, z, *l);
fast_two_sum (h, &tl, P[j - 1], *h);
*l += P[j] + tl;
}
/* multiply by z */
a_mul (h, &tl, *h, z);
*l = fma (*l, z, tl);
return;
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an accurate
approximation of erf(z).
Assumes z >= 2^-61, thus no underflow can occur. */
@@ -179,7 +59,7 @@ cr_erf_accurate (double *h, double *l, double z)
{
double th, tl;
if (z < 0.125) /* z < 1/8 */
return cr_erf_accurate_tiny (h, l, z);
return __cr_erf_accurate_tiny (h, l, z, false);
double v = floor (8.0 * z);
uint32_t i = 8.0 * z;
z = (z - 0.0625) - 0.125 * v;
@@ -507,7 +387,7 @@ cr_erfc_fast (double *h, double *l, double x)
throughput is about 44 cycles */
if (x < 0) // erfc(x) = 1 - erf(x) = 1 + erf(-x)
{
double err = cr_erf_fast (h, l, -x);
double err = __cr_erf_fast (h, l, -x);
/* h+l approximates erf(-x), with relative error bounded by err,
where err <= 0x1.78p-69 */
err = err * *h; /* convert into absolute error */
@@ -533,7 +413,7 @@ cr_erfc_fast (double *h, double *l, double x)
the average reciprocal throughput is about 59 cycles */
else if (x <= THRESHOLD1)
{
double err = cr_erf_fast (h, l, x);
double err = __cr_erf_fast (h, l, x);
/* h+l approximates erf(x), with relative error bounded by err,
where err <= 0x1.78p-69 */
err = err * *h; /* convert into absolute error */