mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-07 06:43:00 +03:00
AArch64: Add vector sinpi to libmvec
Vector variant of the new C23 sinpi. New tests pass on AArch64.
This commit is contained in:
committed by
Wilco Dijkstra
parent
939e770e01
commit
6050b45716
@@ -340,4 +340,15 @@
|
|||||||
#define __DECL_SIMD_tanf32x
|
#define __DECL_SIMD_tanf32x
|
||||||
#define __DECL_SIMD_tanf64x
|
#define __DECL_SIMD_tanf64x
|
||||||
#define __DECL_SIMD_tanf128x
|
#define __DECL_SIMD_tanf128x
|
||||||
|
|
||||||
|
#define __DECL_SIMD_sinpi
|
||||||
|
#define __DECL_SIMD_sinpif
|
||||||
|
#define __DECL_SIMD_sinpil
|
||||||
|
#define __DECL_SIMD_sinpif16
|
||||||
|
#define __DECL_SIMD_sinpif32
|
||||||
|
#define __DECL_SIMD_sinpif64
|
||||||
|
#define __DECL_SIMD_sinpif128
|
||||||
|
#define __DECL_SIMD_sinpif32x
|
||||||
|
#define __DECL_SIMD_sinpif64x
|
||||||
|
#define __DECL_SIMD_sinpif128x
|
||||||
#endif
|
#endif
|
||||||
|
@@ -78,7 +78,7 @@ __MATHCALL (atan2pi,, (_Mdouble_ __y, _Mdouble_ __x));
|
|||||||
/* Cosine of pi * X. */
|
/* Cosine of pi * X. */
|
||||||
__MATHCALL (cospi,, (_Mdouble_ __x));
|
__MATHCALL (cospi,, (_Mdouble_ __x));
|
||||||
/* Sine of pi * X. */
|
/* Sine of pi * X. */
|
||||||
__MATHCALL (sinpi,, (_Mdouble_ __x));
|
__MATHCALL_VEC (sinpi,, (_Mdouble_ __x));
|
||||||
/* Tangent of pi * X. */
|
/* Tangent of pi * X. */
|
||||||
__MATHCALL (tanpi,, (_Mdouble_ __x));
|
__MATHCALL (tanpi,, (_Mdouble_ __x));
|
||||||
#endif
|
#endif
|
||||||
|
@@ -22,6 +22,7 @@ libmvec-supported-funcs = acos \
|
|||||||
pow \
|
pow \
|
||||||
sin \
|
sin \
|
||||||
sinh \
|
sinh \
|
||||||
|
sinpi \
|
||||||
tan \
|
tan \
|
||||||
tanh
|
tanh
|
||||||
|
|
||||||
|
@@ -141,5 +141,10 @@ libmvec {
|
|||||||
_ZGVnN4v_logp1f;
|
_ZGVnN4v_logp1f;
|
||||||
_ZGVsMxv_logp1;
|
_ZGVsMxv_logp1;
|
||||||
_ZGVsMxv_logp1f;
|
_ZGVsMxv_logp1f;
|
||||||
|
_ZGVnN2v_sinpi;
|
||||||
|
_ZGVnN2v_sinpif;
|
||||||
|
_ZGVnN4v_sinpif;
|
||||||
|
_ZGVsMxv_sinpi;
|
||||||
|
_ZGVsMxv_sinpif;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -41,6 +41,7 @@ libmvec_hidden_proto (V_NAME_F1(log));
|
|||||||
libmvec_hidden_proto (V_NAME_F2(pow));
|
libmvec_hidden_proto (V_NAME_F2(pow));
|
||||||
libmvec_hidden_proto (V_NAME_F1(sin));
|
libmvec_hidden_proto (V_NAME_F1(sin));
|
||||||
libmvec_hidden_proto (V_NAME_F1(sinh));
|
libmvec_hidden_proto (V_NAME_F1(sinh));
|
||||||
|
libmvec_hidden_proto (V_NAME_F1(sinpi));
|
||||||
libmvec_hidden_proto (V_NAME_F1(tan));
|
libmvec_hidden_proto (V_NAME_F1(tan));
|
||||||
libmvec_hidden_proto (V_NAME_F1(tanh));
|
libmvec_hidden_proto (V_NAME_F1(tanh));
|
||||||
libmvec_hidden_proto (V_NAME_F2(atan2));
|
libmvec_hidden_proto (V_NAME_F2(atan2));
|
||||||
|
@@ -129,6 +129,10 @@
|
|||||||
# define __DECL_SIMD_sinh __DECL_SIMD_aarch64
|
# define __DECL_SIMD_sinh __DECL_SIMD_aarch64
|
||||||
# undef __DECL_SIMD_sinhf
|
# undef __DECL_SIMD_sinhf
|
||||||
# define __DECL_SIMD_sinhf __DECL_SIMD_aarch64
|
# define __DECL_SIMD_sinhf __DECL_SIMD_aarch64
|
||||||
|
# undef __DECL_SIMD_sinpi
|
||||||
|
# define __DECL_SIMD_sinpi __DECL_SIMD_aarch64
|
||||||
|
# undef __DECL_SIMD_sinpif
|
||||||
|
# define __DECL_SIMD_sinpif __DECL_SIMD_aarch64
|
||||||
# undef __DECL_SIMD_tan
|
# undef __DECL_SIMD_tan
|
||||||
# define __DECL_SIMD_tan __DECL_SIMD_aarch64
|
# define __DECL_SIMD_tan __DECL_SIMD_aarch64
|
||||||
# undef __DECL_SIMD_tanf
|
# undef __DECL_SIMD_tanf
|
||||||
@@ -188,6 +192,7 @@ __vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t);
|
|||||||
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
|
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
|
||||||
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
|
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
|
||||||
__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
|
__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
|
||||||
|
__vpcs __f32x4_t _ZGVnN4v_sinpif (__f32x4_t);
|
||||||
__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
|
__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
|
||||||
__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
|
__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
|
||||||
|
|
||||||
@@ -216,6 +221,7 @@ __vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t);
|
|||||||
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
|
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
|
||||||
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
|
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
|
||||||
__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
|
__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
|
||||||
|
__vpcs __f64x2_t _ZGVnN2v_sinpi (__f64x2_t);
|
||||||
__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
|
__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
|
||||||
__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
|
__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
|
||||||
|
|
||||||
@@ -249,6 +255,7 @@ __sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t);
|
|||||||
__sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
|
__sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
|
||||||
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
|
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
|
||||||
__sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
|
__sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
|
||||||
|
__sv_f32_t _ZGVsMxv_sinpif (__sv_f32_t, __sv_bool_t);
|
||||||
__sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t);
|
__sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t);
|
||||||
__sv_f32_t _ZGVsMxv_tanhf (__sv_f32_t, __sv_bool_t);
|
__sv_f32_t _ZGVsMxv_tanhf (__sv_f32_t, __sv_bool_t);
|
||||||
|
|
||||||
@@ -277,6 +284,7 @@ __sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t);
|
|||||||
__sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t);
|
__sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t);
|
||||||
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
|
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
|
||||||
__sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
|
__sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
|
||||||
|
__sv_f64_t _ZGVsMxv_sinpi (__sv_f64_t, __sv_bool_t);
|
||||||
__sv_f64_t _ZGVsMxv_tan (__sv_f64_t, __sv_bool_t);
|
__sv_f64_t _ZGVsMxv_tan (__sv_f64_t, __sv_bool_t);
|
||||||
__sv_f64_t _ZGVsMxv_tanh (__sv_f64_t, __sv_bool_t);
|
__sv_f64_t _ZGVsMxv_tanh (__sv_f64_t, __sv_bool_t);
|
||||||
|
|
||||||
|
87
sysdeps/aarch64/fpu/sinpi_advsimd.c
Normal file
87
sysdeps/aarch64/fpu/sinpi_advsimd.c
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
/* Double-precision (Advanced SIMD) sinpi function
|
||||||
|
|
||||||
|
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include "v_math.h"
|
||||||
|
#include "poly_advsimd_f64.h"
|
||||||
|
|
||||||
|
static const struct data
|
||||||
|
{
|
||||||
|
float64x2_t poly[10];
|
||||||
|
} data = {
|
||||||
|
/* Polynomial coefficients generated using Remez algorithm,
|
||||||
|
see sinpi.sollya for details. */
|
||||||
|
.poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
|
||||||
|
V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
|
||||||
|
V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
|
||||||
|
V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
|
||||||
|
V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
|
||||||
|
};
|
||||||
|
|
||||||
|
#if WANT_SIMD_EXCEPT
|
||||||
|
# define TinyBound v_u64 (0x3bf0000000000000) /* asuint64(0x1p-64). */
|
||||||
|
/* asuint64(0x1p64) - TinyBound. */
|
||||||
|
# define Thresh v_u64 (0x07f0000000000000)
|
||||||
|
|
||||||
|
static float64x2_t VPCS_ATTR NOINLINE
|
||||||
|
special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
|
||||||
|
{
|
||||||
|
/* Fall back to scalar code. */
|
||||||
|
y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
|
||||||
|
return v_call_f64 (sinpi, x, y, cmp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Approximation for vector double-precision sinpi(x).
|
||||||
|
Maximum Error 3.05 ULP:
|
||||||
|
_ZGVnN2v_sinpi(0x1.d32750db30b4ap-2) got 0x1.fb295878301c7p-1
|
||||||
|
want 0x1.fb295878301cap-1. */
|
||||||
|
float64x2_t VPCS_ATTR V_NAME_D1 (sinpi) (float64x2_t x)
|
||||||
|
{
|
||||||
|
const struct data *d = ptr_barrier (&data);
|
||||||
|
|
||||||
|
#if WANT_SIMD_EXCEPT
|
||||||
|
uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
|
||||||
|
uint64x2_t cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
|
||||||
|
|
||||||
|
/* When WANT_SIMD_EXCEPT = 1, special lanes should be set to 0
|
||||||
|
to avoid them under/overflowing and throwing exceptions. */
|
||||||
|
float64x2_t r = v_zerofy_f64 (x, cmp);
|
||||||
|
#else
|
||||||
|
float64x2_t r = x;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If r is odd, the sign of the result should be inverted. */
|
||||||
|
uint64x2_t odd
|
||||||
|
= vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63);
|
||||||
|
|
||||||
|
/* r = x - rint(x). Range reduction to -1/2 .. 1/2. */
|
||||||
|
r = vsubq_f64 (r, vrndaq_f64 (r));
|
||||||
|
|
||||||
|
/* y = sin(r). */
|
||||||
|
float64x2_t r2 = vmulq_f64 (r, r);
|
||||||
|
float64x2_t r4 = vmulq_f64 (r2, r2);
|
||||||
|
float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r);
|
||||||
|
|
||||||
|
#if WANT_SIMD_EXCEPT
|
||||||
|
if (__glibc_unlikely (v_any_u64 (cmp)))
|
||||||
|
return special_case (x, y, odd, cmp);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
|
||||||
|
}
|
61
sysdeps/aarch64/fpu/sinpi_sve.c
Normal file
61
sysdeps/aarch64/fpu/sinpi_sve.c
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/* Double-precision (SVE) sinpi function
|
||||||
|
|
||||||
|
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include "sv_math.h"
|
||||||
|
#include "poly_sve_f64.h"
|
||||||
|
|
||||||
|
static const struct data
|
||||||
|
{
|
||||||
|
double poly[10], range_val;
|
||||||
|
} data = {
|
||||||
|
/* Polynomial coefficients generated using Remez algorithm,
|
||||||
|
see sinpi.sollya for details. */
|
||||||
|
.poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1,
|
||||||
|
-0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
|
||||||
|
0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
|
||||||
|
0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
|
||||||
|
.range_val = 0x1p63,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* A fast SVE implementation of sinpi.
|
||||||
|
Maximum error 3.10 ULP:
|
||||||
|
_ZGVsMxv_sinpi(0x1.df1a14f1b235p-2) got 0x1.fd64f541606cp-1
|
||||||
|
want 0x1.fd64f541606c3p-1. */
|
||||||
|
svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
|
||||||
|
{
|
||||||
|
const struct data *d = ptr_barrier (&data);
|
||||||
|
|
||||||
|
/* range reduction into -1/2 .. 1/2)
|
||||||
|
with n = rint(x) and r = r - n. */
|
||||||
|
svfloat64_t n = svrinta_x (pg, x);
|
||||||
|
svfloat64_t r = svsub_x (pg, x, n);
|
||||||
|
|
||||||
|
/* Result should be negated based on if n is odd or not. */
|
||||||
|
svbool_t cmp = svaclt (pg, x, d->range_val);
|
||||||
|
svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n));
|
||||||
|
svuint64_t sign = svlsl_z (cmp, intn, 63);
|
||||||
|
|
||||||
|
/* y = sin(r). */
|
||||||
|
svfloat64_t r2 = svmul_x (pg, r, r);
|
||||||
|
svfloat64_t r4 = svmul_x (pg, r2, r2);
|
||||||
|
svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly);
|
||||||
|
y = svmul_x (pg, y, r);
|
||||||
|
|
||||||
|
return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
|
||||||
|
}
|
85
sysdeps/aarch64/fpu/sinpif_advsimd.c
Normal file
85
sysdeps/aarch64/fpu/sinpif_advsimd.c
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
/* Single-precision (Advanced SIMD) sinpi function
|
||||||
|
|
||||||
|
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include "v_math.h"
|
||||||
|
#include "poly_advsimd_f32.h"
|
||||||
|
|
||||||
|
static const struct data
|
||||||
|
{
|
||||||
|
float32x4_t poly[6];
|
||||||
|
} data = {
|
||||||
|
/* Taylor series coefficents for sin(pi * x). */
|
||||||
|
.poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
|
||||||
|
V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
|
||||||
|
};
|
||||||
|
|
||||||
|
#if WANT_SIMD_EXCEPT
|
||||||
|
# define TinyBound v_u32 (0x30000000) /* asuint32(0x1p-31f). */
|
||||||
|
# define Thresh v_u32 (0x1f000000) /* asuint32(0x1p31f) - TinyBound. */
|
||||||
|
|
||||||
|
static float32x4_t VPCS_ATTR NOINLINE
|
||||||
|
special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
|
||||||
|
{
|
||||||
|
/* Fall back to scalar code. */
|
||||||
|
y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
|
||||||
|
return v_call_f32 (sinpif, x, y, cmp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Approximation for vector single-precision sinpi(x)
|
||||||
|
Maximum Error 3.03 ULP:
|
||||||
|
_ZGVnN4v_sinpif(0x1.c597ccp-2) got 0x1.f7cd56p-1
|
||||||
|
want 0x1.f7cd5p-1. */
|
||||||
|
float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinpi) (float32x4_t x)
|
||||||
|
{
|
||||||
|
const struct data *d = ptr_barrier (&data);
|
||||||
|
|
||||||
|
#if WANT_SIMD_EXCEPT
|
||||||
|
uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
|
||||||
|
uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ir, TinyBound), Thresh);
|
||||||
|
|
||||||
|
/* When WANT_SIMD_EXCEPT = 1, special lanes should be set to 0
|
||||||
|
to avoid them under/overflowing and throwing exceptions. */
|
||||||
|
float32x4_t r = v_zerofy_f32 (x, cmp);
|
||||||
|
#else
|
||||||
|
float32x4_t r = x;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If r is odd, the sign of the result should be inverted. */
|
||||||
|
uint32x4_t odd
|
||||||
|
= vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31);
|
||||||
|
|
||||||
|
/* r = x - rint(x). Range reduction to -1/2 .. 1/2. */
|
||||||
|
r = vsubq_f32 (r, vrndaq_f32 (r));
|
||||||
|
|
||||||
|
/* Pairwise Horner approximation for y = sin(r * pi). */
|
||||||
|
float32x4_t r2 = vmulq_f32 (r, r);
|
||||||
|
float32x4_t r4 = vmulq_f32 (r2, r2);
|
||||||
|
float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r);
|
||||||
|
|
||||||
|
#if WANT_SIMD_EXCEPT
|
||||||
|
if (__glibc_unlikely (v_any_u32 (cmp)))
|
||||||
|
return special_case (x, y, odd, cmp);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
|
||||||
|
}
|
||||||
|
|
||||||
|
libmvec_hidden_def (V_NAME_F1 (sinpi))
|
||||||
|
HALF_WIDTH_ALIAS_F1 (sinpi)
|
57
sysdeps/aarch64/fpu/sinpif_sve.c
Normal file
57
sysdeps/aarch64/fpu/sinpif_sve.c
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
/* Single-precision (SVE) sinpi function
|
||||||
|
|
||||||
|
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include "sv_math.h"
|
||||||
|
#include "poly_sve_f32.h"
|
||||||
|
|
||||||
|
static const struct data
|
||||||
|
{
|
||||||
|
float poly[6], range_val;
|
||||||
|
} data = {
|
||||||
|
/* Taylor series coefficents for sin(pi * x). */
|
||||||
|
.poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
|
||||||
|
0x1.50783p-4f, -0x1.e30750p-8f },
|
||||||
|
.range_val = 0x1p31,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* A fast SVE implementation of sinpif.
|
||||||
|
Maximum error 2.48 ULP:
|
||||||
|
_ZGVsMxv_sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
|
||||||
|
want 0x1.fa8c02p-1. */
|
||||||
|
svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
|
||||||
|
{
|
||||||
|
const struct data *d = ptr_barrier (&data);
|
||||||
|
|
||||||
|
/* range reduction into -1/2 .. 1/2
|
||||||
|
with n = rint(x) and r = r - n. */
|
||||||
|
svfloat32_t n = svrinta_x (pg, x);
|
||||||
|
svfloat32_t r = svsub_x (pg, x, n);
|
||||||
|
|
||||||
|
/* Result should be negated based on if n is odd or not. */
|
||||||
|
svbool_t cmp = svaclt (pg, x, d->range_val);
|
||||||
|
svuint32_t intn = svreinterpret_u32 (svcvt_s32_z (pg, n));
|
||||||
|
svuint32_t sign = svlsl_z (cmp, intn, 31);
|
||||||
|
|
||||||
|
/* y = sin(r). */
|
||||||
|
svfloat32_t r2 = svmul_x (pg, r, r);
|
||||||
|
svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly);
|
||||||
|
y = svmul_x (pg, y, r);
|
||||||
|
|
||||||
|
return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
|
||||||
|
}
|
@@ -47,5 +47,6 @@ VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2)
|
|||||||
VPCS_VECTOR_WRAPPER_ff (pow_advsimd, _ZGVnN2vv_pow)
|
VPCS_VECTOR_WRAPPER_ff (pow_advsimd, _ZGVnN2vv_pow)
|
||||||
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
|
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
|
||||||
VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
|
VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
|
||||||
|
VPCS_VECTOR_WRAPPER (sinpi_advsimd, _ZGVnN2v_sinpi)
|
||||||
VPCS_VECTOR_WRAPPER (tan_advsimd, _ZGVnN2v_tan)
|
VPCS_VECTOR_WRAPPER (tan_advsimd, _ZGVnN2v_tan)
|
||||||
VPCS_VECTOR_WRAPPER (tanh_advsimd, _ZGVnN2v_tanh)
|
VPCS_VECTOR_WRAPPER (tanh_advsimd, _ZGVnN2v_tanh)
|
||||||
|
@@ -66,5 +66,6 @@ SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2)
|
|||||||
SVE_VECTOR_WRAPPER_ff (pow_sve, _ZGVsMxvv_pow)
|
SVE_VECTOR_WRAPPER_ff (pow_sve, _ZGVsMxvv_pow)
|
||||||
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
|
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
|
||||||
SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
|
SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
|
||||||
|
SVE_VECTOR_WRAPPER (sinpi_sve, _ZGVsMxv_sinpi)
|
||||||
SVE_VECTOR_WRAPPER (tan_sve, _ZGVsMxv_tan)
|
SVE_VECTOR_WRAPPER (tan_sve, _ZGVsMxv_tan)
|
||||||
SVE_VECTOR_WRAPPER (tanh_sve, _ZGVsMxv_tanh)
|
SVE_VECTOR_WRAPPER (tanh_sve, _ZGVsMxv_tanh)
|
||||||
|
@@ -47,5 +47,6 @@ VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f)
|
|||||||
VPCS_VECTOR_WRAPPER_ff (powf_advsimd, _ZGVnN4vv_powf)
|
VPCS_VECTOR_WRAPPER_ff (powf_advsimd, _ZGVnN4vv_powf)
|
||||||
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
|
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
|
||||||
VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
|
VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
|
||||||
|
VPCS_VECTOR_WRAPPER (sinpif_advsimd, _ZGVnN4v_sinpif)
|
||||||
VPCS_VECTOR_WRAPPER (tanf_advsimd, _ZGVnN4v_tanf)
|
VPCS_VECTOR_WRAPPER (tanf_advsimd, _ZGVnN4v_tanf)
|
||||||
VPCS_VECTOR_WRAPPER (tanhf_advsimd, _ZGVnN4v_tanhf)
|
VPCS_VECTOR_WRAPPER (tanhf_advsimd, _ZGVnN4v_tanhf)
|
||||||
|
@@ -66,5 +66,6 @@ SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f)
|
|||||||
SVE_VECTOR_WRAPPER_ff (powf_sve, _ZGVsMxvv_powf)
|
SVE_VECTOR_WRAPPER_ff (powf_sve, _ZGVsMxvv_powf)
|
||||||
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
|
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
|
||||||
SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
|
SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
|
||||||
|
SVE_VECTOR_WRAPPER (sinpif_sve, _ZGVsMxv_sinpif)
|
||||||
SVE_VECTOR_WRAPPER (tanf_sve, _ZGVsMxv_tanf)
|
SVE_VECTOR_WRAPPER (tanf_sve, _ZGVsMxv_tanf)
|
||||||
SVE_VECTOR_WRAPPER (tanhf_sve, _ZGVsMxv_tanhf)
|
SVE_VECTOR_WRAPPER (tanhf_sve, _ZGVsMxv_tanhf)
|
||||||
|
@@ -1627,11 +1627,19 @@ double: 2
|
|||||||
float: 2
|
float: 2
|
||||||
ldouble: 2
|
ldouble: 2
|
||||||
|
|
||||||
|
Function: "sinpi_advsimd":
|
||||||
|
double: 2
|
||||||
|
float: 2
|
||||||
|
|
||||||
Function: "sinpi_downward":
|
Function: "sinpi_downward":
|
||||||
double: 2
|
double: 2
|
||||||
float: 2
|
float: 2
|
||||||
ldouble: 2
|
ldouble: 2
|
||||||
|
|
||||||
|
Function: "sinpi_sve":
|
||||||
|
double: 2
|
||||||
|
float: 2
|
||||||
|
|
||||||
Function: "sinpi_towardzero":
|
Function: "sinpi_towardzero":
|
||||||
double: 2
|
double: 2
|
||||||
float: 1
|
float: 1
|
||||||
|
@@ -130,6 +130,11 @@ GLIBC_2.40 _ZGVsMxvv_pow F
|
|||||||
GLIBC_2.40 _ZGVsMxvv_powf F
|
GLIBC_2.40 _ZGVsMxvv_powf F
|
||||||
GLIBC_2.41 _ZGVnN2v_logp1 F
|
GLIBC_2.41 _ZGVnN2v_logp1 F
|
||||||
GLIBC_2.41 _ZGVnN2v_logp1f F
|
GLIBC_2.41 _ZGVnN2v_logp1f F
|
||||||
|
GLIBC_2.41 _ZGVnN2v_sinpi F
|
||||||
|
GLIBC_2.41 _ZGVnN2v_sinpif F
|
||||||
GLIBC_2.41 _ZGVnN4v_logp1f F
|
GLIBC_2.41 _ZGVnN4v_logp1f F
|
||||||
|
GLIBC_2.41 _ZGVnN4v_sinpif F
|
||||||
GLIBC_2.41 _ZGVsMxv_logp1 F
|
GLIBC_2.41 _ZGVsMxv_logp1 F
|
||||||
GLIBC_2.41 _ZGVsMxv_logp1f F
|
GLIBC_2.41 _ZGVsMxv_logp1f F
|
||||||
|
GLIBC_2.41 _ZGVsMxv_sinpi F
|
||||||
|
GLIBC_2.41 _ZGVsMxv_sinpif F
|
||||||
|
Reference in New Issue
Block a user