mirror of
https://sourceware.org/git/glibc.git
synced 2025-12-06 12:01:08 +03:00
To enable “longlong.h” removal, add_ssaaaa and sub_ssaaaa are moved to gmp-arch.h. The generic implementation now uses a static inline. This provides better type checking than the GNU extension, which casts the asm constraint; and it also works better with clang. Most architectures use the generic implementation, with except of arc, arm, hppa, x86, m68k, powerpc, and sparc. The 32 bit architectures the compiler generates good enough code using uint64_t types, where for 64 bit architecture the patch leverages the math_u128.h definitions that uses 128-bit integers when available (all 64 bit architectures on gcc 15). The strongly typed implementation required some changes. I adjusted _FP_W_TYPE, _FP_WS_TYPE, and _FP_I_TYPE to use the same type as mp_limb_t on aarch64, powerpc64le, x86_64, and riscv64. This basically means using “long” instead of “long long.” Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
226 lines
6.7 KiB
C
226 lines
6.7 KiB
C
/* Configure soft-fp for building sqrtf128. Based on sfp-machine.h in
|
|
libgcc, with soft-float and other irrelevant parts removed. */
|
|
|
|
#include <math-inline-asm.h>
|
|
|
|
#if HAVE_X86_LIBGCC_CMP_RETURN_ATTR
|
|
/* The type of the result of a floating point comparison. This must
|
|
match `__libgcc_cmp_return__' in GCC for the target. */
|
|
typedef int __gcc_CMPtype_GCC __attribute__ ((mode (__libgcc_cmp_return__)));
|
|
#else
|
|
# ifdef __x86_64__
|
|
typedef long long int __gcc_CMPtype;
|
|
#else
|
|
typedef long int __gcc_CMPtype;
|
|
# endif
|
|
#endif
|
|
#define CMPtype __gcc_CMPtype
|
|
|
|
#ifdef __x86_64__
|
|
# define _FP_W_TYPE_SIZE 64
|
|
# ifndef __ILP32__
|
|
# define _FP_W_TYPE unsigned long
|
|
# define _FP_WS_TYPE signed long
|
|
# define _FP_I_TYPE long
|
|
# else
|
|
# define _FP_W_TYPE unsigned long long
|
|
# define _FP_WS_TYPE signed long long
|
|
# define _FP_I_TYPE long long
|
|
# endif
|
|
|
|
typedef int TItype __attribute__ ((mode (TI)));
|
|
typedef unsigned int UTItype __attribute__ ((mode (TI)));
|
|
|
|
# define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype))
|
|
|
|
# define _FP_MUL_MEAT_Q(R,X,Y) \
|
|
_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
|
|
|
|
# define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
|
|
|
|
# define _FP_NANFRAC_S _FP_QNANBIT_S
|
|
# define _FP_NANFRAC_D _FP_QNANBIT_D
|
|
# define _FP_NANFRAC_E _FP_QNANBIT_E, 0
|
|
# define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0
|
|
|
|
# define FP_EX_SHIFT 7
|
|
|
|
# define _FP_DECL_EX \
|
|
unsigned int _fcw __attribute__ ((unused)) = FP_RND_NEAREST;
|
|
|
|
# define FP_RND_NEAREST 0
|
|
# define FP_RND_ZERO 0x6000
|
|
# define FP_RND_PINF 0x4000
|
|
# define FP_RND_MINF 0x2000
|
|
|
|
# define FP_RND_MASK 0x6000
|
|
|
|
# define FP_INIT_ROUNDMODE \
|
|
do { \
|
|
stmxcsr_inline_asm (&_fcw); \
|
|
} while (0)
|
|
#else
|
|
# define _FP_W_TYPE_SIZE 32
|
|
# define _FP_W_TYPE unsigned long int
|
|
# define _FP_WS_TYPE signed long int
|
|
# define _FP_I_TYPE long int
|
|
|
|
# define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
|
|
__asm__ ("add{l} {%11,%3|%3,%11}\n\t" \
|
|
"adc{l} {%9,%2|%2,%9}\n\t" \
|
|
"adc{l} {%7,%1|%1,%7}\n\t" \
|
|
"adc{l} {%5,%0|%0,%5}" \
|
|
: "=r" ((USItype) (r3)), \
|
|
"=&r" ((USItype) (r2)), \
|
|
"=&r" ((USItype) (r1)), \
|
|
"=&r" ((USItype) (r0)) \
|
|
: "%0" ((USItype) (x3)), \
|
|
"g" ((USItype) (y3)), \
|
|
"%1" ((USItype) (x2)), \
|
|
"g" ((USItype) (y2)), \
|
|
"%2" ((USItype) (x1)), \
|
|
"g" ((USItype) (y1)), \
|
|
"%3" ((USItype) (x0)), \
|
|
"g" ((USItype) (y0)))
|
|
# define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
|
|
__asm__ ("add{l} {%8,%2|%2,%8}\n\t" \
|
|
"adc{l} {%6,%1|%1,%6}\n\t" \
|
|
"adc{l} {%4,%0|%0,%4}" \
|
|
: "=r" ((USItype) (r2)), \
|
|
"=&r" ((USItype) (r1)), \
|
|
"=&r" ((USItype) (r0)) \
|
|
: "%0" ((USItype) (x2)), \
|
|
"g" ((USItype) (y2)), \
|
|
"%1" ((USItype) (x1)), \
|
|
"g" ((USItype) (y1)), \
|
|
"%2" ((USItype) (x0)), \
|
|
"g" ((USItype) (y0)))
|
|
# define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
|
|
__asm__ ("sub{l} {%11,%3|%3,%11}\n\t" \
|
|
"sbb{l} {%9,%2|%2,%9}\n\t" \
|
|
"sbb{l} {%7,%1|%1,%7}\n\t" \
|
|
"sbb{l} {%5,%0|%0,%5}" \
|
|
: "=r" ((USItype) (r3)), \
|
|
"=&r" ((USItype) (r2)), \
|
|
"=&r" ((USItype) (r1)), \
|
|
"=&r" ((USItype) (r0)) \
|
|
: "0" ((USItype) (x3)), \
|
|
"g" ((USItype) (y3)), \
|
|
"1" ((USItype) (x2)), \
|
|
"g" ((USItype) (y2)), \
|
|
"2" ((USItype) (x1)), \
|
|
"g" ((USItype) (y1)), \
|
|
"3" ((USItype) (x0)), \
|
|
"g" ((USItype) (y0)))
|
|
# define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
|
|
__asm__ ("sub{l} {%8,%2|%2,%8}\n\t" \
|
|
"sbb{l} {%6,%1|%1,%6}\n\t" \
|
|
"sbb{l} {%4,%0|%0,%4}" \
|
|
: "=r" ((USItype) (r2)), \
|
|
"=&r" ((USItype) (r1)), \
|
|
"=&r" ((USItype) (r0)) \
|
|
: "0" ((USItype) (x2)), \
|
|
"g" ((USItype) (y2)), \
|
|
"1" ((USItype) (x1)), \
|
|
"g" ((USItype) (y1)), \
|
|
"2" ((USItype) (x0)), \
|
|
"g" ((USItype) (y0)))
|
|
# define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
|
|
__asm__ ("add{l} {%4,%3|%3,%4}\n\t" \
|
|
"adc{l} {$0,%2|%2,0}\n\t" \
|
|
"adc{l} {$0,%1|%1,0}\n\t" \
|
|
"adc{l} {$0,%0|%0,0}" \
|
|
: "+r" ((USItype) (x3)), \
|
|
"+&r" ((USItype) (x2)), \
|
|
"+&r" ((USItype) (x1)), \
|
|
"+&r" ((USItype) (x0)) \
|
|
: "g" ((USItype) (i)))
|
|
|
|
|
|
# define _FP_MUL_MEAT_S(R,X,Y) \
|
|
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
|
|
# define _FP_MUL_MEAT_D(R,X,Y) \
|
|
_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
|
|
# define _FP_MUL_MEAT_Q(R,X,Y) \
|
|
_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
|
|
|
|
# define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y)
|
|
# define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
|
|
# define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
|
|
|
|
# define _FP_NANFRAC_S _FP_QNANBIT_S
|
|
# define _FP_NANFRAC_D _FP_QNANBIT_D, 0
|
|
/* Even if XFmode is 12byte, we have to pad it to
|
|
16byte since soft-fp emulation is done in 16byte. */
|
|
# define _FP_NANFRAC_E _FP_QNANBIT_E, 0, 0, 0
|
|
# define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
|
|
|
|
# define FP_EX_SHIFT 0
|
|
|
|
# define _FP_DECL_EX \
|
|
unsigned short _fcw __attribute__ ((unused)) = FP_RND_NEAREST;
|
|
|
|
# define FP_RND_NEAREST 0
|
|
# define FP_RND_ZERO 0xc00
|
|
# define FP_RND_PINF 0x800
|
|
# define FP_RND_MINF 0x400
|
|
|
|
# define FP_RND_MASK 0xc00
|
|
|
|
# define FP_INIT_ROUNDMODE \
|
|
do { \
|
|
__asm__ __volatile__ ("fnstcw\t%0" : "=m" (_fcw)); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#define _FP_KEEPNANFRACP 1
|
|
#define _FP_QNANNEGATEDP 0
|
|
|
|
#define _FP_NANSIGN_S 1
|
|
#define _FP_NANSIGN_D 1
|
|
#define _FP_NANSIGN_E 1
|
|
#define _FP_NANSIGN_Q 1
|
|
|
|
/* Here is something Intel misdesigned: the specs don't define
|
|
the case where we have two NaNs with same mantissas, but
|
|
different sign. Different operations pick up different NaNs. */
|
|
#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
|
|
do { \
|
|
if (_FP_FRAC_GT_##wc(X, Y) \
|
|
|| (_FP_FRAC_EQ_##wc(X,Y) && (OP == '+' || OP == '*'))) \
|
|
{ \
|
|
R##_s = X##_s; \
|
|
_FP_FRAC_COPY_##wc(R,X); \
|
|
} \
|
|
else \
|
|
{ \
|
|
R##_s = Y##_s; \
|
|
_FP_FRAC_COPY_##wc(R,Y); \
|
|
} \
|
|
R##_c = FP_CLS_NAN; \
|
|
} while (0)
|
|
|
|
#define FP_EX_INVALID 0x01
|
|
#define FP_EX_DENORM 0x02
|
|
#define FP_EX_DIVZERO 0x04
|
|
#define FP_EX_OVERFLOW 0x08
|
|
#define FP_EX_UNDERFLOW 0x10
|
|
#define FP_EX_INEXACT 0x20
|
|
#define FP_EX_ALL \
|
|
(FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \
|
|
| FP_EX_UNDERFLOW | FP_EX_INEXACT)
|
|
|
|
void __sfp_handle_exceptions (int);
|
|
|
|
#define FP_HANDLE_EXCEPTIONS \
|
|
do { \
|
|
if (__builtin_expect (_fex, 0)) \
|
|
__sfp_handle_exceptions (_fex); \
|
|
} while (0);
|
|
|
|
#define FP_TRAPPING_EXCEPTIONS ((~_fcw >> FP_EX_SHIFT) & FP_EX_ALL)
|
|
|
|
#define FP_ROUNDMODE (_fcw & FP_RND_MASK)
|
|
|
|
#define _FP_TININESS_AFTER_ROUNDING 1
|