1
0
mirror of https://git.code.sf.net/p/mingw-w64/mingw-w64 synced 2025-04-18 17:44:18 +03:00

crt: Use aarch64 assembly variants for ARM64EC math functions.

Signed-off-by: Jacek Caban <jacek@codeweavers.com>
This commit is contained in:
Billy Laws 2025-04-03 15:43:30 +02:00 committed by Jacek Caban
parent dbb60ad07c
commit cf2b16e2a3
12 changed files with 26 additions and 25 deletions

View File

@ -17,7 +17,7 @@ double fma(double x, double y, double z){
return z;
}
#elif defined(_ARM64_) || defined(__aarch64__)
#elif defined(_ARM64_) || defined(__aarch64__) || defined(_ARM64EC_) || defined(__arm64ec__)
/* Use hardware FMA on ARM64. */
double fma(double x, double y, double z){

View File

@ -17,7 +17,7 @@ float fmaf(float x, float y, float z){
return z;
}
#elif defined(_ARM64_) || defined(__aarch64__)
#elif defined(_ARM64_) || defined(__aarch64__) || defined(_ARM64EC_) || defined(__arm64ec__)
/* Use hardware FMA on ARM64. */
float fmaf(float x, float y, float z){

View File

@ -9,7 +9,7 @@
long long llrint (double x)
{
long long retval = 0ll;
#if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__)) || defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpll %0" : "=m" (retval) : "t" (x) : "st");
#else
int mode = fegetround();

View File

@ -9,7 +9,7 @@
long long llrintf (float x)
{
long long retval = 0ll;
#if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__)) || defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpll %0" : "=m" (retval) : "t" (x) : "st");
#else
int mode = fegetround();

View File

@ -9,7 +9,8 @@
long long llrintl (long double x)
{
long long retval = 0ll;
#if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__)) || \
defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpll %0" : "=m" (retval) : "t" (x) : "st");
#else
int mode = fegetround();

View File

@ -5,14 +5,19 @@
*/
#include <math.h>
#if defined(_AMD64_) || defined(__x86_64__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__))
#include <xmmintrin.h>
#endif
long lrint (double x)
{
long retval = 0L;
#if defined(_AMD64_) || defined(__x86_64__)
#if defined(__aarch64__) || defined(_ARM64_) || defined(__arm64ec__) || defined(_ARM64EC_)
__asm__ __volatile__ (
"frintx %d1, %d1\n\t"
"fcvtzs %w0, %d1\n\t"
: "=r" (retval), "+w" (x));
#elif defined(_AMD64_) || defined(__x86_64__)
retval = _mm_cvtsd_si32(_mm_load_sd(&x));
#elif defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st");
@ -22,11 +27,6 @@ long lrint (double x)
"vcvtr.s32.f64 %[tmp], %[src]\n\t"
"fmrs %[dst], %[tmp]\n\t"
: [dst] "=r" (retval), [tmp] "=t" (temp) : [src] "w" (x));
#elif defined(__aarch64__) || defined(_ARM64_)
__asm__ __volatile__ (
"frintx %d1, %d1\n\t"
"fcvtzs %w0, %d1\n\t"
: "=r" (retval), "+w" (x));
#endif
return retval;
}

View File

@ -5,14 +5,19 @@
*/
#include <math.h>
#if defined(_AMD64_) || defined(__x86_64__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__))
#include <xmmintrin.h>
#endif
long lrintf (float x)
{
long retval = 0l;
#if defined(_AMD64_) || defined(__x86_64__)
#if defined(__aarch64__) || defined(_ARM64_) || defined(__arm64ec__) || defined(_ARM64EC_)
__asm__ __volatile__ (
"frintx %s1, %s1\n\t"
"fcvtzs %w0, %s1\n\t"
: "=r" (retval), "+w" (x));
#elif defined(_AMD64_) || defined(__x86_64__)
retval = _mm_cvtss_si32(_mm_load_ss(&x));
#elif defined(_X86_) || defined(__i386__)
__asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st");
@ -21,11 +26,6 @@ long lrintf (float x)
"vcvtr.s32.f32 %[src], %[src]\n\t"
"fmrs %[dst], %[src]\n\t"
: [dst] "=r" (retval), [src] "+w" (x));
#elif defined(__aarch64__) || defined(_ARM64_)
__asm__ __volatile__ (
"frintx %s1, %s1\n\t"
"fcvtzs %w0, %s1\n\t"
: "=r" (retval), "+w" (x));
#endif
return retval;
}

View File

@ -12,7 +12,7 @@ modf (double value, double* iptr)
{
double int_part = 0.0;
/* truncate */
#if defined(_AMD64_) || defined(__x86_64__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__))
asm volatile ("subq $8, %%rsp\n"
"fnstcw 4(%%rsp)\n"
"movzwl 4(%%rsp), %%eax\n"

View File

@ -13,7 +13,7 @@ modff (float value, float* iptr)
float int_part = 0.0F;
/* truncate */
/* truncate */
#if defined(_AMD64_) || defined(__x86_64__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__))
asm volatile ("subq $8, %%rsp\n"
"fnstcw 4(%%rsp)\n"
"movzwl 4(%%rsp), %%eax\n"

View File

@ -12,7 +12,7 @@ modfl (long double value, long double* iptr)
{
long double int_part = 0.0L;
/* truncate */
#if defined(_AMD64_) || defined(__x86_64__)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__))
asm volatile ("subq $8, %%rsp\n"
"fnstcw 4(%%rsp)\n"
"movzwl 4(%%rsp), %%eax\n"

View File

@ -9,11 +9,11 @@
int __signbitl (long double x) {
#if defined(__x86_64__) || defined(_AMD64_)
#if (defined(_AMD64_) && !defined(_ARM64EC_)) || (defined(__x86_64__) && !defined(__arm64ec__))
__mingw_ldbl_type_t ld;
ld.x = x;
return ((ld.lh.sign_exponent & 0x8000) != 0);
#elif defined(__arm__) || defined(_ARM_) || defined(__aarch64__) || defined(_ARM64_)
#elif defined(__arm__) || defined(_ARM_) || defined(__aarch64__) || defined(_ARM64_) || defined(__arm64ec__) || defined(_ARM64EC_)
return __signbit(x);
#elif defined(__i386__) || defined(_X86_)
unsigned short sw;

View File

@ -77,7 +77,7 @@ __FLT_ABI (sqrt) (__FLT_TYPE x)
#else
asm volatile ("fsqrtd %[dst], %[src];\n" : [dst] "=w" (res) : [src] "w" (x));
#endif
#elif defined(__aarch64__) || defined(_ARM64_)
#elif defined(__aarch64__) || defined(_ARM64_) || defined(__arm64ec__) || defined(_ARM64EC_)
#if _NEW_COMPLEX_FLOAT
asm volatile ("fsqrt %s[dst], %s[src]\n" : [dst] "=w" (res) : [src] "w" (x));
#else