1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-10-28 23:34:53 +03:00
Files
glibc/sysdeps/x86/fpu/math_private.h
H.J. Lu 1fa5773eb1 x86: Don't use asm statement for trunc/truncf
Compiler inlines trunc and truncf with SSE4.1.  But older versions of GCC
doesn't inline them with -Os:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121861

Don't use asm statement for trunc and truncf if compiler can inline them
with -Os.  It removes one register move with GCC 16:

__modff_sse41:                        __modff_sse41:
.LFB23:                               .LFB23:
   .cfi_startproc                        .cfi_startproc
   endbr64                               endbr64
   subq  $24, %rsp                       subq  $24, %rsp
   .cfi_def_cfa_offset 32                .cfi_def_cfa_offset 32
   movq  %fs:40, %rax                    movq  %fs:40, %rax
   movq  %rax, 8(%rsp)                   movq  %rax, 8(%rsp)
   xorl  %eax, %eax                      xorl  %eax, %eax
   movd  %xmm0, %eax                     movd  %xmm0, %eax
   addl  %eax, %eax                      addl  %eax, %eax
   cmpl  $-16777216, %eax                cmpl  $-16777216, %eax
   je .L7                                je .L7
                                   >     movaps   %xmm0, %xmm3
   movaps   %xmm0, %xmm4                 movaps   %xmm0, %xmm4
   movss .LC0(%rip), %xmm2         |     movss .LC0(%rip), %xmm1
   movaps   %xmm2, %xmm3           |     movaps   %xmm1, %xmm2
   andps %xmm0, %xmm2              |     roundss  $11, %xmm3, %xmm3
   roundss $11, %xmm0, %xmm1       |     subss %xmm3, %xmm4
   subss %xmm1, %xmm4              |     andps %xmm0, %xmm1
   andnps   %xmm4, %xmm3           |     andnps   %xmm4, %xmm2
   orps  %xmm3, %xmm2              |     orps  %xmm2, %xmm1
.L3:                                  .L3:
   movss %xmm1, (%rdi)             |     movss %xmm3, (%rdi)
   movq  8(%rsp), %rax                   movq  8(%rsp), %rax
   subq  %fs:40, %rax                    subq  %fs:40, %rax
   jne   .L8                             jne   .L8
   movaps   %xmm2, %xmm0           |     movaps   %xmm1, %xmm0
   addq  $24, %rsp                       addq  $24, %rsp
   .cfi_remember_state                   .cfi_remember_state
   .cfi_def_cfa_offset 8                 .cfi_def_cfa_offset 8
   ret                                   ret

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
2025-09-17 04:30:11 -07:00

56 lines
1.5 KiB
C

/* Private inline math functions for x86.
Copyright (C) 1995-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef X86_MATH_PRIVATE_H
#define X86_MATH_PRIVATE_H 1
#include <math.h>
#include_next <math_private.h>
__extern_always_inline long double
__NTH (__ieee754_atan2l (long double y, long double x))
{
long double ret;
__asm__ __volatile__ ("fpatan" : "=t" (ret) : "0" (x), "u" (y) : "st(1)");
return ret;
}
__extern_always_inline double
__trunc (double x)
{
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
return trunc (x);
#else
asm ("%vroundsd $11, %d1, %0" : "=v" (x) : "v" (x));
return x;
#endif
}
__extern_always_inline float
__truncf (float x)
{
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
return truncf (x);
#else
asm ("%vroundss $11, %d1, %0" : "=v" (x) : "v" (x));
return x;
#endif
}
#endif