mirror of
https://sourceware.org/git/glibc.git
synced 2025-10-28 23:34:53 +03:00
Compiler inlines trunc and truncf with SSE4.1. But older versions of GCC doesn't inline them with -Os: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121861 Don't use asm statement for trunc and truncf if compiler can inline them with -Os. It removes one register move with GCC 16: __modff_sse41: __modff_sse41: .LFB23: .LFB23: .cfi_startproc .cfi_startproc endbr64 endbr64 subq $24, %rsp subq $24, %rsp .cfi_def_cfa_offset 32 .cfi_def_cfa_offset 32 movq %fs:40, %rax movq %fs:40, %rax movq %rax, 8(%rsp) movq %rax, 8(%rsp) xorl %eax, %eax xorl %eax, %eax movd %xmm0, %eax movd %xmm0, %eax addl %eax, %eax addl %eax, %eax cmpl $-16777216, %eax cmpl $-16777216, %eax je .L7 je .L7 > movaps %xmm0, %xmm3 movaps %xmm0, %xmm4 movaps %xmm0, %xmm4 movss .LC0(%rip), %xmm2 | movss .LC0(%rip), %xmm1 movaps %xmm2, %xmm3 | movaps %xmm1, %xmm2 andps %xmm0, %xmm2 | roundss $11, %xmm3, %xmm3 roundss $11, %xmm0, %xmm1 | subss %xmm3, %xmm4 subss %xmm1, %xmm4 | andps %xmm0, %xmm1 andnps %xmm4, %xmm3 | andnps %xmm4, %xmm2 orps %xmm3, %xmm2 | orps %xmm2, %xmm1 .L3: .L3: movss %xmm1, (%rdi) | movss %xmm3, (%rdi) movq 8(%rsp), %rax movq 8(%rsp), %rax subq %fs:40, %rax subq %fs:40, %rax jne .L8 jne .L8 movaps %xmm2, %xmm0 | movaps %xmm1, %xmm0 addq $24, %rsp addq $24, %rsp .cfi_remember_state .cfi_remember_state .cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8 ret ret Signed-off-by: H.J. Lu <hjl.tools@gmail.com> Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
56 lines
1.5 KiB
C
56 lines
1.5 KiB
C
/* Private inline math functions for x86.
|
|
Copyright (C) 1995-2025 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef X86_MATH_PRIVATE_H
|
|
#define X86_MATH_PRIVATE_H 1
|
|
|
|
#include <math.h>
|
|
#include_next <math_private.h>
|
|
|
|
__extern_always_inline long double
|
|
__NTH (__ieee754_atan2l (long double y, long double x))
|
|
{
|
|
long double ret;
|
|
__asm__ __volatile__ ("fpatan" : "=t" (ret) : "0" (x), "u" (y) : "st(1)");
|
|
return ret;
|
|
}
|
|
|
|
__extern_always_inline double
|
|
__trunc (double x)
|
|
{
|
|
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
|
|
return trunc (x);
|
|
#else
|
|
asm ("%vroundsd $11, %d1, %0" : "=v" (x) : "v" (x));
|
|
return x;
|
|
#endif
|
|
}
|
|
|
|
__extern_always_inline float
|
|
__truncf (float x)
|
|
{
|
|
#if HAVE_X86_INLINE_TRUNC || !defined __SSE4_1__
|
|
return truncf (x);
|
|
#else
|
|
asm ("%vroundss $11, %d1, %0" : "=v" (x) : "v" (x));
|
|
return x;
|
|
#endif
|
|
}
|
|
|
|
#endif
|