mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-05 19:35:52 +03:00
powerpc: Use faster means to access FPSCR when possible in some cases
Using 'mffs' instruction to read the Floating Point Status Control Register (FPSCR) can force a processor flush in some cases, with undesirable performance impact. If the values of the bits in the FPSCR which force the flush are not needed, an instruction that is new to POWER9 (ISA version 3.0), 'mffsl' can be used instead. Cases included: get_rounding_mode, fegetround, fegetmode, fegetexcept. * sysdeps/powerpc/bits/fenvinline.h (__fegetround): Use __fegetround_ISA300() or __fegetround_ISA2() as appropriate. (__fegetround_ISA300) New. (__fegetround_ISA2) New. * sysdeps/powerpc/fpu_control.h (IS_ISA300): New. (_FPU_MFFS): Move implementation... (_FPU_GETCW): Here. (_FPU_MFFSL): Move implementation.... (_FPU_GET_RC_ISA300): Here. New. (_FPU_GET_RC): Use _FPU_GET_RC_ISA300() or _FPU_GETCW() as appropriate. * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): New. (fegetenv_status): New. * sysdeps/powerpc/fpu/fegetmode.c (fegetmode): Use fegetenv_status() instead of fegetenv_register(). * sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Likewise. Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
This commit is contained in:
committed by
Tulio Magno Quites Machado Filho
parent
d064591266
commit
3db85a9814
18
ChangeLog
18
ChangeLog
@@ -1,3 +1,21 @@
|
|||||||
|
2019-06-30 Paul A. Clarke <pc@us.ibm.com>
|
||||||
|
|
||||||
|
* sysdeps/powerpc/bits/fenvinline.h (__fegetround): Use
|
||||||
|
__fegetround_ISA300() or __fegetround_ISA2() as appropriate.
|
||||||
|
(__fegetround_ISA300) New.
|
||||||
|
(__fegetround_ISA2) New.
|
||||||
|
* sysdeps/powerpc/fpu_control.h (IS_ISA300): New.
|
||||||
|
(_FPU_MFFS): Move implementation...
|
||||||
|
(_FPU_GETCW): Here.
|
||||||
|
(_FPU_MFFSL): Move implementation....
|
||||||
|
(_FPU_GET_RC_ISA300): Here. New.
|
||||||
|
(_FPU_GET_RC): Use _FPU_GET_RC_ISA300() or _FPU_GETCW() as appropriate.
|
||||||
|
* sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): New.
|
||||||
|
(fegetenv_status): New.
|
||||||
|
* sysdeps/powerpc/fpu/fegetmode.c (fegetmode): Use fegetenv_status()
|
||||||
|
instead of fegetenv_register().
|
||||||
|
* sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Likewise.
|
||||||
|
|
||||||
2019-06-28 Wilco Dijkstra <wdijkstr@arm.com>
|
2019-06-28 Wilco Dijkstra <wdijkstr@arm.com>
|
||||||
|
|
||||||
* benchtests/bench-math-inlines.c: Increase iterations.
|
* benchtests/bench-math-inlines.c: Increase iterations.
|
||||||
|
@@ -18,13 +18,36 @@
|
|||||||
|
|
||||||
#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
|
#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
|
||||||
|
|
||||||
/* Inline definition for fegetround. */
|
/* Inline definitions for fegetround. */
|
||||||
# define __fegetround() \
|
# define __fegetround_ISA300() \
|
||||||
(__extension__ ({ int __fegetround_result; \
|
(__extension__ ({ \
|
||||||
__asm__ __volatile__ \
|
union { double __d; unsigned long long __ll; } __u; \
|
||||||
("mcrfs 7,7 ; mfcr %0" \
|
__asm__ __volatile__ ( \
|
||||||
|
".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
|
||||||
|
: "=f" (__u.__d)); \
|
||||||
|
__u.__ll & 0x0000000000000003LL; \
|
||||||
|
}))
|
||||||
|
|
||||||
|
# define __fegetround_ISA2() \
|
||||||
|
(__extension__ ({ \
|
||||||
|
int __fegetround_result; \
|
||||||
|
__asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0" \
|
||||||
: "=r"(__fegetround_result) : : "cr7"); \
|
: "=r"(__fegetround_result) : : "cr7"); \
|
||||||
__fegetround_result & 3; }))
|
__fegetround_result & 3; \
|
||||||
|
}))
|
||||||
|
|
||||||
|
# ifdef _ARCH_PWR9
|
||||||
|
# define __fegetround() __fegetround_ISA300()
|
||||||
|
# elif defined __BUILTIN_CPU_SUPPORTS__
|
||||||
|
# define __fegetround() \
|
||||||
|
(__glibc_likely (__builtin_cpu_supports ("arch_3_00")) \
|
||||||
|
? __fegetround_ISA300() \
|
||||||
|
: __fegetround_ISA2() \
|
||||||
|
)
|
||||||
|
# else
|
||||||
|
# define __fegetround() __fegetround_ISA2()
|
||||||
|
# endif
|
||||||
|
|
||||||
# define fegetround() __fegetround ()
|
# define fegetround() __fegetround ()
|
||||||
|
|
||||||
# ifndef __NO_MATH_INLINES
|
# ifndef __NO_MATH_INLINES
|
||||||
|
@@ -24,7 +24,7 @@ __fegetexcept (void)
|
|||||||
{
|
{
|
||||||
fenv_union_t fe;
|
fenv_union_t fe;
|
||||||
|
|
||||||
fe.fenv = fegetenv_register ();
|
fe.fenv = fegetenv_status ();
|
||||||
|
|
||||||
return fenv_reg_to_exceptions (fe.l);
|
return fenv_reg_to_exceptions (fe.l);
|
||||||
}
|
}
|
||||||
|
@@ -21,6 +21,6 @@
|
|||||||
int
|
int
|
||||||
fegetmode (femode_t *modep)
|
fegetmode (femode_t *modep)
|
||||||
{
|
{
|
||||||
*modep = fegetenv_register ();
|
*modep = fegetenv_status ();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -34,6 +34,27 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
|
|||||||
pointer. */
|
pointer. */
|
||||||
#define fegetenv_register() __builtin_mffs()
|
#define fegetenv_register() __builtin_mffs()
|
||||||
|
|
||||||
|
/* Equivalent to fegetenv_register, but only returns bits for
|
||||||
|
status, exception enables, and mode. */
|
||||||
|
|
||||||
|
#define fegetenv_status_ISA300() \
|
||||||
|
({register double __fr; \
|
||||||
|
__asm__ __volatile__ ( \
|
||||||
|
".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
|
||||||
|
: "=f" (__fr)); \
|
||||||
|
__fr; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#ifdef _ARCH_PWR9
|
||||||
|
# define fegetenv_status() fegetenv_status_ISA300()
|
||||||
|
#else
|
||||||
|
# define fegetenv_status() \
|
||||||
|
(__glibc_likely (__builtin_cpu_supports ("arch_3_00")) \
|
||||||
|
? fegetenv_status_ISA300() \
|
||||||
|
: fegetenv_register() \
|
||||||
|
)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */
|
/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */
|
||||||
#define fesetenv_register(env) \
|
#define fesetenv_register(env) \
|
||||||
do { \
|
do { \
|
||||||
|
@@ -65,36 +65,38 @@ extern fpu_control_t __fpu_control;
|
|||||||
typedef unsigned int fpu_control_t;
|
typedef unsigned int fpu_control_t;
|
||||||
|
|
||||||
/* Macros for accessing the hardware control word. */
|
/* Macros for accessing the hardware control word. */
|
||||||
# define __FPU_MFFS() \
|
|
||||||
({register double __fr; \
|
|
||||||
__asm__ __volatile__("mffs %0" : "=f" (__fr)); \
|
|
||||||
__fr; \
|
|
||||||
})
|
|
||||||
|
|
||||||
# define _FPU_GETCW(cw) \
|
# define _FPU_GETCW(cw) \
|
||||||
({union { double __d; unsigned long long __ll; } __u; \
|
({union { double __d; unsigned long long __ll; } __u; \
|
||||||
__u.__d = __FPU_MFFS(); \
|
__asm__ __volatile__("mffs %0" : "=f" (__u.__d)); \
|
||||||
(cw) = (fpu_control_t) __u.__ll; \
|
(cw) = (fpu_control_t) __u.__ll; \
|
||||||
(fpu_control_t) __u.__ll; \
|
(fpu_control_t) __u.__ll; \
|
||||||
})
|
})
|
||||||
|
|
||||||
#ifdef _ARCH_PWR9
|
# define _FPU_GET_RC_ISA300() \
|
||||||
# define __FPU_MFFSL() \
|
|
||||||
({register double __fr; \
|
|
||||||
__asm__ __volatile__("mffsl %0" : "=f" (__fr)); \
|
|
||||||
__fr; \
|
|
||||||
})
|
|
||||||
#else
|
|
||||||
# define __FPU_MFFSL() __FPU_MFFS()
|
|
||||||
#endif
|
|
||||||
|
|
||||||
# define _FPU_GET_RC() \
|
|
||||||
({union { double __d; unsigned long long __ll; } __u; \
|
({union { double __d; unsigned long long __ll; } __u; \
|
||||||
__u.__d = __FPU_MFFSL(); \
|
__asm__ __volatile__( \
|
||||||
__u.__ll &= _FPU_MASK_RC; \
|
".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
|
||||||
(fpu_control_t) __u.__ll; \
|
: "=f" (__u.__d)); \
|
||||||
|
(fpu_control_t) (__u.__ll & _FPU_MASK_RC); \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# ifdef _ARCH_PWR9
|
||||||
|
# define _FPU_GET_RC() _FPU_GET_RC_ISA300()
|
||||||
|
# elif defined __BUILTIN_CPU_SUPPORTS__
|
||||||
|
# define _FPU_GET_RC() \
|
||||||
|
({fpu_control_t __rc; \
|
||||||
|
__rc = __glibc_likely (__builtin_cpu_supports ("arch_3_00")) \
|
||||||
|
? _FPU_GET_RC_ISA300 () \
|
||||||
|
: _FPU_GETCW (__rc) & _FPU_MASK_RC; \
|
||||||
|
__rc; \
|
||||||
|
})
|
||||||
|
# else
|
||||||
|
# define _FPU_GET_RC() \
|
||||||
|
({fpu_control_t __rc = _FPU_GETCW (__rc) & _FPU_MASK_RC; \
|
||||||
|
__rc; \
|
||||||
|
})
|
||||||
|
# endif
|
||||||
|
|
||||||
# define _FPU_SETCW(cw) \
|
# define _FPU_SETCW(cw) \
|
||||||
{ union { double __d; unsigned long long __ll; } __u; \
|
{ union { double __d; unsigned long long __ll; } __u; \
|
||||||
register double __fr; \
|
register double __fr; \
|
||||||
|
Reference in New Issue
Block a user