mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Use __attribute__((target(...))) for AVX-512 support.
Presently, we check for compiler support for the required intrinsics both with and without extra compiler flags (e.g., -mxsave), and then depending on the results of those checks, we pick which files to compile with which flags. This is tedious and complicated, and it results in unsustainable coding patterns such as separate files for each portion of code may need to be built with different compiler flags. This commit introduces support for __attribute__((target(...))) and uses it for the AVX-512 code. This simplifies both the configure-time checks and the build scripts, and it allows us to place the functions that use the intrinsics in files that we otherwise do not want to build with special CPU instructions. We are careful to avoid using __attribute__((target(...))) on compilers that do not understand it, but we still perform the configure-time checks in case the compiler allows using the intrinsics without it (e.g., MSVC). A similar change could likely be made for some of the CRC-32C code, but that is left as a future exercise. Suggested-by: Andres Freund Reviewed-by: Raghuveer Devulapalli, Andres Freund Discussion: https://postgr.es/m/20240731205254.vfpap7uxwmebqeaf%40awork3.anarazel.de
This commit is contained in:
167
configure
vendored
167
configure
vendored
@ -647,9 +647,6 @@ MSGFMT_FLAGS
|
||||
MSGFMT
|
||||
PG_CRC32C_OBJS
|
||||
CFLAGS_CRC
|
||||
PG_POPCNT_OBJS
|
||||
CFLAGS_POPCNT
|
||||
CFLAGS_XSAVE
|
||||
LIBOBJS
|
||||
OPENSSL
|
||||
ZSTD
|
||||
@ -17272,185 +17269,103 @@ fi
|
||||
|
||||
# Check for XSAVE intrinsics
|
||||
#
|
||||
CFLAGS_XSAVE=""
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=" >&5
|
||||
$as_echo_n "checking for _xgetbv with CFLAGS=... " >&6; }
|
||||
if ${pgac_cv_xsave_intrinsics_+:} false; then :
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv" >&5
|
||||
$as_echo_n "checking for _xgetbv... " >&6; }
|
||||
if ${pgac_cv_xsave_intrinsics+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS "
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <immintrin.h>
|
||||
#if defined(__has_attribute) && __has_attribute (target)
|
||||
__attribute__((target("xsave")))
|
||||
#endif
|
||||
static int xsave_test(void)
|
||||
{
|
||||
return _xgetbv(0) & 0xe0;
|
||||
}
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return _xgetbv(0) & 0xe0;
|
||||
return xsave_test();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv_xsave_intrinsics_=yes
|
||||
pgac_cv_xsave_intrinsics=yes
|
||||
else
|
||||
pgac_cv_xsave_intrinsics_=no
|
||||
pgac_cv_xsave_intrinsics=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics_" >&5
|
||||
$as_echo "$pgac_cv_xsave_intrinsics_" >&6; }
|
||||
if test x"$pgac_cv_xsave_intrinsics_" = x"yes"; then
|
||||
CFLAGS_XSAVE=""
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics" >&5
|
||||
$as_echo "$pgac_cv_xsave_intrinsics" >&6; }
|
||||
if test x"$pgac_cv_xsave_intrinsics" = x"yes"; then
|
||||
pgac_xsave_intrinsics=yes
|
||||
fi
|
||||
|
||||
if test x"$pgac_xsave_intrinsics" != x"yes"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=-mxsave" >&5
|
||||
$as_echo_n "checking for _xgetbv with CFLAGS=-mxsave... " >&6; }
|
||||
if ${pgac_cv_xsave_intrinsics__mxsave+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -mxsave"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <immintrin.h>
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return _xgetbv(0) & 0xe0;
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv_xsave_intrinsics__mxsave=yes
|
||||
else
|
||||
pgac_cv_xsave_intrinsics__mxsave=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics__mxsave" >&5
|
||||
$as_echo "$pgac_cv_xsave_intrinsics__mxsave" >&6; }
|
||||
if test x"$pgac_cv_xsave_intrinsics__mxsave" = x"yes"; then
|
||||
CFLAGS_XSAVE="-mxsave"
|
||||
pgac_xsave_intrinsics=yes
|
||||
fi
|
||||
|
||||
fi
|
||||
if test x"$pgac_xsave_intrinsics" = x"yes"; then
|
||||
|
||||
$as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
|
||||
# Check for AVX-512 popcount intrinsics
|
||||
#
|
||||
CFLAGS_POPCNT=""
|
||||
PG_POPCNT_OBJS=""
|
||||
if test x"$host_cpu" = x"x86_64"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=" >&5
|
||||
$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=... " >&6; }
|
||||
if ${pgac_cv_avx512_popcnt_intrinsics_+:} false; then :
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64" >&5
|
||||
$as_echo_n "checking for _mm512_popcnt_epi64... " >&6; }
|
||||
if ${pgac_cv_avx512_popcnt_intrinsics+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS "
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <immintrin.h>
|
||||
#if defined(__has_attribute) && __has_attribute (target)
|
||||
__attribute__((target("avx512vpopcntdq","avx512bw")))
|
||||
#endif
|
||||
static int popcount_test(void)
|
||||
{
|
||||
const char buf[sizeof(__m512i)];
|
||||
PG_INT64_TYPE popcnt = 0;
|
||||
__m512i accum = _mm512_setzero_si512();
|
||||
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
|
||||
const __m512i cnt = _mm512_popcnt_epi64(val);
|
||||
accum = _mm512_add_epi64(accum, cnt);
|
||||
popcnt = _mm512_reduce_add_epi64(accum);
|
||||
return (int) popcnt;
|
||||
}
|
||||
int
|
||||
main ()
|
||||
{
|
||||
const char buf[sizeof(__m512i)];
|
||||
PG_INT64_TYPE popcnt = 0;
|
||||
__m512i accum = _mm512_setzero_si512();
|
||||
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
|
||||
const __m512i cnt = _mm512_popcnt_epi64(val);
|
||||
accum = _mm512_add_epi64(accum, cnt);
|
||||
popcnt = _mm512_reduce_add_epi64(accum);
|
||||
/* return computed value, to prevent the above being optimized away */
|
||||
return popcnt == 0;
|
||||
return popcount_test();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv_avx512_popcnt_intrinsics_=yes
|
||||
pgac_cv_avx512_popcnt_intrinsics=yes
|
||||
else
|
||||
pgac_cv_avx512_popcnt_intrinsics_=no
|
||||
pgac_cv_avx512_popcnt_intrinsics=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics_" >&5
|
||||
$as_echo "$pgac_cv_avx512_popcnt_intrinsics_" >&6; }
|
||||
if test x"$pgac_cv_avx512_popcnt_intrinsics_" = x"yes"; then
|
||||
CFLAGS_POPCNT=""
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics" >&5
|
||||
$as_echo "$pgac_cv_avx512_popcnt_intrinsics" >&6; }
|
||||
if test x"$pgac_cv_avx512_popcnt_intrinsics" = x"yes"; then
|
||||
pgac_avx512_popcnt_intrinsics=yes
|
||||
fi
|
||||
|
||||
if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw" >&5
|
||||
$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw... " >&6; }
|
||||
if ${pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -mavx512vpopcntdq -mavx512bw"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <immintrin.h>
|
||||
int
|
||||
main ()
|
||||
{
|
||||
const char buf[sizeof(__m512i)];
|
||||
PG_INT64_TYPE popcnt = 0;
|
||||
__m512i accum = _mm512_setzero_si512();
|
||||
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
|
||||
const __m512i cnt = _mm512_popcnt_epi64(val);
|
||||
accum = _mm512_add_epi64(accum, cnt);
|
||||
popcnt = _mm512_reduce_add_epi64(accum);
|
||||
/* return computed value, to prevent the above being optimized away */
|
||||
return popcnt == 0;
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=yes
|
||||
else
|
||||
pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&5
|
||||
$as_echo "$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&6; }
|
||||
if test x"$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" = x"yes"; then
|
||||
CFLAGS_POPCNT="-mavx512vpopcntdq -mavx512bw"
|
||||
pgac_avx512_popcnt_intrinsics=yes
|
||||
fi
|
||||
|
||||
fi
|
||||
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
|
||||
PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
|
||||
|
||||
$as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
|
||||
#
|
||||
# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
|
||||
|
Reference in New Issue
Block a user