1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-25 13:17:41 +03:00

Be more paranoid in configure's checks for CRC and POPCNT intrinsics.

In these tests, we need to verify not only that the compiler has heard
of these intrinsics, but that lower-level tools cope with them too.
(For example, the assembler must also know the instructions, and on
some platforms there might be library support involved.)  The hazard
is that the compiler might optimize away the calls altogether,
allowing the configure check to succeed only to have the build fail
later if lower-level support is missing.  The existing code tried to
prevent that by ensuring that the result of the intrinsic is used
for something, but that's really insufficient because we were feeding
constant input to it.  So the compiler would be perfectly entitled to
optimize away the calls anyway.  Fix by making the inputs into global
variables.  (Hypothetically, LTO optimization could still remove the
code --- but that's well past where we'd be likely to hit trouble.)

It is not known that any current compiler would actually optimize
away these calls, and even if that happened it would be unlikely
that any problem would manifest.  Our concern for this stems from
largely-bygone days when it was common to install gcc on platforms
with some other native compiler, so that a compiler-vs-library
support discrepancy was more probable.  Still, there's little
point in defending against such cases in a way that is visibly
incomplete.

I'm content to fix this in master for now; we can back-patch if
any indication appears that it's a live problem for someone.

Discussion: https://postgr.es/m/3368102.1741993462@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2025-03-20 16:23:09 -04:00
parent 50ba65e733
commit fdb5dd6331
3 changed files with 36 additions and 31 deletions

View File

@@ -553,16 +553,20 @@ fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
# the other ones are, on x86-64 platforms) # the other ones are, on x86-64 platforms)
# #
# If the intrinsics are supported, sets pgac_sse42_crc32_intrinsics. # If the intrinsics are supported, sets pgac_sse42_crc32_intrinsics.
#
# To detect the case where the compiler knows the function but library support
# is missing, we must link not just compile, and store the results in global
# variables so the compiler doesn't optimize away the call.
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS], AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics])])dnl [define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics])])dnl
AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32], [Ac_cachevar], AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32], [Ac_cachevar],
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h> [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>
unsigned int crc;
#if defined(__has_attribute) && __has_attribute (target) #if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("sse4.2"))) __attribute__((target("sse4.2")))
#endif #endif
static int crc32_sse42_test(void) static int crc32_sse42_test(void)
{ {
unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u8(crc, 0);
crc = _mm_crc32_u32(crc, 0); crc = _mm_crc32_u32(crc, 0);
/* return computed value, to prevent the above being optimized away */ /* return computed value, to prevent the above being optimized away */
@@ -593,9 +597,9 @@ AC_DEFUN([PGAC_ARMV8_CRC32C_INTRINSICS],
AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar], AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS [pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS $1" CFLAGS="$pgac_save_CFLAGS $1"
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>], AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>
[unsigned int crc = 0; unsigned int crc;],
crc = __crc32cb(crc, 0); [crc = __crc32cb(crc, 0);
crc = __crc32ch(crc, 0); crc = __crc32ch(crc, 0);
crc = __crc32cw(crc, 0); crc = __crc32cw(crc, 0);
crc = __crc32cd(crc, 0); crc = __crc32cd(crc, 0);
@@ -628,9 +632,8 @@ AC_DEFUN([PGAC_LOONGARCH_CRC32C_INTRINSICS],
AC_CACHE_CHECK( AC_CACHE_CHECK(
[for __builtin_loongarch_crcc_w_b_w, __builtin_loongarch_crcc_w_h_w, __builtin_loongarch_crcc_w_w_w and __builtin_loongarch_crcc_w_d_w], [for __builtin_loongarch_crcc_w_b_w, __builtin_loongarch_crcc_w_h_w, __builtin_loongarch_crcc_w_w_w and __builtin_loongarch_crcc_w_d_w],
[Ac_cachevar], [Ac_cachevar],
[AC_LINK_IFELSE([AC_LANG_PROGRAM([], [AC_LINK_IFELSE([AC_LANG_PROGRAM([unsigned int crc;],
[unsigned int crc = 0; [crc = __builtin_loongarch_crcc_w_b_w(0, crc);
crc = __builtin_loongarch_crcc_w_b_w(0, crc);
crc = __builtin_loongarch_crcc_w_h_w(0, crc); crc = __builtin_loongarch_crcc_w_h_w(0, crc);
crc = __builtin_loongarch_crcc_w_w_w(0, crc); crc = __builtin_loongarch_crcc_w_w_w(0, crc);
crc = __builtin_loongarch_crcc_w_d_w(0, crc); crc = __builtin_loongarch_crcc_w_d_w(0, crc);
@@ -680,22 +683,23 @@ undefine([Ac_cachevar])dnl
AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS], AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl [define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl
AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar], AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar],
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h> [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <immintrin.h>
#include <stdint.h> #include <stdint.h>
char buf[sizeof(__m512i)];
#if defined(__has_attribute) && __has_attribute (target) #if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("avx512vpopcntdq,avx512bw"))) __attribute__((target("avx512vpopcntdq,avx512bw")))
#endif #endif
static int popcount_test(void) static int popcount_test(void)
{ {
const char buf@<:@sizeof(__m512i)@:>@;
int64_t popcnt = 0; int64_t popcnt = 0;
__m512i accum = _mm512_setzero_si512(); __m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf); __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val); __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt); accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum); popcnt = _mm512_reduce_add_epi64(accum);
return (int) popcnt; return (int) popcnt;
}], }]],
[return popcount_test();])], [return popcount_test();])],
[Ac_cachevar=yes], [Ac_cachevar=yes],
[Ac_cachevar=no])]) [Ac_cachevar=no])])

18
configure vendored
View File

@@ -17334,16 +17334,17 @@ else
/* end confdefs.h. */ /* end confdefs.h. */
#include <immintrin.h> #include <immintrin.h>
#include <stdint.h> #include <stdint.h>
char buf[sizeof(__m512i)];
#if defined(__has_attribute) && __has_attribute (target) #if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("avx512vpopcntdq,avx512bw"))) __attribute__((target("avx512vpopcntdq,avx512bw")))
#endif #endif
static int popcount_test(void) static int popcount_test(void)
{ {
const char buf[sizeof(__m512i)];
int64_t popcnt = 0; int64_t popcnt = 0;
__m512i accum = _mm512_setzero_si512(); __m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf); __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val); __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt); accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum); popcnt = _mm512_reduce_add_epi64(accum);
return (int) popcnt; return (int) popcnt;
@@ -17387,12 +17388,12 @@ else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
#include <nmmintrin.h> #include <nmmintrin.h>
unsigned int crc;
#if defined(__has_attribute) && __has_attribute (target) #if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("sse4.2"))) __attribute__((target("sse4.2")))
#endif #endif
static int crc32_sse42_test(void) static int crc32_sse42_test(void)
{ {
unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u8(crc, 0);
crc = _mm_crc32_u32(crc, 0); crc = _mm_crc32_u32(crc, 0);
/* return computed value, to prevent the above being optimized away */ /* return computed value, to prevent the above being optimized away */
@@ -17459,10 +17460,10 @@ CFLAGS="$pgac_save_CFLAGS "
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
#include <arm_acle.h> #include <arm_acle.h>
unsigned int crc;
int int
main () main ()
{ {
unsigned int crc = 0;
crc = __crc32cb(crc, 0); crc = __crc32cb(crc, 0);
crc = __crc32ch(crc, 0); crc = __crc32ch(crc, 0);
crc = __crc32cw(crc, 0); crc = __crc32cw(crc, 0);
@@ -17500,10 +17501,10 @@ CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc+simd"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
#include <arm_acle.h> #include <arm_acle.h>
unsigned int crc;
int int
main () main ()
{ {
unsigned int crc = 0;
crc = __crc32cb(crc, 0); crc = __crc32cb(crc, 0);
crc = __crc32ch(crc, 0); crc = __crc32ch(crc, 0);
crc = __crc32cw(crc, 0); crc = __crc32cw(crc, 0);
@@ -17541,10 +17542,10 @@ CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
#include <arm_acle.h> #include <arm_acle.h>
unsigned int crc;
int int
main () main ()
{ {
unsigned int crc = 0;
crc = __crc32cb(crc, 0); crc = __crc32cb(crc, 0);
crc = __crc32ch(crc, 0); crc = __crc32ch(crc, 0);
crc = __crc32cw(crc, 0); crc = __crc32cw(crc, 0);
@@ -17585,11 +17586,10 @@ if ${pgac_cv_loongarch_crc32c_intrinsics+:} false; then :
else else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
unsigned int crc;
int int
main () main ()
{ {
unsigned int crc = 0;
crc = __builtin_loongarch_crcc_w_b_w(0, crc); crc = __builtin_loongarch_crcc_w_b_w(0, crc);
crc = __builtin_loongarch_crcc_w_h_w(0, crc); crc = __builtin_loongarch_crcc_w_h_w(0, crc);
crc = __builtin_loongarch_crcc_w_w_w(0, crc); crc = __builtin_loongarch_crcc_w_w_w(0, crc);

View File

@@ -2259,17 +2259,17 @@ if host_cpu == 'x86_64'
prog = ''' prog = '''
#include <immintrin.h> #include <immintrin.h>
#include <stdint.h> #include <stdint.h>
char buf[sizeof(__m512i)];
#if defined(__has_attribute) && __has_attribute (target) #if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("avx512vpopcntdq,avx512bw"))) __attribute__((target("avx512vpopcntdq,avx512bw")))
#endif #endif
int main(void) int main(void)
{ {
const char buf[sizeof(__m512i)];
int64_t popcnt = 0; int64_t popcnt = 0;
__m512i accum = _mm512_setzero_si512(); __m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf); __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val); __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt); accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum); popcnt = _mm512_reduce_add_epi64(accum);
/* return computed value, to prevent the above being optimized away */ /* return computed value, to prevent the above being optimized away */
@@ -2317,13 +2317,13 @@ if host_cpu == 'x86' or host_cpu == 'x86_64'
prog = ''' prog = '''
#include <nmmintrin.h> #include <nmmintrin.h>
unsigned int crc;
#if defined(__has_attribute) && __has_attribute (target) #if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("sse4.2"))) __attribute__((target("sse4.2")))
#endif #endif
int main(void) int main(void)
{ {
unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u8(crc, 0);
crc = _mm_crc32_u32(crc, 0); crc = _mm_crc32_u32(crc, 0);
/* return computed value, to prevent the above being optimized away */ /* return computed value, to prevent the above being optimized away */
@@ -2352,10 +2352,10 @@ elif host_cpu == 'arm' or host_cpu == 'aarch64'
prog = ''' prog = '''
#include <arm_acle.h> #include <arm_acle.h>
unsigned int crc;
int main(void) int main(void)
{ {
unsigned int crc = 0;
crc = __crc32cb(crc, 0); crc = __crc32cb(crc, 0);
crc = __crc32ch(crc, 0); crc = __crc32ch(crc, 0);
crc = __crc32cw(crc, 0); crc = __crc32cw(crc, 0);
@@ -2390,9 +2390,10 @@ int main(void)
elif host_cpu == 'loongarch64' elif host_cpu == 'loongarch64'
prog = ''' prog = '''
unsigned int crc;
int main(void) int main(void)
{ {
unsigned int crc = 0;
crc = __builtin_loongarch_crcc_w_b_w(0, crc); crc = __builtin_loongarch_crcc_w_b_w(0, crc);
crc = __builtin_loongarch_crcc_w_h_w(0, crc); crc = __builtin_loongarch_crcc_w_h_w(0, crc);
crc = __builtin_loongarch_crcc_w_w_w(0, crc); crc = __builtin_loongarch_crcc_w_w_w(0, crc);