diff --git a/meson.build b/meson.build index 6d304f32fb0..92850244ad9 100644 --- a/meson.build +++ b/meson.build @@ -2614,7 +2614,9 @@ endif if host_cpu == 'x86_64' - if cc.compiles(''' + if cc.get_id() == 'msvc' + cdata.set('HAVE_X86_64_POPCNTQ', 1) + elif cc.compiles(''' void main(void) { long long x = 1; long long r; diff --git a/src/include/c.h b/src/include/c.h index 13fbeea408e..c0be07a4566 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -1230,6 +1230,25 @@ typedef struct PGAlignedXLogBlock ((underlying_type) (expr)) #endif +/* + * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume + * that compilers targeting this architecture understand SSE2 intrinsics. + */ +#if (defined(__x86_64__) || defined(_M_AMD64)) +#define USE_SSE2 + +/* + * We use the Neon instructions if the compiler provides access to them (as + * indicated by __ARM_NEON) and we are on aarch64. While Neon support is + * technically optional for aarch64, it appears that all available 64-bit + * hardware does have it. Neon exists in some 32-bit hardware too, but we + * could not realistically use it there without a run-time check, which seems + * not worth the trouble for now. + */ +#elif defined(__aarch64__) && defined(__ARM_NEON) +#define USE_NEON +#endif + /* ---------------------------------------------------------------- * Section 9: system-specific hacks * diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 89b117d9817..35761f509ec 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -276,43 +276,12 @@ pg_ceil_log2_64(uint64 num) return pg_leftmost_one_pos64(num - 1) + 1; } -/* - * With MSVC on x86_64 builds, try using native popcnt instructions via the - * __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's - * __builtin_popcount* intrinsic functions as they always emit popcnt - * instructions. - */ -#if defined(_MSC_VER) && defined(_M_AMD64) -#define HAVE_X86_64_POPCNTQ -#endif - -/* - * On x86_64, we can use the hardware popcount instruction, but only if - * we can verify that the CPU supports it via the cpuid instruction. - * - * Otherwise, we fall back to a hand-rolled implementation. - */ -#ifdef HAVE_X86_64_POPCNTQ -#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) -#define TRY_POPCNT_X86_64 1 -#endif -#endif - -/* - * On AArch64, we can use Neon instructions if the compiler provides access to - * them (as indicated by __ARM_NEON). As in simd.h, we assume that all - * available 64-bit hardware has Neon support. - */ -#if defined(__aarch64__) && defined(__ARM_NEON) -#define POPCNT_AARCH64 1 -#endif - extern int pg_popcount32_portable(uint32 word); extern int pg_popcount64_portable(uint64 word); extern uint64 pg_popcount_portable(const char *buf, int bytes); extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask); -#ifdef TRY_POPCNT_X86_64 +#ifdef HAVE_X86_64_POPCNTQ /* * Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check * first. @@ -322,7 +291,7 @@ extern PGDLLIMPORT int (*pg_popcount64) (uint64 word); extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes); extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask); -#elif POPCNT_AARCH64 +#elif defined(USE_NEON) /* Use the Neon version of pg_popcount{32,64} without function pointer. */ extern int pg_popcount32(uint32 word); extern int pg_popcount64(uint64 word); @@ -346,7 +315,7 @@ extern int pg_popcount64(uint64 word); extern uint64 pg_popcount_optimized(const char *buf, int bytes); extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask); -#endif /* TRY_POPCNT_X86_64 */ +#endif /* * Returns the number of 1-bits in buf. diff --git a/src/include/port/simd.h b/src/include/port/simd.h index 33202a4b0e2..50615aec7f4 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -18,32 +18,19 @@ #ifndef SIMD_H #define SIMD_H -#if (defined(__x86_64__) || defined(_M_AMD64)) +#if defined(USE_SSE2) /* - * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume - * that compilers targeting this architecture understand SSE2 intrinsics. - * * We use emmintrin.h rather than the comprehensive header immintrin.h in * order to exclude extensions beyond SSE2. This is because MSVC, at least, * will allow the use of intrinsics that haven't been enabled at compile * time. */ #include -#define USE_SSE2 typedef __m128i Vector8; typedef __m128i Vector32; -#elif defined(__aarch64__) && defined(__ARM_NEON) -/* - * We use the Neon instructions if the compiler provides access to them (as - * indicated by __ARM_NEON) and we are on aarch64. While Neon support is - * technically optional for aarch64, it appears that all available 64-bit - * hardware does have it. Neon exists in some 32-bit hardware too, but we - * could not realistically use it there without a run-time check, which seems - * not worth the trouble for now. - */ +#elif defined(USE_NEON) #include -#define USE_NEON typedef uint8x16_t Vector8; typedef uint32x4_t Vector32; diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 170aeef7548..ffda75825e5 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -242,7 +242,7 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask) return popcnt; } -#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64) +#if !defined(HAVE_X86_64_POPCNTQ) && !defined(USE_NEON) /* * When special CPU instructions are not available, there's no point in using @@ -282,4 +282,4 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask) return pg_popcount_masked_portable(buf, bytes, mask); } -#endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */ +#endif /* ! HAVE_X86_64_POPCNTQ && ! USE_NEON */ diff --git a/src/port/pg_popcount_aarch64.c b/src/port/pg_popcount_aarch64.c index cda73cf6088..2184854dbf7 100644 --- a/src/port/pg_popcount_aarch64.c +++ b/src/port/pg_popcount_aarch64.c @@ -12,9 +12,7 @@ */ #include "c.h" -#include "port/pg_bitutils.h" - -#ifdef POPCNT_AARCH64 +#ifdef USE_NEON #include @@ -30,6 +28,8 @@ #endif #endif +#include "port/pg_bitutils.h" + /* * The Neon versions are built regardless of whether we are building the SVE * versions. @@ -478,4 +478,4 @@ pg_popcount_masked_neon(const char *buf, int bytes, bits8 mask) return popcnt; } -#endif /* POPCNT_AARCH64 */ +#endif /* USE_NEON */ diff --git a/src/port/pg_popcount_x86.c b/src/port/pg_popcount_x86.c index ce0ce9c8a3c..245f0167d00 100644 --- a/src/port/pg_popcount_x86.c +++ b/src/port/pg_popcount_x86.c @@ -12,9 +12,7 @@ */ #include "c.h" -#include "port/pg_bitutils.h" - -#ifdef TRY_POPCNT_X86_64 +#ifdef HAVE_X86_64_POPCNTQ #if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) #include @@ -28,6 +26,8 @@ #include #endif +#include "port/pg_bitutils.h" + /* * The SSE4.2 versions are built regardless of whether we are building the * AVX-512 versions. @@ -468,4 +468,4 @@ pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask) return popcnt; } -#endif /* TRY_POPCNT_X86_64 */ +#endif /* HAVE_X86_64_POPCNTQ */