1
0
mirror of https://github.com/postgres/postgres.git synced 2026-01-26 09:41:40 +03:00

Refactor some SIMD and popcount macros.

This commit does the following:

* Removes TRY_POPCNT_X86_64.  We now assume that the required CPUID
intrinsics are available when HAVE_X86_64_POPCNTQ is defined, as we
have done since v16 for meson builds when
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK is defined and since v17 when
USE_AVX512_POPCNT_WITH_RUNTIME_CHECK is defined.

* Moves the MSVC check for HAVE_X86_64_POPCNTQ to configure-time.
This way, we set it for all relevant platforms in one place.

* Moves the #defines for USE_SSE2 and USE_NEON to c.h so that they
can be used elsewhere without including simd.h.  Consequently, we
can remove the POPCNT_AARCH64 macro.

* Moves the #includes for pg_bitutils.h to below the system headers
in pg_popcount_{aarch64,x86}.c, since we no longer depend on macros
from pg_bitutils.h to decide which system headers to use.

Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/aWf_InS1VrbeXAfP%40nathan
This commit is contained in:
Nathan Bossart
2026-01-21 14:21:00 -06:00
parent 8c6653516c
commit 25dc485074
7 changed files with 37 additions and 60 deletions

View File

@@ -2614,7 +2614,9 @@ endif
if host_cpu == 'x86_64'
if cc.compiles('''
if cc.get_id() == 'msvc'
cdata.set('HAVE_X86_64_POPCNTQ', 1)
elif cc.compiles('''
void main(void)
{
long long x = 1; long long r;

View File

@@ -1230,6 +1230,25 @@ typedef struct PGAlignedXLogBlock
((underlying_type) (expr))
#endif
/*
* SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
* that compilers targeting this architecture understand SSE2 intrinsics.
*/
#if (defined(__x86_64__) || defined(_M_AMD64))
#define USE_SSE2
/*
* We use the Neon instructions if the compiler provides access to them (as
* indicated by __ARM_NEON) and we are on aarch64. While Neon support is
* technically optional for aarch64, it appears that all available 64-bit
* hardware does have it. Neon exists in some 32-bit hardware too, but we
* could not realistically use it there without a run-time check, which seems
* not worth the trouble for now.
*/
#elif defined(__aarch64__) && defined(__ARM_NEON)
#define USE_NEON
#endif
/* ----------------------------------------------------------------
* Section 9: system-specific hacks
*

View File

@@ -276,43 +276,12 @@ pg_ceil_log2_64(uint64 num)
return pg_leftmost_one_pos64(num - 1) + 1;
}
/*
* With MSVC on x86_64 builds, try using native popcnt instructions via the
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
* __builtin_popcount* intrinsic functions as they always emit popcnt
* instructions.
*/
#if defined(_MSC_VER) && defined(_M_AMD64)
#define HAVE_X86_64_POPCNTQ
#endif
/*
* On x86_64, we can use the hardware popcount instruction, but only if
* we can verify that the CPU supports it via the cpuid instruction.
*
* Otherwise, we fall back to a hand-rolled implementation.
*/
#ifdef HAVE_X86_64_POPCNTQ
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
#define TRY_POPCNT_X86_64 1
#endif
#endif
/*
* On AArch64, we can use Neon instructions if the compiler provides access to
* them (as indicated by __ARM_NEON). As in simd.h, we assume that all
* available 64-bit hardware has Neon support.
*/
#if defined(__aarch64__) && defined(__ARM_NEON)
#define POPCNT_AARCH64 1
#endif
extern int pg_popcount32_portable(uint32 word);
extern int pg_popcount64_portable(uint64 word);
extern uint64 pg_popcount_portable(const char *buf, int bytes);
extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask);
#ifdef TRY_POPCNT_X86_64
#ifdef HAVE_X86_64_POPCNTQ
/*
* Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check
* first.
@@ -322,7 +291,7 @@ extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
#elif POPCNT_AARCH64
#elif defined(USE_NEON)
/* Use the Neon version of pg_popcount{32,64} without function pointer. */
extern int pg_popcount32(uint32 word);
extern int pg_popcount64(uint64 word);
@@ -346,7 +315,7 @@ extern int pg_popcount64(uint64 word);
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
#endif /* TRY_POPCNT_X86_64 */
#endif
/*
* Returns the number of 1-bits in buf.

View File

@@ -18,32 +18,19 @@
#ifndef SIMD_H
#define SIMD_H
#if (defined(__x86_64__) || defined(_M_AMD64))
#if defined(USE_SSE2)
/*
* SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
* that compilers targeting this architecture understand SSE2 intrinsics.
*
* We use emmintrin.h rather than the comprehensive header immintrin.h in
* order to exclude extensions beyond SSE2. This is because MSVC, at least,
* will allow the use of intrinsics that haven't been enabled at compile
* time.
*/
#include <emmintrin.h>
#define USE_SSE2
typedef __m128i Vector8;
typedef __m128i Vector32;
#elif defined(__aarch64__) && defined(__ARM_NEON)
/*
* We use the Neon instructions if the compiler provides access to them (as
* indicated by __ARM_NEON) and we are on aarch64. While Neon support is
* technically optional for aarch64, it appears that all available 64-bit
* hardware does have it. Neon exists in some 32-bit hardware too, but we
* could not realistically use it there without a run-time check, which seems
* not worth the trouble for now.
*/
#elif defined(USE_NEON)
#include <arm_neon.h>
#define USE_NEON
typedef uint8x16_t Vector8;
typedef uint32x4_t Vector32;

View File

@@ -242,7 +242,7 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
return popcnt;
}
#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
#if !defined(HAVE_X86_64_POPCNTQ) && !defined(USE_NEON)
/*
* When special CPU instructions are not available, there's no point in using
@@ -282,4 +282,4 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
return pg_popcount_masked_portable(buf, bytes, mask);
}
#endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */
#endif /* ! HAVE_X86_64_POPCNTQ && ! USE_NEON */

View File

@@ -12,9 +12,7 @@
*/
#include "c.h"
#include "port/pg_bitutils.h"
#ifdef POPCNT_AARCH64
#ifdef USE_NEON
#include <arm_neon.h>
@@ -30,6 +28,8 @@
#endif
#endif
#include "port/pg_bitutils.h"
/*
* The Neon versions are built regardless of whether we are building the SVE
* versions.
@@ -478,4 +478,4 @@ pg_popcount_masked_neon(const char *buf, int bytes, bits8 mask)
return popcnt;
}
#endif /* POPCNT_AARCH64 */
#endif /* USE_NEON */

View File

@@ -12,9 +12,7 @@
*/
#include "c.h"
#include "port/pg_bitutils.h"
#ifdef TRY_POPCNT_X86_64
#ifdef HAVE_X86_64_POPCNTQ
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
#include <cpuid.h>
@@ -28,6 +26,8 @@
#include <intrin.h>
#endif
#include "port/pg_bitutils.h"
/*
* The SSE4.2 versions are built regardless of whether we are building the
* AVX-512 versions.
@@ -468,4 +468,4 @@ pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask)
return popcnt;
}
#endif /* TRY_POPCNT_X86_64 */
#endif /* HAVE_X86_64_POPCNTQ */