mirror of
https://github.com/postgres/postgres.git
synced 2026-01-26 09:41:40 +03:00
Refactor some SIMD and popcount macros.
This commit does the following:
* Removes TRY_POPCNT_X86_64. We now assume that the required CPUID
intrinsics are available when HAVE_X86_64_POPCNTQ is defined, as we
have done since v16 for meson builds when
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK is defined and since v17 when
USE_AVX512_POPCNT_WITH_RUNTIME_CHECK is defined.
* Moves the MSVC check for HAVE_X86_64_POPCNTQ to configure-time.
This way, we set it for all relevant platforms in one place.
* Moves the #defines for USE_SSE2 and USE_NEON to c.h so that they
can be used elsewhere without including simd.h. Consequently, we
can remove the POPCNT_AARCH64 macro.
* Moves the #includes for pg_bitutils.h to below the system headers
in pg_popcount_{aarch64,x86}.c, since we no longer depend on macros
from pg_bitutils.h to decide which system headers to use.
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/aWf_InS1VrbeXAfP%40nathan
This commit is contained in:
@@ -2614,7 +2614,9 @@ endif
|
||||
|
||||
if host_cpu == 'x86_64'
|
||||
|
||||
if cc.compiles('''
|
||||
if cc.get_id() == 'msvc'
|
||||
cdata.set('HAVE_X86_64_POPCNTQ', 1)
|
||||
elif cc.compiles('''
|
||||
void main(void)
|
||||
{
|
||||
long long x = 1; long long r;
|
||||
|
||||
@@ -1230,6 +1230,25 @@ typedef struct PGAlignedXLogBlock
|
||||
((underlying_type) (expr))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
|
||||
* that compilers targeting this architecture understand SSE2 intrinsics.
|
||||
*/
|
||||
#if (defined(__x86_64__) || defined(_M_AMD64))
|
||||
#define USE_SSE2
|
||||
|
||||
/*
|
||||
* We use the Neon instructions if the compiler provides access to them (as
|
||||
* indicated by __ARM_NEON) and we are on aarch64. While Neon support is
|
||||
* technically optional for aarch64, it appears that all available 64-bit
|
||||
* hardware does have it. Neon exists in some 32-bit hardware too, but we
|
||||
* could not realistically use it there without a run-time check, which seems
|
||||
* not worth the trouble for now.
|
||||
*/
|
||||
#elif defined(__aarch64__) && defined(__ARM_NEON)
|
||||
#define USE_NEON
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Section 9: system-specific hacks
|
||||
*
|
||||
|
||||
@@ -276,43 +276,12 @@ pg_ceil_log2_64(uint64 num)
|
||||
return pg_leftmost_one_pos64(num - 1) + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* With MSVC on x86_64 builds, try using native popcnt instructions via the
|
||||
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
|
||||
* __builtin_popcount* intrinsic functions as they always emit popcnt
|
||||
* instructions.
|
||||
*/
|
||||
#if defined(_MSC_VER) && defined(_M_AMD64)
|
||||
#define HAVE_X86_64_POPCNTQ
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On x86_64, we can use the hardware popcount instruction, but only if
|
||||
* we can verify that the CPU supports it via the cpuid instruction.
|
||||
*
|
||||
* Otherwise, we fall back to a hand-rolled implementation.
|
||||
*/
|
||||
#ifdef HAVE_X86_64_POPCNTQ
|
||||
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
|
||||
#define TRY_POPCNT_X86_64 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On AArch64, we can use Neon instructions if the compiler provides access to
|
||||
* them (as indicated by __ARM_NEON). As in simd.h, we assume that all
|
||||
* available 64-bit hardware has Neon support.
|
||||
*/
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON)
|
||||
#define POPCNT_AARCH64 1
|
||||
#endif
|
||||
|
||||
extern int pg_popcount32_portable(uint32 word);
|
||||
extern int pg_popcount64_portable(uint64 word);
|
||||
extern uint64 pg_popcount_portable(const char *buf, int bytes);
|
||||
extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask);
|
||||
|
||||
#ifdef TRY_POPCNT_X86_64
|
||||
#ifdef HAVE_X86_64_POPCNTQ
|
||||
/*
|
||||
* Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check
|
||||
* first.
|
||||
@@ -322,7 +291,7 @@ extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
|
||||
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
|
||||
extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
|
||||
|
||||
#elif POPCNT_AARCH64
|
||||
#elif defined(USE_NEON)
|
||||
/* Use the Neon version of pg_popcount{32,64} without function pointer. */
|
||||
extern int pg_popcount32(uint32 word);
|
||||
extern int pg_popcount64(uint64 word);
|
||||
@@ -346,7 +315,7 @@ extern int pg_popcount64(uint64 word);
|
||||
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
|
||||
extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
|
||||
|
||||
#endif /* TRY_POPCNT_X86_64 */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Returns the number of 1-bits in buf.
|
||||
|
||||
@@ -18,32 +18,19 @@
|
||||
#ifndef SIMD_H
|
||||
#define SIMD_H
|
||||
|
||||
#if (defined(__x86_64__) || defined(_M_AMD64))
|
||||
#if defined(USE_SSE2)
|
||||
/*
|
||||
* SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
|
||||
* that compilers targeting this architecture understand SSE2 intrinsics.
|
||||
*
|
||||
* We use emmintrin.h rather than the comprehensive header immintrin.h in
|
||||
* order to exclude extensions beyond SSE2. This is because MSVC, at least,
|
||||
* will allow the use of intrinsics that haven't been enabled at compile
|
||||
* time.
|
||||
*/
|
||||
#include <emmintrin.h>
|
||||
#define USE_SSE2
|
||||
typedef __m128i Vector8;
|
||||
typedef __m128i Vector32;
|
||||
|
||||
#elif defined(__aarch64__) && defined(__ARM_NEON)
|
||||
/*
|
||||
* We use the Neon instructions if the compiler provides access to them (as
|
||||
* indicated by __ARM_NEON) and we are on aarch64. While Neon support is
|
||||
* technically optional for aarch64, it appears that all available 64-bit
|
||||
* hardware does have it. Neon exists in some 32-bit hardware too, but we
|
||||
* could not realistically use it there without a run-time check, which seems
|
||||
* not worth the trouble for now.
|
||||
*/
|
||||
#elif defined(USE_NEON)
|
||||
#include <arm_neon.h>
|
||||
#define USE_NEON
|
||||
typedef uint8x16_t Vector8;
|
||||
typedef uint32x4_t Vector32;
|
||||
|
||||
|
||||
@@ -242,7 +242,7 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
|
||||
return popcnt;
|
||||
}
|
||||
|
||||
#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
|
||||
#if !defined(HAVE_X86_64_POPCNTQ) && !defined(USE_NEON)
|
||||
|
||||
/*
|
||||
* When special CPU instructions are not available, there's no point in using
|
||||
@@ -282,4 +282,4 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
|
||||
return pg_popcount_masked_portable(buf, bytes, mask);
|
||||
}
|
||||
|
||||
#endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */
|
||||
#endif /* ! HAVE_X86_64_POPCNTQ && ! USE_NEON */
|
||||
|
||||
@@ -12,9 +12,7 @@
|
||||
*/
|
||||
#include "c.h"
|
||||
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
#ifdef POPCNT_AARCH64
|
||||
#ifdef USE_NEON
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
@@ -30,6 +28,8 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
/*
|
||||
* The Neon versions are built regardless of whether we are building the SVE
|
||||
* versions.
|
||||
@@ -478,4 +478,4 @@ pg_popcount_masked_neon(const char *buf, int bytes, bits8 mask)
|
||||
return popcnt;
|
||||
}
|
||||
|
||||
#endif /* POPCNT_AARCH64 */
|
||||
#endif /* USE_NEON */
|
||||
|
||||
@@ -12,9 +12,7 @@
|
||||
*/
|
||||
#include "c.h"
|
||||
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
#ifdef TRY_POPCNT_X86_64
|
||||
#ifdef HAVE_X86_64_POPCNTQ
|
||||
|
||||
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
|
||||
#include <cpuid.h>
|
||||
@@ -28,6 +26,8 @@
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
/*
|
||||
* The SSE4.2 versions are built regardless of whether we are building the
|
||||
* AVX-512 versions.
|
||||
@@ -468,4 +468,4 @@ pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask)
|
||||
return popcnt;
|
||||
}
|
||||
|
||||
#endif /* TRY_POPCNT_X86_64 */
|
||||
#endif /* HAVE_X86_64_POPCNTQ */
|
||||
|
||||
Reference in New Issue
Block a user