diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index c9c508d4ba3..89b117d9817 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -307,10 +307,10 @@ pg_ceil_log2_64(uint64 num) #define POPCNT_AARCH64 1 #endif -extern int pg_popcount32_slow(uint32 word); -extern int pg_popcount64_slow(uint64 word); -extern uint64 pg_popcount_slow(const char *buf, int bytes); -extern uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask); +extern int pg_popcount32_portable(uint32 word); +extern int pg_popcount64_portable(uint64 word); +extern uint64 pg_popcount_portable(const char *buf, int bytes); +extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask); #ifdef TRY_POPCNT_X86_64 /* diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 9f9f90ddd4d..170aeef7548 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -97,11 +97,11 @@ const uint8 pg_number_of_ones[256] = { }; /* - * pg_popcount32_slow + * pg_popcount32_portable * Return the number of 1 bits set in word */ int -pg_popcount32_slow(uint32 word) +pg_popcount32_portable(uint32 word) { #ifdef HAVE__BUILTIN_POPCOUNT return __builtin_popcount(word); @@ -119,11 +119,11 @@ pg_popcount32_slow(uint32 word) } /* - * pg_popcount64_slow + * pg_popcount64_portable * Return the number of 1 bits set in word */ int -pg_popcount64_slow(uint64 word) +pg_popcount64_portable(uint64 word) { #ifdef HAVE__BUILTIN_POPCOUNT #if SIZEOF_LONG == 8 @@ -147,11 +147,11 @@ pg_popcount64_slow(uint64 word) } /* - * pg_popcount_slow + * pg_popcount_portable * Returns the number of 1-bits in buf */ uint64 -pg_popcount_slow(const char *buf, int bytes) +pg_popcount_portable(const char *buf, int bytes) { uint64 popcnt = 0; @@ -163,7 +163,7 @@ pg_popcount_slow(const char *buf, int bytes) while (bytes >= 8) { - popcnt += pg_popcount64_slow(*words++); + popcnt += pg_popcount64_portable(*words++); bytes -= 8; } @@ -177,7 +177,7 @@ pg_popcount_slow(const char *buf, int bytes) while (bytes >= 4) { - popcnt += pg_popcount32_slow(*words++); + popcnt += pg_popcount32_portable(*words++); bytes -= 4; } @@ -193,11 +193,11 @@ pg_popcount_slow(const char *buf, int bytes) } /* - * pg_popcount_masked_slow + * pg_popcount_masked_portable * Returns the number of 1-bits in buf after applying the mask to each byte */ uint64 -pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask) +pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask) { uint64 popcnt = 0; @@ -211,7 +211,7 @@ pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask) while (bytes >= 8) { - popcnt += pg_popcount64_slow(*words++ & maskv); + popcnt += pg_popcount64_portable(*words++ & maskv); bytes -= 8; } @@ -227,7 +227,7 @@ pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask) while (bytes >= 4) { - popcnt += pg_popcount32_slow(*words++ & maskv); + popcnt += pg_popcount32_portable(*words++ & maskv); bytes -= 4; } @@ -246,20 +246,20 @@ pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask) /* * When special CPU instructions are not available, there's no point in using - * function pointers to vary the implementation between the fast and slow - * method. We instead just make these actual external functions. The compiler - * should be able to inline the slow versions here. + * function pointers to vary the implementation. We instead just make these + * actual external functions. The compiler should be able to inline the + * portable versions here. */ int pg_popcount32(uint32 word) { - return pg_popcount32_slow(word); + return pg_popcount32_portable(word); } int pg_popcount64(uint64 word) { - return pg_popcount64_slow(word); + return pg_popcount64_portable(word); } /* @@ -269,7 +269,7 @@ pg_popcount64(uint64 word) uint64 pg_popcount_optimized(const char *buf, int bytes) { - return pg_popcount_slow(buf, bytes); + return pg_popcount_portable(buf, bytes); } /* @@ -279,7 +279,7 @@ pg_popcount_optimized(const char *buf, int bytes) uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask) { - return pg_popcount_masked_slow(buf, bytes, mask); + return pg_popcount_masked_portable(buf, bytes, mask); } #endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */ diff --git a/src/port/pg_popcount_x86.c b/src/port/pg_popcount_x86.c index a189a04a8e1..ce0ce9c8a3c 100644 --- a/src/port/pg_popcount_x86.c +++ b/src/port/pg_popcount_x86.c @@ -31,11 +31,15 @@ /* * The SSE4.2 versions are built regardless of whether we are building the * AVX-512 versions. + * + * Technically, POPCNT is not part of SSE4.2, and isn't even a vector + * operation, but in practice this is close enough, and "sse42" seems easier to + * follow than "popcnt" for these names. */ -static inline int pg_popcount32_fast(uint32 word); -static inline int pg_popcount64_fast(uint64 word); -static uint64 pg_popcount_fast(const char *buf, int bytes); -static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask); +static inline int pg_popcount32_sse42(uint32 word); +static inline int pg_popcount64_sse42(uint64 word); +static uint64 pg_popcount_sse42(const char *buf, int bytes); +static uint64 pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask); /* * These are the AVX-512 implementations of the popcount functions. @@ -64,7 +68,7 @@ uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) * Return true if CPUID indicates that the POPCNT instruction is available. */ static bool -pg_popcount_available(void) +pg_popcount_sse42_available(void) { unsigned int exx[4] = {0, 0, 0, 0}; @@ -161,19 +165,19 @@ pg_popcount_avx512_available(void) static inline void choose_popcount_functions(void) { - if (pg_popcount_available()) + if (pg_popcount_sse42_available()) { - pg_popcount32 = pg_popcount32_fast; - pg_popcount64 = pg_popcount64_fast; - pg_popcount_optimized = pg_popcount_fast; - pg_popcount_masked_optimized = pg_popcount_masked_fast; + pg_popcount32 = pg_popcount32_sse42; + pg_popcount64 = pg_popcount64_sse42; + pg_popcount_optimized = pg_popcount_sse42; + pg_popcount_masked_optimized = pg_popcount_masked_sse42; } else { - pg_popcount32 = pg_popcount32_slow; - pg_popcount64 = pg_popcount64_slow; - pg_popcount_optimized = pg_popcount_slow; - pg_popcount_masked_optimized = pg_popcount_masked_slow; + pg_popcount32 = pg_popcount32_portable; + pg_popcount64 = pg_popcount64_portable; + pg_popcount_optimized = pg_popcount_portable; + pg_popcount_masked_optimized = pg_popcount_masked_portable; } #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK @@ -335,11 +339,11 @@ pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask) #endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */ /* - * pg_popcount32_fast + * pg_popcount32_sse42 * Return the number of 1 bits set in word */ static inline int -pg_popcount32_fast(uint32 word) +pg_popcount32_sse42(uint32 word) { #ifdef _MSC_VER return __popcnt(word); @@ -352,11 +356,11 @@ __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc"); } /* - * pg_popcount64_fast + * pg_popcount64_sse42 * Return the number of 1 bits set in word */ static inline int -pg_popcount64_fast(uint64 word) +pg_popcount64_sse42(uint64 word) { #ifdef _MSC_VER return __popcnt64(word); @@ -369,11 +373,11 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); } /* - * pg_popcount_fast + * pg_popcount_sse42 * Returns the number of 1-bits in buf */ static uint64 -pg_popcount_fast(const char *buf, int bytes) +pg_popcount_sse42(const char *buf, int bytes) { uint64 popcnt = 0; @@ -385,7 +389,7 @@ pg_popcount_fast(const char *buf, int bytes) while (bytes >= 8) { - popcnt += pg_popcount64_fast(*words++); + popcnt += pg_popcount64_sse42(*words++); bytes -= 8; } @@ -399,7 +403,7 @@ pg_popcount_fast(const char *buf, int bytes) while (bytes >= 4) { - popcnt += pg_popcount32_fast(*words++); + popcnt += pg_popcount32_sse42(*words++); bytes -= 4; } @@ -415,11 +419,11 @@ pg_popcount_fast(const char *buf, int bytes) } /* - * pg_popcount_masked_fast + * pg_popcount_masked_sse42 * Returns the number of 1-bits in buf after applying the mask to each byte */ static uint64 -pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask) +pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask) { uint64 popcnt = 0; @@ -433,7 +437,7 @@ pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask) while (bytes >= 8) { - popcnt += pg_popcount64_fast(*words++ & maskv); + popcnt += pg_popcount64_sse42(*words++ & maskv); bytes -= 8; } @@ -449,7 +453,7 @@ pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask) while (bytes >= 4) { - popcnt += pg_popcount32_fast(*words++ & maskv); + popcnt += pg_popcount32_sse42(*words++ & maskv); bytes -= 4; }