From deb1486c7d36e5430b50b63e8c0f2d8405e24a2a Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 3 Apr 2024 12:22:02 -0500 Subject: [PATCH] Inline pg_popcount() for small buffers. If there aren't many bytes to process, the function call overhead of the optimized implementation isn't worth taking, so instead we inline a loop that consults pg_number_of_ones in that case. If there are many bytes to process, we accept the function call overhead because the optimized versions are likely to be faster. The threshold at which we use the optimized implementation is set to the smallest amount of data required to use special popcount instructions. Reviewed-by: Alvaro Herrera, Tom Lane Discussion: https://postgr.es/m/20240402155301.GA2750455%40nathanxps13 --- src/include/port/pg_bitutils.h | 38 ++++++++++++++++++++++++++++++++-- src/port/pg_bitutils.c | 12 +++++------ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 53e52397170..de480da71e1 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -302,16 +302,50 @@ pg_ceil_log2_64(uint64 num) /* Attempt to use the POPCNT instruction, but perform a runtime check first */ extern PGDLLIMPORT int (*pg_popcount32) (uint32 word); extern PGDLLIMPORT int (*pg_popcount64) (uint64 word); -extern PGDLLIMPORT uint64 (*pg_popcount) (const char *buf, int bytes); +extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes); #else /* Use a portable implementation -- no need for a function pointer. */ extern int pg_popcount32(uint32 word); extern int pg_popcount64(uint64 word); -extern uint64 pg_popcount(const char *buf, int bytes); +extern uint64 pg_popcount_optimized(const char *buf, int bytes); #endif /* TRY_POPCNT_FAST */ +/* + * Returns the number of 1-bits in buf. + * + * If there aren't many bytes to process, the function call overhead of the + * optimized versions isn't worth taking, so we inline a loop that consults + * pg_number_of_ones in that case. If there are many bytes to process, we + * accept the function call overhead because the optimized versions are likely + * to be faster. + */ +static inline uint64 +pg_popcount(const char *buf, int bytes) +{ + /* + * We set the threshold to the point at which we'll first use special + * instructions in the optimized version. + */ +#if SIZEOF_VOID_P >= 8 + int threshold = 8; +#else + int threshold = 4; +#endif + + if (bytes < threshold) + { + uint64 popcnt = 0; + + while (bytes--) + popcnt += pg_number_of_ones[(unsigned char) *buf++]; + return popcnt; + } + + return pg_popcount_optimized(buf, bytes); +} + /* * Rotate the bits of "word" to the right/left by n bits. */ diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 28312f3dd95..6271acea600 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -118,7 +118,7 @@ static uint64 pg_popcount_fast(const char *buf, int bytes); int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; -uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose; +uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose; #endif /* TRY_POPCNT_FAST */ #ifdef TRY_POPCNT_FAST @@ -155,13 +155,13 @@ choose_popcount_functions(void) { pg_popcount32 = pg_popcount32_fast; pg_popcount64 = pg_popcount64_fast; - pg_popcount = pg_popcount_fast; + pg_popcount_optimized = pg_popcount_fast; } else { pg_popcount32 = pg_popcount32_slow; pg_popcount64 = pg_popcount64_slow; - pg_popcount = pg_popcount_slow; + pg_popcount_optimized = pg_popcount_slow; } } @@ -183,7 +183,7 @@ static uint64 pg_popcount_choose(const char *buf, int bytes) { choose_popcount_functions(); - return pg_popcount(buf, bytes); + return pg_popcount_optimized(buf, bytes); } /* @@ -387,11 +387,11 @@ pg_popcount64(uint64 word) } /* - * pg_popcount + * pg_popcount_optimized * Returns the number of 1-bits in buf */ uint64 -pg_popcount(const char *buf, int bytes) +pg_popcount_optimized(const char *buf, int bytes) { return pg_popcount_slow(buf, bytes); }