diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 53e52397170..de480da71e1 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -302,16 +302,50 @@ pg_ceil_log2_64(uint64 num) /* Attempt to use the POPCNT instruction, but perform a runtime check first */ extern PGDLLIMPORT int (*pg_popcount32) (uint32 word); extern PGDLLIMPORT int (*pg_popcount64) (uint64 word); -extern PGDLLIMPORT uint64 (*pg_popcount) (const char *buf, int bytes); +extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes); #else /* Use a portable implementation -- no need for a function pointer. */ extern int pg_popcount32(uint32 word); extern int pg_popcount64(uint64 word); -extern uint64 pg_popcount(const char *buf, int bytes); +extern uint64 pg_popcount_optimized(const char *buf, int bytes); #endif /* TRY_POPCNT_FAST */ +/* + * Returns the number of 1-bits in buf. + * + * If there aren't many bytes to process, the function call overhead of the + * optimized versions isn't worth taking, so we inline a loop that consults + * pg_number_of_ones in that case. If there are many bytes to process, we + * accept the function call overhead because the optimized versions are likely + * to be faster. + */ +static inline uint64 +pg_popcount(const char *buf, int bytes) +{ + /* + * We set the threshold to the point at which we'll first use special + * instructions in the optimized version. + */ +#if SIZEOF_VOID_P >= 8 + int threshold = 8; +#else + int threshold = 4; +#endif + + if (bytes < threshold) + { + uint64 popcnt = 0; + + while (bytes--) + popcnt += pg_number_of_ones[(unsigned char) *buf++]; + return popcnt; + } + + return pg_popcount_optimized(buf, bytes); +} + /* * Rotate the bits of "word" to the right/left by n bits. */ diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 28312f3dd95..6271acea600 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -118,7 +118,7 @@ static uint64 pg_popcount_fast(const char *buf, int bytes); int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; -uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose; +uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose; #endif /* TRY_POPCNT_FAST */ #ifdef TRY_POPCNT_FAST @@ -155,13 +155,13 @@ choose_popcount_functions(void) { pg_popcount32 = pg_popcount32_fast; pg_popcount64 = pg_popcount64_fast; - pg_popcount = pg_popcount_fast; + pg_popcount_optimized = pg_popcount_fast; } else { pg_popcount32 = pg_popcount32_slow; pg_popcount64 = pg_popcount64_slow; - pg_popcount = pg_popcount_slow; + pg_popcount_optimized = pg_popcount_slow; } } @@ -183,7 +183,7 @@ static uint64 pg_popcount_choose(const char *buf, int bytes) { choose_popcount_functions(); - return pg_popcount(buf, bytes); + return pg_popcount_optimized(buf, bytes); } /* @@ -387,11 +387,11 @@ pg_popcount64(uint64 word) } /* - * pg_popcount + * pg_popcount_optimized * Returns the number of 1-bits in buf */ uint64 -pg_popcount(const char *buf, int bytes) +pg_popcount_optimized(const char *buf, int bytes) { return pg_popcount_slow(buf, bytes); }