1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Optimize popcount functions with ARM SVE intrinsics.

This commit introduces SVE implementations of pg_popcount{32,64}.
Unlike the Neon versions, we need an additional configure-time
check to determine if the compiler supports SVE intrinsics, and we
need a runtime check to determine if the current CPU supports SVE
instructions.  Our testing showed that the SVE implementations are
much faster for larger inputs and are comparable to the status
quo for smaller inputs.

Author: "Devanga.Susmitha@fujitsu.com" <Devanga.Susmitha@fujitsu.com>
Co-authored-by: "Chiranmoy.Bhattacharya@fujitsu.com" <Chiranmoy.Bhattacharya@fujitsu.com>
Co-authored-by: "Malladi, Rama" <ramamalladi@hotmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/010101936e4aaa70-b474ab9e-b9ce-474d-a3ba-a3dc223d295c-000000%40us-west-2.amazonses.com
Discussion: https://postgr.es/m/OSZPR01MB84990A9A02A3515C6E85A65B8B2A2%40OSZPR01MB8499.jpnprd01.prod.outlook.com
This commit is contained in:
Nathan Bossart
2025-03-28 16:20:20 -05:00
parent 3c8e463b0d
commit 519338ace4
7 changed files with 475 additions and 6 deletions

View File

@ -708,3 +708,55 @@ if test x"$Ac_cachevar" = x"yes"; then
fi
undefine([Ac_cachevar])dnl
])# PGAC_AVX512_POPCNT_INTRINSICS
# PGAC_SVE_POPCNT_INTRINSICS
# --------------------------
# Check if the compiler supports the SVE popcount instructions using the
# svptrue_b64, svdup_u64, svcntb, svld1_u64, svld1_u8, svadd_u64_x,
# svcnt_u64_x, svcnt_u8_x, svaddv_u64, svaddv_u8, svwhilelt_b8_s32,
# svand_n_u64_x, and svand_n_u8_x intrinsic functions.
#
# If the intrinsics are supported, sets pgac_sve_popcnt_intrinsics.
AC_DEFUN([PGAC_SVE_POPCNT_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sve_popcnt_intrinsics])])dnl
AC_CACHE_CHECK([for svcnt_x], [Ac_cachevar],
[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <arm_sve.h>
char buf[128];
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("arch=armv8-a+sve")))
#endif
static int popcount_test(void)
{
svbool_t pred = svptrue_b64();
svuint8_t vec8;
svuint64_t accum1 = svdup_u64(0),
accum2 = svdup_u64(0),
vec64;
char *p = buf;
uint64_t popcnt,
mask = 0x5555555555555555;
vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask);
accum1 = svadd_u64_x(pred, accum1, svcnt_u64_x(pred, vec64));
p += svcntb();
vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask);
accum2 = svadd_u64_x(pred, accum2, svcnt_u64_x(pred, vec64));
p += svcntb();
popcnt = svaddv_u64(pred, svadd_u64_x(pred, accum1, accum2));
pred = svwhilelt_b8_s32(0, sizeof(buf));
vec8 = svand_n_u8_x(pred, svld1_u8(pred, (const uint8_t *) p), 0x55);
return (int) (popcnt + svaddv_u8(pred, svcnt_u8_x(pred, vec8)));
}]],
[return popcount_test();])],
[Ac_cachevar=yes],
[Ac_cachevar=no])])
if test x"$Ac_cachevar" = x"yes"; then
pgac_sve_popcnt_intrinsics=yes
fi
undefine([Ac_cachevar])dnl
])# PGAC_SVE_POPCNT_INTRINSICS