mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Optimize popcount functions with ARM SVE intrinsics.
This commit introduces SVE implementations of pg_popcount{32,64}. Unlike the Neon versions, we need an additional configure-time check to determine if the compiler supports SVE intrinsics, and we need a runtime check to determine if the current CPU supports SVE instructions. Our testing showed that the SVE implementations are much faster for larger inputs and are comparable to the status quo for smaller inputs. Author: "Devanga.Susmitha@fujitsu.com" <Devanga.Susmitha@fujitsu.com> Co-authored-by: "Chiranmoy.Bhattacharya@fujitsu.com" <Chiranmoy.Bhattacharya@fujitsu.com> Co-authored-by: "Malladi, Rama" <ramamalladi@hotmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Discussion: https://postgr.es/m/010101936e4aaa70-b474ab9e-b9ce-474d-a3ba-a3dc223d295c-000000%40us-west-2.amazonses.com Discussion: https://postgr.es/m/OSZPR01MB84990A9A02A3515C6E85A65B8B2A2%40OSZPR01MB8499.jpnprd01.prod.outlook.com
This commit is contained in:
48
meson.build
48
meson.build
@ -2297,6 +2297,54 @@ int main(void)
|
||||
endif
|
||||
|
||||
|
||||
###############################################################
|
||||
# Check for the availability of SVE popcount intrinsics.
|
||||
###############################################################
|
||||
|
||||
if host_cpu == 'aarch64'
|
||||
|
||||
prog = '''
|
||||
#include <arm_sve.h>
|
||||
|
||||
char buf[128];
|
||||
|
||||
#if defined(__has_attribute) && __has_attribute (target)
|
||||
__attribute__((target("arch=armv8-a+sve")))
|
||||
#endif
|
||||
int main(void)
|
||||
{
|
||||
svbool_t pred = svptrue_b64();
|
||||
svuint8_t vec8;
|
||||
svuint64_t accum1 = svdup_u64(0),
|
||||
accum2 = svdup_u64(0),
|
||||
vec64;
|
||||
char *p = buf;
|
||||
uint64_t popcnt,
|
||||
mask = 0x5555555555555555;
|
||||
|
||||
vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask);
|
||||
accum1 = svadd_u64_x(pred, accum1, svcnt_u64_x(pred, vec64));
|
||||
p += svcntb();
|
||||
|
||||
vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask);
|
||||
accum2 = svadd_u64_x(pred, accum2, svcnt_u64_x(pred, vec64));
|
||||
p += svcntb();
|
||||
|
||||
popcnt = svaddv_u64(pred, svadd_u64_x(pred, accum1, accum2));
|
||||
|
||||
pred = svwhilelt_b8_s32(0, sizeof(buf));
|
||||
vec8 = svand_n_u8_x(pred, svld1_u8(pred, (const uint8_t *) p), 0x55);
|
||||
return (int) (popcnt + svaddv_u8(pred, svcnt_u8_x(pred, vec8)));
|
||||
}
|
||||
'''
|
||||
|
||||
if cc.links(prog, name: 'SVE popcount', args: test_c_args)
|
||||
cdata.set('USE_SVE_POPCNT_WITH_RUNTIME_CHECK', 1)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
###############################################################
|
||||
# Select CRC-32C implementation.
|
||||
#
|
||||
|
Reference in New Issue
Block a user