Introduce helper SIMD functions for small byte arrays

vector8_min - helper for emulating ">=" semantics vector8_highbit_mask - used to turn the result of a vector comparison into a bitmask Masahiko Sawada Reviewed by Nathan Bossart, with additional adjustments by me Discussion: https://postgr.es/m/CAFBsxsHbBm_M22gLBO%2BAZT4mfMq3L_oX3wdKZxjeNnT7fHsYMQ%40mail.gmail.com
2025-12-04 12:02:48 +03:00 · 2024-03-06 14:22:15 +07:00
parent 60c07820d6
commit 9f225e992b
1 changed files with 47 additions and 0 deletions
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -79,6 +79,7 @@ static inline bool vector8_has_le(const Vector8 v, const uint8 c);
 static inline bool vector8_is_highbit_set(const Vector8 v);
 #ifndef USE_NO_SIMD
 static inline bool vector32_is_highbit_set(const Vector32 v);
 static inline uint32 vector8_highbit_mask(const Vector8 v);
 #endif
 /* arithmetic operations */
@@ -96,6 +97,7 @@ static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
 */
 #ifndef USE_NO_SIMD
 static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2);
 static inline Vector8 vector8_min(const Vector8 v1, const Vector8 v2);
 static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2);
 #endif
@@ -299,6 +301,36 @@ vector32_is_highbit_set(const Vector32 v)
 }
 #endif							/* ! USE_NO_SIMD */
 /*
 * Return a bitmask formed from the high-bit of each element.
 */
 #ifndef USE_NO_SIMD
 static inline uint32
 vector8_highbit_mask(const Vector8 v)
 {
 #ifdef USE_SSE2
 	return (uint32) _mm_movemask_epi8(v);
 #elif defined(USE_NEON)
 	/*
 	 * Note: It would be faster to use vget_lane_u64 and vshrn_n_u16, but that
 	 * returns a uint64, making it inconvenient to combine mask values from
 	 * multiple vectors.
 	 */
 	static const uint8 mask[16] = {
 		1 << 0, 1 << 1, 1 << 2, 1 << 3,
 		1 << 4, 1 << 5, 1 << 6, 1 << 7,
 		1 << 0, 1 << 1, 1 << 2, 1 << 3,
 		1 << 4, 1 << 5, 1 << 6, 1 << 7,
 	};
 	uint8x16_t	masked = vandq_u8(vld1q_u8(mask), (uint8x16_t) vshrq_n_s8(v, 7));
 	uint8x16_t	maskedhi = vextq_u8(masked, masked, 8);
 	return (uint32) vaddvq_u16((uint16x8_t) vzip1q_u8(masked, maskedhi));
 #endif
 }
 #endif							/* ! USE_NO_SIMD */
 /*
 * Return the bitwise OR of the inputs
 */
@@ -372,4 +404,19 @@ vector32_eq(const Vector32 v1, const Vector32 v2)
 }
 #endif							/* ! USE_NO_SIMD */
 /*
 * Given two vectors, return a vector with the minimum element of each.
 */
 #ifndef USE_NO_SIMD
 static inline Vector8
 vector8_min(const Vector8 v1, const Vector8 v2)
 {
 #ifdef USE_SSE2
 	return _mm_min_epu8(v1, v2);
 #elif defined(USE_NEON)
 	return vminq_u8(v1, v2);
 #endif
 }
 #endif							/* ! USE_NO_SIMD */
 #endif							/* SIMD_H */