mirror of
https://github.com/postgres/postgres.git
synced 2025-10-16 17:07:43 +03:00
Optimize vector8_has_le() on AArch64.
Presently, the SIMD implementation of this function uses unsigned saturating subtraction to find bytes less than or equal to the given value, which is a workaround for the lack of unsigned comparison instructions on some architectures. However, Neon offers vminvq_u8(), which returns the minimum (unsigned) value in the vector. This commit adds a Neon-specific implementation that uses vminvq_u8() to optimize vector8_has_le() on AArch64. In passing, adjust the SSE2 implementation to use vector8_min() and vector8_eq() to find values less than or equal to the given value. This was the only use of vector8_ssub(), so it has been removed. Reviewed-by: John Naylor <johncnaylorls@gmail.com> Discussion: https://postgr.es/m/aNHDNDSHleq0ogC_%40nathan
This commit is contained in:
@@ -86,7 +86,6 @@ static inline uint32 vector8_highbit_mask(const Vector8 v);
|
|||||||
static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
|
static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
|
||||||
#ifndef USE_NO_SIMD
|
#ifndef USE_NO_SIMD
|
||||||
static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
|
static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
|
||||||
static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -213,6 +212,10 @@ static inline bool
|
|||||||
vector8_has_le(const Vector8 v, const uint8 c)
|
vector8_has_le(const Vector8 v, const uint8 c)
|
||||||
{
|
{
|
||||||
bool result = false;
|
bool result = false;
|
||||||
|
#ifdef USE_SSE2
|
||||||
|
Vector8 umin;
|
||||||
|
Vector8 cmpe;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* pre-compute the result for assert checking */
|
/* pre-compute the result for assert checking */
|
||||||
#ifdef USE_ASSERT_CHECKING
|
#ifdef USE_ASSERT_CHECKING
|
||||||
@@ -250,14 +253,12 @@ vector8_has_le(const Vector8 v, const uint8 c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#elif defined(USE_SSE2)
|
||||||
|
umin = vector8_min(v, vector8_broadcast(c));
|
||||||
/*
|
cmpe = vector8_eq(umin, v);
|
||||||
* Use saturating subtraction to find bytes <= c, which will present as
|
result = vector8_is_highbit_set(cmpe);
|
||||||
* NUL bytes. This approach is a workaround for the lack of unsigned
|
#elif defined(USE_NEON)
|
||||||
* comparison instructions on some architectures.
|
result = vminvq_u8(v) <= c;
|
||||||
*/
|
|
||||||
result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c)));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Assert(assert_result == result);
|
Assert(assert_result == result);
|
||||||
@@ -358,24 +359,6 @@ vector32_or(const Vector32 v1, const Vector32 v2)
|
|||||||
}
|
}
|
||||||
#endif /* ! USE_NO_SIMD */
|
#endif /* ! USE_NO_SIMD */
|
||||||
|
|
||||||
/*
|
|
||||||
* Return the result of subtracting the respective elements of the input
|
|
||||||
* vectors using saturation (i.e., if the operation would yield a value less
|
|
||||||
* than zero, zero is returned instead). For more information on saturation
|
|
||||||
* arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic
|
|
||||||
*/
|
|
||||||
#ifndef USE_NO_SIMD
|
|
||||||
static inline Vector8
|
|
||||||
vector8_ssub(const Vector8 v1, const Vector8 v2)
|
|
||||||
{
|
|
||||||
#ifdef USE_SSE2
|
|
||||||
return _mm_subs_epu8(v1, v2);
|
|
||||||
#elif defined(USE_NEON)
|
|
||||||
return vqsubq_u8(v1, v2);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif /* ! USE_NO_SIMD */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return a vector with all bits set in each lane where the corresponding
|
* Return a vector with all bits set in each lane where the corresponding
|
||||||
* lanes in the inputs are equal.
|
* lanes in the inputs are equal.
|
||||||
|
Reference in New Issue
Block a user