You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
Vectorized update min max
This commit is contained in:
@ -1295,19 +1295,13 @@ inline SIMD_WRAPPER_TYPE simdSwapedOrderDataLoad(const ColRequestHeaderDataType
|
||||
}
|
||||
|
||||
template <typename VT, typename SimdType>
|
||||
void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT simdProcessor,
|
||||
SimdType& dataVec, SimdType& simdMin, SimdType& simdMax)
|
||||
void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& simdProcessor,
|
||||
SimdType dataVec, SimdType simdMin, SimdType simdMax)
|
||||
{
|
||||
if (validMinMax)
|
||||
if (validMinMax && nonNullOrEmptyMask)
|
||||
{
|
||||
simdMin = simdProcessor.blend(
|
||||
simdMin, dataVec,
|
||||
simdProcessor.bwAnd(simdProcessor.cmpGt2(simdMin, dataVec),
|
||||
bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask))));
|
||||
simdMax = simdProcessor.blend(
|
||||
simdMax, dataVec,
|
||||
simdProcessor.bwAnd(simdProcessor.cmpGt2(dataVec, simdMax),
|
||||
bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask))));
|
||||
simdMin = simdProcessor.min(simdMin, dataVec);
|
||||
simdMax = simdProcessor.max(simdMax, dataVec);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1328,7 +1322,7 @@ void scalarUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT&
|
||||
}
|
||||
|
||||
template<typename T, typename VT, typename SimdType>
|
||||
void extractMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, T& min, T& max)
|
||||
void extractMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T& min, T& max)
|
||||
{
|
||||
constexpr const uint16_t size = VT::vecByteSize / sizeof(T);
|
||||
T* simdMinVec = reinterpret_cast<T*>(&simdMin);
|
||||
@ -1336,13 +1330,6 @@ void extractMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, T& min
|
||||
max = *std::max_element(simdMaxVec, simdMaxVec + size);
|
||||
min = *std::min_element(simdMinVec, simdMinVec + size);
|
||||
}
|
||||
|
||||
template <typename T, typename VT, typename SimdType>
|
||||
void getInitialSimdMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T min, T max)
|
||||
{
|
||||
simdMin = simdProcessor.loadValue(min);
|
||||
simdMax = simdProcessor.loadValue(max);
|
||||
}
|
||||
// This routine filters input block in a vectorized manner.
|
||||
// It supports all output types, all input types.
|
||||
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
||||
@ -1478,12 +1465,9 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
}
|
||||
}
|
||||
}
|
||||
[[maybe_unused]] SimdType simdMin;
|
||||
[[maybe_unused]] SimdType simdMax;
|
||||
if constexpr (KIND != KIND_TEXT)
|
||||
{
|
||||
getInitialSimdMinMax(simdProcessor, simdMin, simdMax, min, max);
|
||||
}
|
||||
[[maybe_unused]] SimdType simdMin = simdDataLoad<VT, SimdWrapperType, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
|
||||
origSrcArray, ridArray, 0).v;;
|
||||
[[maybe_unused]] SimdType simdMax = simdMin;
|
||||
// main loop
|
||||
// writeMask tells which values must get into the result. Includes values that matches filters. Can have
|
||||
// NULLs. nonEmptyMask tells which vector coords are not EMPTY magics. nonNullMask tells which vector coords
|
||||
@ -1537,7 +1521,7 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
simdProcessor, valuesWritten, validMinMax, ridOffset, dataVecTPtr, ridDstArray, writeMask, min, max,
|
||||
in, out, nonNullOrEmptyMask, ridArray);
|
||||
|
||||
if constexpr (KIND != KIND_TEXT)
|
||||
if constexpr (HAS_INPUT_RIDS && KIND != KIND_TEXT)
|
||||
{
|
||||
vectorizedUpdateMinMax(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVec, simdMin, simdMax);
|
||||
}
|
||||
@ -1556,7 +1540,7 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
srcArray += VECTOR_SIZE;
|
||||
ridArray += VECTOR_SIZE;
|
||||
}
|
||||
if constexpr(KIND != KIND_TEXT)
|
||||
if constexpr(HAS_INPUT_RIDS && KIND != KIND_TEXT)
|
||||
{
|
||||
extractMinMax(simdProcessor, simdMin, simdMax, min, max);
|
||||
}
|
||||
|
@ -250,30 +250,17 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return reinterpret_cast<SimdType>(std::min(reinterpret_cast<int128_t>(x), reinterpret_cast<int128_t>(y)));
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return reinterpret_cast<SimdType>(std::max(reinterpret_cast<int128_t>(x), reinterpret_cast<int128_t>(y)));
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <typename VT, typename T>
|
||||
@ -389,30 +376,15 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_pd(reinterpret_cast<T*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_pd(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_pd(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_pd(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_cmpgt_pd(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_pd(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VT, typename T>
|
||||
@ -528,30 +500,15 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_ps(reinterpret_cast<T*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_ps(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_ps(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_cmpgt_ps(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_ps(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_ps(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VT, typename CHECK_T>
|
||||
@ -660,29 +617,14 @@ class SimdFilterProcessor<VT, CHECK_T,
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(x,y));
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_cmpgt_epi64(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
{
|
||||
return blend(x, y, cmpGt2(x,y));
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
{
|
||||
return blend(x, y, cmpGt2(y,x));
|
||||
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(y,x));
|
||||
}
|
||||
};
|
||||
|
||||
@ -795,32 +737,14 @@ class SimdFilterProcessor<VT, CHECK_T,
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(x,y));
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
SimdType signVec = constant4i<0,(int32_t)0x80000000,0,(int32_t)0x80000000>();
|
||||
SimdType xFlip = _mm_xor_si128(x, signVec);
|
||||
SimdType yFlip = _mm_xor_si128(y, signVec);
|
||||
return _mm_cmpgt_epi64(xFlip, yFlip);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
{
|
||||
return blend(x, y, cmpGt2(x,y));
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
{
|
||||
return blend(x, y, cmpGt2(y,x));
|
||||
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(y,x));
|
||||
}
|
||||
};
|
||||
|
||||
@ -930,27 +854,12 @@ class SimdFilterProcessor<VT, CHECK_T,
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_cmpgt_epi32(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_epi32(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_epi32(x, y);
|
||||
}
|
||||
@ -1065,30 +974,12 @@ class SimdFilterProcessor<VT, CHECK_T,
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
SimdType signVec = constant4i<(int32_t)0x80000000,(int32_t)0x80000000,(int32_t)0x80000000,(int32_t)0x80000000>();
|
||||
SimdType xFlip = _mm_xor_si128(x, signVec);
|
||||
SimdType yFlip = _mm_xor_si128(y, signVec);
|
||||
return _mm_cmpgt_epi32(xFlip, yFlip);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_epu32(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_epu32(x, y);
|
||||
}
|
||||
@ -1199,27 +1090,12 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_cmpgt_epi16(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_epi16(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_epi16(x, y);
|
||||
}
|
||||
@ -1331,30 +1207,12 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y)
|
||||
{
|
||||
SimdType ones =
|
||||
constant4i<(int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF>();
|
||||
SimdType maxOfTwo = _mm_max_epu16(x, y);
|
||||
return _mm_xor_si128(_mm_cmpeq_epi16(y, maxOfTwo), ones);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_epu16(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_epu16(x, y);
|
||||
}
|
||||
@ -1472,27 +1330,12 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_cmpgt_epi8(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_epi8(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_epi8(x, y);
|
||||
}
|
||||
@ -1611,30 +1454,12 @@ class SimdFilterProcessor<
|
||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
|
||||
{
|
||||
return _mm_blendv_epi8(x, y, mask);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
|
||||
{
|
||||
return _mm_and_si128(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y)
|
||||
{
|
||||
SimdType ones =
|
||||
constant4i<(int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF>();
|
||||
SimdType maxOfTwo = _mm_max_epu8(x, y);
|
||||
return _mm_xor_si128(_mm_cmpeq_epi8(y, maxOfTwo), ones);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_min_epu8(x, y);
|
||||
}
|
||||
|
||||
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
|
||||
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||
{
|
||||
return _mm_max_epu8(x, y);
|
||||
}
|
||||
|
Reference in New Issue
Block a user