1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

Vectorized update min max

This commit is contained in:
Andrey Piskunov
2022-06-29 14:06:43 +03:00
parent f88a3bfc65
commit 20f48fd730
2 changed files with 39 additions and 230 deletions

View File

@ -1295,19 +1295,13 @@ inline SIMD_WRAPPER_TYPE simdSwapedOrderDataLoad(const ColRequestHeaderDataType
}
template <typename VT, typename SimdType>
void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT simdProcessor,
SimdType& dataVec, SimdType& simdMin, SimdType& simdMax)
void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& simdProcessor,
SimdType dataVec, SimdType simdMin, SimdType simdMax)
{
if (validMinMax)
if (validMinMax && nonNullOrEmptyMask)
{
simdMin = simdProcessor.blend(
simdMin, dataVec,
simdProcessor.bwAnd(simdProcessor.cmpGt2(simdMin, dataVec),
bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask))));
simdMax = simdProcessor.blend(
simdMax, dataVec,
simdProcessor.bwAnd(simdProcessor.cmpGt2(dataVec, simdMax),
bitCast<SimdType>(simd::bitMaskToByteMask16(nonNullOrEmptyMask))));
simdMin = simdProcessor.min(simdMin, dataVec);
simdMax = simdProcessor.max(simdMax, dataVec);
}
}
@ -1328,7 +1322,7 @@ void scalarUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT&
}
template<typename T, typename VT, typename SimdType>
void extractMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, T& min, T& max)
void extractMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T& min, T& max)
{
constexpr const uint16_t size = VT::vecByteSize / sizeof(T);
T* simdMinVec = reinterpret_cast<T*>(&simdMin);
@ -1336,13 +1330,6 @@ void extractMinMax(VT& simdProcessor, SimdType simdMin, SimdType simdMax, T& min
max = *std::max_element(simdMaxVec, simdMaxVec + size);
min = *std::min_element(simdMinVec, simdMinVec + size);
}
template <typename T, typename VT, typename SimdType>
void getInitialSimdMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T min, T max)
{
simdMin = simdProcessor.loadValue(min);
simdMax = simdProcessor.loadValue(max);
}
// This routine filters input block in a vectorized manner.
// It supports all output types, all input types.
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
@ -1478,12 +1465,9 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
}
}
}
[[maybe_unused]] SimdType simdMin;
[[maybe_unused]] SimdType simdMax;
if constexpr (KIND != KIND_TEXT)
{
getInitialSimdMinMax(simdProcessor, simdMin, simdMax, min, max);
}
[[maybe_unused]] SimdType simdMin = simdDataLoad<VT, SimdWrapperType, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
origSrcArray, ridArray, 0).v;;
[[maybe_unused]] SimdType simdMax = simdMin;
// main loop
// writeMask tells which values must get into the result. Includes values that matches filters. Can have
// NULLs. nonEmptyMask tells which vector coords are not EMPTY magics. nonNullMask tells which vector coords
@ -1537,7 +1521,7 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
simdProcessor, valuesWritten, validMinMax, ridOffset, dataVecTPtr, ridDstArray, writeMask, min, max,
in, out, nonNullOrEmptyMask, ridArray);
if constexpr (KIND != KIND_TEXT)
if constexpr (HAS_INPUT_RIDS && KIND != KIND_TEXT)
{
vectorizedUpdateMinMax(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVec, simdMin, simdMax);
}
@ -1556,7 +1540,7 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
srcArray += VECTOR_SIZE;
ridArray += VECTOR_SIZE;
}
if constexpr(KIND != KIND_TEXT)
if constexpr(HAS_INPUT_RIDS && KIND != KIND_TEXT)
{
extractMinMax(simdProcessor, simdMin, simdMax, min, max);
}

View File

@ -250,30 +250,17 @@ class SimdFilterProcessor<
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return x;
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return x;
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return x;
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return reinterpret_cast<SimdType>(std::min(reinterpret_cast<int128_t>(x), reinterpret_cast<int128_t>(y)));
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return reinterpret_cast<SimdType>(std::max(reinterpret_cast<int128_t>(x), reinterpret_cast<int128_t>(y)));
}
};
template <typename VT, typename T>
@ -389,30 +376,15 @@ class SimdFilterProcessor<
_mm_storeu_pd(reinterpret_cast<T*>(dst), x);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_pd(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_pd(x, y);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_pd(x, y, mask);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return _mm_cmpgt_pd(x, y);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_pd(x, y);
}
};
template <typename VT, typename T>
@ -528,30 +500,15 @@ class SimdFilterProcessor<
_mm_storeu_ps(reinterpret_cast<T*>(dst), x);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_ps(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_ps(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return _mm_cmpgt_ps(x, y);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_ps(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_ps(x, y);
}
};
template <typename VT, typename CHECK_T>
@ -660,29 +617,14 @@ class SimdFilterProcessor<VT, CHECK_T,
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_blendv_epi8(x, y, mask);
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(x,y));
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return _mm_cmpgt_epi64(x, y);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
{
return blend(x, y, cmpGt2(x,y));
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
{
return blend(x, y, cmpGt2(y,x));
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(y,x));
}
};
@ -795,32 +737,14 @@ class SimdFilterProcessor<VT, CHECK_T,
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_blendv_epi8(x, y, mask);
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(x,y));
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
SimdType signVec = constant4i<0,(int32_t)0x80000000,0,(int32_t)0x80000000>();
SimdType xFlip = _mm_xor_si128(x, signVec);
SimdType yFlip = _mm_xor_si128(y, signVec);
return _mm_cmpgt_epi64(xFlip, yFlip);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
{
return blend(x, y, cmpGt2(x,y));
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
{
return blend(x, y, cmpGt2(y,x));
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(y,x));
}
};
@ -930,27 +854,12 @@ class SimdFilterProcessor<VT, CHECK_T,
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_epi8(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return _mm_cmpgt_epi32(x, y);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_epi32(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_epi32(x, y);
}
@ -1065,30 +974,12 @@ class SimdFilterProcessor<VT, CHECK_T,
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_epi8(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
SimdType signVec = constant4i<(int32_t)0x80000000,(int32_t)0x80000000,(int32_t)0x80000000,(int32_t)0x80000000>();
SimdType xFlip = _mm_xor_si128(x, signVec);
SimdType yFlip = _mm_xor_si128(y, signVec);
return _mm_cmpgt_epi32(xFlip, yFlip);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_epu32(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_epu32(x, y);
}
@ -1199,27 +1090,12 @@ class SimdFilterProcessor<
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_epi8(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return _mm_cmpgt_epi16(x, y);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_epi16(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_epi16(x, y);
}
@ -1331,30 +1207,12 @@ class SimdFilterProcessor<
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_epi8(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y)
{
SimdType ones =
constant4i<(int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF>();
SimdType maxOfTwo = _mm_max_epu16(x, y);
return _mm_xor_si128(_mm_cmpeq_epi16(y, maxOfTwo), ones);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_epu16(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_epu16(x, y);
}
@ -1472,27 +1330,12 @@ class SimdFilterProcessor<
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_epi8(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y) const
{
return _mm_cmpgt_epi8(x, y);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_epi8(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_epi8(x, y);
}
@ -1611,30 +1454,12 @@ class SimdFilterProcessor<
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
}
MCS_FORCE_INLINE SimdType blend(SimdType x, SimdType y, SimdType mask) const
{
return _mm_blendv_epi8(x, y, mask);
}
MCS_FORCE_INLINE SimdType bwAnd(SimdType x, SimdType y) const
{
return _mm_and_si128(x, y);
}
MCS_FORCE_INLINE SimdType cmpGt2(SimdType x, SimdType y)
{
SimdType ones =
constant4i<(int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF, (int32_t)0xFFFFFFFF>();
SimdType maxOfTwo = _mm_max_epu8(x, y);
return _mm_xor_si128(_mm_cmpeq_epi8(y, maxOfTwo), ones);
}
MCS_FORCE_INLINE SimdType min(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
{
return _mm_min_epu8(x, y);
}
MCS_FORCE_INLINE SimdType max(SimdType x, SimdType y) const
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
{
return _mm_max_epu8(x, y);
}