You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Vectorized update min max
This commit is contained in:
@ -1001,9 +1001,6 @@ inline uint16_t vectWriteColValues(
|
|||||||
*tmpDstVecTPtr = dataVecTPtr[j];
|
*tmpDstVecTPtr = dataVecTPtr[j];
|
||||||
++tmpDstVecTPtr;
|
++tmpDstVecTPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
vectUpdateMinMax<T, KIND, HAS_INPUT_RIDS>(validMinMax, nonNullOrEmptyMask & bitMapPosition, Min, Max,
|
|
||||||
dataVecTPtr[j], in);
|
|
||||||
}
|
}
|
||||||
// Store the whole vector however one level up the stack
|
// Store the whole vector however one level up the stack
|
||||||
// vectorizedFiltering() increases the dstArray by a number of
|
// vectorizedFiltering() increases the dstArray by a number of
|
||||||
@ -1069,8 +1066,6 @@ inline uint16_t vectWriteColValues(
|
|||||||
vectWriteColValuesLoopRIDAsignment<T, HAS_INPUT_RIDS>(ridDstArray, out, ridOffset + j, ridSrcArray, j);
|
vectWriteColValuesLoopRIDAsignment<T, HAS_INPUT_RIDS>(ridDstArray, out, ridOffset + j, ridSrcArray, j);
|
||||||
++ridDstArray;
|
++ridDstArray;
|
||||||
}
|
}
|
||||||
vectUpdateMinMax<T, KIND, HAS_INPUT_RIDS>(validMinMax, nonNullOrEmptyMask & bitMapPosition, Min, Max,
|
|
||||||
dataVecTPtr[j], in);
|
|
||||||
}
|
}
|
||||||
// Store the whole vector however one level up the stack
|
// Store the whole vector however one level up the stack
|
||||||
// vectorizedFiltering() increases the dstArray by a number of
|
// vectorizedFiltering() increases the dstArray by a number of
|
||||||
@ -1106,14 +1101,11 @@ inline uint16_t vectWriteRIDValues(
|
|||||||
uint16_t j = 0;
|
uint16_t j = 0;
|
||||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep)
|
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep)
|
||||||
{
|
{
|
||||||
MT bitMapPosition = 1 << it;
|
|
||||||
if (writeMask & (1 << it))
|
if (writeMask & (1 << it))
|
||||||
{
|
{
|
||||||
vectWriteColValuesLoopRIDAsignment<T, HAS_INPUT_RIDS>(ridDstArray, out, ridOffset + j, ridSrcArray, j);
|
vectWriteColValuesLoopRIDAsignment<T, HAS_INPUT_RIDS>(ridDstArray, out, ridOffset + j, ridSrcArray, j);
|
||||||
++ridDstArray;
|
++ridDstArray;
|
||||||
}
|
}
|
||||||
vectUpdateMinMax<T, KIND, HAS_INPUT_RIDS>(validMinMax, nonNullOrEmptyMask & bitMapPosition, Min, Max,
|
|
||||||
dataVecTPtr[j], in);
|
|
||||||
}
|
}
|
||||||
return ridDstArray - origRIDDstArray;
|
return ridDstArray - origRIDDstArray;
|
||||||
}
|
}
|
||||||
@ -1284,6 +1276,42 @@ inline SIMD_WRAPPER_TYPE simdSwapedOrderDataLoad(const ColRequestHeaderDataType
|
|||||||
return {result};
|
return {result};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename VT, typename SimdType>
|
||||||
|
void vectorizedUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& simdProcessor,
|
||||||
|
SimdType dataVec, SimdType simdMin, SimdType simdMax)
|
||||||
|
{
|
||||||
|
if (validMinMax && nonNullOrEmptyMask)
|
||||||
|
{
|
||||||
|
simdMin = simdProcessor.min(simdMin, dataVec);
|
||||||
|
simdMax = simdProcessor.max(simdMax, dataVec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename VT, ENUM_KIND KIND>
|
||||||
|
void scalarUpdateMinMax(const bool validMinMax, const MT nonNullOrEmptyMask, VT& simdPRocessor,
|
||||||
|
T* dataVecTPtr, T& min, T& max, NewColRequestHeader* in)
|
||||||
|
{
|
||||||
|
constexpr const uint16_t filterMaskStep = VT::FilterMaskStep;
|
||||||
|
uint16_t j = 0;
|
||||||
|
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += filterMaskStep)
|
||||||
|
{
|
||||||
|
MT bitMapPosition = 1 << it;
|
||||||
|
if (validMinMax && (nonNullOrEmptyMask & bitMapPosition))
|
||||||
|
{
|
||||||
|
updateMinMax<KIND>(min, max, dataVecTPtr[j], in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename VT, typename SimdType>
|
||||||
|
void extractMinMax(VT& simdProcessor, SimdType& simdMin, SimdType& simdMax, T& min, T& max)
|
||||||
|
{
|
||||||
|
constexpr const uint16_t size = VT::vecByteSize / sizeof(T);
|
||||||
|
T* simdMinVec = reinterpret_cast<T*>(&simdMin);
|
||||||
|
T* simdMaxVec = reinterpret_cast<T*>(&simdMax);
|
||||||
|
max = *std::max_element(simdMaxVec, simdMaxVec + size);
|
||||||
|
min = *std::min_element(simdMinVec, simdMinVec + size);
|
||||||
|
}
|
||||||
// This routine filters input block in a vectorized manner.
|
// This routine filters input block in a vectorized manner.
|
||||||
// It supports all output types, all input types.
|
// It supports all output types, all input types.
|
||||||
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
// It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand.
|
||||||
@ -1298,7 +1326,7 @@ template<typename T, typename VT, bool HAS_INPUT_RIDS, int OUTPUT_TYPE,
|
|||||||
void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T* srcArray,
|
void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T* srcArray,
|
||||||
const uint32_t srcSize, primitives::RIDType* ridArray, const uint16_t ridSize,
|
const uint32_t srcSize, primitives::RIDType* ridArray, const uint16_t ridSize,
|
||||||
ParsedColumnFilter* parsedColumnFilter, const bool validMinMax, const T emptyValue,
|
ParsedColumnFilter* parsedColumnFilter, const bool validMinMax, const T emptyValue,
|
||||||
const T nullValue, T Min, T Max, const bool isNullValueMatches)
|
const T nullValue, T min, T max, const bool isNullValueMatches)
|
||||||
{
|
{
|
||||||
constexpr const uint16_t WIDTH = sizeof(T);
|
constexpr const uint16_t WIDTH = sizeof(T);
|
||||||
using SimdType = typename VT::SimdType;
|
using SimdType = typename VT::SimdType;
|
||||||
@ -1419,7 +1447,9 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
[[maybe_unused]] SimdType simdMin = simdDataLoad<VT, SimdWrapperType, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
|
||||||
|
origSrcArray, ridArray, 0).v;;
|
||||||
|
[[maybe_unused]] SimdType simdMax = simdMin;
|
||||||
// main loop
|
// main loop
|
||||||
// writeMask tells which values must get into the result. Includes values that matches filters. Can have
|
// writeMask tells which values must get into the result. Includes values that matches filters. Can have
|
||||||
// NULLs. nonEmptyMask tells which vector coords are not EMPTY magics. nonNullMask tells which vector coords
|
// NULLs. nonEmptyMask tells which vector coords are not EMPTY magics. nonNullMask tells which vector coords
|
||||||
@ -1465,14 +1495,24 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
// outside the scope of the memory allocated to out msg.
|
// outside the scope of the memory allocated to out msg.
|
||||||
// vectWriteColValues is empty if outputMode == OT_RID.
|
// vectWriteColValues is empty if outputMode == OT_RID.
|
||||||
uint16_t valuesWritten = vectWriteColValues<T, VT, OUTPUT_TYPE, KIND, HAS_INPUT_RIDS>(
|
uint16_t valuesWritten = vectWriteColValues<T, VT, OUTPUT_TYPE, KIND, HAS_INPUT_RIDS>(
|
||||||
simdProcessor, writeMask, nonNullOrEmptyMask, validMinMax, ridOffset, dataVecTPtr, dstArray, Min, Max,
|
simdProcessor, writeMask, nonNullOrEmptyMask, validMinMax, ridOffset, dataVecTPtr, dstArray, min, max,
|
||||||
in, out, ridDstArray, ridArray);
|
in, out, ridDstArray, ridArray);
|
||||||
// Some outputType modes saves RIDs also. vectWriteRIDValues is empty for
|
// Some outputType modes saves RIDs also. vectWriteRIDValues is empty for
|
||||||
// OT_DATAVALUE, OT_BOTH(vectWriteColValues takes care about RIDs).
|
// OT_DATAVALUE, OT_BOTH(vectWriteColValues takes care about RIDs).
|
||||||
valuesWritten = vectWriteRIDValues<T, VT, OUTPUT_TYPE, KIND, HAS_INPUT_RIDS>(
|
valuesWritten = vectWriteRIDValues<T, VT, OUTPUT_TYPE, KIND, HAS_INPUT_RIDS>(
|
||||||
simdProcessor, valuesWritten, validMinMax, ridOffset, dataVecTPtr, ridDstArray, writeMask, Min, Max,
|
simdProcessor, valuesWritten, validMinMax, ridOffset, dataVecTPtr, ridDstArray, writeMask, min, max,
|
||||||
in, out, nonNullOrEmptyMask, ridArray);
|
in, out, nonNullOrEmptyMask, ridArray);
|
||||||
|
|
||||||
|
if constexpr (HAS_INPUT_RIDS && KIND != KIND_TEXT)
|
||||||
|
{
|
||||||
|
vectorizedUpdateMinMax(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVec, simdMin, simdMax);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
scalarUpdateMinMax<T, VT, KIND>(validMinMax, nonNullOrEmptyMask, simdProcessor, dataVecTPtr, min, max,
|
||||||
|
in);
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate bytes written
|
// Calculate bytes written
|
||||||
uint16_t bytesWritten = valuesWritten * WIDTH;
|
uint16_t bytesWritten = valuesWritten * WIDTH;
|
||||||
totalValuesWritten += valuesWritten;
|
totalValuesWritten += valuesWritten;
|
||||||
@ -1482,7 +1522,10 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
srcArray += VECTOR_SIZE;
|
srcArray += VECTOR_SIZE;
|
||||||
ridArray += VECTOR_SIZE;
|
ridArray += VECTOR_SIZE;
|
||||||
}
|
}
|
||||||
|
if constexpr(HAS_INPUT_RIDS && KIND != KIND_TEXT)
|
||||||
|
{
|
||||||
|
extractMinMax(simdProcessor, simdMin, simdMax, min, max);
|
||||||
|
}
|
||||||
// Set the number of output values here b/c tail processing can skip this operation.
|
// Set the number of output values here b/c tail processing can skip this operation.
|
||||||
out->NVALS = totalValuesWritten;
|
out->NVALS = totalValuesWritten;
|
||||||
|
|
||||||
@ -1490,8 +1533,8 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
out->ValidMinMax = validMinMax;
|
out->ValidMinMax = validMinMax;
|
||||||
if (validMinMax)
|
if (validMinMax)
|
||||||
{
|
{
|
||||||
out->Min = Min;
|
out->Min = min;
|
||||||
out->Max = Max;
|
out->Max = max;
|
||||||
}
|
}
|
||||||
// process the tail. scalarFiltering changes out contents, e.g. Min/Max, NVALS, RIDs and values array
|
// process the tail. scalarFiltering changes out contents, e.g. Min/Max, NVALS, RIDs and values array
|
||||||
// This tail also sets out::Min/Max, out::validMinMax if validMinMax is set.
|
// This tail also sets out::Min/Max, out::validMinMax if validMinMax is set.
|
||||||
@ -1499,7 +1542,7 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
|||||||
scalarFiltering<T, FT, ST, KIND>(in, out, columnFilterMode, filterSet, filterCount, filterCOPs,
|
scalarFiltering<T, FT, ST, KIND>(in, out, columnFilterMode, filterSet, filterCount, filterCOPs,
|
||||||
filterValues, filterRFs, in->colType, origSrcArray, srcSize, origRidArray,
|
filterValues, filterRFs, in->colType, origSrcArray, srcSize, origRidArray,
|
||||||
ridSize, processedSoFar, outputType, validMinMax, emptyValue, nullValue,
|
ridSize, processedSoFar, outputType, validMinMax, emptyValue, nullValue,
|
||||||
Min, Max, isNullValueMatches);
|
min, max, isNullValueMatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This routine dispatches template function calls to reduce branching.
|
// This routine dispatches template function calls to reduce branching.
|
||||||
|
@ -239,6 +239,18 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<SimdType>(std::min(reinterpret_cast<int128_t>(x), reinterpret_cast<int128_t>(y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<SimdType>(std::max(reinterpret_cast<int128_t>(x), reinterpret_cast<int128_t>(y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename T>
|
template <typename VT, typename T>
|
||||||
@ -353,6 +365,16 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_pd(reinterpret_cast<T*>(dst), x);
|
_mm_storeu_pd(reinterpret_cast<T*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_pd(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_pd(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename T>
|
template <typename VT, typename T>
|
||||||
@ -467,6 +489,16 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_ps(reinterpret_cast<T*>(dst), x);
|
_mm_storeu_ps(reinterpret_cast<T*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_ps(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_ps(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -574,6 +606,16 @@ class SimdFilterProcessor<VT, CHECK_T,
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(x,y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(y,x));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -684,6 +726,16 @@ class SimdFilterProcessor<VT, CHECK_T,
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(x,y));
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_blendv_epi8(x, y, _mm_cmpgt_epi64(y,x));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -791,6 +843,16 @@ class SimdFilterProcessor<VT, CHECK_T,
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_epi32(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_epi32(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -901,6 +963,16 @@ class SimdFilterProcessor<VT, CHECK_T,
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_epu32(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_epu32(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -1007,6 +1079,16 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_epi16(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_epi16(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -1114,6 +1196,16 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_epu16(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_epu16(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -1227,6 +1319,16 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_epi8(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_epi8(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename VT, typename CHECK_T>
|
template <typename VT, typename CHECK_T>
|
||||||
@ -1340,6 +1442,16 @@ class SimdFilterProcessor<
|
|||||||
{
|
{
|
||||||
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
_mm_storeu_si128(reinterpret_cast<SimdType*>(dst), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType min(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_min_epu8(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MCS_FORCE_INLINE SimdType max(SimdType& x, SimdType& y)
|
||||||
|
{
|
||||||
|
return _mm_max_epu8(x, y);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace simd
|
} // namespace simd
|
||||||
|
Reference in New Issue
Block a user