You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-4809 This patch adds support for float data types filtering and scanning vectorization
This commit is contained in:
@ -50,19 +50,8 @@ using namespace execplan;
|
||||
|
||||
namespace
|
||||
{
|
||||
// WIP Move this
|
||||
using MT = uint16_t;
|
||||
|
||||
// Column filtering is dispatched 4-way based on the column type,
|
||||
// which defines implementation of comparison operations for the column values
|
||||
enum ENUM_KIND
|
||||
{
|
||||
KIND_DEFAULT, // compared as signed integers
|
||||
KIND_UNSIGNED, // compared as unsigned integers
|
||||
KIND_FLOAT, // compared as floating-point numbers
|
||||
KIND_TEXT
|
||||
}; // whitespace-trimmed and then compared as signed integers
|
||||
|
||||
inline uint64_t order_swap(uint64_t x)
|
||||
{
|
||||
uint64_t ret = (x >> 56) | ((x << 40) & 0x00FF000000000000ULL) | ((x << 24) & 0x0000FF0000000000ULL) |
|
||||
@ -950,16 +939,16 @@ inline uint16_t vectWriteColValues(
|
||||
primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs
|
||||
primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs
|
||||
{
|
||||
constexpr const uint16_t WIDTH = sizeof(T);
|
||||
using SIMD_TYPE = typename VT::SIMD_TYPE;
|
||||
SIMD_TYPE tmpStorageVector;
|
||||
constexpr const uint16_t FilterMaskStep = VT::FilterMaskStep;
|
||||
using SimdType = typename VT::SimdType;
|
||||
SimdType tmpStorageVector;
|
||||
T* tmpDstVecTPtr = reinterpret_cast<T*>(&tmpStorageVector);
|
||||
// Saving values based on writeMask into tmp vec.
|
||||
// Min/Max processing.
|
||||
// The mask is 16 bit long and it describes N elements.
|
||||
// N = sizeof(vector type) / WIDTH.
|
||||
uint32_t j = 0;
|
||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += WIDTH)
|
||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep)
|
||||
{
|
||||
MT bitMapPosition = 1 << it;
|
||||
if (writeMask & bitMapPosition)
|
||||
@ -1016,16 +1005,16 @@ inline uint16_t vectWriteColValues(
|
||||
primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs
|
||||
primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs
|
||||
{
|
||||
constexpr const uint16_t WIDTH = sizeof(T);
|
||||
using SIMD_TYPE = typename VT::SIMD_TYPE;
|
||||
SIMD_TYPE tmpStorageVector;
|
||||
constexpr const uint16_t FilterMaskStep = VT::FilterMaskStep;
|
||||
using SimdType = typename VT::SimdType;
|
||||
SimdType tmpStorageVector;
|
||||
T* tmpDstVecTPtr = reinterpret_cast<T*>(&tmpStorageVector);
|
||||
// Saving values based on writeMask into tmp vec.
|
||||
// Min/Max processing.
|
||||
// The mask is 16 bit long and it describes N elements.
|
||||
// N = sizeof(vector type) / WIDTH.
|
||||
uint32_t j = 0;
|
||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += WIDTH)
|
||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep)
|
||||
{
|
||||
MT bitMapPosition = 1 << it;
|
||||
if (writeMask & bitMapPosition)
|
||||
@ -1064,13 +1053,13 @@ inline uint16_t vectWriteRIDValues(
|
||||
MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values
|
||||
primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs
|
||||
{
|
||||
constexpr const uint16_t WIDTH = sizeof(T);
|
||||
constexpr const uint16_t FilterMaskStep = VT::FilterMaskStep;
|
||||
primitives::RIDType* origRIDDstArray = ridDstArray;
|
||||
// Saving values based on writeMask into tmp vec.
|
||||
// Min/Max processing.
|
||||
// The mask is 16 bit long and it describes N elements where N = sizeof(vector type) / WIDTH.
|
||||
uint16_t j = 0;
|
||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += WIDTH)
|
||||
for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep)
|
||||
{
|
||||
MT bitMapPosition = 1 << it;
|
||||
if (writeMask & (1 << it))
|
||||
@ -1213,13 +1202,11 @@ inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray,
|
||||
{
|
||||
constexpr const uint16_t WIDTH = sizeof(T);
|
||||
constexpr const uint16_t VECTOR_SIZE = VT::vecByteSize / WIDTH;
|
||||
using SIMD_TYPE = typename VT::SIMD_TYPE;
|
||||
SIMD_TYPE result;
|
||||
using SimdType = typename VT::SimdType;
|
||||
SimdType result;
|
||||
T* resultTypedPtr = reinterpret_cast<T*>(&result);
|
||||
for (uint32_t i = 0; i < VECTOR_SIZE; ++i)
|
||||
{
|
||||
// std::cout << " simdDataLoadTemplate ridArray[ridArrayOffset] " << (int8_t) origSrcArray[ridArray[i]] <<
|
||||
// " ridArray[i] " << ridArray[i] << "\n";
|
||||
resultTypedPtr[i] = origSrcArray[ridArray[i]];
|
||||
}
|
||||
|
||||
@ -1243,12 +1230,13 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
const T nullValue, T Min, T Max, const bool isNullValueMatches)
|
||||
{
|
||||
constexpr const uint16_t WIDTH = sizeof(T);
|
||||
using SIMD_TYPE = typename VT::SIMD_TYPE;
|
||||
using SIMD_WRAPPER_TYPE = typename VT::SIMD_WRAPPER_TYPE;
|
||||
using SimdType = typename VT::SimdType;
|
||||
using SimdWrapperType = typename VT::SimdWrapperType;
|
||||
using FilterType = typename VT::FilterType;
|
||||
VT simdProcessor;
|
||||
SIMD_TYPE dataVec;
|
||||
SIMD_TYPE emptyFilterArgVec = simdProcessor.loadValue(emptyValue);
|
||||
SIMD_TYPE nullFilterArgVec = simdProcessor.loadValue(nullValue);
|
||||
SimdType dataVec;
|
||||
SimdType emptyFilterArgVec = simdProcessor.emptyNullLoadValue(emptyValue);
|
||||
SimdType nullFilterArgVec = simdProcessor.emptyNullLoadValue(nullValue);
|
||||
MT writeMask, nonEmptyMask, nonNullMask, nonNullOrEmptyMask;
|
||||
MT initFilterMask = 0xFFFF;
|
||||
primitives::RIDType rid = 0;
|
||||
@ -1262,18 +1250,16 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
ColumnFilterMode columnFilterMode = ALWAYS_TRUE;
|
||||
const ST* filterSet = nullptr;
|
||||
const ParsedColumnFilter::RFsType* filterRFs = nullptr;
|
||||
|
||||
uint8_t outputType = in->OutputType;
|
||||
|
||||
constexpr uint16_t VECTOR_SIZE = VT::vecByteSize / WIDTH;
|
||||
// If there are RIDs use its number to get a number of vectorized iterations.
|
||||
uint16_t iterNumber = HAS_INPUT_RIDS ? ridSize / VECTOR_SIZE : srcSize / VECTOR_SIZE;
|
||||
uint32_t filterCount = 0;
|
||||
// These pragmas are to silence GCC warnings
|
||||
// warning: ignoring attributes on template argument
|
||||
// warning: ignoring attributes on template argument
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
std::vector<SIMD_TYPE> filterArgsVectors;
|
||||
std::vector<SimdType> filterArgsVectors;
|
||||
auto ptrA = std::mem_fn(&VT::cmpEq);
|
||||
using COPType = decltype(ptrA);
|
||||
std::vector<COPType> copFunctorVec;
|
||||
@ -1314,11 +1300,18 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
for (uint32_t j = 0; j < filterCount; ++j)
|
||||
{
|
||||
// Preload filter argument values only once.
|
||||
filterArgsVectors[j] = simdProcessor.loadValue(filterValues[j]);
|
||||
filterArgsVectors[j] = simdProcessor.loadValue(*((FilterType*)&filterValues[j]));
|
||||
switch (filterCOPs[j])
|
||||
{
|
||||
case (COMPARE_EQ): copFunctorVec.push_back(std::mem_fn(&VT::cmpEq)); break;
|
||||
case (COMPARE_EQ):
|
||||
// Skipping extra filter pass generated by IS NULL
|
||||
if (memcmp(&filterValues[j], &nullValue, sizeof(nullValue)) == 0)
|
||||
copFunctorVec.push_back(std::mem_fn(&VT::nullEmptyCmpEq));
|
||||
else
|
||||
copFunctorVec.push_back(std::mem_fn(&VT::cmpEq));
|
||||
break;
|
||||
case (COMPARE_GE): copFunctorVec.push_back(std::mem_fn(&VT::cmpGe)); break;
|
||||
|
||||
case (COMPARE_GT): copFunctorVec.push_back(std::mem_fn(&VT::cmpGt)); break;
|
||||
case (COMPARE_LE): copFunctorVec.push_back(std::mem_fn(&VT::cmpLe)); break;
|
||||
case (COMPARE_LT): copFunctorVec.push_back(std::mem_fn(&VT::cmpLt)); break;
|
||||
@ -1344,14 +1337,13 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
{
|
||||
primitives::RIDType ridOffset = i * VECTOR_SIZE;
|
||||
assert(!HAS_INPUT_RIDS || (HAS_INPUT_RIDS && ridSize >= ridOffset));
|
||||
dataVec = simdDataLoadTemplate<VT, SIMD_WRAPPER_TYPE, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
|
||||
origSrcArray, ridArray, i)
|
||||
dataVec = simdDataLoadTemplate<VT, SimdWrapperType, HAS_INPUT_RIDS, T>(simdProcessor, srcArray,
|
||||
origSrcArray, ridArray, i)
|
||||
.v;
|
||||
// empty check
|
||||
nonEmptyMask = simdProcessor.cmpNe(dataVec, emptyFilterArgVec);
|
||||
nonEmptyMask = simdProcessor.nullEmptyCmpNe(dataVec, emptyFilterArgVec);
|
||||
writeMask = nonEmptyMask;
|
||||
// NULL check
|
||||
nonNullMask = simdProcessor.cmpNe(dataVec, nullFilterArgVec);
|
||||
nonNullMask = simdProcessor.nullEmptyCmpNe(dataVec, nullFilterArgVec);
|
||||
// Exclude NULLs from the resulting set if NULL doesn't match the filters.
|
||||
writeMask = isNullValueMatches ? writeMask : writeMask & nonNullMask;
|
||||
nonNullOrEmptyMask = nonNullMask & nonEmptyMask;
|
||||
@ -1397,6 +1389,7 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
|
||||
// Set the number of output values here b/c tail processing can skip this operation.
|
||||
out->NVALS = totalValuesWritten;
|
||||
|
||||
// Write captured Min/Max values to *out
|
||||
out->ValidMinMax = validMinMax;
|
||||
if (validMinMax)
|
||||
@ -1415,17 +1408,18 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
|
||||
}
|
||||
|
||||
// This routine dispatches template function calls to reduce branching.
|
||||
template <typename T, ENUM_KIND KIND, typename FT, typename ST>
|
||||
void vectorizedFilteringDispatcher(NewColRequestHeader* in, ColResultHeader* out, const T* srcArray,
|
||||
const uint32_t srcSize, uint16_t* ridArray, const uint16_t ridSize,
|
||||
ParsedColumnFilter* parsedColumnFilter, const bool validMinMax,
|
||||
const T emptyValue, const T nullValue, T Min, T Max,
|
||||
template <typename STORAGE_TYPE, ENUM_KIND KIND, typename FT, typename ST>
|
||||
void vectorizedFilteringDispatcher(NewColRequestHeader* in, ColResultHeader* out,
|
||||
const STORAGE_TYPE* srcArray, const uint32_t srcSize, uint16_t* ridArray,
|
||||
const uint16_t ridSize, ParsedColumnFilter* parsedColumnFilter,
|
||||
const bool validMinMax, const STORAGE_TYPE emptyValue,
|
||||
const STORAGE_TYPE nullValue, STORAGE_TYPE Min, STORAGE_TYPE Max,
|
||||
const bool isNullValueMatches)
|
||||
{
|
||||
constexpr const uint8_t WIDTH = sizeof(T);
|
||||
// TODO make a SFINAE template switch for the class template spec.
|
||||
using SIMD_TYPE = simd::vi128_wr;
|
||||
using VT = typename simd::SimdFilterProcessor<SIMD_TYPE, WIDTH>;
|
||||
// Using struct to dispatch SIMD type based on integral type T.
|
||||
using SimdType = typename simd::IntegralToSIMD<STORAGE_TYPE, KIND>::type;
|
||||
using FilterType = typename simd::StorageToFiltering<STORAGE_TYPE, KIND>::type;
|
||||
using VT = typename simd::SimdFilterProcessor<SimdType, FilterType>;
|
||||
bool hasInputRIDs = (in->NVALS > 0) ? true : false;
|
||||
if (hasInputRIDs)
|
||||
{
|
||||
@ -1433,22 +1427,22 @@ void vectorizedFilteringDispatcher(NewColRequestHeader* in, ColResultHeader* out
|
||||
switch (in->OutputType)
|
||||
{
|
||||
case OT_RID:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_RID, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_RID, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
case OT_BOTH:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_BOTH, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_BOTH, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
case OT_TOKEN:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_TOKEN, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_TOKEN, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
case OT_DATAVALUE:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_DATAVALUE, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_DATAVALUE, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
@ -1460,22 +1454,22 @@ void vectorizedFilteringDispatcher(NewColRequestHeader* in, ColResultHeader* out
|
||||
switch (in->OutputType)
|
||||
{
|
||||
case OT_RID:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_RID, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_RID, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
case OT_BOTH:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_BOTH, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_BOTH, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
case OT_TOKEN:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_TOKEN, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_TOKEN, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
case OT_DATAVALUE:
|
||||
vectorizedFiltering<T, VT, hasInput, OT_DATAVALUE, KIND, FT, ST>(
|
||||
vectorizedFiltering<STORAGE_TYPE, VT, hasInput, OT_DATAVALUE, KIND, FT, ST>(
|
||||
in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue,
|
||||
nullValue, Min, Max, isNullValueMatches);
|
||||
break;
|
||||
@ -1539,8 +1533,8 @@ void filterColumnData(NewColRequestHeader* in, ColResultHeader* out, uint16_t* r
|
||||
// all values w/o any filter(even empty values filter) applied.
|
||||
|
||||
#if defined(__x86_64__)
|
||||
// Don't use vectorized filtering for non-integer based data types wider than 16 bytes.
|
||||
if (KIND < KIND_FLOAT && WIDTH < 16)
|
||||
// Don't use vectorized filtering for text based data types.
|
||||
if (KIND <= KIND_FLOAT && WIDTH < 16)
|
||||
{
|
||||
bool canUseFastFiltering = true;
|
||||
for (uint32_t i = 0; i < filterCount; ++i)
|
||||
@ -1601,7 +1595,6 @@ void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, Co
|
||||
auto dataType = (execplan::CalpontSystemCatalog::ColDataType)in->colType.DataType;
|
||||
if (dataType == execplan::CalpontSystemCatalog::FLOAT)
|
||||
{
|
||||
// WIP make this inline function
|
||||
const uint16_t ridSize = in->NVALS;
|
||||
uint16_t* ridArray = in->getRIDArrayPtr(W);
|
||||
const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W;
|
||||
@ -1771,4 +1764,4 @@ template void primitives::PrimitiveProcessor::columnScanAndFilter<int128_t>(NewC
|
||||
ColResultHeader*);
|
||||
|
||||
} // namespace primitives
|
||||
// vim:ts=4 sw=4:
|
||||
// vim:ts=2 sw=2:
|
||||
|
@ -167,6 +167,7 @@ class ParsedColumnFilter
|
||||
using RFsType = uint8_t;
|
||||
static constexpr uint32_t noSetFilterThreshold = 8;
|
||||
ColumnFilterMode columnFilterMode;
|
||||
// Very unfortunately prestored_argVals can also be used to store double/float values.
|
||||
boost::shared_array<int64_t> prestored_argVals;
|
||||
boost::shared_array<int128_t> prestored_argVals128;
|
||||
boost::shared_array<CopsType> prestored_cops;
|
||||
@ -181,7 +182,7 @@ class ParsedColumnFilter
|
||||
template <typename T, typename std::enable_if<std::is_same<T, int64_t>::value, T>::type* = nullptr>
|
||||
T* getFilterVals()
|
||||
{
|
||||
return prestored_argVals.get();
|
||||
return reinterpret_cast<T*>(prestored_argVals.get());
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_same<T, int128_t>::value, T>::type* = nullptr>
|
||||
@ -561,4 +562,4 @@ boost::shared_ptr<ParsedColumnFilter> _parseColumnFilter(
|
||||
|
||||
} // namespace primitives
|
||||
|
||||
// vim:ts=4 sw=4:
|
||||
// vim:ts=2 sw=2:
|
||||
|
Reference in New Issue
Block a user