/* Copyright (C) 2014 InfiniDB, Inc. Copyright (C) 2016-2021 MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include // #define NDEBUG #include #include #include #ifndef _MSC_VER #include #else #endif using namespace std; #include using namespace boost; #include "primitiveprocessor.h" #include "messagelog.h" #include "messageobj.h" #include "we_type.h" #include "stats.h" #include "primproc.h" #include "dataconvert.h" #include "mcs_decimal.h" #include "simd_sse.h" #include "utils/common/columnwidth.h" using namespace logging; using namespace dbbc; using namespace primitives; using namespace primitiveprocessor; using namespace execplan; namespace { using MT = uint16_t; inline uint64_t order_swap(uint64_t x) { uint64_t ret = (x >> 56) | ((x << 40) & 0x00FF000000000000ULL) | ((x << 24) & 0x0000FF0000000000ULL) | ((x << 8) & 0x000000FF00000000ULL) | ((x >> 8) & 0x00000000FF000000ULL) | ((x >> 24) & 0x0000000000FF0000ULL) | ((x >> 40) & 0x000000000000FF00ULL) | (x << 56); return ret; } template inline int compareBlock(const void* a, const void* b) { return ((*(T*)a) - (*(T*)b)); } // this function is out-of-band, we don't need to inline it void logIt(int mid, int arg1, const string& arg2 = string()) { MessageLog logger(LoggingID(28)); logging::Message::Args args; Message msg(mid); args.add(arg1); if (arg2.length() > 0) args.add(arg2); msg.format(args); logger.logErrorMessage(msg); } template inline bool colCompare_(const T& val1, const T& val2, uint8_t COP) { switch (COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2; case COMPARE_EQ: return val1 == val2; case COMPARE_LE: return val1 <= val2; case COMPARE_GT: return val1 > val2; case COMPARE_NE: return val1 != val2; case COMPARE_GE: return val1 >= val2; default: logIt(34, COP, "colCompare_"); return false; // throw an exception here? } } inline bool colCompareStr(const ColRequestHeaderDataType& type, uint8_t COP, const utils::ConstString& val1, const utils::ConstString& val2) { int error = 0; bool rc = primitives::StringComparator(type).op(&error, COP, val1, val2); if (error) { logIt(34, COP, "colCompareStr"); return false; // throw an exception here? } return rc; } template inline bool colCompare_(const T& val1, const T& val2, uint8_t COP, uint8_t rf) { switch (COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2 || (val1 == val2 && (rf & 0x01)); case COMPARE_LE: return val1 < val2 || (val1 == val2 && rf ^ 0x80); case COMPARE_EQ: return val1 == val2 && rf == 0; case COMPARE_NE: return val1 != val2 || rf != 0; case COMPARE_GE: return val1 > val2 || (val1 == val2 && rf ^ 0x01); case COMPARE_GT: return val1 > val2 || (val1 == val2 && (rf & 0x80)); default: logIt(34, COP, "colCompare_"); return false; // throw an exception here? } } //@bug 1828 Like must be a string compare. inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf) { switch (COP) { case COMPARE_NIL: return false; case COMPARE_LT: return val1 < val2 || (val1 == val2 && rf != 0); case COMPARE_LE: return val1 <= val2; case COMPARE_EQ: return val1 == val2 && rf == 0; case COMPARE_NE: return val1 != val2 || rf != 0; case COMPARE_GE: return val1 > val2 || (val1 == val2 && rf == 0); case COMPARE_GT: return val1 > val2; case COMPARE_LIKE: case COMPARE_NLIKE: default: logIt(34, COP, "colStrCompare_"); return false; // throw an exception here? } } // Set the minimum and maximum in the return header if we will be doing a block scan and // we are dealing with a type that is comparable as a 64 bit integer. Subsequent calls can then // skip this block if the value being searched is outside of the Min/Max range. inline bool isMinMaxValid(const NewColRequestHeader* in) { if (in->NVALS != 0) { return false; } else { switch (in->colType.DataType) { case CalpontSystemCatalog::CHAR: return (in->colType.DataSize < 9); case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return (in->colType.DataSize < 8); case CalpontSystemCatalog::TINYINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::UTINYINT: case CalpontSystemCatalog::USMALLINT: case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: return true; case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: return (in->colType.DataSize <= datatypes::MAXDECIMALWIDTH); default: return false; } } } template ::type* = nullptr> inline bool colCompareDispatcherT(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null) { float dVal1 = *((float*)&columnValue); float dVal2 = *((float*)&filterValue); return colCompare_(dVal1, dVal2, cop); } template ::type* = nullptr> inline bool colCompareDispatcherT(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null) { double dVal1 = *((double*)&columnValue); double dVal2 = *((double*)&filterValue); return colCompare_(dVal1, dVal2, cop); } template ::type* = nullptr> inline bool colCompareDispatcherT(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null) { if (cop & COMPARE_LIKE) // LIKE and NOT LIKE { utils::ConstString subject{reinterpret_cast(&columnValue), COL_WIDTH}; utils::ConstString pattern{reinterpret_cast(&filterValue), COL_WIDTH}; return typeHolder.like(cop & COMPARE_NOT, subject.rtrimZero(), pattern.rtrimZero()); } if (!rf) { // A temporary hack for xxx_nopad_bin collations // TODO: MCOL-4534 Improve comparison performance in 8bit nopad_bin collations if ((typeHolder.getCharset().state & (MY_CS_BINSORT | MY_CS_NOPAD)) == (MY_CS_BINSORT | MY_CS_NOPAD)) return colCompare_(order_swap(columnValue), order_swap(filterValue), cop); utils::ConstString s1{reinterpret_cast(&columnValue), COL_WIDTH}; utils::ConstString s2{reinterpret_cast(&filterValue), COL_WIDTH}; return colCompareStr(typeHolder, cop, s1.rtrimZero(), s2.rtrimZero()); } else return colStrCompare_(order_swap(columnValue), order_swap(filterValue), cop, rf); } // This template where IS_NULL = true is used only comparing filter predicate // values with column NULL so I left branching here. template ::type* = nullptr> inline bool colCompareDispatcherT(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null) { if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE)) { if (KIND_UNSIGNED == KIND) { // Ugly hack to convert all to the biggest type b/w T1 and T2. // I presume that sizeof(T2) AKA a filter predicate type is GEQ sizeof(T1) AKA col type. using UT2 = typename datatypes::make_unsigned::type; UT2 ucolumnValue = columnValue; UT2 ufilterValue = filterValue; return colCompare_(ucolumnValue, ufilterValue, cop, rf); } else { // Ugly hack to convert all to the biggest type b/w T1 and T2. // I presume that sizeof(T2) AKA a filter predicate type is GEQ sizeof(T1) AKA col type. T2 tempVal1 = columnValue; return colCompare_(tempVal1, filterValue, cop, rf); } } else return false; } template ::type* = nullptr> inline bool colCompareDispatcherT(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null) { if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE)) { // Ugly hack to convert all to the biggest type b/w T1 and T2. // I presume that sizeof(T2)(a filter predicate type) is GEQ T1(col type). using UT2 = typename datatypes::make_unsigned::type; UT2 ucolumnValue = columnValue; UT2 ufilterValue = filterValue; return colCompare_(ucolumnValue, ufilterValue, cop, rf); } else return false; } template ::type* = nullptr> inline bool colCompareDispatcherT(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null) { if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE)) { // Ugly hack to convert all to the biggest type b/w T1 and T2. // I presume that sizeof(T2)(a filter predicate type) is GEQ T1(col type). T2 tempVal1 = columnValue; return colCompare_(tempVal1, filterValue, cop, rf); } else return false; } // Compare two column values using given comparison operation, // taking into account all rules about NULL values, string trimming and so on template inline bool colCompare(T1 columnValue, T2 filterValue, uint8_t cop, uint8_t rf, const ColRequestHeaderDataType& typeHolder, bool isVal2Null = false) { // cout << "comparing " << hex << columnValue << " to " << filterValue << endl; if (COMPARE_NIL == cop) return false; return colCompareDispatcherT(columnValue, filterValue, cop, rf, typeHolder, isVal2Null); } /***************************************************************************** *** NULL/EMPTY VALUES FOR EVERY COLUMN TYPE/WIDTH *************************** *****************************************************************************/ // Bit pattern representing EMPTY value for given column type/width // TBD Use typeHandler template ::type* = nullptr> T getEmptyValue(uint8_t type) { return datatypes::Decimal128Empty; } template ::type* = nullptr> T getEmptyValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: return joblist::DOUBLEEMPTYROW; case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: case CalpontSystemCatalog::VARBINARY: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return joblist::CHAR8EMPTYROW; case CalpontSystemCatalog::UBIGINT: return joblist::UBIGINTEMPTYROW; default: return joblist::BIGINTEMPTYROW; } } template ::type* = nullptr> T getEmptyValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: return joblist::FLOATEMPTYROW; case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: return joblist::CHAR4EMPTYROW; case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UMEDINT: return joblist::UINTEMPTYROW; default: return joblist::INTEMPTYROW; } } template ::type* = nullptr> T getEmptyValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: return joblist::CHAR2EMPTYROW; case CalpontSystemCatalog::USMALLINT: return joblist::USMALLINTEMPTYROW; default: return joblist::SMALLINTEMPTYROW; } } template ::type* = nullptr> T getEmptyValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: return joblist::CHAR1EMPTYROW; case CalpontSystemCatalog::UTINYINT: return joblist::UTINYINTEMPTYROW; default: return joblist::TINYINTEMPTYROW; } } // Bit pattern representing NULL value for given column type/width // TBD Use TypeHandler template ::type* = nullptr> T getNullValue(uint8_t type) { return datatypes::Decimal128Null; } template ::type* = nullptr> T getNullValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: return joblist::DOUBLENULL; case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: case CalpontSystemCatalog::VARBINARY: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return joblist::CHAR8NULL; case CalpontSystemCatalog::UBIGINT: return joblist::UBIGINTNULL; default: return joblist::BIGINTNULL; } } template ::type* = nullptr> T getNullValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: return joblist::FLOATNULL; case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return joblist::CHAR4NULL; case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: return joblist::DATENULL; case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UMEDINT: return joblist::UINTNULL; default: return joblist::INTNULL; } } template ::type* = nullptr> T getNullValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: return joblist::CHAR2NULL; case CalpontSystemCatalog::USMALLINT: return joblist::USMALLINTNULL; default: return joblist::SMALLINTNULL; } } template ::type* = nullptr> T getNullValue(uint8_t type) { switch (type) { case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::DATE: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::TIMESTAMP: case CalpontSystemCatalog::TIME: return joblist::CHAR1NULL; case CalpontSystemCatalog::UTINYINT: return joblist::UTINYINTNULL; default: return joblist::TINYINTNULL; } } // Check whether val is NULL (or alternative NULL bit pattern for 64-bit string types) template inline bool isNullValue(const T val, const T NULL_VALUE) { return val == NULL_VALUE; } template <> inline bool isNullValue(const int64_t val, const int64_t NULL_VALUE) { //@bug 339 might be a token here // TODO: what's up with the alternative NULL here? constexpr const int64_t ALT_NULL_VALUE = 0xFFFFFFFFFFFFFFFELL; return (val == NULL_VALUE || val == ALT_NULL_VALUE); } // // FILTER A COLUMN VALUE // template ::type* = nullptr> inline bool noneValuesInArray(const T curValue, const FT* filterValues, const uint32_t filterCount) { // ignore NULLs in the array and in the column data return false; } template ::type* = nullptr> inline bool noneValuesInArray(const T curValue, const FT* filterValues, const uint32_t filterCount) { for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) { if (curValue == static_cast(filterValues[argIndex])) return false; } return true; } template ::type* = nullptr> inline bool noneValuesInSet(const T curValue, const ST* filterSet) { // bug 1920: ignore NULLs in the set and in the column data return false; } template ::type* = nullptr> inline bool noneValuesInSet(const T curValue, const ST* filterSet) { bool found = (filterSet->find(curValue) != filterSet->end()); return !found; } // The routine is used to test the value from a block against filters // according with columnFilterMode(see the corresponding enum for details). // Returns true if the curValue matches the filter. template inline bool matchingColValue( const T curValue, const ColumnFilterMode columnFilterMode, const ST* filterSet, // Set of values for simple filters (any of values / none of them) const uint32_t filterCount, // Number of filter elements, each described by one entry in the following arrays: const uint8_t* filterCOPs, // comparison operation const FT* filterValues, // value to compare to const uint8_t* filterRFs, // reverse byte order flags const ColRequestHeaderDataType& typeHolder, const T NULL_VALUE) // Bit pattern representing NULL value for this column type/width { /* In order to make filtering as fast as possible, we replaced the single generic algorithm with several algorithms, better tailored for more specific cases: empty filter, single comparison, and/or/xor comparison results, one/none of small/large set of values */ switch (columnFilterMode) { // Empty filter is always true case ALWAYS_TRUE: return true; // Filter consisting of exactly one comparison operation case SINGLE_COMPARISON: { auto filterValue = filterValues[0]; // This can be future optimized checking if a filterValue is NULL or not bool cmp = colCompare(curValue, filterValue, filterCOPs[0], filterRFs[0], typeHolder, isNullValue(filterValue, NULL_VALUE)); return cmp; } // Filter is true if ANY comparison is true (BOP_OR) case ANY_COMPARISON_TRUE: { for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) { auto filterValue = filterValues[argIndex]; // This can be future optimized checking if a filterValues are NULLs or not before the higher level // loop. bool cmp = colCompare(curValue, filterValue, filterCOPs[argIndex], filterRFs[argIndex], typeHolder, isNullValue(filterValue, NULL_VALUE)); // Short-circuit the filter evaluation - true || ... == true if (cmp == true) return true; } // We can get here only if all filters returned false return false; } // Filter is true only if ALL comparisons are true (BOP_AND) case ALL_COMPARISONS_TRUE: { for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) { auto filterValue = filterValues[argIndex]; // This can be future optimized checking if a filterValues are NULLs or not before the higher level // loop. bool cmp = colCompare(curValue, filterValue, filterCOPs[argIndex], filterRFs[argIndex], typeHolder, isNullValue(filterValue, NULL_VALUE)); // Short-circuit the filter evaluation - false && ... = false if (cmp == false) return false; } // We can get here only if all filters returned true return true; } // XORing results of comparisons (BOP_XOR) case XOR_COMPARISONS: { bool result = false; for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) { auto filterValue = filterValues[argIndex]; // This can be future optimized checking if a filterValues are NULLs or not before the higher level // loop. bool cmp = colCompare(curValue, filterValue, filterCOPs[argIndex], filterRFs[argIndex], typeHolder, isNullValue(filterValue, NULL_VALUE)); result ^= cmp; } return result; } // ONE of the values in the small set represented by an array (BOP_OR + all COMPARE_EQ) case ONE_OF_VALUES_IN_ARRAY: { for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) { if (curValue == static_cast(filterValues[argIndex])) return true; } return false; } // NONE of the values in the small set represented by an array (BOP_AND + all COMPARE_NE) case NONE_OF_VALUES_IN_ARRAY: return noneValuesInArray(curValue, filterValues, filterCount); // ONE of the values in the set is equal to the value checked (BOP_OR + all COMPARE_EQ) case ONE_OF_VALUES_IN_SET: { bool found = (filterSet->find(curValue) != filterSet->end()); return found; } // NONE of the values in the set is equal to the value checked (BOP_AND + all COMPARE_NE) case NONE_OF_VALUES_IN_SET: return noneValuesInSet(curValue, filterSet); default: idbassert(0); return true; } } /***************************************************************************** *** MISC FUNCS ************************************************************** *****************************************************************************/ // These two are templates update min/max values in the loop iterating the values in filterColumnData. template ::type* = nullptr> inline void updateMinMax(T& Min, T& Max, const T curValue, NewColRequestHeader* in) { constexpr int COL_WIDTH = sizeof(T); if (colCompare(Min, curValue, COMPARE_GT, false, in->colType)) Min = curValue; if (colCompare(Max, curValue, COMPARE_LT, false, in->colType)) Max = curValue; } template ::type* = nullptr> inline void updateMinMax(T& Min, T& Max, const T curValue, NewColRequestHeader* in) { if (Min > curValue) Min = curValue; if (Max < curValue) Max = curValue; } // The next templates group sets initial Min/Max values in filterColumnData. template ::type* = nullptr> T getInitialMin(NewColRequestHeader* in) { const CHARSET_INFO& cs = in->colType.getCharset(); T Min = 0; cs.max_str((uchar*)&Min, sizeof(Min), sizeof(Min)); return Min; } template ::type* = nullptr> T getInitialMin(NewColRequestHeader* in) { return datatypes::numeric_limits::max(); } template ::type* = nullptr> T getInitialMax(NewColRequestHeader* in) { return datatypes::numeric_limits::min(); } template ::type* = nullptr> T getInitialMax(NewColRequestHeader* in) { return 0; } template ::type* = nullptr> T getInitialMax(NewColRequestHeader* in) { const CHARSET_INFO& cs = in->colType.getCharset(); T Max = 0; cs.min_str((uchar*)&Max, sizeof(Max), sizeof(Max)); return Max; } /***************************************************************************** *** READ COLUMN VALUES ****************************************************** *****************************************************************************/ // Read one ColValue from the input block. // Return true on success, false on End of Block. // Values are read from srcArray either in natural order or in the order defined by ridArray. // Empty values are skipped, unless ridArray==0 && !(OutputType & OT_RID). template inline bool nextColValue( T& result, // Place for the value returned bool* isEmpty, // ... and flag whether it's EMPTY uint32_t* index, // Successive index either in srcArray (going from 0 to srcSize-1) or ridArray (0..ridSize-1) uint16_t* rid, // Index in srcArray of the value returned const T* srcArray, // Input array const uint32_t srcSize, // ... and its size const uint16_t* ridArray, // Optional array of indexes into srcArray, that defines the read order const uint16_t ridSize, // ... and its size const uint8_t OutputType, // Used to decide whether to skip EMPTY values T EMPTY_VALUE) { auto i = *index; // local copy of *index to speed up loops T value; // value to be written into *result, local for the same reason if (ridArray) { // Read next non-empty value in the order defined by ridArray for (;; i++) { if (UNLIKELY(i >= ridSize)) return false; value = srcArray[ridArray[i]]; if (value != EMPTY_VALUE) break; } *rid = ridArray[i]; *isEmpty = false; } else if (OutputType & OT_RID) // TODO: check correctness of this condition for SKIP_EMPTY_VALUES { // Read next non-empty value in the natural order for (;; i++) { if (UNLIKELY(i >= srcSize)) return false; value = srcArray[i]; if (value != EMPTY_VALUE) break; } *rid = i; *isEmpty = false; } else { // Read next value in the natural order if (UNLIKELY(i >= srcSize)) return false; *rid = i; value = srcArray[i]; *isEmpty = (value == EMPTY_VALUE); } *index = i + 1; result = value; return true; } /// /// WRITE COLUMN VALUES /// // Write the value index in srcArray and/or the value itself, depending on bits in OutputType, // into the output buffer and update the output pointer. // TODO Introduce another dispatching layer based on OutputType. template inline void writeColValue(uint8_t OutputType, ColResultHeader* out, uint16_t rid, const T* srcArray) { // TODO move base ptr calculation one level up. uint8_t* outPtr = reinterpret_cast(&out[1]); auto idx = out->NVALS++; if (OutputType & OT_RID) { auto* outPos = getRIDArrayPosition(outPtr, idx); *outPos = rid; out->RidFlags |= (1 << (rid >> 9)); // set the (row/512)'th bit } if (OutputType & (OT_TOKEN | OT_DATAVALUE)) { // TODO move base ptr calculation one level up. T* outPos = getValuesArrayPosition(primitives::getFirstValueArrayPosition(out), idx); // TODO check bytecode for the 16 byte type *outPos = srcArray[rid]; } } #if defined(__x86_64__) template ::type* = nullptr> inline void vectUpdateMinMax(const bool validMinMax, const bool isNonNullOrEmpty, T& Min, T& Max, T curValue, NewColRequestHeader* in) { if (validMinMax && isNonNullOrEmpty) updateMinMax(Min, Max, curValue, in); } // MCS won't update Min/Max for a block if it doesn't read all values in a block. // This happens if in->NVALS > 0(HAS_INPUT_RIDS is set). template ::type* = nullptr> inline void vectUpdateMinMax(const bool validMinMax, const bool isNonNullOrEmpty, T& Min, T& Max, T curValue, NewColRequestHeader* in) { // } template ::type* = nullptr> void vectWriteColValuesLoopRIDAsignment(primitives::RIDType* ridDstArray, ColResultHeader* out, const primitives::RIDType calculatedRID, const primitives::RIDType* ridSrcArray, const uint32_t srcRIDIdx) { *ridDstArray = calculatedRID; out->RidFlags |= (1 << (calculatedRID >> 9)); // set the (row/512)'th bit } template ::type* = nullptr> void vectWriteColValuesLoopRIDAsignment(primitives::RIDType* ridDstArray, ColResultHeader* out, const primitives::RIDType calculatedRID, const primitives::RIDType* ridSrcArray, const uint32_t srcRIDIdx) { *ridDstArray = ridSrcArray[srcRIDIdx]; out->RidFlags |= (1 << (ridSrcArray[srcRIDIdx] >> 9)); // set the (row/512)'th bit } // The set of SFINAE templates are used to write values/RID into the output buffer based on // a number of template parameters // No RIDs only values template ::type* = nullptr> inline uint16_t vectWriteColValues( VT& simdProcessor, // SIMD processor const MT writeMask, // SIMD intrinsics bitmask for values to write const MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values const bool validMinMax, // The flag to update Min/Max for a block or not const primitives::RIDType ridOffset, // The first RID value of the dataVecTPtr T* dataVecTPtr, // Typed SIMD vector from the input block char* dstArray, // the actual char dst array ptr to start writing values T& Min, T& Max, // Min/Max of the extent NewColRequestHeader* in, // Proto message ColResultHeader* out, // Proto message primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs { constexpr const uint16_t FilterMaskStep = VT::FilterMaskStep; using SimdType = typename VT::SimdType; SimdType tmpStorageVector; T* tmpDstVecTPtr = reinterpret_cast(&tmpStorageVector); // Saving values based on writeMask into tmp vec. // Min/Max processing. // The mask is 16 bit long and it describes N elements. // N = sizeof(vector type) / WIDTH. uint32_t j = 0; for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep) { MT bitMapPosition = 1 << it; if (writeMask & bitMapPosition) { *tmpDstVecTPtr = dataVecTPtr[j]; ++tmpDstVecTPtr; } vectUpdateMinMax(validMinMax, nonNullOrEmptyMask & bitMapPosition, Min, Max, dataVecTPtr[j], in); } // Store the whole vector however one level up the stack // vectorizedFiltering() increases the dstArray by a number of // actual values written that is the result of this function. simdProcessor.store(dstArray, tmpStorageVector); return tmpDstVecTPtr - reinterpret_cast(&tmpStorageVector); } // RIDs no values template ::type* = nullptr> inline uint16_t vectWriteColValues( VT& simdProcessor, // SIMD processor const MT writeMask, // SIMD intrinsics bitmask for values to write const MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values const bool validMinMax, // The flag to update Min/Max for a block or not const primitives::RIDType ridOffset, // The first RID value of the dataVecTPtr T* dataVecTPtr, // Typed SIMD vector from the input block char* dstArray, // the actual char dst array ptr to start writing values T& Min, T& Max, // Min/Max of the extent NewColRequestHeader* in, // Proto message ColResultHeader* out, // Proto message primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs { return 0; } // Both RIDs and values template ::type* = nullptr> inline uint16_t vectWriteColValues( VT& simdProcessor, // SIMD processor const MT writeMask, // SIMD intrinsics bitmask for values to write const MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values const bool validMinMax, // The flag to update Min/Max for a block or not const primitives::RIDType ridOffset, // The first RID value of the dataVecTPtr T* dataVecTPtr, // Typed SIMD vector from the input block char* dstArray, // the actual char dst array ptr to start writing values T& Min, T& Max, // Min/Max of the extent NewColRequestHeader* in, // Proto message ColResultHeader* out, // Proto message primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs { constexpr const uint16_t FilterMaskStep = VT::FilterMaskStep; using SimdType = typename VT::SimdType; SimdType tmpStorageVector; T* tmpDstVecTPtr = reinterpret_cast(&tmpStorageVector); // Saving values based on writeMask into tmp vec. // Min/Max processing. // The mask is 16 bit long and it describes N elements. // N = sizeof(vector type) / WIDTH. uint32_t j = 0; for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep) { MT bitMapPosition = 1 << it; if (writeMask & bitMapPosition) { *tmpDstVecTPtr = dataVecTPtr[j]; ++tmpDstVecTPtr; vectWriteColValuesLoopRIDAsignment(ridDstArray, out, ridOffset + j, ridSrcArray, j); ++ridDstArray; } vectUpdateMinMax(validMinMax, nonNullOrEmptyMask & bitMapPosition, Min, Max, dataVecTPtr[j], in); } // Store the whole vector however one level up the stack // vectorizedFiltering() increases the dstArray by a number of // actual values written that is the result of this function. simdProcessor.store(dstArray, tmpStorageVector); return tmpDstVecTPtr - reinterpret_cast(&tmpStorageVector); } // RIDs no values template ::type* = nullptr> inline uint16_t vectWriteRIDValues( VT& processor, // SIMD processor const uint16_t valuesWritten, // The number of values written to in certain SFINAE cases const bool validMinMax, // The flag to update Min/Max for a block or not const primitives::RIDType ridOffset, // The first RID value of the dataVecTPtr T* dataVecTPtr, // Typed SIMD vector from the input block primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs MT writeMask, // SIMD intrinsics bitmask for values to write T& Min, T& Max, // Min/Max of the extent NewColRequestHeader* in, // Proto message ColResultHeader* out, // Proto message MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs { constexpr const uint16_t FilterMaskStep = VT::FilterMaskStep; primitives::RIDType* origRIDDstArray = ridDstArray; // Saving values based on writeMask into tmp vec. // Min/Max processing. // The mask is 16 bit long and it describes N elements where N = sizeof(vector type) / WIDTH. uint16_t j = 0; for (uint32_t it = 0; it < VT::vecByteSize; ++j, it += FilterMaskStep) { MT bitMapPosition = 1 << it; if (writeMask & (1 << it)) { vectWriteColValuesLoopRIDAsignment(ridDstArray, out, ridOffset + j, ridSrcArray, j); ++ridDstArray; } vectUpdateMinMax(validMinMax, nonNullOrEmptyMask & bitMapPosition, Min, Max, dataVecTPtr[j], in); } return ridDstArray - origRIDDstArray; } // Both RIDs and values // vectWriteColValues writes RIDs traversing the writeMask. template ::type* = nullptr> inline uint16_t vectWriteRIDValues( VT& processor, // SIMD processor const uint16_t valuesWritten, // The number of values written to in certain SFINAE cases const bool validMinMax, // The flag to update Min/Max for a block or not const primitives::RIDType ridOffset, // The first RID value of the dataVecTPtr T* dataVecTPtr, // Typed SIMD vector from the input block primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs MT writeMask, // SIMD intrinsics bitmask for values to write T& Min, T& Max, // Min/Max of the extent NewColRequestHeader* in, // Proto message ColResultHeader* out, // Proto message MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs { return valuesWritten; } // No RIDs only values template ::type* = nullptr> inline uint16_t vectWriteRIDValues( VT& processor, // SIMD processor const uint16_t valuesWritten, // The number of values written to in certain SFINAE cases const bool validMinMax, // The flag to update Min/Max for a block or not const primitives::RIDType ridOffset, // The first RID value of the dataVecTPtr T* dataVecTPtr, // Typed SIMD vector from the input block primitives::RIDType* ridDstArray, // The actual dst arrray ptr to start writing RIDs MT writeMask, // SIMD intrinsics bitmask for values to write T& Min, T& Max, // Min/Max of the extent NewColRequestHeader* in, // Proto message ColResultHeader* out, // Proto message MT nonNullOrEmptyMask, // SIMD intrinsics inverce bitmask for NULL/EMPTY values primitives::RIDType* ridSrcArray) // The actual src array ptr to read RIDs { return valuesWritten; } #endif /***************************************************************************** *** RUN DATA THROUGH A COLUMN FILTER **************************************** *****************************************************************************/ // TODO turn columnFilterMode into template param to use it in matchingColValue // This routine filters values in a columnar block processing one scalar at a time. template void scalarFiltering( NewColRequestHeader* in, ColResultHeader* out, const ColumnFilterMode columnFilterMode, const ST* filterSet, // Set of values for simple filters (any of values / none of them) const uint32_t filterCount, // Number of filter elements, each described by one entry in the following arrays: const uint8_t* filterCOPs, // comparison operation const FT* filterValues, // value to compare to const uint8_t* filterRFs, const ColRequestHeaderDataType& typeHolder, // TypeHolder to use collation-aware ops for char/text. const T* srcArray, // Input array const uint32_t srcSize, // ... and its size const uint16_t* ridArray, // Optional array of indexes into srcArray, that defines the read order const uint16_t ridSize, // ... and its size const uint32_t initialRID, // The input block idx to start scanning/filter at. const uint8_t outputType, // Used to decide whether to skip EMPTY values const bool validMinMax, // The flag to store min/max T emptyValue, // Deduced empty value magic T nullValue, // Deduced null value magic T Min, T Max, const bool isNullValueMatches) { constexpr int WIDTH = sizeof(T); // Loop-local variables T curValue = 0; primitives::RIDType rid = 0; bool isEmpty = false; // Loop over the column values, storing those matching the filter, and updating the min..max range for (uint32_t i = initialRID; nextColValue(curValue, &isEmpty, &i, &rid, srcArray, srcSize, ridArray, ridSize, outputType, emptyValue);) { if (isEmpty) continue; else if (isNullValue(curValue, nullValue)) { // If NULL values match the filter, write curValue to the output buffer if (isNullValueMatches) writeColValue(outputType, out, rid, srcArray); } else { // If curValue matches the filter, write it to the output buffer if (matchingColValue(curValue, columnFilterMode, filterSet, filterCount, filterCOPs, filterValues, filterRFs, in->colType, nullValue)) { writeColValue(outputType, out, rid, srcArray); } // Update Min and Max if necessary. EMPTY/NULL values are processed in other branches. if (validMinMax) updateMinMax(Min, Max, curValue, in); } } // Write captured Min/Max values to *out out->ValidMinMax = validMinMax; if (validMinMax) { out->Min = Min; out->Max = Max; } } #if defined(__x86_64__) template ::type* = nullptr> inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray, const T* origSrcArray, const primitives::RIDType* ridArray, const uint16_t iter) { return {processor.loadFrom(reinterpret_cast(srcArray))}; } // Scatter-gather implementation // TODO Move the logic into simd namespace class methods and use intrinsics template ::type* = nullptr> inline SIMD_WRAPPER_TYPE simdDataLoadTemplate(VT& processor, const T* srcArray, const T* origSrcArray, const primitives::RIDType* ridArray, const uint16_t iter) { constexpr const uint16_t WIDTH = sizeof(T); constexpr const uint16_t VECTOR_SIZE = VT::vecByteSize / WIDTH; using SimdType = typename VT::SimdType; SimdType result; T* resultTypedPtr = reinterpret_cast(&result); for (uint32_t i = 0; i < VECTOR_SIZE; ++i) { resultTypedPtr[i] = origSrcArray[ridArray[i]]; } return {result}; } // This routine filters input block in a vectorized manner. // It supports all output types, all input types. // It doesn't support KIND==TEXT so upper layers filters this KIND out beforehand. // It doesn't support KIND==FLOAT yet also. // To reduce branching it first compiles the filter to produce a vector of // vector processing class methods(actual filters) pointers and a logical function pointer // to glue the masks produced by actual filters. // Then it takes a vector of data, run filters and logical function using pointers. // See the corresponding dispatcher to get more details on vector processing class. template void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T* srcArray, const uint32_t srcSize, primitives::RIDType* ridArray, const uint16_t ridSize, ParsedColumnFilter* parsedColumnFilter, const bool validMinMax, const T emptyValue, const T nullValue, T Min, T Max, const bool isNullValueMatches) { constexpr const uint16_t WIDTH = sizeof(T); using SimdType = typename VT::SimdType; using SimdWrapperType = typename VT::SimdWrapperType; using FilterType = typename VT::FilterType; VT simdProcessor; SimdType dataVec; SimdType emptyFilterArgVec = simdProcessor.emptyNullLoadValue(emptyValue); SimdType nullFilterArgVec = simdProcessor.emptyNullLoadValue(nullValue); MT writeMask, nonEmptyMask, nonNullMask, nonNullOrEmptyMask; MT initFilterMask = 0xFFFF; primitives::RIDType rid = 0; primitives::RIDType* origRidArray = ridArray; uint16_t totalValuesWritten = 0; char* dstArray = reinterpret_cast(primitives::getFirstValueArrayPosition(out)); primitives::RIDType* ridDstArray = reinterpret_cast(getFirstRIDArrayPosition(out)); const T* origSrcArray = srcArray; const FT* filterValues = nullptr; const ParsedColumnFilter::CopsType* filterCOPs = nullptr; ColumnFilterMode columnFilterMode = ALWAYS_TRUE; const ST* filterSet = nullptr; const ParsedColumnFilter::RFsType* filterRFs = nullptr; uint8_t outputType = in->OutputType; constexpr uint16_t VECTOR_SIZE = VT::vecByteSize / WIDTH; // If there are RIDs use its number to get a number of vectorized iterations. uint16_t iterNumber = HAS_INPUT_RIDS ? ridSize / VECTOR_SIZE : srcSize / VECTOR_SIZE; uint32_t filterCount = 0; // These pragmas are to silence GCC warnings // warning: ignoring attributes on template argument #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-attributes" std::vector filterArgsVectors; auto ptrA = std::mem_fn(&VT::cmpEq); using COPType = decltype(ptrA); std::vector copFunctorVec; #pragma GCC diagnostic pop using BOPType = std::function; BOPType bopFunctor; // filter comparators and logical function compilation. if (parsedColumnFilter != nullptr) { filterValues = parsedColumnFilter->getFilterVals(); filterCOPs = parsedColumnFilter->prestored_cops.get(); columnFilterMode = parsedColumnFilter->columnFilterMode; filterSet = parsedColumnFilter->getFilterSet(); filterRFs = parsedColumnFilter->prestored_rfs.get(); filterCount = parsedColumnFilter->getFilterCount(); if (iterNumber > 0) { copFunctorVec.reserve(filterCount); switch (parsedColumnFilter->getBOP()) { case BOP_OR: bopFunctor = std::bit_or(); initFilterMask = 0; break; case BOP_AND: bopFunctor = std::bit_and(); break; case BOP_XOR: bopFunctor = std::bit_or(); initFilterMask = 0; break; case BOP_NONE: // According with the comments in linux-port/primitiveprocessor.h // there can't be BOP_NONE with filterCount > 0 bopFunctor = std::bit_and(); break; default: idbassert(false); } filterArgsVectors.reserve(filterCount); for (uint32_t j = 0; j < filterCount; ++j) { // Preload filter argument values only once. filterArgsVectors.push_back(simdProcessor.loadValue(*((FilterType*)&filterValues[j]))); switch (filterCOPs[j]) { case (COMPARE_EQ): // Skipping extra filter pass generated by IS NULL if (memcmp(&filterValues[j], &nullValue, sizeof(nullValue)) == 0) copFunctorVec.push_back(std::mem_fn(&VT::nullEmptyCmpEq)); else copFunctorVec.push_back(std::mem_fn(&VT::cmpEq)); break; case (COMPARE_GE): copFunctorVec.push_back(std::mem_fn(&VT::cmpGe)); break; case (COMPARE_GT): copFunctorVec.push_back(std::mem_fn(&VT::cmpGt)); break; case (COMPARE_LE): copFunctorVec.push_back(std::mem_fn(&VT::cmpLe)); break; case (COMPARE_LT): copFunctorVec.push_back(std::mem_fn(&VT::cmpLt)); break; case (COMPARE_NE): copFunctorVec.push_back(std::mem_fn(&VT::cmpNe)); break; case (COMPARE_NIL): copFunctorVec.push_back(std::mem_fn(&VT::cmpAlwaysFalse)); break; // There are couple other COP, e.g. COMPARE_NOT however they can't be met here // b/c MCS 6.x uses COMPARE_NOT for strings with OP_LIKE only. See op2num() for // details. default: idbassert(false); } } } } // main loop // writeMask tells which values must get into the result. Includes values that matches filters. Can have // NULLs. nonEmptyMask tells which vector coords are not EMPTY magics. nonNullMask tells which vector coords // are not NULL magics. for (uint16_t i = 0; i < iterNumber; ++i) { primitives::RIDType ridOffset = i * VECTOR_SIZE; assert(!HAS_INPUT_RIDS || (HAS_INPUT_RIDS && ridSize >= ridOffset)); dataVec = simdDataLoadTemplate(simdProcessor, srcArray, origSrcArray, ridArray, i) .v; nonEmptyMask = simdProcessor.nullEmptyCmpNe(dataVec, emptyFilterArgVec); writeMask = nonEmptyMask; // NULL check nonNullMask = simdProcessor.nullEmptyCmpNe(dataVec, nullFilterArgVec); // Exclude NULLs from the resulting set if NULL doesn't match the filters. writeMask = isNullValueMatches ? writeMask : writeMask & nonNullMask; nonNullOrEmptyMask = nonNullMask & nonEmptyMask; // filters MT prevFilterMask = initFilterMask; // TODO name this mask literal MT filterMask = 0xFFFF; for (uint32_t j = 0; j < filterCount; ++j) { // filter using compiled filter and preloaded filter argument filterMask = copFunctorVec[j](simdProcessor, dataVec, filterArgsVectors[j]); filterMask = bopFunctor(prevFilterMask, filterMask); prevFilterMask = filterMask; } writeMask = writeMask & filterMask; T* dataVecTPtr = reinterpret_cast(&dataVec); // vectWriteColValues iterates over the values in the source vec // to store values/RIDs into dstArray/ridDstArray. // It also sets Min/Max values for the block if eligible. // !!! vectWriteColValues increases ridDstArray internally but it doesn't go // outside the scope of the memory allocated to out msg. // vectWriteColValues is empty if outputMode == OT_RID. uint16_t valuesWritten = vectWriteColValues( simdProcessor, writeMask, nonNullOrEmptyMask, validMinMax, ridOffset, dataVecTPtr, dstArray, Min, Max, in, out, ridDstArray, ridArray); // Some outputType modes saves RIDs also. vectWriteRIDValues is empty for // OT_DATAVALUE, OT_BOTH(vectWriteColValues takes care about RIDs). valuesWritten = vectWriteRIDValues( simdProcessor, valuesWritten, validMinMax, ridOffset, dataVecTPtr, ridDstArray, writeMask, Min, Max, in, out, nonNullOrEmptyMask, ridArray); // Calculate bytes written uint16_t bytesWritten = valuesWritten * WIDTH; totalValuesWritten += valuesWritten; ridDstArray += valuesWritten; dstArray += bytesWritten; rid += VECTOR_SIZE; srcArray += VECTOR_SIZE; ridArray += VECTOR_SIZE; } // Set the number of output values here b/c tail processing can skip this operation. out->NVALS = totalValuesWritten; // WIP Remove this block // Write captured Min/Max values to *out out->ValidMinMax = validMinMax; if (validMinMax) { out->Min = Min; out->Max = Max; } // process the tail. scalarFiltering changes out contents, e.g. Min/Max, NVALS, RIDs and values array // This tail also sets out::Min/Max, out::validMinMax if validMinMax is set. uint32_t processedSoFar = rid; scalarFiltering(in, out, columnFilterMode, filterSet, filterCount, filterCOPs, filterValues, filterRFs, in->colType, origSrcArray, srcSize, origRidArray, ridSize, processedSoFar, outputType, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); } // This routine dispatches template function calls to reduce branching. template void vectorizedFilteringDispatcher(NewColRequestHeader* in, ColResultHeader* out, const STORAGE_TYPE* srcArray, const uint32_t srcSize, uint16_t* ridArray, const uint16_t ridSize, ParsedColumnFilter* parsedColumnFilter, const bool validMinMax, const STORAGE_TYPE emptyValue, const STORAGE_TYPE nullValue, STORAGE_TYPE Min, STORAGE_TYPE Max, const bool isNullValueMatches) { // Using struct to dispatch SIMD type based on integral type T. using SimdType = typename simd::IntegralToSIMD::type; using FilterType = typename simd::StorageToFiltering::type; using VT = typename simd::SimdFilterProcessor; bool hasInputRIDs = (in->NVALS > 0) ? true : false; if (hasInputRIDs) { const bool hasInput = true; switch (in->OutputType) { case OT_RID: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; case OT_BOTH: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; case OT_TOKEN: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; case OT_DATAVALUE: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; } } else { const bool hasInput = false; switch (in->OutputType) { case OT_RID: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; case OT_BOTH: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; case OT_TOKEN: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; case OT_DATAVALUE: vectorizedFiltering( in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); break; } } } #endif // TBD Make changes in Command class ancestors to threat BPP::values as buffer. // TBD this will allow to copy values only once from BPP::blockData to the destination. // This template contains the main scanning/filtering loop. // Copy data matching parsedColumnFilter from input to output. // Input is srcArray[srcSize], optionally accessed in the order defined by ridArray[ridSize]. // Output is buf: ColResponseHeader, RIDType[BLOCK_SIZE], T[BLOCK_SIZE]. template void filterColumnData(NewColRequestHeader* in, ColResultHeader* out, uint16_t* ridArray, const uint16_t ridSize, // Number of values in ridArray int* srcArray16, const uint32_t srcSize, boost::shared_ptr parsedColumnFilter) { using FT = typename IntegralTypeToFilterType::type; using ST = typename IntegralTypeToFilterSetType::type; constexpr int WIDTH = sizeof(T); const T* srcArray = reinterpret_cast(srcArray16); // Cache some structure fields in local vars auto dataType = (CalpontSystemCatalog::ColDataType)in->colType.DataType; // Column datatype uint32_t filterCount = in->NOPS; // Number of elements in the filter uint8_t outputType = in->OutputType; // If no pre-parsed column filter is set, parse the filter in the message if (parsedColumnFilter.get() == nullptr && filterCount > 0) parsedColumnFilter = _parseColumnFilter(in->getFilterStringPtr(), dataType, filterCount, in->BOP); // Cache parsedColumnFilter fields in local vars auto columnFilterMode = filterCount == 0 ? ALWAYS_TRUE : parsedColumnFilter->columnFilterMode; FT* filterValues = filterCount == 0 ? nullptr : parsedColumnFilter->getFilterVals(); auto filterCOPs = filterCount == 0 ? nullptr : parsedColumnFilter->prestored_cops.get(); auto filterRFs = filterCount == 0 ? nullptr : parsedColumnFilter->prestored_rfs.get(); ST* filterSet = filterCount == 0 ? nullptr : parsedColumnFilter->getFilterSet(); // Bit patterns in srcArray[i] representing EMPTY and NULL values T emptyValue = getEmptyValue(dataType); T nullValue = getNullValue(dataType); // Precompute filter results for NULL values bool isNullValueMatches = matchingColValue(nullValue, columnFilterMode, filterSet, filterCount, filterCOPs, filterValues, filterRFs, in->colType, nullValue); // ########################### // Boolean indicating whether to capture the min and max values bool validMinMax = isMinMaxValid(in); T Min = getInitialMin(in); T Max = getInitialMax(in); // Vectorized scanning/filtering for all numerics except float/double types. // If the total number of input values can't fill a vector the vector path // applies scalar filtering. // Syscat queries mustn't follow vectorized processing path b/c PP must return // all values w/o any filter(even empty values filter) applied. #if defined(__x86_64__) // Don't use vectorized filtering for text based data types. if (KIND <= KIND_FLOAT && WIDTH < 16) { bool canUseFastFiltering = true; for (uint32_t i = 0; i < filterCount; ++i) if (filterRFs[i] != 0) canUseFastFiltering = false; if (canUseFastFiltering) { vectorizedFilteringDispatcher(in, out, srcArray, srcSize, ridArray, ridSize, parsedColumnFilter.get(), validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); return; } } #endif uint32_t initialRID = 0; scalarFiltering(in, out, columnFilterMode, filterSet, filterCount, filterCOPs, filterValues, filterRFs, in->colType, srcArray, srcSize, ridArray, ridSize, initialRID, outputType, validMinMax, emptyValue, nullValue, Min, Max, isNullValueMatches); } // end of filterColumnData } // namespace namespace primitives { // The routine used to dispatch CHAR|VARCHAR|TEXT|BLOB scan. inline bool isDictTokenScan(NewColRequestHeader* in) { switch (in->colType.DataType) { case CalpontSystemCatalog::CHAR: return (in->colType.DataSize > 8); case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::BLOB: case CalpontSystemCatalog::TEXT: return (in->colType.DataSize > 7); default: return false; } } // A set of dispatchers for different column widths/integral types. template = 5 typename std::enable_if::type* = nullptr> // gcc >= 5 #else typename std::enable_if::type*> // gcc 4.8.5 #endif #else typename std::enable_if::type* = nullptr> #endif void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out) { constexpr int W = sizeof(T); auto dataType = (execplan::CalpontSystemCatalog::ColDataType)in->colType.DataType; if (dataType == execplan::CalpontSystemCatalog::FLOAT) { const uint16_t ridSize = in->NVALS; uint16_t* ridArray = in->getRIDArrayPtr(W); const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W; filterColumnData(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); return; } _scanAndFilterTypeDispatcher(in, out); } template = 5 typename std::enable_if::type* = nullptr> // gcc >= 5 #else typename std::enable_if::type*> // gcc 4.8.5 #endif #else typename std::enable_if::type* = nullptr> #endif void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out) { constexpr int W = sizeof(T); auto dataType = (execplan::CalpontSystemCatalog::ColDataType)in->colType.DataType; if (dataType == execplan::CalpontSystemCatalog::DOUBLE) { const uint16_t ridSize = in->NVALS; uint16_t* ridArray = in->getRIDArrayPtr(W); const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W; filterColumnData(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); return; } _scanAndFilterTypeDispatcher(in, out); } template = 5 sizeof(T) == sizeof(int128_t), T>::type* = nullptr> // gcc >= 5 #else sizeof(T) == sizeof(int128_t), T>::type*> // gcc 4.8.5 #endif #else sizeof(T) == sizeof(int128_t), T>::type* = nullptr> #endif void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out) { _scanAndFilterTypeDispatcher(in, out); } template = 5 typename std::enable_if::type* = nullptr> // gcc >= 5 #else typename std::enable_if::type*> // gcc 4.8.5 #endif #else typename std::enable_if::type* = nullptr> #endif void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out) { constexpr int W = sizeof(T); const uint16_t ridSize = in->NVALS; uint16_t* ridArray = in->getRIDArrayPtr(W); const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W; filterColumnData(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); } template = 5 typename std::enable_if::type* = nullptr> // gcc >= 5 #else typename std::enable_if::type*> // gcc 4.8.5 #endif #else typename std::enable_if::type* = nullptr> #endif void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, ColResultHeader* out) { constexpr int W = sizeof(T); const uint16_t ridSize = in->NVALS; uint16_t* ridArray = in->getRIDArrayPtr(W); const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE : BLOCK_SIZE / W; auto dataType = (execplan::CalpontSystemCatalog::ColDataType)in->colType.DataType; if ((dataType == execplan::CalpontSystemCatalog::CHAR || dataType == execplan::CalpontSystemCatalog::VARCHAR || dataType == execplan::CalpontSystemCatalog::TEXT) && !isDictTokenScan(in)) { filterColumnData(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); return; } if (datatypes::isUnsigned(dataType)) { using UT = typename std::conditional::value || datatypes::is_uint128_t::value, T, typename datatypes::make_unsigned::type>::type; filterColumnData(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); return; } filterColumnData(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); } // The entrypoint for block scanning and filtering. // The block is in in msg, out msg is used to store values|RIDs matched. template void PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader* in, ColResultHeader* out) { #ifdef PRIM_DEBUG auto markEvent = [&](char eventChar) { if (fStatsPtr) fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, eventChar); }; #endif constexpr int W = sizeof(T); void* outp = static_cast(out); memcpy(outp, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); out->NVALS = 0; out->LBID = in->LBID; out->ism.Command = COL_RESULTS; out->OutputType = in->OutputType; out->RidFlags = 0; //...Initialize I/O counts; out->CacheIO = 0; out->PhysicalIO = 0; #if 0 // short-circuit the actual block scan for testing if (out->LBID >= 802816) { out->ValidMinMax = false; out->Min = 0; out->Max = 0; return; } #endif #ifdef PRIM_DEBUG markEvent('B'); #endif // Sort ridArray (the row index array) if there are RIDs with this in msg in->sortRIDArrayIfNeeded(W); scanAndFilterTypeDispatcher(in, out); #ifdef PRIM_DEBUG markEvent('C'); #endif } template void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, ColResultHeader*); template void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, ColResultHeader*); template void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, ColResultHeader*); template void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, ColResultHeader*); template void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, ColResultHeader*); } // namespace primitives