diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fdde8afc..7d7194f86 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -307,6 +307,8 @@ SET (ENGINE_UTILS_QUERYSTATS_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/queryst SET (ENGINE_UTILS_LIBMYSQL_CL_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/libmysql_client") SET (ENGINE_WE_CONFIGCPP_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/writeengine/xml") SET (ENGINE_DATATYPES_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/datatypes") +SET (ENGINE_BLOCKCACHE_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/primitives/blockcache") +SET (ENGINE_PRIMPROC_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/primitives/primproc") SET (ENGINE_SERVER_SQL_INCLUDE "${SERVER_SOURCE_ROOT_DIR}/sql") SET (ENGINE_SERVER_INCLUDE_INCLUDE "${SERVER_SOURCE_ROOT_DIR}/include") IF (PCRE_INCLUDES) diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index b6bd803e5..fbaa47e05 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -123,12 +123,6 @@ template<> struct make_unsigned { typedef uint32_t type; }; template<> struct make_unsigned { typedef uint64_t type; }; template<> struct make_unsigned { typedef uint128_t type; }; -} // namespace datatypes - - -namespace datatypes -{ - template struct _WidthToSIntegralType { diff --git a/dbcon/joblist/primitivemsg.h b/dbcon/joblist/primitivemsg.h index 1ede5ecad..da82a8a02 100644 --- a/dbcon/joblist/primitivemsg.h +++ b/dbcon/joblist/primitivemsg.h @@ -760,6 +760,32 @@ struct NewColRequestHeader // QQ: The below constructor is never used. // This struct is used in a cast only, in a hackish way. NewColRequestHeader(); + inline uint16_t* getRIDArrayPtr(const int width) const + { + return (NVALS > 0) ? reinterpret_cast((uint8_t*)this + + sizeof(NewColRequestHeader) + + (NOPS * getFilterSize(width))) + : nullptr; + } + // WIP change to unsigned if needed + inline int getFilterSize(const int width) const + { + return sizeof(uint8_t) + sizeof(uint8_t) + width; + } + inline uint8_t* getFilterStringPtr() + { + return reinterpret_cast(this) + sizeof(NewColRequestHeader); + } + inline void sortRIDArrayIfNeeded(const int width) + { + // NVALS means number of RIDs in the context of the f(). + if (NVALS > 0) + { + uint16_t* ridArray = getRIDArrayPtr(width); + if (sort == 1) + std::sort(ridArray, ridArray + NVALS); + } + } }; struct NewColAggRequestHeader diff --git a/primitives/linux-port/CMakeLists.txt b/primitives/linux-port/CMakeLists.txt index 569169d12..fdc7fae8c 100644 --- a/primitives/linux-port/CMakeLists.txt +++ b/primitives/linux-port/CMakeLists.txt @@ -6,8 +6,6 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ../blockcache ../primproc) set(processor_STAT_SRCS primitiveprocessor.cpp dictionary.cpp column.cpp) -#libprocessor_a_CXXFLAGS = $(march_flags) $(AM_CXXFLAGS) - add_library(processor STATIC ${processor_STAT_SRCS}) add_dependencies(processor loggingcpp) @@ -15,10 +13,3 @@ add_dependencies(processor loggingcpp) target_link_libraries(processor ${NETSNMP_LIBRARIES}) INSTALL (TARGETS processor DESTINATION ${ENGINE_LIBDIR}) - -#if (WITH_PP_SCAN_UT) -# add_executable(pp_scan_unittest pp-scan-unittest.cpp) -# target_link_libraries(pp_scan_unittest ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} ${CPPUNIT_LIBRARIES} cppunit) -# install(TARGETS pp_scan_unittest DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) -#endif() - diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index a02075646..4204daf22 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -1,5 +1,5 @@ /* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016-2020 MariaDB Corporation + Copyright (C) 2016-2021 MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -16,10 +16,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -/***************************************************************************** - * $Id: column.cpp 2103 2013-06-04 17:53:38Z dcathey $ - * - ****************************************************************************/ #include #include //#define NDEBUG @@ -51,6 +47,14 @@ using namespace execplan; namespace { +using RID_T = uint16_t; // Row index type, as used in rid arrays + +// Column filtering is dispatched 4-way based on the column type, +// which defines implementation of comparison operations for the column values +enum ENUM_KIND {KIND_DEFAULT, // compared as signed integers + KIND_UNSIGNED, // compared as unsigned integers + KIND_FLOAT, // compared as floating-point numbers + KIND_TEXT}; // whitespace-trimmed and then compared as signed integers inline uint64_t order_swap(uint64_t x) { @@ -87,7 +91,6 @@ void logIt(int mid, int arg1, const string& arg2 = string()) logger.logErrorMessage(msg); } - template inline bool colCompare_(const T& val1, const T& val2, uint8_t COP) { @@ -120,7 +123,6 @@ inline bool colCompare_(const T& val1, const T& val2, uint8_t COP) } } - inline bool colCompareStr(const ColRequestHeaderDataType &type, uint8_t COP, const utils::ConstString &val1, @@ -164,7 +166,7 @@ inline bool colCompare_(const T& val1, const T& val2, uint8_t COP, uint8_t rf) return val1 > val2 || (val1 == val2 && (rf & 0x80)); default: - logIt(34, COP, "colCompare_l"); + logIt(34, COP, "colCompare_"); return false; // throw an exception here? } } @@ -199,300 +201,11 @@ inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf case COMPARE_LIKE: case COMPARE_NLIKE: default: - logIt(34, COP, "colCompare_l"); + logIt(34, COP, "colStrCompare_"); return false; // throw an exception here? } } - -template -inline bool isEmptyVal(uint8_t type, const uint8_t* val8); - -template<> -inline bool isEmptyVal<16>(uint8_t type, const uint8_t* ival) // For BINARY -{ - const int128_t* val = reinterpret_cast(ival); - // Wide-DECIMAL supplies a universal NULL/EMPTY magics for all 16 byte - // data types. - return *val == datatypes::Decimal128Empty; -} - -template<> -inline bool isEmptyVal<8>(uint8_t type, const uint8_t* ival) -{ - const uint64_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - return (joblist::DOUBLEEMPTYROW == *val); - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - return (*val == joblist::CHAR8EMPTYROW); - - case CalpontSystemCatalog::UBIGINT: - return (joblist::UBIGINTEMPTYROW == *val); - - default: - break; - } - - return (joblist::BIGINTEMPTYROW == *val); -} - -template<> -inline bool isEmptyVal<4>(uint8_t type, const uint8_t* ival) -{ - const uint32_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - return (joblist::FLOATEMPTYROW == *val); - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - return (joblist::CHAR4EMPTYROW == *val); - - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UMEDINT: - return (joblist::UINTEMPTYROW == *val); - - default: - break; - } - - return (joblist::INTEMPTYROW == *val); -} - -template<> -inline bool isEmptyVal<2>(uint8_t type, const uint8_t* ival) -{ - const uint16_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - return (joblist::CHAR2EMPTYROW == *val); - - case CalpontSystemCatalog::USMALLINT: - return (joblist::USMALLINTEMPTYROW == *val); - - default: - break; - } - - return (joblist::SMALLINTEMPTYROW == *val); -} - -template<> -inline bool isEmptyVal<1>(uint8_t type, const uint8_t* ival) -{ - const uint8_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - return (*val == joblist::CHAR1EMPTYROW); - - case CalpontSystemCatalog::UTINYINT: - return (*val == joblist::UTINYINTEMPTYROW); - - default: - break; - } - - return (*val == joblist::TINYINTEMPTYROW); -} - -template -inline bool isNullVal(uint8_t type, const uint8_t* val8); - -template<> -inline bool isNullVal<16>(uint8_t type, const uint8_t* ival) -{ - const int128_t* val = reinterpret_cast(ival); - // Wide-DECIMAL supplies a universal NULL/EMPTY magics for all 16 byte - // data types. - return *val == datatypes::Decimal128Null; -} - -template<> -inline bool isNullVal<8>(uint8_t type, const uint8_t* ival) -{ - const uint64_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - return (joblist::DOUBLENULL == *val); - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - //@bug 339 might be a token here - //TODO: what's up with the second const here? - return (*val == joblist::CHAR8NULL || 0xFFFFFFFFFFFFFFFELL == *val); - - case CalpontSystemCatalog::UBIGINT: - return (joblist::UBIGINTNULL == *val); - - default: - break; - } - - return (joblist::BIGINTNULL == *val); -} - -template<> -inline bool isNullVal<4>(uint8_t type, const uint8_t* ival) -{ - const uint32_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - return (joblist::FLOATNULL == *val); - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - return (joblist::CHAR4NULL == *val); - - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - return (joblist::DATENULL == *val); - - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UMEDINT: - return (joblist::UINTNULL == *val); - - default: - break; - } - - return (joblist::INTNULL == *val); -} - -template<> -inline bool isNullVal<2>(uint8_t type, const uint8_t* ival) -{ - const uint16_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - return (joblist::CHAR2NULL == *val); - - case CalpontSystemCatalog::USMALLINT: - return (joblist::USMALLINTNULL == *val); - - default: - break; - } - - return (joblist::SMALLINTNULL == *val); -} - -template<> -inline bool isNullVal<1>(uint8_t type, const uint8_t* ival) -{ - const uint8_t* val = reinterpret_cast(ival); - - switch (type) - { - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::BLOB: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - case CalpontSystemCatalog::TIME: - return (*val == joblist::CHAR1NULL); - - case CalpontSystemCatalog::UTINYINT: - return (joblist::UTINYINTNULL == *val); - - default: - break; - } - - return (*val == joblist::TINYINTNULL); -} - -/* A generic isNullVal */ -inline bool isNullVal(uint32_t length, uint8_t type, const uint8_t* val8) -{ - switch (length) - { - case 16: - return isNullVal<16>(type, val8); - - case 8: - return isNullVal<8>(type, val8); - - case 4: - return isNullVal<4>(type, val8); - - case 2: - return isNullVal<2>(type, val8); - - case 1: - return isNullVal<1>(type, val8); - }; - - return false; -} - // Set the minimum and maximum in the return header if we will be doing a block scan and // we are dealing with a type that is comparable as a 64 bit integer. Subsequent calls can then // skip this block if the value being searched is outside of the Min/Max range. @@ -540,937 +253,1247 @@ inline bool isMinMaxValid(const NewColRequestHeader* in) } } - -inline bool colCompare(int64_t val1, int64_t val2, uint8_t COP, uint8_t rf, - const ColRequestHeaderDataType &typeHolder, uint8_t width, - bool isNull = false) +template::type* = nullptr> +inline bool colCompareDispatcherT( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null) { - uint8_t type = typeHolder.DataType; -// cout << "comparing " << hex << val1 << " to " << val2 << endl; + float dVal1 = *((float*) &columnValue); + float dVal2 = *((float*) &filterValue); + return colCompare_(dVal1, dVal2, cop); +} - if (COMPARE_NIL == COP) return false; +template::type* = nullptr> +inline bool colCompareDispatcherT( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null) +{ + double dVal1 = *((double*) &columnValue); + double dVal2 = *((double*) &filterValue); + return colCompare_(dVal1, dVal2, cop); +} - //@bug 425 added isNull condition - else if ( !isNull && (type == CalpontSystemCatalog::FLOAT || type == CalpontSystemCatalog::DOUBLE)) +template::type* = nullptr> +inline bool colCompareDispatcherT( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null) +{ + if (cop & COMPARE_LIKE) // LIKE and NOT LIKE { - double dVal1, dVal2; - - if (type == CalpontSystemCatalog::FLOAT) - { - dVal1 = *((float*) &val1); - dVal2 = *((float*) &val2); - } - else - { - dVal1 = *((double*) &val1); - dVal2 = *((double*) &val2); - } - - return colCompare_(dVal1, dVal2, COP); + utils::ConstString subject{reinterpret_cast(&columnValue), COL_WIDTH}; + utils::ConstString pattern{reinterpret_cast(&filterValue), COL_WIDTH}; + return typeHolder.like(cop & COMPARE_NOT, subject.rtrimZero(), + pattern.rtrimZero()); } - else if ( (type == CalpontSystemCatalog::CHAR || type == CalpontSystemCatalog::VARCHAR || - type == CalpontSystemCatalog::TEXT) && !isNull ) + if (!rf) { - if (COP & COMPARE_LIKE) // LIKE and NOT LIKE - { - utils::ConstString subject = {reinterpret_cast(&val1), width}; - utils::ConstString pattern = {reinterpret_cast(&val2), width}; - return typeHolder.like(COP & COMPARE_NOT, subject.rtrimZero(), - pattern.rtrimZero()); - } - - if (!rf) - { - // A temporary hack for xxx_nopad_bin collations - // TODO: MCOL-4534 Improve comparison performance in 8bit nopad_bin collations - if ((typeHolder.getCharset().state & (MY_CS_BINSORT|MY_CS_NOPAD)) == - (MY_CS_BINSORT|MY_CS_NOPAD)) - return colCompare_(order_swap(val1), order_swap(val2), COP); - utils::ConstString s1 = {reinterpret_cast(&val1), width}; - utils::ConstString s2 = {reinterpret_cast(&val2), width}; - return colCompareStr(typeHolder, COP, s1.rtrimZero(), s2.rtrimZero()); - } - else - return colStrCompare_(order_swap(val1), order_swap(val2), COP, rf); + // A temporary hack for xxx_nopad_bin collations + // TODO: MCOL-4534 Improve comparison performance in 8bit nopad_bin collations + if ((typeHolder.getCharset().state & (MY_CS_BINSORT|MY_CS_NOPAD)) == + (MY_CS_BINSORT|MY_CS_NOPAD)) + return colCompare_(order_swap(columnValue), order_swap(filterValue), cop); + utils::ConstString s1{reinterpret_cast(&columnValue), COL_WIDTH}; + utils::ConstString s2{reinterpret_cast(&filterValue), COL_WIDTH}; + return colCompareStr(typeHolder, cop, s1.rtrimZero(), s2.rtrimZero()); } - - /* isNullVal should work on the normalized value on little endian machines */ else - { - bool val2Null = isNullVal(width, type, (uint8_t*) &val2); + return colStrCompare_(order_swap(columnValue), order_swap(filterValue), cop, rf); - if (isNull == val2Null || (val2Null && COP == COMPARE_NE)) - return colCompare_(val1, val2, COP, rf); +} + +// This template where IS_NULL = true is used only comparing filter predicate +// values with column NULL so I left branching here. +template::type* = nullptr> +inline bool colCompareDispatcherT( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null) +{ + if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE)) + { + if (KIND_UNSIGNED == KIND) + { + // Ugly hack to convert all to the biggest type b/w T1 and T2. + // I presume that sizeof(T2) AKA a filter predicate type is GEQ sizeof(T1) AKA col type. + using UT2 = typename datatypes::make_unsigned::type; + UT2 ucolumnValue = columnValue; + UT2 ufilterValue = filterValue; + return colCompare_(ucolumnValue, ufilterValue, cop, rf); + } else + { + // Ugly hack to convert all to the biggest type b/w T1 and T2. + // I presume that sizeof(T2) AKA a filter predicate type is GEQ sizeof(T1) AKA col type. + T2 tempVal1 = columnValue; + return colCompare_(tempVal1, filterValue, cop, rf); + } + } + else + return false; +} + +template::type* = nullptr> +inline bool colCompareDispatcherT( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null) +{ + if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE)) + { + // Ugly hack to convert all to the biggest type b/w T1 and T2. + // I presume that sizeof(T2)(a filter predicate type) is GEQ T1(col type). + using UT2 = typename datatypes::make_unsigned::type; + UT2 ucolumnValue = columnValue; + UT2 ufilterValue = filterValue; + return colCompare_(ucolumnValue, ufilterValue, cop, rf); + } + else + return false; +} + +template::type* = nullptr> +inline bool colCompareDispatcherT( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null) +{ + if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE)) + { + // Ugly hack to convert all to the biggest type b/w T1 and T2. + // I presume that sizeof(T2)(a filter predicate type) is GEQ T1(col type). + T2 tempVal1 = columnValue; + return colCompare_(tempVal1, filterValue, cop, rf); + } + else + return false; +} + +// Compare two column values using given comparison operation, +// taking into account all rules about NULL values, string trimming and so on +template +inline bool colCompare( + T1 columnValue, + T2 filterValue, + uint8_t cop, + uint8_t rf, + const ColRequestHeaderDataType& typeHolder, + bool isVal2Null = false) +{ +// cout << "comparing " << hex << columnValue << " to " << filterValue << endl; + if (COMPARE_NIL == cop) return false; + + return colCompareDispatcherT(columnValue, filterValue, + cop, rf, typeHolder, isVal2Null); +} + +/***************************************************************************** + *** NULL/EMPTY VALUES FOR EVERY COLUMN TYPE/WIDTH *************************** + *****************************************************************************/ +// Bit pattern representing EMPTY value for given column type/width +// TBD Use typeHandler +template::type* = nullptr> +T getEmptyValue(uint8_t type) +{ + return datatypes::Decimal128Empty; +} + +template::type* = nullptr> +T getEmptyValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + return joblist::DOUBLEEMPTYROW; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + return joblist::CHAR8EMPTYROW; + + case CalpontSystemCatalog::UBIGINT: + return joblist::UBIGINTEMPTYROW; + + default: + return joblist::BIGINTEMPTYROW; + } +} + +template::type* = nullptr> +T getEmptyValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + return joblist::FLOATEMPTYROW; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + return joblist::CHAR4EMPTYROW; + + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UMEDINT: + return joblist::UINTEMPTYROW; + + default: + return joblist::INTEMPTYROW; + } +} + +template::type* = nullptr> +T getEmptyValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + return joblist::CHAR2EMPTYROW; + + case CalpontSystemCatalog::USMALLINT: + return joblist::USMALLINTEMPTYROW; + + default: + return joblist::SMALLINTEMPTYROW; + } +} + +template::type* = nullptr> +T getEmptyValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + return joblist::CHAR1EMPTYROW; + + case CalpontSystemCatalog::UTINYINT: + return joblist::UTINYINTEMPTYROW; + + default: + return joblist::TINYINTEMPTYROW; + } +} + +// Bit pattern representing NULL value for given column type/width +// TBD Use TypeHandler +template::type* = nullptr> +T getNullValue(uint8_t type) +{ + return datatypes::Decimal128Null; +} + +template::type* = nullptr> +T getNullValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + return joblist::DOUBLENULL; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + return joblist::CHAR8NULL; + + case CalpontSystemCatalog::UBIGINT: + return joblist::UBIGINTNULL; + + default: + return joblist::BIGINTNULL; + } +} + +template::type* = nullptr> +T getNullValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + return joblist::FLOATNULL; + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + return joblist::CHAR4NULL; + + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + return joblist::DATENULL; + + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UMEDINT: + return joblist::UINTNULL; + + default: + return joblist::INTNULL; + } +} + +template::type* = nullptr> +T getNullValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + return joblist::CHAR2NULL; + + case CalpontSystemCatalog::USMALLINT: + return joblist::USMALLINTNULL; + + default: + return joblist::SMALLINTNULL; + } +} + +template::type* = nullptr> +T getNullValue(uint8_t type) +{ + switch (type) + { + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + case CalpontSystemCatalog::TIMESTAMP: + case CalpontSystemCatalog::TIME: + return joblist::CHAR1NULL; + + case CalpontSystemCatalog::UTINYINT: + return joblist::UTINYINTNULL; + + default: + return joblist::TINYINTNULL; + } +} + +// Check whether val is NULL (or alternative NULL bit pattern for 64-bit string types) +template +inline bool isNullValue(const T val, const T NULL_VALUE) +{ + return val == NULL_VALUE; +} + +template<> +inline bool isNullValue(const int64_t val, const int64_t NULL_VALUE) +{ + //@bug 339 might be a token here + //TODO: what's up with the alternative NULL here? + constexpr const int64_t ALT_NULL_VALUE = 0xFFFFFFFFFFFFFFFELL; + + return (val == NULL_VALUE || + val == ALT_NULL_VALUE); +} + +// +// FILTER A COLUMN VALUE +// + +template::type* = nullptr> +inline bool noneValuesInArray(const T curValue, + const FT* filterValues, + const uint32_t filterCount) +{ + // ignore NULLs in the array and in the column data + return false; +} + +template::type* = nullptr> +inline bool noneValuesInArray(const T curValue, + const FT* filterValues, + const uint32_t filterCount) +{ + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + if (curValue == static_cast(filterValues[argIndex])) return false; } + + return true; } -inline bool colCompare(int128_t val1, int128_t val2, uint8_t COP, uint8_t rf, int type, uint8_t width, bool isNull = false) +template::type* = nullptr> +inline bool noneValuesInSet(const T curValue, const ST* filterSet) { - if (COMPARE_NIL == COP) return false; + // bug 1920: ignore NULLs in the set and in the column data + return false; +} - /* isNullVal should work on the normalized value on little endian machines */ - bool val2Null = isNullVal(width, type, (uint8_t*) &val2); +template::type* = nullptr> +inline bool noneValuesInSet(const T curValue, const ST* filterSet) +{ + bool found = (filterSet->find(curValue) != filterSet->end()); + return !found; +} - if (isNull == val2Null || (val2Null && COP == COMPARE_NE)) - return colCompare_(val1, val2, COP, rf); +// The routine is used to test the value from a block against filters +// according with columnFilterMode(see the corresponding enum for details). +// Returns true if the curValue matches the filter. +template +inline bool matchingColValue(const T curValue, + const ColumnFilterMode columnFilterMode, + const ST* filterSet, // Set of values for simple filters (any of values / none of them) + const uint32_t filterCount, // Number of filter elements, each described by one entry in the following arrays: + const uint8_t* filterCOPs, // comparison operation + const FT* filterValues, // value to compare to + const uint8_t* filterRFs, // reverse byte order flags + const ColRequestHeaderDataType& typeHolder, + const T NULL_VALUE) // Bit pattern representing NULL value for this column type/width +{ + /* In order to make filtering as fast as possible, we replaced the single generic algorithm + with several algorithms, better tailored for more specific cases: + empty filter, single comparison, and/or/xor comparison results, one/none of small/large set of values + */ + switch (columnFilterMode) + { + // Empty filter is always true + case ALWAYS_TRUE: + return true; + + + // Filter consisting of exactly one comparison operation + case SINGLE_COMPARISON: + { + auto filterValue = filterValues[0]; + // This can be future optimized checking if a filterValue is NULL or not + bool cmp = colCompare(curValue, filterValue, filterCOPs[0], + filterRFs[0], typeHolder, isNullValue(filterValue, + NULL_VALUE)); + return cmp; + } + + + // Filter is true if ANY comparison is true (BOP_OR) + case ANY_COMPARISON_TRUE: + { + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + auto filterValue = filterValues[argIndex]; + // This can be future optimized checking if a filterValues are NULLs or not before the higher level loop. + bool cmp = colCompare(curValue, filterValue, filterCOPs[argIndex], + filterRFs[argIndex], typeHolder, isNullValue(filterValue, + NULL_VALUE)); + + // Short-circuit the filter evaluation - true || ... == true + if (cmp == true) + return true; + } + + // We can get here only if all filters returned false + return false; + } + + + // Filter is true only if ALL comparisons are true (BOP_AND) + case ALL_COMPARISONS_TRUE: + { + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + auto filterValue = filterValues[argIndex]; + // This can be future optimized checking if a filterValues are NULLs or not before the higher level loop. + bool cmp = colCompare(curValue, filterValue, filterCOPs[argIndex], + filterRFs[argIndex], typeHolder, + isNullValue(filterValue, NULL_VALUE)); + + // Short-circuit the filter evaluation - false && ... = false + if (cmp == false) + return false; + } + + // We can get here only if all filters returned true + return true; + } + + + // XORing results of comparisons (BOP_XOR) + case XOR_COMPARISONS: + { + bool result = false; + + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + auto filterValue = filterValues[argIndex]; + // This can be future optimized checking if a filterValues are NULLs or not before the higher level loop. + bool cmp = colCompare(curValue, filterValue, filterCOPs[argIndex], + filterRFs[argIndex], typeHolder, + isNullValue(filterValue, NULL_VALUE)); + result ^= cmp; + } + + return result; + } + + + // ONE of the values in the small set represented by an array (BOP_OR + all COMPARE_EQ) + case ONE_OF_VALUES_IN_ARRAY: + { + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + if (curValue == static_cast(filterValues[argIndex])) + return true; + } + + return false; + } + + + // NONE of the values in the small set represented by an array (BOP_AND + all COMPARE_NE) + case NONE_OF_VALUES_IN_ARRAY: + return noneValuesInArray(curValue, filterValues, filterCount); + + + // ONE of the values in the set is equal to the value checked (BOP_OR + all COMPARE_EQ) + case ONE_OF_VALUES_IN_SET: + { + bool found = (filterSet->find(curValue) != filterSet->end()); + return found; + } + + + // NONE of the values in the set is equal to the value checked (BOP_AND + all COMPARE_NE) + case NONE_OF_VALUES_IN_SET: + return noneValuesInSet(curValue, filterSet); + + + default: + idbassert(0); + return true; + } +} + +/***************************************************************************** + *** READ COLUMN VALUES ****************************************************** + *****************************************************************************/ + +// Read one ColValue from the input block. +// Return true on success, false on End of Block. +// Values are read from srcArray either in natural order or in the order defined by ridArray. +// Empty values are skipped, unless ridArray==0 && !(OutputType & OT_RID). +template +inline bool nextColValue( + T& result, // Place for the value returned + bool* isEmpty, // ... and flag whether it's EMPTY + uint32_t* index, // Successive index either in srcArray (going from 0 to srcSize-1) or ridArray (0..ridSize-1) + uint16_t* rid, // Index in srcArray of the value returned + const T* srcArray, // Input array + const uint32_t srcSize, // ... and its size + const uint16_t* ridArray, // Optional array of indexes into srcArray, that defines the read order + const uint16_t ridSize, // ... and its size + const uint8_t OutputType, // Used to decide whether to skip EMPTY values + T EMPTY_VALUE) +{ + auto i = *index; // local copy of *index to speed up loops + T value; // value to be written into *result, local for the same reason + + if (ridArray) + { + // Read next non-empty value in the order defined by ridArray + for( ; ; i++) + { + if (UNLIKELY(i >= ridSize)) + return false; + + value = srcArray[ridArray[i]]; + + if (value != EMPTY_VALUE) + break; + } + + *rid = ridArray[i]; + *isEmpty = false; + } + else if (OutputType & OT_RID) //TODO: check correctness of this condition for SKIP_EMPTY_VALUES + { + // Read next non-empty value in the natural order + for( ; ; i++) + { + if (UNLIKELY(i >= srcSize)) + return false; + + value = srcArray[i]; + + if (value != EMPTY_VALUE) + break; + } + + *rid = i; + *isEmpty = false; + } else - return false; + { + // Read next value in the natural order + if (UNLIKELY(i >= srcSize)) + return false; + + *rid = i; + value = srcArray[i]; + *isEmpty = (value == EMPTY_VALUE); + } + + *index = i+1; + result = value; + return true; } -inline bool colCompareUnsigned(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf, int type, uint8_t width, bool isNull = false) -{ - // cout << "comparing unsigned" << hex << val1 << " to " << val2 << endl; - - if (COMPARE_NIL == COP) return false; - - /* isNullVal should work on the normalized value on little endian machines */ - bool val2Null = isNullVal(width, type, (uint8_t*) &val2); - - if (isNull == val2Null || (val2Null && COP == COMPARE_NE)) - return colCompare_(val1, val2, COP, rf); - else - return false; -} - -inline void store(const NewColRequestHeader* in, - NewColResultHeader* out, - unsigned outSize, - unsigned* written, - uint16_t rid, const uint8_t* block8) +/// +/// WRITE COLUMN VALUES +/// + +// Append value to the output buffer with debug-time check for buffer overflow +template +inline void checkedWriteValue( + void* out, + unsigned outSize, + unsigned* outPos, + const T* src, + int errSubtype) { +#ifdef PRIM_DEBUG + if (sizeof(T) > outSize - *outPos) + { + logIt(35, errSubtype); + throw logic_error("PrimitiveProcessor::checkedWriteValue(): output buffer is too small"); + } +#endif uint8_t* out8 = reinterpret_cast(out); + memcpy(out8 + *outPos, src, sizeof(T)); + *outPos += sizeof(T); +} - if (in->OutputType & OT_RID) +// Write the value index in srcArray and/or the value itself, depending on bits in OutputType, +// into the output buffer and update the output pointer. +template +inline void writeColValue( + uint8_t OutputType, + NewColResultHeader* out, + unsigned outSize, + unsigned* written, + uint16_t rid, + const T* srcArray) +{ + if (OutputType & OT_RID) { -#ifdef PRIM_DEBUG - - if (*written + 2 > outSize) - { - logIt(35, 1); - throw logic_error("PrimitiveProcessor::store(): output buffer is too small"); - } - -#endif + checkedWriteValue(out, outSize, written, &rid, 1); out->RidFlags |= (1 << (rid >> 9)); // set the (row/512)'th bit - memcpy(&out8[*written], &rid, 2); - *written += 2; } - if (in->OutputType & OT_TOKEN || in->OutputType & OT_DATAVALUE) + if (OutputType & (OT_TOKEN | OT_DATAVALUE)) { -#ifdef PRIM_DEBUG - - if (*written + in->colType.DataSize > outSize) - { - logIt(35, 2); - throw logic_error("PrimitiveProcessor::store(): output buffer is too small"); - } - -#endif - - void* ptr1 = &out8[*written]; - const uint8_t* ptr2 = &block8[0]; - - switch (in->colType.DataSize) - { - case 16: - ptr2 += (rid << 4); - memcpy(ptr1, ptr2, 16); - break; - - default: - // fallthrough - - case 8: - ptr2 += (rid << 3); - memcpy(ptr1, ptr2, 8); - break; - - case 4: - ptr2 += (rid << 2); - memcpy(ptr1, ptr2, 4); - break; - - case 2: - ptr2 += (rid << 1); - memcpy(ptr1, ptr2, 2); - break; - - case 1: - ptr2 += (rid << 0); - memcpy(ptr1, ptr2, 1); - break; - } - - *written += in->colType.DataSize; + checkedWriteValue(out, outSize, written, &srcArray[rid], 2); } - out->NVALS++; + out->NVALS++; //TODO: Can be computed at the end from *written value } -template -inline uint64_t nextUnsignedColValue(int type, - const uint16_t* ridArray, - int NVALS, - int* index, - bool* done, - bool* isNull, - bool* isEmpty, - uint16_t* rid, - uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk) +/* WIP +template +void writeArray( + size_t dataSize, + const T* dataArray, + const RID_T* dataRid, + const FILTER_ARRAY_T *filterArray, + uint8_t* outbuf, + unsigned* written, + uint16_t* NVALS, + uint8_t* RidFlagsPtr, + T NULL_VALUE) { - const uint8_t* vp = 0; + uint8_t* out = outbuf; + uint8_t RidFlags = *RidFlagsPtr; - if (ridArray == NULL) + for (size_t i = 0; i < dataSize; ++i) { - while (static_cast(*index) < itemsPerBlk && - isEmptyVal(type, &val8[*index * W]) && - (OutputType & OT_RID)) + //TODO: optimize handling of NULL values and flags by avoiding non-predictable jumps + if (dataArray[i]==NULL_VALUE? IS_NULL_VALUE_MATCHES : filterArray[i]) { - (*index)++; - } - - if (static_cast(*index) >= itemsPerBlk) - { - *done = true; - return 0; - } - - vp = &val8[*index * W]; - *isNull = isNullVal(type, vp); - *isEmpty = isEmptyVal(type, vp); - *rid = (*index)++; - } - else - { - while (*index < NVALS && - isEmptyVal(type, &val8[ridArray[*index] * W])) - { - (*index)++; - } - - if (*index >= NVALS) - { - *done = true; - return 0; - } - - vp = &val8[ridArray[*index] * W]; - *isNull = isNullVal(type, vp); - *isEmpty = isEmptyVal(type, vp); - *rid = ridArray[(*index)++]; - } - - // at this point, nextRid is the index to return, and index is... - // if RIDs are not specified, nextRid + 1, - // if RIDs are specified, it's the next index in the rid array. - //Bug 838, tinyint null problem - switch (W) - { - case 1: - return reinterpret_cast (val8)[*rid]; - - case 2: - return reinterpret_cast(val8)[*rid]; - - case 4: - return reinterpret_cast(val8)[*rid]; - - case 8: - return reinterpret_cast(val8)[*rid]; - - default: - logIt(33, W); - -#ifdef PRIM_DEBUG - throw logic_error("PrimitiveProcessor::nextColValue() bad width"); -#endif - return -1; - } -} -template -inline uint8_t* nextBinColValue(int type, - const uint16_t* ridArray, - int NVALS, - int* index, - bool* done, - bool* isNull, - bool* isEmpty, - uint16_t* rid, - uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk) -{ - if (ridArray == NULL) - { - while (static_cast(*index) < itemsPerBlk && - isEmptyVal(type, &val8[*index * W]) && - (OutputType & OT_RID)) - { - (*index)++; - } - - - if (static_cast(*index) >= itemsPerBlk) - { - *done = true; - return NULL; - } - *rid = (*index)++; - } - else - { - while (*index < NVALS && - isEmptyVal(type, &val8[ridArray[*index] * W])) - { - (*index)++; - } - - if (*index >= NVALS) - { - *done = true; - return NULL; - } - *rid = ridArray[(*index)++]; - } - - uint32_t curValueOffset = *rid * W; - - *isNull = isNullVal(type, &val8[curValueOffset]); - *isEmpty = isEmptyVal(type, &val8[curValueOffset]); - //cout << "nextColBinValue " << *index << " rowid " << *rid << endl; - // at this point, nextRid is the index to return, and index is... - // if RIDs are not specified, nextRid + 1, - // if RIDs are specified, it's the next index in the rid array. - return &val8[curValueOffset]; - -#ifdef PRIM_DEBUG - throw logic_error("PrimitiveProcessor::nextColBinValue() bad width"); -#endif - return NULL; -} - - -template -inline int64_t nextColValue(int type, - const uint16_t* ridArray, - int NVALS, - int* index, - bool* done, - bool* isNull, - bool* isEmpty, - uint16_t* rid, - uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk) -{ - const uint8_t* vp = 0; - - if (ridArray == NULL) - { - while (static_cast(*index) < itemsPerBlk && - isEmptyVal(type, &val8[*index * W]) && - (OutputType & OT_RID)) - { - (*index)++; - } - - if (static_cast(*index) >= itemsPerBlk) - { - *done = true; - return 0; - } - - vp = &val8[*index * W]; - *isNull = isNullVal(type, vp); - *isEmpty = isEmptyVal(type, vp); - *rid = (*index)++; - } - else - { - while (*index < NVALS && - isEmptyVal(type, &val8[ridArray[*index] * W])) - { - (*index)++; - } - - if (*index >= NVALS) - { - *done = true; - return 0; - } - - vp = &val8[ridArray[*index] * W]; - *isNull = isNullVal(type, vp); - *isEmpty = isEmptyVal(type, vp); - *rid = ridArray[(*index)++]; - } - - // at this point, nextRid is the index to return, and index is... - // if RIDs are not specified, nextRid + 1, - // if RIDs are specified, it's the next index in the rid array. - //Bug 838, tinyint null problem - switch (W) - { - case 1: - return reinterpret_cast (val8)[*rid]; - - case 2: - return reinterpret_cast(val8)[*rid]; - - case 4: -#if 0 - if (type == CalpontSystemCatalog::FLOAT) + if (WRITE_RID) { - // convert the float to a 64-bit type, return that w/o conversion - int32_t* val32 = reinterpret_cast(val8); - double dTmp; - dTmp = (double) * ((float*) &val32[*rid]); - return *((int64_t*) &dTmp); - } - else - { - return reinterpret_cast(val8)[*rid]; + copyValue(out, &dataRid[i], sizeof(RID_T)); + out += sizeof(RID_T); + + RidFlags |= (1 << (dataRid[i] >> 10)); // set the (row/1024)'th bit } -#else - return reinterpret_cast(val8)[*rid]; -#endif - - case 8: - return reinterpret_cast(val8)[*rid]; - - default: - logIt(33, W); - -#ifdef PRIM_DEBUG - throw logic_error("PrimitiveProcessor::nextColValue() bad width"); -#endif - return -1; + if (WRITE_DATA) + { + copyValue(out, &dataArray[i], sizeof(T)); + out += sizeof(T); + } + } } + + // Update number of written values, number of written bytes and out->RidFlags + int size1 = (WRITE_RID? sizeof(RID_T) : 0) + (WRITE_DATA? sizeof(T) : 0); + *NVALS += (out - outbuf) / size1; + *written += out - outbuf; + *RidFlagsPtr = RidFlags; } +*/ +/***************************************************************************** + *** RUN DATA THROUGH A COLUMN FILTER **************************************** + *****************************************************************************/ -template -inline void p_Col_ridArray(NewColRequestHeader* in, - NewColResultHeader* out, - unsigned outSize, - unsigned* written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk, - boost::shared_ptr parsedColumnFilter) +/* "Vertical" processing of the column filter: + 1. load all data into temporary vector + 2. process one filter element over entire vector before going to a next one + 3. write records, that succesfully passed through the filter, to outbuf +*/ +/* +template +void processArray( + // Source data + const T* srcArray, + size_t srcSize, + uint16_t* ridArray, + size_t ridSize, // Number of values in ridArray + // Filter description + int BOP, + prestored_set_t* filterSet, // Set of values for simple filters (any of values / none of them) + uint32_t filterCount, // Number of filter elements, each described by one entry in the following arrays: + uint8_t* filterCOPs, // comparison operation + int64_t* filterValues, // value to compare to + // Output buffer/stats + uint8_t* outbuf, // Pointer to the place for output data + unsigned* written, // Number of written bytes, that we need to update + uint16_t* NVALS, // Number of written values, that we need to update + uint8_t* RidFlagsPtr, // Pointer to out->RidFlags + // Processing parameters + bool WRITE_RID, + bool WRITE_DATA, + bool SKIP_EMPTY_VALUES, + T EMPTY_VALUE, + bool IS_NULL_VALUE_MATCHES, + T NULL_VALUE, + // Min/Max search + bool ValidMinMax, + VALTYPE* MinPtr, + VALTYPE* MaxPtr) { - uint16_t* ridArray = 0; - uint8_t* in8 = reinterpret_cast(in); - const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W; + // Alloc temporary arrays + size_t inputSize = (ridArray? ridSize : srcSize); - if (in->NVALS > 0) - ridArray = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + - (in->NOPS * filterSize)]); + // Temporary array with data to filter + std::vector dataVec(inputSize); + auto dataArray = dataVec.data(); - if (ridArray && 1 == in->sort ) + // Temporary array with RIDs of corresponding dataArray elements + std::vector dataRidVec(WRITE_RID? inputSize : 0); + auto dataRid = dataRidVec.data(); + + + // Copy input data into temporary array, opt. storing RIDs, opt. skipping EMPTYs + size_t dataSize; // number of values copied into dataArray + if (ridArray != NULL) { - qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock); + SKIP_EMPTY_VALUES = true; // let findMinMaxArray() know that empty values will be skipped - if (fStatsPtr) -#ifdef _MSC_VER - fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O'); - -#else - fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O'); -#endif + dataSize = WRITE_RID? readArray(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE) + : readArray(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE); } - - // Set boolean indicating whether to capture the min and max values. - out->ValidMinMax = isMinMaxValid(in); - - if (out->ValidMinMax) + else if (SKIP_EMPTY_VALUES) { - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - out->Min = static_cast(numeric_limits::max()); - out->Max = 0; - } - else - { - out->Min = numeric_limits::max(); - out->Max = numeric_limits::min(); - } + dataSize = WRITE_RID? readArray(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE) + : readArray(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE); } else { - out->Min = 0; - out->Max = 0; + dataSize = WRITE_RID? readArray(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE) + : readArray(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE); } - const ColArgs* args = NULL; - int64_t val = 0; - uint64_t uval = 0; - int nextRidIndex = 0, argIndex = 0; - bool done = false, cmp = false, isNull = false, isEmpty = false; + // If required, find Min/Max values of the data + if (ValidMinMax) + { + SKIP_EMPTY_VALUES? findMinMaxArray (dataSize, dataArray, MinPtr, MaxPtr, EMPTY_VALUE, NULL_VALUE) + : findMinMaxArray(dataSize, dataArray, MinPtr, MaxPtr, EMPTY_VALUE, NULL_VALUE); + } + + + // Choose initial filterArray[i] value depending on the operation + bool initValue = false; + if (filterCount == 0) {initValue = true;} + else if (BOP_NONE == BOP) {initValue = false; BOP = BOP_OR;} + else if (BOP_OR == BOP) {initValue = false;} + else if (BOP_XOR == BOP) {initValue = false;} + else if (BOP_AND == BOP) {initValue = true;} + + // Temporary array accumulating results of filtering for each record + std::vector filterVec(dataSize, initValue); + auto filterArray = filterVec.data(); + + // Real type of column data, may be floating-point (used only for comparisons in the filtering) + using FLOAT_T = typename std::conditional::type; + using DATA_T = typename std::conditional::type; + auto realDataArray = reinterpret_cast(dataArray); + + + // Evaluate column filter on elements of dataArray and store results into filterArray + if (filterSet != NULL && BOP == BOP_OR) + { + applySetFilter(dataSize, dataArray, filterSet, filterArray); + } + else if (filterSet != NULL && BOP == BOP_AND) + { + applySetFilter(dataSize, dataArray, filterSet, filterArray); + } + else + + for (int i = 0; i < filterCount; ++i) + { + DATA_T cmp_value; // value for comparison, may be floating-point + copyValue(&cmp_value, &filterValues[i], sizeof(cmp_value)); + + switch(BOP) + { + case BOP_AND: applyFilterElement(filterCOPs[i], dataSize, realDataArray, cmp_value, filterArray); break; + case BOP_OR: applyFilterElement (filterCOPs[i], dataSize, realDataArray, cmp_value, filterArray); break; + case BOP_XOR: applyFilterElement(filterCOPs[i], dataSize, realDataArray, cmp_value, filterArray); break; + default: idbassert(0); + } + } + } + + + // Copy filtered data and/or their RIDs into output buffer + if (WRITE_RID && WRITE_DATA) + { + IS_NULL_VALUE_MATCHES? writeArray (dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE) + : writeArray(dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE); + } + else if (WRITE_RID) + { + IS_NULL_VALUE_MATCHES? writeArray (dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE) + : writeArray(dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE); + } + else + { + IS_NULL_VALUE_MATCHES? writeArray (dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE) + : writeArray(dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE); + } +} +*/ + +// These two are templates update min/max values in the loop iterating the values in filterColumnData. +template::type* = nullptr> +inline void updateMinMax(T& Min, T& Max, T& curValue, NewColRequestHeader* in) +{ + constexpr int COL_WIDTH = sizeof(T); + if (colCompare(Min, curValue, COMPARE_GT, false, in->colType)) + Min = curValue; + + if (colCompare(Max, curValue, COMPARE_LT, false, in->colType)) + Max = curValue; +} + +template::type* = nullptr> +inline void updateMinMax(T& Min, T& Max, T& curValue, NewColRequestHeader* in) +{ + if (Min > curValue) + Min = curValue; + + if (Max < curValue) + Max = curValue; +} + +// TBD Check if MCS really needs to copy values from in into out msgs or +// it is possible to copy from in msg into BPP::values directly. +// This template contains the main scanning/filtering loop. +// Copy data matching parsedColumnFilter from input to output. +// Input is srcArray[srcSize], optionally accessed in the order defined by ridArray[ridSize]. +// Output is BLOB out[outSize], written starting at offset *written, which is updated afterward. +template +void filterColumnData( + NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written, + uint16_t* ridArray, + const uint16_t ridSize, // Number of values in ridArray + int* srcArray16, + const uint32_t srcSize, + boost::shared_ptr parsedColumnFilter) +{ + using FT = typename IntegralTypeToFilterType::type; + using ST = typename IntegralTypeToFilterSetType::type; + constexpr int COL_WIDTH = sizeof(T); + const T* srcArray = reinterpret_cast(srcArray16); + + // Cache some structure fields in local vars + auto dataType = (CalpontSystemCatalog::ColDataType) in->colType.DataType; // Column datatype + uint32_t filterCount = in->NOPS; // Number of elements in the filter + uint8_t outputType = in->OutputType; + + // If no pre-parsed column filter is set, parse the filter in the message + if (parsedColumnFilter.get() == nullptr && filterCount > 0) + parsedColumnFilter = _parseColumnFilter(in->getFilterStringPtr(), + dataType, filterCount, in->BOP); + + // Cache parsedColumnFilter fields in local vars + auto columnFilterMode = filterCount==0 ? ALWAYS_TRUE : parsedColumnFilter->columnFilterMode; + FT* filterValues = filterCount==0 ? nullptr : parsedColumnFilter->getFilterVals(); + auto filterCOPs = filterCount==0 ? nullptr : parsedColumnFilter->prestored_cops.get(); + auto filterRFs = filterCount==0 ? nullptr : parsedColumnFilter->prestored_rfs.get(); + ST* filterSet = filterCount==0 ? nullptr : parsedColumnFilter->getFilterSet(); + + // ########################### + // Bit patterns in srcArray[i] representing EMPTY and NULL values + T EMPTY_VALUE = getEmptyValue(dataType); + T NULL_VALUE = getNullValue(dataType); + + // Precompute filter results for NULL values + bool isNullValueMatches = matchingColValue(NULL_VALUE, columnFilterMode, + filterSet, filterCount, filterCOPs, filterValues, filterRFs, in->colType, NULL_VALUE); + + // Boolean indicating whether to capture the min and max values + bool ValidMinMax = isMinMaxValid(in); + // Local vars to capture the min and max values + T Min = datatypes::numeric_limits::max(); + T Max = (KIND == KIND_UNSIGNED) ? 0 : datatypes::numeric_limits::min(); + +/* WIP add vertical processing + // If possible, use faster "vertical" filtering approach + if (KIND != KIND_TEXT) + { + bool canUseFastFiltering = true; + for (int i = 0; i < filterCount; ++i) + if (filterRFs[i] != 0) + canUseFastFiltering = false; + + if (canUseFastFiltering) + { + processArray(srcArray, srcSize, ridArray, ridSize, + in->BOP, filterSet, filterCount, filterCOPs, filterValues, + reinterpret_cast(out) + *written, + written, & out->NVALS, & out->RidFlags, + (outputType & OT_RID) != 0, + (outputType & (OT_TOKEN | OT_DATAVALUE)) != 0, + (outputType & OT_RID) != 0, //TODO: check correctness of this condition for SKIP_EMPTY_VALUES + EMPTY_VALUE, + isNullValueMatches, NULL_VALUE, + ValidMinMax, &Min, &Max); + return; + } + } +*/ + + // Loop-local variables + T curValue = 0; uint16_t rid = 0; - prestored_set_t::const_iterator it; + bool isEmpty = false; - int64_t* std_argVals = (int64_t*)alloca(in->NOPS * sizeof(int64_t)); - uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); - uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); - int64_t* argVals = NULL; - uint64_t* uargVals = NULL; - uint8_t* cops = NULL; - uint8_t* rfs = NULL; - - // no pre-parsed column filter is set, parse the filter in the message - if (parsedColumnFilter.get() == NULL) + // Loop over the column values, storing those matching the filter, and updating the min..max range + for (uint32_t i = 0; + nextColValue(curValue, &isEmpty, + &i, &rid, + srcArray, srcSize, ridArray, ridSize, + outputType, EMPTY_VALUE); ) { - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) + if (isEmpty) + continue; + else if (isNullValue(curValue, NULL_VALUE)) { - uargVals = reinterpret_cast(std_argVals); - cops = std_cops; - rfs = std_rfs; - - for (argIndex = 0; argIndex < in->NOPS; argIndex++) - { - args = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + - (argIndex * filterSize)]); - cops[argIndex] = args->COP; - rfs[argIndex] = args->rf; - - switch (W) - { - case 1: - uargVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 2: - uargVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 4: - uargVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 8: - uargVals[argIndex] = *reinterpret_cast(args->val); - break; - } - } + // If NULL values match the filter, write curValue to the output buffer + if (isNullValueMatches) + writeColValue(outputType, out, outSize, written, rid, srcArray); } else { - argVals = std_argVals; - cops = std_cops; - rfs = std_rfs; - - for (argIndex = 0; argIndex < in->NOPS; argIndex++) + // If curValue matches the filter, write it to the output buffer + if (matchingColValue(curValue, columnFilterMode, filterSet, filterCount, + filterCOPs, filterValues, filterRFs, in->colType, NULL_VALUE)) { - args = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + - (argIndex * filterSize)]); - cops[argIndex] = args->COP; - rfs[argIndex] = args->rf; - - switch (W) - { - case 1: - argVals[argIndex] = args->val[0]; - break; - - case 2: - argVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 4: -#if 0 - if (in->colType.DataType == CalpontSystemCatalog::FLOAT) - { - double dTmp; - - dTmp = (double) * ((const float*) args->val); - argVals[argIndex] = *((int64_t*) &dTmp); - } - else - argVals[argIndex] = *reinterpret_cast(args->val); - -#else - argVals[argIndex] = *reinterpret_cast(args->val); -#endif - break; - - case 8: - argVals[argIndex] = *reinterpret_cast(args->val); - break; - } + writeColValue(outputType, out, outSize, written, rid, srcArray); } + + // Update Min and Max if necessary. EMPTY/NULL values are processed in other branches. + if (ValidMinMax) + updateMinMax(Min, Max, curValue, in); } } - // we have a pre-parsed filter, and it's in the form of op and value arrays - else if (parsedColumnFilter->columnFilterMode == TWO_ARRAYS) + + + // Write captured Min/Max values to *out + out->ValidMinMax = ValidMinMax; + if (ValidMinMax) { - argVals = parsedColumnFilter->prestored_argVals.get(); - uargVals = reinterpret_cast(parsedColumnFilter->prestored_argVals.get()); - cops = parsedColumnFilter->prestored_cops.get(); - rfs = parsedColumnFilter->prestored_rfs.get(); + out->Min = Min; + out->Max = Max; } - - // else we have a pre-parsed filter, and it's an unordered set for quick == comparisons - - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - uval = nextUnsignedColValue(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, - &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); - } - else - { - val = nextColValue(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, - &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); - } - - while (!done) - { - if (cops == NULL) // implies parsedColumnFilter && columnFilterMode == SET - { - /* bug 1920: ignore NULLs in the set and in the column data */ - if (!(isNull && in->BOP == BOP_AND)) - { - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - it = parsedColumnFilter->prestored_set->find(*reinterpret_cast(&uval)); - } - else - { - it = parsedColumnFilter->prestored_set->find(val); - } - - if (in->BOP == BOP_OR) - { - // assume COP == COMPARE_EQ - if (it != parsedColumnFilter->prestored_set->end()) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - } - else if (in->BOP == BOP_AND) - { - // assume COP == COMPARE_NE - if (it == parsedColumnFilter->prestored_set->end()) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - } - } - } - else - { - for (argIndex = 0; argIndex < in->NOPS; argIndex++) - { - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - cmp = colCompareUnsigned(uval, uargVals[argIndex], cops[argIndex], - rfs[argIndex], in->colType.DataType, W, isNull); - } - else - { - cmp = colCompare(val, argVals[argIndex], cops[argIndex], - rfs[argIndex], in->colType, W, isNull); - } - - if (in->NOPS == 1) - { - if (cmp == true) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - - break; - } - else if (in->BOP == BOP_AND && cmp == false) - { - break; - } - else if (in->BOP == BOP_OR && cmp == true) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - break; - } - } - - if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - } - - // Set the min and max if necessary. Ignore nulls. - if (out->ValidMinMax && !isNull && !isEmpty) - { - - if (in->colType.DataType == CalpontSystemCatalog::CHAR || - in->colType.DataType == CalpontSystemCatalog::VARCHAR || - in->colType.DataType == CalpontSystemCatalog::BLOB || - in->colType.DataType == CalpontSystemCatalog::TEXT ) - { - if (colCompare(out->Min, val, COMPARE_GT, false, in->colType, W)) - out->Min = val; - - if (colCompare(out->Max, val, COMPARE_LT, false, in->colType, W)) - out->Max = val; - } - else if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - if (static_cast(out->Min) > uval) - out->Min = static_cast(uval); - - if (static_cast(out->Max) < uval) - out->Max = static_cast(uval);; - } - else - { - if (out->Min > val) - out->Min = val; - - if (out->Max < val) - out->Max = val; - } - } - - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - uval = nextUnsignedColValue(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, - &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast(block), - itemsPerBlk); - } - else - { - val = nextColValue(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, - &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast(block), - itemsPerBlk); - } - } - - if (fStatsPtr) -#ifdef _MSC_VER - fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K'); - -#else - fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K'); -#endif -} - -// There are number of hardcoded type-dependant objects -// that effectively makes this template int128-based only. -// Use type based template method for Min,Max values. -// prestored_set_128 must be a template with a type arg. -template -inline void p_Col_bin_ridArray(NewColRequestHeader* in, - NewColResultHeader* out, - unsigned outSize, - unsigned* written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk, - boost::shared_ptr parsedColumnFilter) -{ - uint16_t* ridArray = 0; - uint8_t* in8 = reinterpret_cast(in); - const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W; - - if (in->NVALS > 0) - ridArray = reinterpret_cast(&in8[sizeof(NewColRequestHeader) + - (in->NOPS * filterSize)]); - - if (ridArray && 1 == in->sort ) - { - qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock); - - if (fStatsPtr) -#ifdef _MSC_VER - fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O'); - -#else - fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O'); -#endif - } - - // Set boolean indicating whether to capture the min and max values. - out->ValidMinMax = isMinMaxValid(in); - - if (out->ValidMinMax) - { - // Assume that isUnsigned returns true for 8-bytes DTs only - if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType)) - { - out->Min = -1; - out->Max = 0; - } - else - { - out->Min = datatypes::Decimal::maxInt128; - out->Max = datatypes::Decimal::minInt128; - } - } - else - { - out->Min = 0; - out->Max = 0; - } - - typedef char binWtype [W]; - - const ColArgs* args = NULL; - binWtype* bval; - int nextRidIndex = 0, argIndex = 0; - bool done = false, cmp = false, isNull = false, isEmpty = false; - uint16_t rid = 0; - prestored_set_t_128::const_iterator it; - - binWtype* argVals = (binWtype*)alloca(in->NOPS * W); - uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); - uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t)); - uint8_t* cops = NULL; - uint8_t* rfs = NULL; - - // no pre-parsed column filter is set, parse the filter in the message - if (parsedColumnFilter.get() == NULL) { - - cops = std_cops; - rfs = std_rfs; - - for (argIndex = 0; argIndex < in->NOPS; argIndex++) { - args = reinterpret_cast (&in8[sizeof (NewColRequestHeader) + - (argIndex * filterSize)]); - cops[argIndex] = args->COP; - rfs[argIndex] = args->rf; - - memcpy(argVals[argIndex],args->val, W); - } - } - // we have a pre-parsed filter, and it's in the form of op and value arrays - else if (parsedColumnFilter->columnFilterMode == TWO_ARRAYS) - { - argVals = (binWtype*) parsedColumnFilter->prestored_argVals128.get(); - cops = parsedColumnFilter->prestored_cops.get(); - rfs = parsedColumnFilter->prestored_rfs.get(); - } - - // else we have a pre-parsed filter, and it's an unordered set for quick == comparisons - - bval = (binWtype*)nextBinColValue(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, - &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); - - T val; - - while (!done) - { - val = *reinterpret_cast(bval); - - if (cops == NULL) // implies parsedColumnFilter && columnFilterMode == SET - { - /* bug 1920: ignore NULLs in the set and in the column data */ - if (!(isNull && in->BOP == BOP_AND)) - { - - it = parsedColumnFilter->prestored_set_128->find(val); - - - if (in->BOP == BOP_OR) - { - // assume COP == COMPARE_EQ - if (it != parsedColumnFilter->prestored_set_128->end()) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - } - else if (in->BOP == BOP_AND) - { - // assume COP == COMPARE_NE - if (it == parsedColumnFilter->prestored_set_128->end()) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - } - } - } - else - { - for (argIndex = 0; argIndex < in->NOPS; argIndex++) - { - T filterVal = *reinterpret_cast(argVals[argIndex]); - - cmp = colCompare(val, filterVal, cops[argIndex], - rfs[argIndex], in->colType.DataType, W, isNull); - - if (in->NOPS == 1) - { - if (cmp == true) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - break; - } - else if (in->BOP == BOP_AND && cmp == false) - { - break; - } - else if (in->BOP == BOP_OR && cmp == true) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - break; - } - } - - if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0) - { - store(in, out, outSize, written, rid, reinterpret_cast(block)); - } - } - - // Set the min and max if necessary. Ignore nulls. - if (out->ValidMinMax && !isNull && !isEmpty) - { - - if (in->colType.DataType == CalpontSystemCatalog::CHAR || - in->colType.DataType == CalpontSystemCatalog::VARCHAR) - { - // !!! colCompare is overloaded with int128_t only yet. - if (colCompare(out->Min, val, COMPARE_GT, false, in->colType, W)) - { - out->Min = val; - } - - if (colCompare(out->Max, val, COMPARE_LT, false, in->colType, W)) - { - out->Max = val; - } - } - else - { - if (out->Min > val) - { - out->Min = val; - } - - if (out->Max < val) - { - out->Max = val; - } - } - } - - bval = (binWtype*)nextBinColValue(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull, - &isEmpty, &rid, in->OutputType, reinterpret_cast(block), itemsPerBlk); - - } - - if (fStatsPtr) -#ifdef _MSC_VER - fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K'); - -#else - fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K'); -#endif -} +} // end of filterColumnData } //namespace anon namespace primitives { -void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out, - unsigned outSize, unsigned* written) +// The routine used to dispatch CHAR|VARCHAR|TEXT|BLOB scan. +inline bool isDictTokenScan(NewColRequestHeader* in) { + switch (in->colType.DataType) + { + case CalpontSystemCatalog::CHAR: + return (in->colType.DataSize > 8); + + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::BLOB: + case CalpontSystemCatalog::TEXT: + return (in->colType.DataSize > 7); + default: + return false; + } +} + +// A set of dispatchers for different column widths/integral types. +template= 5 + typename std::enable_if::type* = nullptr> // gcc >= 5 + #else + typename std::enable_if::type*> // gcc 4.8.5 + #endif +#else + typename std::enable_if::type* = nullptr> +#endif +void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written) +{ + constexpr int W = sizeof(T); + auto dataType = (execplan::CalpontSystemCatalog::ColDataType) in->colType.DataType; + if (dataType == execplan::CalpontSystemCatalog::FLOAT) + { +// WIP make this inline function + const uint16_t ridSize = in->NVALS; + uint16_t* ridArray = in->getRIDArrayPtr(W); + const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE + : BLOCK_SIZE / W; + filterColumnData(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); + return; + } + _scanAndFilterTypeDispatcher(in, out, outSize, written); +} + +template= 5 + typename std::enable_if::type* = nullptr> // gcc >= 5 + #else + typename std::enable_if::type*> // gcc 4.8.5 + #endif +#else + typename std::enable_if::type* = nullptr> +#endif +void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written) +{ + constexpr int W = sizeof(T); + auto dataType = (execplan::CalpontSystemCatalog::ColDataType) in->colType.DataType; + if (dataType == execplan::CalpontSystemCatalog::DOUBLE) + { + const uint16_t ridSize = in->NVALS; + uint16_t* ridArray = in->getRIDArrayPtr(W); + const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE + : BLOCK_SIZE / W; + filterColumnData(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); + return; + } + _scanAndFilterTypeDispatcher(in, out, outSize, written); +} + +template= 5 + sizeof(T) == sizeof(int128_t), T>::type* = nullptr> // gcc >= 5 + #else + sizeof(T) == sizeof(int128_t), T>::type*> // gcc 4.8.5 + #endif +#else + sizeof(T) == sizeof(int128_t), T>::type* = nullptr> +#endif +void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written) +{ + _scanAndFilterTypeDispatcher(in, out, outSize, written); +} + +template= 5 + typename std::enable_if::type* = nullptr> // gcc >= 5 + #else + typename std::enable_if::type*> // gcc 4.8.5 + #endif +#else + typename std::enable_if::type* = nullptr> +#endif +void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written) +{ + constexpr int W = sizeof(T); + const uint16_t ridSize = in->NVALS; + uint16_t* ridArray = in->getRIDArrayPtr(W); + const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE + : BLOCK_SIZE / W; + + filterColumnData(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); +} + +template= 5 + typename std::enable_if::type* = nullptr> // gcc >= 5 + #else + typename std::enable_if::type*> // gcc 4.8.5 + #endif +#else + typename std::enable_if::type* = nullptr> +#endif +void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, + NewColResultHeader* out, + unsigned outSize, + unsigned* written) +{ + constexpr int W = sizeof(T); + const uint16_t ridSize = in->NVALS; + uint16_t* ridArray = in->getRIDArrayPtr(W); + const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE + : BLOCK_SIZE / W; + + auto dataType = (execplan::CalpontSystemCatalog::ColDataType) in->colType.DataType; + if ((dataType == execplan::CalpontSystemCatalog::CHAR || + dataType == execplan::CalpontSystemCatalog::VARCHAR || + dataType == execplan::CalpontSystemCatalog::TEXT) && + !isDictTokenScan(in)) + { + filterColumnData(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); + return; + } + + if (datatypes::isUnsigned(dataType)) + { + using UT = typename std::conditional::value || datatypes::is_uint128_t::value, T, typename datatypes::make_unsigned::type>::type; + filterColumnData(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); + return; + } + filterColumnData(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter); +} + +// The entrypoint for block scanning and filtering. +// The block is in in msg, out msg is used to store values|RIDs matched. +template +void PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written) +{ +#ifdef PRIM_DEBUG + auto markEvent = [&] (char eventChar) + { + if (fStatsPtr) + fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, eventChar); + }; +#endif + constexpr int W = sizeof(T); + void *outp = static_cast(out); memcpy(outp, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); out->NVALS = 0; @@ -1479,19 +1502,11 @@ void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out, out->OutputType = in->OutputType; out->RidFlags = 0; *written = sizeof(NewColResultHeader); - unsigned itemsPerBlk = 0; - - if (logicalBlockMode) - itemsPerBlk = BLOCK_SIZE; - else - itemsPerBlk = BLOCK_SIZE / in->colType.DataSize; - //...Initialize I/O counts; out->CacheIO = 0; out->PhysicalIO = 0; #if 0 - // short-circuit the actual block scan for testing if (out->LBID >= 802816) { @@ -1500,205 +1515,30 @@ void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out, out->Max = 0; return; } - #endif - if (fStatsPtr) -#ifdef _MSC_VER - fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'B'); - -#else - fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'B'); +#ifdef PRIM_DEBUG + markEvent('B'); #endif - switch (in->colType.DataSize) - { - case 8: - p_Col_ridArray<8>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); - break; - - case 4: - p_Col_ridArray<4>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); - break; - - case 2: - p_Col_ridArray<2>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); - break; - - case 1: - p_Col_ridArray<1>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); - break; - - case 16: - p_Col_bin_ridArray<16, int128_t>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter); - break; - - default: - idbassert(0); - break; - } - - if (fStatsPtr) -#ifdef _MSC_VER - fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'C'); - -#else - fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'C'); + // Sort ridArray (the row index array) if there are RIDs with this in msg + in->sortRIDArrayIfNeeded(W); + scanAndFilterTypeDispatcher(in, out, outSize, written); +#ifdef PRIM_DEBUG + markEvent('C'); #endif } -boost::shared_ptr parseColumnFilter -(const uint8_t* filterString, uint32_t colWidth, uint32_t colType, uint32_t filterCount, - uint32_t BOP) -{ - boost::shared_ptr ret; - uint32_t argIndex; - const ColArgs* args; - bool convertToSet = true; - - if (filterCount == 0) - return ret; - - ret.reset(new ParsedColumnFilter()); - - ret->columnFilterMode = TWO_ARRAYS; - if (datatypes::isWideDecimalType( - (CalpontSystemCatalog::ColDataType)colType, colWidth)) - ret->prestored_argVals128.reset(new int128_t[filterCount]); - else - ret->prestored_argVals.reset(new int64_t[filterCount]); - ret->prestored_cops.reset(new uint8_t[filterCount]); - ret->prestored_rfs.reset(new uint8_t[filterCount]); - - /* - for (unsigned ii = 0; ii < filterCount; ii++) - { - ret->prestored_argVals[ii] = 0; - ret->prestored_cops[ii] = 0; - ret->prestored_rfs[ii] = 0; - } - */ - - const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + colWidth; - - /* Decide which structure to use. I think the only cases where we can use the set - are when NOPS > 1, BOP is OR, and every COP is ==, - and when NOPS > 1, BOP is AND, and every COP is !=. - - Parse the filter predicates and insert them into argVals and cops. - If there were no predicates that violate the condition for using a set, - insert argVals into a set. - */ - if (filterCount == 1) - convertToSet = false; - - for (argIndex = 0; argIndex < filterCount; argIndex++) - { - args = reinterpret_cast(filterString + (argIndex * filterSize)); - ret->prestored_cops[argIndex] = args->COP; - ret->prestored_rfs[argIndex] = args->rf; - - if ((BOP == BOP_OR && args->COP != COMPARE_EQ) || - (BOP == BOP_AND && args->COP != COMPARE_NE) || - (args->COP == COMPARE_NIL)) - convertToSet = false; - - if (isUnsigned((CalpontSystemCatalog::ColDataType)colType)) - { - switch (colWidth) - { - case 1: - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 2: - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 4: - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 8: - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; - } - } - else - { - switch (colWidth) - { - case 1: - ret->prestored_argVals[argIndex] = args->val[0]; - break; - - case 2: - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 4: -#if 0 - if (colType == CalpontSystemCatalog::FLOAT) - { - double dTmp; - - dTmp = (double) * ((const float*) args->val); - ret->prestored_argVals[argIndex] = *((int64_t*) &dTmp); - } - else - ret->prestored_argVals[argIndex] = - *reinterpret_cast(args->val); - -#else - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); -#endif - break; - - case 8: - ret->prestored_argVals[argIndex] = *reinterpret_cast(args->val); - break; - - case 16: - { - datatypes::TSInt128::assignPtrPtr(&(ret->prestored_argVals128[argIndex]), - args->val); - break; - } - } - } - -// cout << "inserted* " << hex << ret->prestored_argVals[argIndex] << dec << -// " COP = " << (int) ret->prestored_cops[argIndex] << endl; - - } - - if (convertToSet) - { - ret->columnFilterMode = UNORDERED_SET; - if (datatypes::isWideDecimalType( - (CalpontSystemCatalog::ColDataType)colType, colWidth)) - { - ret->prestored_set_128.reset(new prestored_set_t_128()); - - // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression - for (argIndex = 0; argIndex < filterCount; argIndex++) - if (ret->prestored_rfs[argIndex] == 0) - ret->prestored_set_128->insert(ret->prestored_argVals128[argIndex]); - } - else - { - ret->prestored_set.reset(new prestored_set_t()); - - // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression - for (argIndex = 0; argIndex < filterCount; argIndex++) - if (ret->prestored_rfs[argIndex] == 0) - ret->prestored_set->insert(ret->prestored_argVals[argIndex]); - } - } - - return ret; -} +template +void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, NewColResultHeader*, unsigned, unsigned*); +template +void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*); +template +void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*); +template +void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*); +template +void primitives::PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*); } // namespace primitives // vim:ts=4 sw=4: - diff --git a/primitives/linux-port/pp-scan-unittest.cpp b/primitives/linux-port/pp-scan-unittest.cpp index 72483bc9e..69309e8ed 100644 --- a/primitives/linux-port/pp-scan-unittest.cpp +++ b/primitives/linux-port/pp-scan-unittest.cpp @@ -3238,7 +3238,7 @@ public: args->COP = COMPARE_LT; memcpy(args->val, &tmp, in->DataSize); args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + - sizeof(ColArgs) + in->DataSize]); + sizeof(ColArgs) + in->DataSize]); args->COP = COMPARE_GT; tmp = 1000; memcpy(args->val, &tmp, in->DataSize); diff --git a/primitives/linux-port/primitiveprocessor.cpp b/primitives/linux-port/primitiveprocessor.cpp index 3a8adaa4f..cc71b1eaa 100644 --- a/primitives/linux-port/primitiveprocessor.cpp +++ b/primitives/linux-port/primitiveprocessor.cpp @@ -58,10 +58,17 @@ void PrimitiveProcessor::setParsedColumnFilter(boost::shared_ptr prestored_set_t; typedef std::tr1::unordered_set prestored_set_t_128; @@ -124,9 +133,39 @@ public: } }; - -struct ParsedColumnFilter +// Not the safest way b/c it doesn't cover uint128_t but the type +// isn't used now and the approach saves some space. +template +struct IntegralTypeToFilterType { + using type = int64_t; +}; + +template<> +struct IntegralTypeToFilterType +{ + using type = int128_t; +}; + +template +struct IntegralTypeToFilterSetType +{ + using type = prestored_set_t; +}; + +template<> +struct IntegralTypeToFilterSetType +{ + using type = prestored_set_t_128; +}; + +// DRRTuy shared_arrays and shared_ptr looks redundant here +// given that the higher level code uses shared_ptr +// thus runtime calls ParsedColumnFilter dtor in the end. +class ParsedColumnFilter +{ + public: + static constexpr uint32_t noSetFilterThreshold = 8; ColumnFilterMode columnFilterMode; boost::shared_array prestored_argVals; boost::shared_array prestored_argVals128; @@ -136,7 +175,92 @@ struct ParsedColumnFilter boost::shared_ptr prestored_set_128; ParsedColumnFilter(); + ParsedColumnFilter(const uint32_t aFilterCount); ~ParsedColumnFilter(); + + template::value, T>::type* = nullptr> + T* getFilterVals() + { + return prestored_argVals.get(); + } + + template::value, T>::type* = nullptr> + T* getFilterVals() + { + return prestored_argVals128.get(); + } + + template::value, T>::type* = nullptr> + T* getFilterSet() + { + return prestored_set.get(); + } + + template::value, T>::type* = nullptr> + T* getFilterSet() + { + return prestored_set_128.get(); + } + + template::type* = nullptr> + void storeFilterArg(const uint32_t argIndex, const T* argValPtr) + { + prestored_argVals[argIndex] = *argValPtr; + } + + template::type* = nullptr> + void storeFilterArg(const uint32_t argIndex, const WT* argValPtr) + { + datatypes::TSInt128::assignPtrPtr(&(prestored_argVals128[argIndex]), + argValPtr); + } + + template::type* = nullptr> + void allocateSpaceForFilterArgs() + { + prestored_argVals.reset(new int64_t[mFilterCount]); + } + + template::type* = nullptr> + void allocateSpaceForFilterArgs() + { + prestored_argVals128.reset(new int128_t[mFilterCount]); + } + + template::type* = nullptr> + void populatePrestoredSet() + { + prestored_set_128.reset(new prestored_set_t_128()); + + // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression + for (uint32_t argIndex = 0; argIndex < mFilterCount; ++argIndex) + if (prestored_rfs[argIndex] == 0) + prestored_set_128->insert(prestored_argVals128[argIndex]); + } + + template::type* = nullptr> + void populatePrestoredSet() + { + prestored_set.reset(new prestored_set_t()); + + // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression + for (uint32_t argIndex = 0; argIndex < mFilterCount; argIndex++) + if (prestored_rfs[argIndex] == 0) + prestored_set->insert(prestored_argVals[argIndex]); + } + + private: + uint32_t mFilterCount; }; //@bug 1828 These need to be public so that column operations can use it for 'like' @@ -146,9 +270,6 @@ struct p_DataValue const uint8_t* data; }; -boost::shared_ptr parseColumnFilter(const uint8_t* filterString, - uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP); - /** @brief This class encapsulates the primitive processing functionality of the system. * * This class encapsulates the primitive processing functionality of the system. @@ -258,7 +379,7 @@ public: * a NewColResultHeader, followed by the output type specified by in->OutputType. * \li If OT_RID, it will be an array of RIDs * \li If OT_DATAVALUE, it will be an array of matching data values stored in the column - * \li If OT_BOTH, it will be an array of pairs + * \li If OT_BOTH, it will be an array of pairs * @param outSize The size of the output buffer in bytes. * @param written (out parameter) A pointer to 1 int, which will contain the * number of bytes written to out. @@ -267,8 +388,41 @@ public: void p_Col(NewColRequestHeader* in, NewColResultHeader* out, unsigned outSize, unsigned* written); + template::type* = nullptr> + void scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written); + + template::type* = nullptr> + void scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written); + template::type* = nullptr> + void scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written); + + template::type* = nullptr> + void _scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written); + + template::type* = nullptr> + void _scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written); + + template + void columnScanAndFilter(NewColRequestHeader* in, NewColResultHeader* out, + unsigned outSize, unsigned* written); + boost::shared_ptr parseColumnFilter(const uint8_t* filterString, - uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP); + uint32_t colWidth, + uint32_t colType, + uint32_t filterCount, + uint32_t BOP); void setParsedColumnFilter(boost::shared_ptr); /** @brief The p_ColAggregate primitive processor. @@ -312,9 +466,6 @@ private: void nextSig(int NVALS, const PrimToken* tokens, p_DataValue* ret, uint8_t outputFlags = 0, bool oldGetSigBehavior = false, bool skipNulls = false) throw(); -// void do_sum8(NewColAggResultHeader *out, int64_t val); -// void do_unsignedsum8(NewColAggResultHeader *out, int64_t val); - uint64_t masks[11]; int dict_OffsetIndex, currentOffsetIndex; // used by p_dictionary int fDebugLevel; @@ -326,6 +477,108 @@ private: friend class ::PrimTest; }; +// +//COMPILE A COLUMN FILTER +// +// Compile column filter from BLOB into structure optimized for fast filtering. +// Return a shared_ptr for the compiled filter. +template // C++ integer type providing storage for colType +boost::shared_ptr _parseColumnFilter( + const uint8_t* filterString, // Filter represented as BLOB + uint32_t colType, // Column datatype as ColDataType + uint32_t filterCount, // Number of filter elements contained in filterString + uint32_t BOP) // Operation (and/or/xor/none) that combines all filter elements +{ + using UT = typename std::conditional::value || datatypes::is_uint128_t::value, T, typename datatypes::make_unsigned::type>::type; + const uint32_t WIDTH = sizeof(T); // Sizeof of the column to be filtered + + boost::shared_ptr ret; // Place for building the value to return + if (filterCount == 0) + return ret; + + // Allocate the compiled filter structure with space for filterCount filters. + // No need to init arrays since they will be filled on the fly. + ret.reset(new ParsedColumnFilter(filterCount)); + ret->allocateSpaceForFilterArgs(); + + // Choose initial filter mode based on operation and number of filter elements + if (filterCount == 1) + ret->columnFilterMode = SINGLE_COMPARISON; + else if (BOP == BOP_OR) + ret->columnFilterMode = ANY_COMPARISON_TRUE; + else if (BOP == BOP_AND) + ret->columnFilterMode = ALL_COMPARISONS_TRUE; + else if (BOP == BOP_XOR) + ret->columnFilterMode = XOR_COMPARISONS; + else + idbassert(0); // BOP_NONE is compatible only with filterCount <= 1 + + // Size of single filter element in filterString BLOB + const uint32_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + WIDTH; + + // Parse the filter predicates and insert them into argVals and cops + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + // Pointer to ColArgs structure representing argIndex'th element in the BLOB + auto args = reinterpret_cast(filterString + (argIndex * filterSize)); + + ret->prestored_cops[argIndex] = args->COP; + ret->prestored_rfs[argIndex] = args->rf; + + if (datatypes::isUnsigned((execplan::CalpontSystemCatalog::ColDataType)colType)) + ret->storeFilterArg(argIndex, reinterpret_cast(args->val)); + else + ret->storeFilterArg(argIndex, reinterpret_cast(args->val)); + } + + /* Decide which structure to use. I think the only cases where we can use the set + are when NOPS > 1, BOP is OR, and every COP is ==, + and when NOPS > 1, BOP is AND, and every COP is !=. + + If there were no predicates that violate the condition for using a set, + insert argVals into a set. + */ + if (filterCount > 1) + { + // Check that all COPs are of right kind that depends on BOP + for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++) + { + auto cop = ret->prestored_cops[argIndex]; + if (! ((BOP == BOP_OR && cop == COMPARE_EQ) || + (BOP == BOP_AND && cop == COMPARE_NE))) + { + goto skipConversion; + } + } + + // Now we found that conversion is possible. Let's choose between array-based search + // and set-based search depending on the set size. + //TODO: Tailor the threshold based on the actual search algorithms used and WIDTH/SIMD_WIDTH + if (filterCount <= ParsedColumnFilter::noSetFilterThreshold) + { + // Assign filter mode of array-based filtering + if (BOP == BOP_OR) + ret->columnFilterMode = ONE_OF_VALUES_IN_ARRAY; + else + ret->columnFilterMode = NONE_OF_VALUES_IN_ARRAY; + } + else + { + // Assign filter mode of set-based filtering + if (BOP == BOP_OR) + ret->columnFilterMode = ONE_OF_VALUES_IN_SET; + else + ret->columnFilterMode = NONE_OF_VALUES_IN_SET; + + ret->populatePrestoredSet(); + } + + skipConversion:; + } + + return ret; +} + } //namespace primitives #endif diff --git a/primitives/primproc/batchprimitiveprocessor.h b/primitives/primproc/batchprimitiveprocessor.h index 144361253..1f9b2541c 100644 --- a/primitives/primproc/batchprimitiveprocessor.h +++ b/primitives/primproc/batchprimitiveprocessor.h @@ -169,6 +169,14 @@ public: { return doJoin; } + primitives::PrimitiveProcessor& getPrimitiveProcessor() + { + return pp; + } + uint32_t getOutMsgSize() const + { + return outMsgSize; + } private: BatchPrimitiveProcessor(); BatchPrimitiveProcessor(const BatchPrimitiveProcessor&); diff --git a/primitives/primproc/columncommand.cpp b/primitives/primproc/columncommand.cpp index e604e12d1..c75631f3d 100644 --- a/primitives/primproc/columncommand.cpp +++ b/primitives/primproc/columncommand.cpp @@ -1,4 +1,5 @@ /* Copyright (C) 2014 InfiniDB, Inc. + Copyright (C) 2016-2021 MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -249,7 +250,36 @@ void ColumnCommand::loadData() void ColumnCommand::issuePrimitive() { - _issuePrimitive(); + loadData(); + + if (!suppressFilter) + bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter); + else + bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter); + + switch(colType.colWidth) + { + case 1: + _issuePrimitive<1>(); + break; + case 2: + _issuePrimitive<2>(); + break; + case 4: + _issuePrimitive<4>(); + break; + case 8: + _issuePrimitive<8>(); + break; + case 16: + _issuePrimitive<16>(); + break; + default: + throw NotImplementedExcept(std::string("ColumnCommand::_issuePrimitive does not support ") + + std::to_string(colType.colWidth) + + std::string(" byte width.")); + } + if (_isScan) { if (LIKELY(colType.isNarrow())) @@ -259,18 +289,29 @@ void ColumnCommand::issuePrimitive() } } -void ColumnCommand::_issuePrimitive() +template +void ColumnCommand::_issuePrimitiveNarrow() { - uint32_t resultSize; - - loadData(); + _loadData(); if (!suppressFilter) - bpp->pp.setParsedColumnFilter(parsedColumnFilter); + bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter); else - bpp->pp.setParsedColumnFilter(emptyFilter); + bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter); - bpp->pp.p_Col(primMsg, outMsg, bpp->outMsgSize, (unsigned int*)&resultSize); + _issuePrimitive(); + + if (_isScan) + updateCPDataNarrow(); +} + + +template +void ColumnCommand::_issuePrimitive() +{ + using IntegralType = typename datatypes::WidthToSIntegralType::type; + uint32_t resultSize; + bpp->getPrimitiveProcessor().columnScanAndFilter(primMsg, outMsg, bpp->getOutMsgSize(), (unsigned int*)&resultSize); } // _issuePrimitive() void ColumnCommand::updateCPDataNarrow() @@ -559,14 +600,33 @@ void ColumnCommand::createCommand(ByteStream& bs) deserializeInlineVector(bs, lastLbid); Command::createCommand(bs); + switch (colType.colWidth) + { + case 1: + createColumnFilter::type>(); + break; - parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, filterCount, BOP); + case 2: + createColumnFilter::type>(); + break; - /* OR hack */ - emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, 0, BOP); + case 4: + createColumnFilter::type>(); + break; + case 8: + createColumnFilter::type>(); + break; + + case 16: + createColumnFilter::type>(); + break; + + default: + throw NotImplementedExcept(std::string("ColumnCommand::createCommand does not support ") + + std::to_string(colType.colWidth) + + std::string(" byte width.")); + } } void ColumnCommand::createCommand(execplan::CalpontSystemCatalog::ColType& aColType, ByteStream& bs) @@ -1023,6 +1083,18 @@ ColumnCommand* ColumnCommandFabric::createCommand(messageqcpp::ByteStream& bs) return nullptr; } +template +void ColumnCommand::createColumnFilter() +{ + parsedColumnFilter = primitives::_parseColumnFilter(filterString.buf(), + colType.colDataType, + filterCount, BOP); + /* OR hack */ + emptyFilter = primitives::_parseColumnFilter(filterString.buf(), + colType.colDataType, + 0, BOP); +} + ColumnCommand* ColumnCommandFabric::duplicate(const ColumnCommandUniquePtr& rhs) { auto & command = *rhs; @@ -1066,12 +1138,7 @@ ColumnCommand* ColumnCommandFabric::duplicate(const ColumnCommandUniquePtr& rhs) ColumnCommandInt8::ColumnCommandInt8(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs) { ColumnCommand::createCommand(aColType, bs); - parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, filterCount, BOP); - - /* OR hack */ - emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, 0, BOP); + createColumnFilter(); } void ColumnCommandInt8::prep(int8_t outputType, bool absRids) @@ -1104,17 +1171,15 @@ void ColumnCommandInt8::projectResultRG(RowGroup& rg, uint32_t pos) _projectResultRG(rg, pos); } +void ColumnCommandInt8::issuePrimitive() +{ + _issuePrimitiveNarrow(); +} + ColumnCommandInt16::ColumnCommandInt16(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs) { ColumnCommand::createCommand(aColType, bs); - parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), - colType.colWidth, - colType.colDataType, - filterCount, BOP); - /* OR hack */ - emptyFilter = primitives::parseColumnFilter(filterString.buf(), - colType.colWidth, - colType.colDataType, 0, BOP); + createColumnFilter(); } void ColumnCommandInt16::prep(int8_t outputType, bool absRids) @@ -1147,15 +1212,15 @@ void ColumnCommandInt16::projectResultRG(RowGroup& rg, uint32_t pos) _projectResultRG(rg, pos); } +void ColumnCommandInt16::issuePrimitive() +{ + _issuePrimitiveNarrow(); +} + ColumnCommandInt32::ColumnCommandInt32(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs) { ColumnCommand::createCommand(aColType, bs); - parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, filterCount, BOP); - - /* OR hack */ - emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, 0, BOP); + createColumnFilter(); } void ColumnCommandInt32::prep(int8_t outputType, bool absRids) @@ -1188,15 +1253,15 @@ void ColumnCommandInt32::projectResultRG(RowGroup& rg, uint32_t pos) _projectResultRG(rg, pos); } +void ColumnCommandInt32::issuePrimitive() +{ + _issuePrimitiveNarrow(); +} + ColumnCommandInt64::ColumnCommandInt64(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs) { ColumnCommand::createCommand(aColType, bs); - parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, filterCount, BOP); - - /* OR hack */ - emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth, - colType.colDataType, 0, BOP); + createColumnFilter(); } void ColumnCommandInt64::prep(int8_t outputType, bool absRids) @@ -1229,17 +1294,15 @@ void ColumnCommandInt64::projectResultRG(RowGroup& rg, uint32_t pos) _projectResultRG(rg, pos); } +void ColumnCommandInt64::issuePrimitive() +{ + _issuePrimitiveNarrow(); +} + ColumnCommandInt128::ColumnCommandInt128(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs) { ColumnCommand::createCommand(aColType, bs); - parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), - colType.colWidth, - colType.colDataType, - filterCount, BOP); - /* OR hack */ - emptyFilter = primitives::parseColumnFilter(filterString.buf(), - colType.colWidth, - colType.colDataType, 0, BOP); + createColumnFilter(); } void ColumnCommandInt128::prep(int8_t outputType, bool absRids) @@ -1272,5 +1335,19 @@ void ColumnCommandInt128::projectResultRG(RowGroup& rg, uint32_t pos) _projectResultRG(rg, pos); } +void ColumnCommandInt128::issuePrimitive() +{ + loadData(); + + if (!suppressFilter) + bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter); + else + bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter); + + _issuePrimitive(); + if (_isScan) + updateCPDataWide(); +} + } // vim:ts=4 sw=4: diff --git a/primitives/primproc/columncommand.h b/primitives/primproc/columncommand.h index d2b94b094..10cb02d70 100644 --- a/primitives/primproc/columncommand.h +++ b/primitives/primproc/columncommand.h @@ -113,9 +113,15 @@ protected: void _loadData(); void updateCPDataNarrow(); void updateCPDataWide(); + template + void _issuePrimitiveNarrow(); + template void _issuePrimitive(); + virtual void issuePrimitive(); void duplicate(ColumnCommand*); void fillInPrimitiveMessageHeader(const int8_t outputType, const bool absRids); + template + void createColumnFilter(); // we only care about the width and type fields. //On the PM the rest is uninitialized @@ -125,7 +131,6 @@ protected: ColumnCommand& operator=(const ColumnCommand&); void _execute(); - void issuePrimitive(); void processResult(); template void _process_OT_BOTH(); @@ -202,6 +207,7 @@ class ColumnCommandInt8 : public ColumnCommand { public: static constexpr uint8_t size = 1; + using IntegralType = datatypes::WidthToSIntegralType::type; ColumnCommandInt8() : ColumnCommand() { }; ColumnCommandInt8(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs); void prep(int8_t outputType, bool absRids) override; @@ -209,12 +215,14 @@ class ColumnCommandInt8 : public ColumnCommand void process_OT_BOTH() override; void process_OT_DATAVALUE() override; void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override; + void issuePrimitive() override; }; class ColumnCommandInt16 : public ColumnCommand { public: static constexpr uint8_t size = 2; + using IntegralType = datatypes::WidthToSIntegralType::type; ColumnCommandInt16() : ColumnCommand() { }; ColumnCommandInt16(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs); void prep(int8_t outputType, bool absRids) override; @@ -222,12 +230,14 @@ class ColumnCommandInt16 : public ColumnCommand void process_OT_BOTH() override; void process_OT_DATAVALUE() override; void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override; + void issuePrimitive() override; }; class ColumnCommandInt32 : public ColumnCommand { public: static constexpr uint8_t size = 4; + using IntegralType = datatypes::WidthToSIntegralType::type; ColumnCommandInt32() : ColumnCommand() { }; ColumnCommandInt32(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs); void prep(int8_t outputType, bool absRids) override; @@ -235,12 +245,14 @@ class ColumnCommandInt32 : public ColumnCommand void process_OT_BOTH() override; void process_OT_DATAVALUE() override; void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override; + void issuePrimitive() override; }; class ColumnCommandInt64 : public ColumnCommand { public: static constexpr uint8_t size = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; ColumnCommandInt64() : ColumnCommand() { }; ColumnCommandInt64(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs); void prep(int8_t outputType, bool absRids) override; @@ -248,12 +260,14 @@ class ColumnCommandInt64 : public ColumnCommand void process_OT_BOTH() override; void process_OT_DATAVALUE() override; void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override; + void issuePrimitive() override; }; class ColumnCommandInt128 : public ColumnCommand { public: static constexpr uint8_t size = 16; + using IntegralType = datatypes::WidthToSIntegralType::type; ColumnCommandInt128() : ColumnCommand() { }; ColumnCommandInt128(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs); void prep(int8_t outputType, bool absRids) override; @@ -261,6 +275,7 @@ class ColumnCommandInt128 : public ColumnCommand void process_OT_BOTH() override; void process_OT_DATAVALUE() override; void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override; + void issuePrimitive() override; }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 746d77124..356fa402b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -48,3 +48,10 @@ if (WITH_COMPRESSION_UT) target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS}) install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) endif() + +if (WITH_PP_SCAN_UT) + add_executable(column-scan-filter-tests primitives_column_scan_and_filter.cpp) + target_include_directories(column-scan-filter-tests PUBLIC ${ENGINE_COMMON_INCLUDES} ${ENGINE_BLOCKCACHE_INCLUDE} ${ENGINE_PRIMPROC_INCLUDE} ) + target_link_libraries(column-scan-filter-tests ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} ${GTEST_LIBRARIES} processor dbbc) + install(TARGETS column-scan-filter-tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) +endif() diff --git a/tests/primitives_column_scan_and_filter.cpp b/tests/primitives_column_scan_and_filter.cpp new file mode 100644 index 000000000..02a973017 --- /dev/null +++ b/tests/primitives_column_scan_and_filter.cpp @@ -0,0 +1,629 @@ +/* Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include "datatypes/mcs_datatype.h" +#include "stats.h" +#include "primitives/linux-port/primitiveprocessor.h" + +using namespace primitives; +using namespace datatypes; +using namespace std; + +class ColumnScanFilterTest : public ::testing::Test +{ + protected: + PrimitiveProcessor pp; + uint8_t input[BLOCK_SIZE]; + uint8_t output[4 * BLOCK_SIZE]; + uint8_t block[BLOCK_SIZE]; + uint16_t* rids; + uint32_t i; + uint32_t written; + NewColRequestHeader* in; + NewColResultHeader* out; + ColArgs* args; + + void SetUp() override + { + memset(input, 0, BLOCK_SIZE); + memset(output, 0, 4 * BLOCK_SIZE); + in = reinterpret_cast(input); + out = reinterpret_cast(output); + rids = reinterpret_cast(&in[1]); + args = reinterpret_cast(&in[1]); + } + + uint8_t* readBlockFromFile(const std::string& fileName, uint8_t* block) + { + int fd; + uint32_t i; + + fd = open(fileName.c_str(), O_RDONLY); + + if (fd < 0) + { + cerr << "getBlock(): skipping this test; needs the index list file " + << fileName << endl; + return nullptr; + } + + i = read(fd, block, BLOCK_SIZE); + + if (i <= 0) + { + cerr << "getBlock(): Couldn't read the file " << fileName << endl; + close(fd); + return nullptr; + } + + if (i != BLOCK_SIZE) + { + cerr << "getBlock(): could not read a whole block" << endl; + close(fd); + return nullptr; + } + + close(fd); + return block; + } +}; + +TEST_F(ColumnScanFilterTest, ColumnScan1Byte) +{ + constexpr const uint8_t W = 1; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + in->colType = ColRequestHeaderDataType(); + in->colType.DataSize = 1; + in->colType.DataType = SystemCatalog::CHAR; + in->OutputType = OT_DATAVALUE; + in->NOPS = 0; + in->NVALS = 0; + + pp.setBlockPtr((int*) readBlockFromFile("col1block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = &output[sizeof(NewColResultHeader)]; + EXPECT_EQ(out->NVALS, 8160); + + for (i = 0; i < 300; i++) + EXPECT_EQ(results[i],i % 255); + +} + +TEST_F(ColumnScanFilterTest, ColumnScan2Bytes) +{ + constexpr const uint8_t W = 2; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 0; + in->NVALS = 0; + + pp.setBlockPtr((int*) readBlockFromFile("col2block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + EXPECT_EQ(out->NVALS, 4096); + + for (i = 0; i < out->NVALS; i++) + EXPECT_EQ(results[i], i); +} + +TEST_F(ColumnScanFilterTest, ColumnScan4Bytes) +{ + constexpr const uint8_t W = 4; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 0; + in->NVALS = 0; + + pp.setBlockPtr((int*) readBlockFromFile("col4block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + EXPECT_EQ(out->NVALS, 2048); + + for (i = 0; i < out->NVALS; i++) + EXPECT_EQ(results[i], (uint32_t) i); +} + +TEST_F(ColumnScanFilterTest, ColumnScan8Bytes) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 0; + in->NVALS = 0; + + pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 1024); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], (uint32_t) i); +} + +TEST_F(ColumnScanFilterTest, ColumnScan1ByteUsingRID) +{ + constexpr const uint8_t W = 1; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 0; + in->NVALS = 2; + rids[0] = 20; + rids[1] = 17; + + pp.setBlockPtr((int*) readBlockFromFile("col1block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 2); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], rids[i]); +} + +TEST_F(ColumnScanFilterTest, ColumnScan4Bytes1Filter) +{ + constexpr const uint8_t W = 4; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + IntegralType tmp; + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 2; + in->BOP = BOP_AND; + in->NVALS = 0; + + tmp = 20; + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, in->colType.DataSize); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + sizeof(ColArgs) + in->colType.DataSize]); + args->COP = COMPARE_GT; + tmp = 10; + memcpy(args->val, &tmp, in->colType.DataSize); + + pp.setBlockPtr((int*) readBlockFromFile("col4block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 9); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], 11 + (uint32_t)i); +} + +//void p_Col_7() +TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2CompFilters) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + IntegralType tmp; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 2; + in->BOP = BOP_OR; + in->NVALS = 0; + + tmp = 10; + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, in->colType.DataSize); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + sizeof(ColArgs) + in->colType.DataSize]); + args->COP = COMPARE_GT; + tmp = 1000; + memcpy(args->val, &tmp, in->colType.DataSize); + + pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 33); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], (uint32_t) (i < 10 ? i : i - 10 + 1001)); +} + +TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFilters) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + IntegralType tmp; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 2; + in->BOP = BOP_OR; + in->NVALS = 0; + + tmp = 10; + args->COP = COMPARE_EQ; + memcpy(args->val, &tmp, in->colType.DataSize); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + sizeof(ColArgs) + in->colType.DataSize]); + args->COP = COMPARE_EQ; + tmp = 1000; + memcpy(args->val, &tmp, in->colType.DataSize); + + pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + + ASSERT_EQ(out->NVALS, 2); + ASSERT_EQ(results[0], 10); + ASSERT_EQ(results[1], 1000); +} + +TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRID) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + IntegralType tmp; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_DATAVALUE; + in->NOPS = 2; + in->BOP = BOP_OR; + in->NVALS = 2; + + tmp = 10; + args->COP = COMPARE_EQ; + memcpy(args->val, &tmp, in->colType.DataSize); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + sizeof(ColArgs) + in->colType.DataSize]); + args->COP = COMPARE_EQ; + tmp = 1000; + memcpy(args->val, &tmp, in->colType.DataSize); + + rids = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + 2 * (sizeof(ColArgs) + in->colType.DataSize)]); + + rids[0] = 10; + rids[1] = 100; + + pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 1); + ASSERT_EQ(results[0], 10); +} + +TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRIDOutputRid) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + int16_t* results; + IntegralType tmp; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_RID; + in->NOPS = 2; + in->BOP = BOP_OR; + in->NVALS = 0; + + tmp = 10; + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, in->colType.DataSize); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + sizeof(ColArgs) + in->colType.DataSize]); + args->COP = COMPARE_GT; + tmp = 1000; + memcpy(args->val, &tmp, in->colType.DataSize); + + pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 33); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], (i < 10 ? i : i - 10 + 1001)); +} + +TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRIDOutputBoth) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + IntegralType tmp; + IntegralType* resultVal; + int16_t* resultRid; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_BOTH; + in->NOPS = 2; + in->BOP = BOP_OR; + in->NVALS = 0; + + tmp = 10; + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, in->colType.DataSize); + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + + sizeof(ColArgs) + in->colType.DataSize]); + args->COP = COMPARE_GT; + tmp = 1000; + memcpy(args->val, &tmp, in->colType.DataSize); + + pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + ASSERT_EQ(out->NVALS, 33); + + for (i = 0; i < out->NVALS; i++) + { + resultRid = reinterpret_cast(&output[ + sizeof(NewColResultHeader) + i * (sizeof(int16_t) + in->colType.DataSize)]); + resultVal = reinterpret_cast(&resultRid[1]); + ASSERT_EQ(*resultRid, (i < 10 ? i : i - 10 + 1001)); + ASSERT_EQ(*resultVal, (i < 10 ? i : i - 10 + 1001)); + } +} + +//void p_Col_12() +TEST_F(ColumnScanFilterTest, ColumnScan1Byte2CompFilters) +{ + constexpr const uint8_t W = 1; + using IntegralType = datatypes::WidthToSIntegralType::type; + datatypes::make_unsigned::type* results; + + in->colType = ColRequestHeaderDataType(); + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::CHAR; + in->OutputType = OT_DATAVALUE; + in->BOP = BOP_AND; + in->NOPS = 2; + in->NVALS = 0; + + args->COP = COMPARE_GT; + args->val[0] = '2'; + // Filter is COP(1 byte)/rf(1 byte)/val(1 byte) + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + 2 + W]); + args->COP = COMPARE_LT; + args->val[0] = '4'; + + pp.setBlockPtr((int*) readBlockFromFile("col1block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = &output[sizeof(NewColResultHeader)]; + ASSERT_EQ(out->NVALS, 32); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ( (int)'3', results[i]); +} +/* +The code doesn't support filters with RID and literal +//void p_Col_13() +TEST_F(ColumnScanFilterTest, ColumnScan4Bytes2CompFiltersOutputRID) +{ + constexpr const uint8_t W = 4; + using IntegralType = datatypes::WidthToSIntegralType::type; + IntegralType tmp; + int16_t ridTmp; + int16_t* results; + in->colType.DataSize = 4; + in->colType.DataType = SystemCatalog::INT; + in->OutputType = OT_RID; + in->NOPS = 3; + in->BOP = BOP_OR; + in->NVALS = 3; + + // first argument "is RID 8 < 10?" Answer is yes + tmp = 10; // value to check + ridTmp = 8; + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, in->colType.DataSize); + memcpy(&args->val[in->colType.DataSize], &ridTmp, 2); + + // second argument "is RID 5 > 10?" Answer is no + args = reinterpret_cast(&args->val[in->colType.DataSize + 2]); + args->COP = COMPARE_GT; + tmp = 10; + ridTmp = 5; + memcpy(args->val, &tmp, in->colType.DataSize); + memcpy(&args->val[in->colType.DataSize], &ridTmp, 2); + + // third argument "is RID 11 < 1000?" Answer is yes + args = reinterpret_cast(&args->val[in->colType.DataSize + 2]); + args->COP = COMPARE_LT; + tmp = 1000; + ridTmp = 11; + memcpy(args->val, &tmp, in->colType.DataSize); + memcpy(&args->val[in->colType.DataSize], &ridTmp, 2); + + pp.setBlockPtr((int*) readBlockFromFile("col4block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 2); + ASSERT_EQ(results[0], 8); + ASSERT_EQ(results[1], 11); +} +*/ +//void p_Col_double_1() +TEST_F(ColumnScanFilterTest, ColumnScan8BytesDouble2CompFilters) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + double* results; + double tmp; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::DOUBLE; + in->OutputType = OT_DATAVALUE; + in->BOP = BOP_AND; + in->NOPS = 2; + in->NVALS = 0; + + tmp = 10.5; + args->COP = COMPARE_GT; + memcpy(args->val, &tmp, sizeof(tmp)); + tmp = 15; + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + 10]); + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, sizeof(tmp)); + + pp.setBlockPtr((int*) readBlockFromFile("col_double_block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 8); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], 11 + (i * 0.5)); +} + +//void p_Col_float_1() +TEST_F(ColumnScanFilterTest, ColumnScan4BytesFloat2CompFiltersOutputBoth) +{ + constexpr const uint8_t W = 4; + using IntegralType = datatypes::WidthToSIntegralType::type; + float* resultVal; + float tmp; + int16_t* resultRid; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::FLOAT; + in->OutputType = OT_BOTH; + in->BOP = BOP_AND; + in->NOPS = 2; + in->NVALS = 0; + + tmp = 10.5; + args->COP = COMPARE_GT; + memcpy(args->val, &tmp, sizeof(tmp)); + tmp = 15; + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + 6]); + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, sizeof(tmp)); + + pp.setBlockPtr((int*) readBlockFromFile("col_float_block.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + ASSERT_EQ(out->NVALS, 8); + + for (i = 0; i < out->NVALS; i++) + { + resultRid = reinterpret_cast(&output[ + sizeof(NewColResultHeader) + i * (sizeof(int16_t) + in->colType.DataSize)]); + resultVal = reinterpret_cast(&resultRid[1]); + ASSERT_EQ(*resultVal, 11 + (i * 0.5)); + } +} + +//void p_Col_neg_float_1() +TEST_F(ColumnScanFilterTest, ColumnScan4BytesNegFloat2CompFiltersOutputBoth) +{ + constexpr const uint8_t W = 4; + using IntegralType = datatypes::WidthToSIntegralType::type; + float* resultVal; + float tmp; + int16_t* resultRid; + + in->colType.DataSize = 4; + in->colType.DataType = SystemCatalog::FLOAT; + in->OutputType = OT_BOTH; + in->BOP = BOP_AND; + in->NOPS = 2; + in->NVALS = 0; + + tmp = -5.0; + args->COP = COMPARE_GT; + memcpy(args->val, &tmp, sizeof(tmp)); + tmp = 5.0; + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + 6]); + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, sizeof(tmp)); + + pp.setBlockPtr((int*) readBlockFromFile("col_neg_float.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + ASSERT_EQ(out->NVALS, 19); + + for (i = 0; i < out->NVALS; i++) + { + resultRid = reinterpret_cast(&output[ + sizeof(NewColResultHeader) + i * (sizeof(int16_t) + in->colType.DataSize)]); + resultVal = reinterpret_cast(&resultRid[1]); + ASSERT_EQ(*resultVal, -4.5 + (i * 0.5)); + } +} + +//void p_Col_neg_double_1() +TEST_F(ColumnScanFilterTest, ColumnScan4BytesNegDouble2CompFilters) +{ + constexpr const uint8_t W = 8; + using IntegralType = datatypes::WidthToSIntegralType::type; + double* results; + double tmp; + + in->colType.DataSize = W; + in->colType.DataType = SystemCatalog::DOUBLE; + in->OutputType = OT_DATAVALUE; + in->BOP = BOP_AND; + in->NOPS = 2; + in->NVALS = 0; + + tmp = -5.0; + args->COP = COMPARE_GT; + memcpy(args->val, &tmp, sizeof(tmp)); + tmp = 5.0; + args = reinterpret_cast(&input[sizeof(NewColRequestHeader) + 10]); + args->COP = COMPARE_LT; + memcpy(args->val, &tmp, sizeof(tmp)); + + pp.setBlockPtr((int*) readBlockFromFile("col_neg_double.cdf", block)); + pp.columnScanAndFilter(in, out, 4 * BLOCK_SIZE, &written); + + results = reinterpret_cast(&output[sizeof(NewColResultHeader)]); + ASSERT_EQ(out->NVALS, 19); + + for (i = 0; i < out->NVALS; i++) + ASSERT_EQ(results[i], -4.5 + (i * 0.5)); +} + +TEST_F(ColumnScanFilterTest, ColumnScan16Bytes2CompFilters) +{ +//TBD +} +// vim:ts=2 sw=2: