diff --git a/CMakeLists.txt b/CMakeLists.txt index bc7033ccb..7a084400d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,6 +148,13 @@ if (NOT CURL_FOUND) return() endif() +IF (WITH_GTEST) + INCLUDE (FindGTest) + IF (NOT GTEST_FOUND) + MESSAGE(FATAL_ERROR "GSuite libs not found but are requested. Please install them or build.") + ENDIF() + SET (GTEST_LIBRARIES ${GTEST_LIBRARY} ${GTESTMAIN_LIBRARY} ${PTHREAD_LIBRARY}) +ENDIF() FIND_PROGRAM(AWK_EXECUTABLE awk DOC "path to the awk executable") if(NOT AWK_EXECUTABLE) diff --git a/cmake/FindGTest.cmake b/cmake/FindGTest.cmake new file mode 100644 index 000000000..7b1fb3957 --- /dev/null +++ b/cmake/FindGTest.cmake @@ -0,0 +1,38 @@ +find_path(GTEST_ROOT_DIR + NAMES include/gtest/gtest.h +) + +find_library(GTEST_LIBRARY + NAMES gtest + HINTS ${GTEST_ROOT_DIR}/lib +) + +find_library(GTESTMAIN_LIBRARY + NAMES gtest_main + HINTS ${GTEST_ROOT_DIR}/lib +) + +find_library(PTHREAD_LIBRARY + NAMES pthread + HINTS ${GTEST_ROOT_DIR}/lib +) + + +find_path(GTEST_INCLUDE_DIR + NAMES gtest.h + HINTS ${GTEST_ROOT_DIR}/include +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(GTest DEFAULT_MSG + GTEST_LIBRARY + GTESTMAIN_LIBRARY + PTHREAD_LIBRARY + GTEST_INCLUDE_DIR +) + +mark_as_advanced( + GTEST_ROOT_DIR + GTEST_LIBRARIES + GTEST_INCLUDE_DIR +) diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index c8fd6673d..e246e8e95 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -285,7 +285,6 @@ template<> inline bool isEmptyVal<16>(uint8_t type, const uint8_t* ival) // For BINARY { const uint64_t* val = reinterpret_cast(ival); - // WIP ugly speed hack return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW)); } @@ -415,15 +414,15 @@ template<> inline bool isNullVal<16>(uint8_t type, const uint8_t* ival) // For BINARY { const uint64_t* val = reinterpret_cast(ival); - return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYNULL)); + return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYNULL)); } template<> inline bool isNullVal<32>(uint8_t type, const uint8_t* ival) // For BINARY { const uint64_t* val = reinterpret_cast(ival); - return ((val[0] == joblist::BINARYNULL) && (val[1] == joblist::BINARYNULL) - && (val[2] == joblist::BINARYNULL) && (val[3] == joblist::BINARYNULL)); + return ((val[0] == joblist::BINARYEMPTYROW) && (val[1] == joblist::BINARYEMPTYROW) + && (val[2] == joblist::BINARYEMPTYROW) && (val[3] == joblist::BINARYNULL)); } template<> @@ -614,7 +613,7 @@ inline bool isMinMaxValid(const NewColRequestHeader* in) case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: - return (in->DataSize <= 16); + return (in->DataSize <= 16 ); default: return false; diff --git a/primitives/primproc/columncommand.cpp b/primitives/primproc/columncommand.cpp index ac33e47a3..4d4d7bf79 100644 --- a/primitives/primproc/columncommand.cpp +++ b/primitives/primproc/columncommand.cpp @@ -165,6 +165,7 @@ void ColumnCommand::loadData() { // fill remaining blocks with empty values when col scan int blockLen = BLOCK_SIZE / colType.colWidth; + ByteStream::hexbyte* hPtr = NULL; ByteStream::octbyte* oPtr = NULL; ByteStream::quadbyte* qPtr = NULL; ByteStream::byte* bPtr = NULL; @@ -183,6 +184,10 @@ void ColumnCommand::loadData() if (colType.colWidth == 8) oPtr = reinterpret_cast(&bpp->blockData[i * BLOCK_SIZE]); + if (colType.colWidth == 16) + hPtr = reinterpret_cast(&bpp->blockData[i * BLOCK_SIZE]); + + for (int idx = 0; idx < blockLen; idx++) { if (bPtr && colType.colWidth == 1) @@ -208,9 +213,7 @@ void ColumnCommand::loadData() } else if (colType.colWidth == 16) { - uint64_t *ptr = reinterpret_cast(&bpp->blockData[i * BLOCK_SIZE] + (idx*16) ); - *ptr = joblist::BINARYEMPTYROW; - *(ptr + 1) = joblist::BINARYEMPTYROW; + getEmptyRowValue(colType.colDataType, colType.colWidth, &hPtr[idx]); } } @@ -965,7 +968,7 @@ void ColumnCommand::enableFilters() * RETURN: * emptyVal - the value of empty row ***********************************************************/ -uint64_t ColumnCommand::getEmptyRowValue( const execplan::CalpontSystemCatalog::ColDataType dataType, const int width ) const +const uint64_t ColumnCommand::getEmptyRowValue( const CSCDataType dataType, const int width ) const { uint64_t emptyVal = 0; int offset; @@ -1056,6 +1059,16 @@ uint64_t ColumnCommand::getEmptyRowValue( const execplan::CalpontSystemCatalog:: return emptyVal; } +void ColumnCommand::getEmptyRowValue(const CSCDataType dataType, + const int width, messageqcpp::ByteStream::hexbyte* space) const +{ + uint64_t *ptr = reinterpret_cast(space); + ptr[0] = joblist::BINARYEMPTYROW; + ptr[1] = joblist::BINARYEMPTYROW; +} + + + void ColumnCommand::getLBIDList(uint32_t loopCount, vector* lbids) { int64_t firstLBID = lbid, lastLBID = firstLBID + (loopCount * colType.colWidth) - 1, i; diff --git a/primitives/primproc/columncommand.h b/primitives/primproc/columncommand.h index e1d67aa0d..52544021f 100644 --- a/primitives/primproc/columncommand.h +++ b/primitives/primproc/columncommand.h @@ -34,6 +34,8 @@ #include "command.h" #include "calpontsystemcatalog.h" +using CSCDataType = execplan::CalpontSystemCatalog::ColDataType; + namespace primitiveprocessor { @@ -82,8 +84,10 @@ public: makeAbsRids = m; } bool willPrefetch(); - uint64_t getEmptyRowValue( const execplan::CalpontSystemCatalog::ColDataType dataType, const int width ) const; - int64_t getLastLbid(); + const uint64_t getEmptyRowValue( const CSCDataType dataType, const int width ) const; + void getEmptyRowValue(const CSCDataType dataType, + const int width, messageqcpp::ByteStream::hexbyte* space) const; + const int64_t getLastLbid(); void getLBIDList(uint32_t loopCount, std::vector* lbids); virtual SCommand duplicate(); diff --git a/utils/common/columnwidth.h b/utils/common/columnwidth.h index dfcf767d4..8e36f5356 100644 --- a/utils/common/columnwidth.h +++ b/utils/common/columnwidth.h @@ -19,6 +19,7 @@ #define UTILS_COLWIDTH_H #define MAXLEGACYWIDTH 8 +#define MAXCOLUMNWIDTH 16 namespace utils { diff --git a/utils/messageqcpp/bytestream.cpp b/utils/messageqcpp/bytestream.cpp index 4cc4a103a..73ca1487c 100644 --- a/utils/messageqcpp/bytestream.cpp +++ b/utils/messageqcpp/bytestream.cpp @@ -236,12 +236,12 @@ ByteStream& ByteStream::operator<<(const uint64_t o) } // WIP MCOL-641 -ByteStream& ByteStream::operator<<(const unsigned __int128 o) +ByteStream& ByteStream::operator<<(const uint128_t o) { if (fBuf == 0 || (fCurInPtr - fBuf + 16U > fMaxLen + ISSOverhead)) growBuf(fMaxLen + BlockSize); - *((unsigned __int128*) fCurInPtr) = o; + *((uint128_t*) fCurInPtr) = o; fCurInPtr += 16; return *this; @@ -332,7 +332,7 @@ ByteStream& ByteStream::operator>>(uint64_t& o) } // WIP MCOL-641 -ByteStream& ByteStream::operator>>(unsigned __int128& o) +ByteStream& ByteStream::operator>>(uint128_t& o) { peek(o); fCurOutPtr += 16; @@ -420,13 +420,13 @@ void ByteStream::peek(uint64_t& o) const } // WIP MCOL-641 -void ByteStream::peek(unsigned __int128& o) const +void ByteStream::peek(uint128_t& o) const { if (length() < 16) - throw underflow_error("ByteStream>unsigned __int128: not enough data in stream to fill datatype"); + throw underflow_error("ByteStream>uint128_t: not enough data in stream to fill datatype"); - o = *((unsigned __int128*) fCurOutPtr); + o = *((uint128_t*) fCurOutPtr); } void ByteStream::peek(string& s) const diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index 16d547fba..9f5339243 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -45,6 +45,8 @@ class ByteStreamTestSuite; #define EXPORT #endif +using uint128_t = unsigned __int128; + namespace messageqcpp { @@ -74,6 +76,7 @@ public: typedef uint16_t doublebyte; typedef uint32_t quadbyte; typedef uint64_t octbyte; + typedef uint128_t hexbyte; typedef boost::uuids::uuid uuid; /** @@ -147,7 +150,7 @@ public: /** * push an unsigned __int128 onto the end of the stream. The byte order is whatever the native byte order is. */ - EXPORT ByteStream& operator<<(const unsigned __int128 o); + EXPORT ByteStream& operator<<(const uint128_t o); /** * push a float onto the end of the stream. The byte order is * whatever the native byte order is. @@ -216,7 +219,7 @@ public: /** * extract an unsigned __int128 from the front of the stream. The byte order is whatever the native byte order is. */ - EXPORT ByteStream& operator>>(unsigned __int128& o); + EXPORT ByteStream& operator>>(uint128_t& o); /** * extract a float from the front of the stream. The byte * order is whatever the native byte order is. @@ -291,7 +294,7 @@ public: /** * Peek at an unsigned __int128 from the front of the stream. The byte order is whatever the native byte order is. */ - EXPORT void peek(unsigned __int128& o) const; + EXPORT void peek(uint128_t& o) const; /** * Peek at a float from the front of the stream. The byte order * is whatever the native byte order is. diff --git a/utils/rowgroup/CMakeLists.txt b/utils/rowgroup/CMakeLists.txt index edda8e943..86c5fa9db 100644 --- a/utils/rowgroup/CMakeLists.txt +++ b/utils/rowgroup/CMakeLists.txt @@ -16,3 +16,8 @@ target_link_libraries(rowgroup ${NETSNMP_LIBRARIES} funcexp) install(TARGETS rowgroup DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine) +if (WITH_ROWGROUP_UT) + add_executable(rowgroup_tests rowgroup-tests.cpp) + target_link_libraries(rowgroup_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${ENGINE_EXEC_LIBS} ${MARIADB_CLIENT_LIBS}) + install(TARGETS rowgroup_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-platform) +endif() diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 0176e87b5..e6baec5fc 100755 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -225,7 +225,12 @@ inline string getStringNullValue() inline uint64_t getBinaryNullValue() { return joblist::BINARYNULL; -} +} + +inline uint64_t getBinaryEmptyValue() +{ + return joblist::BINARYEMPTYROW; +} } @@ -1173,10 +1178,11 @@ void RowAggregation::makeAggFieldsNull(Row& row) { // WIP This is only 1st part of the value uint64_t nullValue = getBinaryNullValue(); + uint64_t emptyValue = getBinaryEmptyValue(); uint32_t offset = row.getOffset(colOut); row.setBinaryField_offset(&nullValue, sizeof(nullValue), offset); - row.setBinaryField_offset(&nullValue, sizeof(nullValue), + row.setBinaryField_offset(&emptyValue, sizeof(nullValue), offset+sizeof(nullValue)); } break; diff --git a/utils/rowgroup/rowgroup-tests.cpp b/utils/rowgroup/rowgroup-tests.cpp new file mode 100644 index 000000000..7ef43a122 --- /dev/null +++ b/utils/rowgroup/rowgroup-tests.cpp @@ -0,0 +1,110 @@ +#include // googletest header file +#include "rowgroup.h" +#include "columnwidth.h" +#include "joblisttypes.h" + +#define WIDE_DEC_PRECISION 38 +#define INITIAL_ROW_OFFSET 2 + +using int128_t = __int128; +using uint128_t = unsigned __int128; + +class RowTest : public ::testing::Test { + protected: + void SetUp() override { + uint8_t width = utils::widthByPrecision(WIDE_DEC_PRECISION); + uint32_t oid =3001; + + std::vector offsets, roids, tkeys, cscale, cprecision; + std::vector types; + offsets.push_back(INITIAL_ROW_OFFSET); + offsets.push_back(width+INITIAL_ROW_OFFSET); + offsets.push_back(width*2+INITIAL_ROW_OFFSET); + roids.push_back(oid); roids.push_back(oid+1); + tkeys.push_back(1); tkeys.push_back(1); + types.push_back(execplan::CalpontSystemCatalog::DECIMAL); + types.push_back(execplan::CalpontSystemCatalog::UDECIMAL); + cscale.push_back(0); cscale.push_back(0); + cprecision.push_back(WIDE_DEC_PRECISION); + cprecision.push_back(WIDE_DEC_PRECISION); + rowgroup::RowGroup inRG(roids.size(), //column count + offsets, //oldOffset + roids, // column oids + tkeys, //keys + types, // types + cscale, //scale + cprecision, // precision + 20, // sTableThreshold + false //useStringTable + ); + rg = inRG; + rgD.reinit(rg); + rg.setData(&rgD); + + rg.initRow(&r); + rowSize = r.getSize(); + rg.getRow(0, &r); + + std::vector sValueVector; + std::vector uValueVector; + int128_t nullValue = 0; + uint64_t* uint128_pod = reinterpret_cast(&nullValue); + uint128_pod[0] = joblist::BINARYEMPTYROW; + uint128_pod[1] = joblist::BINARYNULL; + + sValueVector.push_back(nullValue); + sValueVector.push_back(-42); + sValueVector.push_back(-42*0xFFFFFFFFFFFFFFFFLL); + sValueVector.push_back(0); + sValueVector.push_back(nullValue-1); + + uValueVector.push_back(nullValue); + uValueVector.push_back(42); + uValueVector.push_back(42*0xFFFFFFFFFFFFFFFFLL); + uValueVector.push_back(0); + uValueVector.push_back(nullValue); + uValueVector.push_back(nullValue-1); + + for(size_t i = 0; i < sValueVector.size(); i++) + { + r.setBinaryField_offset(&sValueVector[i], + sizeof(sValueVector[0]), INITIAL_ROW_OFFSET); + r.setBinaryField_offset(&uValueVector[i], + sizeof(uValueVector[0]), INITIAL_ROW_OFFSET+width); + r.nextRow(rowSize); + } + rowCount = sValueVector.size(); + } + // void TearDown() override {} + + rowgroup::Row r; + rowgroup::RowGroup rg; + rowgroup::RGData rgD; + uint32_t rowSize; + size_t rowCount; +}; + +TEST_F(RowTest, NonNULLValuesCheck) { + rg.getRow(1, &r); + for (size_t i = 0; i <= rg.getRowCount(); i++) + { + EXPECT_FALSE(r.isNullValue(0)); + EXPECT_FALSE(r.isNullValue(1)); + r.nextRow(rowSize); + } +} + +TEST_F(RowTest, NULLValuesCheck) { + rg.getRow(0, &r); + EXPECT_TRUE(r.isNullValue(0)); + EXPECT_TRUE(r.isNullValue(1)); +} + +//Row::isNullValue_offset +//toString +//initToNull +//toCSV +//applyMapping +//setBinaryField remove Field1 and combine setBinaryField +//getBinaryField +//Remove from set/getIntFields Varbinary diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index e3be508ab..b3fb50084 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -848,7 +848,7 @@ void Row::initToNull() case 16 : { uint64_t *dec = reinterpret_cast(&data[offsets[i]]); - dec[0] = joblist::BINARYNULL; + dec[0] = joblist::BINARYEMPTYROW; dec[1] = joblist::BINARYNULL; break; } @@ -888,7 +888,7 @@ void Row::initToNull() } } -template +template inline bool Row::isNullValue_offset(uint32_t offset) const { ostringstream os; @@ -898,69 +898,76 @@ inline bool Row::isNullValue_offset(uint32_t offset) const os << width << endl; throw logic_error(os.str()); } -/* -// WIP how to make if that enables explicit template for two cscTypes? -// Method template resolution could impose some perf degradation -template -inline bool Row::isNullValue_offset(uint32_t offset) const + +// WIP Method template resolution could impose some perf degradation +// Compare perf with switch-case +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const { - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::BIGINTNULL)); + const int64_t *intPtr = reinterpret_cast(&data[offset]); + return ((intPtr[0] == static_cast(joblist::BINARYEMPTYROW)) && + (intPtr[1] == static_cast(joblist::BINARYEMPTYROW)) && + (intPtr[2] == static_cast(joblist::BINARYEMPTYROW)) && + (intPtr[3] == static_cast(joblist::BINARYNULL))); } -template -inline bool Row::isNullValue_offset(uint32_t offset) const +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const { - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::BIGINTNULL)); + const int64_t *intPtr = reinterpret_cast(&data[offset]); + return ((intPtr[0] == static_cast(joblist::BINARYEMPTYROW)) + && (intPtr[1] == static_cast(joblist::BINARYNULL))); } -template -inline bool Row::isNullValue_offset(uint32_t offset) const +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const { - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::BIGINTNULL)); + const int64_t *intPtr = reinterpret_cast(&data[offset]); + return ((intPtr[0] == static_cast(joblist::BINARYEMPTYROW)) + && (intPtr[1] == static_cast(joblist::BINARYNULL))); } -template -inline bool Row::isNullValue_offset(uint32_t offset) const +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const { - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::BIGINTNULL)); + return (*reinterpret_cast(&data[offset]) + == static_cast(joblist::BIGINTNULL)); } -template -inline bool Row::isNullValue_offset(uint32_t offset) const +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const +{ + return (*reinterpret_cast(&data[offset]) + == static_cast(joblist::INTNULL)); +} + +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const +{ + return (*reinterpret_cast(&data[offset]) + == static_cast(joblist::SMALLINTNULL)); +} + +template<> +inline bool +Row::isNullValue_offset( + uint32_t offset) const { return (data[offset] == joblist::TINYINTNULL); } -template -inline bool Row::isNullValue_offset(uint32_t offset) const -{ - return (data[offset] == joblist::TINYINTNULL); -} - -template -inline bool Row::isNullValue_offset(uint32_t offset) const -{ - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::SMALLINTNULL)); -} - -template -inline bool Row::isNullValue_offset(uint32_t offset) const -{ - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::SMALLINTNULL)); -} - -template -inline bool Row::isNullValue_offset(uint32_t offset) const -{ - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::INTNULL)); -} - -template -inline bool Row::isNullValue_offset(uint32_t offset) const -{ - return (*reinterpret_cast(&data[offset]) == static_cast(joblist::INTNULL)); -} -*/ bool Row::isNullValue(uint32_t colIndex) const { switch (types[colIndex]) @@ -1044,16 +1051,22 @@ bool Row::isNullValue(uint32_t colIndex) const case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: { - uint32_t len = getColumnWidth(colIndex); - const int64_t *dec; + // WIP MCOL-641 Allmighty hack. + const uint32_t len = 16; + uint32_t* lenPtr = const_cast(&len); + *lenPtr = getColumnWidth(colIndex); + return isNullValue_offset + (offsets[colIndex]); +// WIP +/* + const int64_t *dec; switch (len) { // MCOL-641 case 16: - dec = reinterpret_cast(&data[offsets[colIndex]]); - return ((dec[0] == static_cast(joblist::BINARYNULL)) - && (dec[1] == static_cast(joblist::BINARYNULL))); + return isNullValue_offset + (offsets[colIndex]); case 1 : return (data[offsets[colIndex]] == joblist::TINYINTNULL); @@ -1067,7 +1080,7 @@ bool Row::isNullValue(uint32_t colIndex) const default: return (*((int64_t*) &data[offsets[colIndex]]) == static_cast(joblist::BIGINTNULL)); } - +*/ break; } @@ -1112,12 +1125,11 @@ bool Row::isNullValue(uint32_t colIndex) const case CalpontSystemCatalog::BINARY: { - // When is null? I dont know. Wait for bitmap null empty implemtenttion ? - // Also still pendig rework discussed use pointers for empty null values - - std::cout << __FILE__<< ":" << __LINE__ << " isNullValue value " << (*((uint64_t*) &data[offsets[colIndex]])) << std::endl; - //return false; - return (*((uint64_t*) &data[offsets[colIndex]]) == joblist::BINARYEMPTYROW); + const uint32_t len = 16; + uint32_t* lenPtr = const_cast(&len); + *lenPtr = getColumnWidth(colIndex); + return isNullValue_offset + (offsets[colIndex]); } default: diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 37f1ad3e4..67668501c 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -28,7 +28,6 @@ #ifndef ROWGROUP_H_ #define ROWGROUP_H_ -#include #include #include #include @@ -72,7 +71,6 @@ using uint128_t = unsigned __int128; namespace rowgroup { -//using cscType = execplan::CalpontSystemCatalog::ColDataType; const int16_t rgCommonSize = 8192; /* @@ -455,7 +453,7 @@ public: uint64_t getNullValue(uint32_t colIndex) const; bool isNullValue(uint32_t colIndex) const; - template + template inline bool isNullValue_offset(uint32_t offset) const; // when NULLs are pulled out via getIntField(), they come out with these values. @@ -820,7 +818,7 @@ inline void Row::setBinaryField1(T* value, uint32_t width, uint32_t colIndex) template inline void Row::setBinaryField_offset(T* value, uint32_t width, uint32_t offset) { - // WIP + // WIP Compare performance. //memcpy(&data[offset], value, width); *reinterpret_cast(&data[offset]) = *value; }