diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 30a80cec7..b637e9b03 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -36,3 +36,9 @@ if (WITH_SHARED_COMP_TESTS) target_link_libraries(we_shared_components_tests ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} cppunit) install(TARGETS we_shared_components_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) endif() + +if (WITH_REBUILD_EM_UT) + add_executable(rebuild_em_tests rebuild-em-tests.cpp) + target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS}) + install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) +endif() diff --git a/tests/rebuild-em-tests.cpp b/tests/rebuild-em-tests.cpp new file mode 100644 index 000000000..8875be460 --- /dev/null +++ b/tests/rebuild-em-tests.cpp @@ -0,0 +1,158 @@ +/* Copyright (C) 2020 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include + +#include "we_convertor.h" + +class RebuildEMTest : public ::testing::Test +{ +protected: + struct FileId + { + FileId() : oid(0), partition(0), segment(0) {} + FileId(uint32_t oid, uint32_t partition, uint32_t segment) + : oid(oid), partition(partition), segment(segment) + { + } + uint32_t oid; + uint32_t partition; + uint32_t segment; + }; + + static uint32_t getOid(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t oid = 0; + oid |= a << 24; + oid |= b << 16; + oid |= c << 8; + oid |= d; + return oid; + } +}; + +TEST_F(RebuildEMTest, File2OidCheckFileFormatTest) +{ + // Valid file names. + auto aFileName = "001.dir/002.dir/003.dir/004.dir/005.dir/FILE006.cdf"; + FileId aExpected(getOid(1, 2, 3, 4), 5, 6); + + auto bFileName = "/011.dir/022.dir/033.dir/044.dir/055.dir/FILE066.cdf"; + FileId bExpected(getOid(11, 22, 33, 44), 55, 66); + + auto cFileName = + "data1/255.dir/255.dir/255.dir/255.dir/255.dir/FILE255.cdf"; + FileId cExpected(getOid(255, 255, 255, 255), 255, 255); + + auto dFileName = + "/data1/000.dir/000.dir/000.dir/001.dir/001.dir/FILE001.cdf"; + FileId dExpected(getOid(0, 0, 0, 1), 1, 1); + + auto eFileName = "/data0/data1/data2/data3/data4/data5/data6/../../../../" + "../data1/000.dir/000.dir/000.dir/000.dir/" + "000.dir/FILE000.cdf"; + FileId eExpected(getOid(0, 0, 0, 0), 0, 0); + + auto fFileName = "data1/data2/data3/data4/data5/data6/data7/000.dir/" + "000.dir/008.dir/028.dir/000.dir/FILE079.cdf"; + FileId fExpected(getOid(0, 0, 8, 28), 00, 79); + + std::vector> expectedFileIds = { + make_pair(aFileName, aExpected), make_pair(bFileName, bExpected), + make_pair(cFileName, cExpected), make_pair(dFileName, dExpected), + make_pair(eFileName, eExpected), make_pair(fFileName, fExpected)}; + + for (const auto& expectedPair : expectedFileIds) + { + FileId calculated; + auto rc = WriteEngine::Convertor::fileName2Oid( + expectedPair.first, calculated.oid, calculated.partition, + calculated.segment); + + ASSERT_EQ(rc, 0); + EXPECT_EQ(expectedPair.second.oid, calculated.oid); + EXPECT_EQ(expectedPair.second.partition, calculated.partition); + EXPECT_EQ(expectedPair.second.segment, calculated.segment); + } + + // Invalid file names. + // Dir exceed 255. + auto aInvalidFileName = + "256.dir/000.dir/001.dir/002.dir/003.dir/FILE004.cdf"; + // Segment exceed 255. + auto bInvalidFileName = + "000.dir/000.dir/001.dir/002.dir/003.dir/FILE256.cdf"; + // Just a random path. + auto cInvalidFileName = "/usr////bin//lib///"; + // Empty string. + auto dInvalidFileName = ""; + // Does not have A dir. + auto eInvalidFileName = + "/data1/000.dir/001.dir/002.dir/003.dir/FILE000.cdf"; + // Invalid partition name dir. + auto fInvalidFileName = + "/000.dir/000.dir/001.dir/002.dir/003.ir/FILE000.cdf"; + // Invalid segment name. + auto gInvalidFileName = + "/000.dir/000.dir/001.dir/002.dir/003.ir/FILE00.cdf"; + // Invalid dir name. + auto hInvalidFileName = "/00.dir/00.dir/001.dir/002.dir/003.ir/FIE000.cdf"; + // Invalid amount of dirs. + auto iInvalidFileName = "/002.dir/003.ir/FIE000.cdf"; + + std::vector invalidFileNames = { + aInvalidFileName, bInvalidFileName, cInvalidFileName, + dInvalidFileName, eInvalidFileName, fInvalidFileName, + gInvalidFileName, hInvalidFileName, iInvalidFileName}; + + for (const auto& invalidFileName : invalidFileNames) + { + FileId calculated; + auto rc = WriteEngine::Convertor::fileName2Oid( + invalidFileName, calculated.oid, calculated.partition, + calculated.segment); + ASSERT_NE(rc, 0); + } +} + +TEST_F(RebuildEMTest, File2OidCalculationTest) +{ + char dbDirName[20][20]; + char fileName[64]; + + for (uint32_t i = 3; i < 256; ++i) + { + FileId expectedFileId(getOid(i, i - 1, i - 2, i - 3), i, i); + memset(fileName, 0, 64); + // Generate the filename by the oid, partition and segment. + auto rc = WriteEngine::Convertor::oid2FileName( + expectedFileId.oid, fileName, dbDirName, expectedFileId.partition, + expectedFileId.segment); + ASSERT_EQ(rc, 0); + + FileId calculatedFileId; + // Generate an oid, partition and segment from the given file name. + rc = WriteEngine::Convertor::fileName2Oid( + fileName, calculatedFileId.oid, calculatedFileId.partition, + calculatedFileId.segment); + ASSERT_EQ(rc, 0); + EXPECT_EQ(expectedFileId.oid, calculatedFileId.oid); + EXPECT_EQ(expectedFileId.partition, calculatedFileId.partition); + EXPECT_EQ(expectedFileId.segment, calculatedFileId.segment); + } +} diff --git a/writeengine/shared/we_convertor.cpp b/writeengine/shared/we_convertor.cpp index 41dedfc01..eedb327fa 100644 --- a/writeengine/shared/we_convertor.cpp +++ b/writeengine/shared/we_convertor.cpp @@ -22,8 +22,10 @@ /** @file */ #include +#include #include #include +#include #ifdef _MSC_VER #include #endif @@ -36,6 +38,14 @@ using namespace execplan; namespace { const char DATE_TIME_FORMAT[] = "%04d-%02d-%02d %02d:%02d:%02d"; +// ColumnStore file `full file name` format. +const char CS_FULL_FILENAME_FORMAT[] = + "*[0-9][0-9][0-9].dir/[0-9][0-9][0-9].dir/[[0-9][0-9][0-9].dir/" + "[0-9][0-9][0-9].dir/[0-9][0-9][0-9].dir/FILE[0-9][0-9][0-9].cdf"; +// ColumnStore file `directory name` format. +const char CS_DIR_FORMAT[] = "[0-9][0-9][0-9].dir"; +// ColumnStore file `file name` format. +const char CS_FILE_FORMAT[] = "FILE[0-9][0-9][0-9].cdf"; /******************************************************************************* * DESCRIPTION: @@ -92,6 +102,75 @@ int _doFile(char* pBuffer, int blen, unsigned char val) return rc; } + +/******************************************************************************* + * DESCRIPTION: + * Takes a buffer in ColumnStore `directory` format and converts it to an + * integer. + * PARAMETERS: + * buffer(input) - a pointer to the input buffer. + val (output) - converted integer. + * RETURN: + * 0 is returned on success, -1 is returned on error. + ******************************************************************************/ +int32_t _fromDir(const char* buffer, uint32_t& val) +{ + int32_t rc = -1; + // Number length in characters. + const uint32_t numberLen = 3; + + // Check that buffer is in the correct `directory` format. + if (buffer && (fnmatch(CS_DIR_FORMAT, buffer, 0) == 0)) + { + char num[numberLen + 1]; + strncpy(num, buffer, numberLen); + num[numberLen] = '\0'; + val = atoi(num); + // The number cannot exceed 0xff. + if (val < 256) + { + rc = 0; + } + } + + return rc; +} + +/******************************************************************************* + * DESCRIPTION: + * Takes a buffer in ColumnStore `file` format and converts it to an + * integer. + * PARAMETERS: + * buffer(input) - a pointer to the input buffer. + val (output) - converted integer. + * RETURN: + * 0 is returned on success, -1 is returned on error. + ******************************************************************************/ +int32_t _fromFile(const char* buffer, uint32_t& val) +{ + int32_t rc = -1; + // Offset from the beggining e.g. `FILE000.cdf`. + const uint32_t offset = 4; + // Number length in characters. + const uint32_t numberLen = 3; + + // Check that buffer is in the correct `file` format. + if (buffer && (fnmatch(CS_FILE_FORMAT, buffer, 0) == 0)) + { + char num[numberLen + 1]; + strncpy(num, buffer + offset, numberLen); + num[numberLen] = '\0'; + val = atoi(num); + // The number cannot exceed 0xff. + if (val < 256) + { + rc = 0; + } + } + + return rc; +} + } namespace WriteEngine @@ -285,7 +364,133 @@ int Convertor::oid2FileName(FID fid, return NO_ERROR; } - + +/******************************************************************************* + * DESCRIPTION: + * Convert the given filename to an oid, segment and partition. + * PARAMETERS: + * fullFileName INPUT -- filename. + * oid OUTPUT -- oid number from the given filename. + * partition OUTPUT -- partition number from the given filename. + * segment OUTPUT -- segment number from the given fielname. + * RETURN: + * NO_ERROR if success, other if fail. + ******************************************************************************/ +int Convertor::fileName2Oid(const std::string& fullFileName, uint32_t& oid, + uint32_t& partition, uint32_t& segment) +{ + // ColumnStore file directory separator. + const char dirSep = '/'; + // The number of the directories in the ColumnStore file name. + // Note: without `DBRoot` directory. + const uint32_t dirNamesMaxSize = 6; + const uint32_t fullFileNameLen = fullFileName.size(); + + // Verify the given `fullFileName`. + if (!fullFileNameLen || + // If not match `fnmatch` returns a result code which is not equal to + // zero. + // TODO: Probably we should use `std::regex_match`, but currently + // there are still parts of code which use legacy `char *`, so + // `std::regex_match` is not applicable there without creating + // additional `std::string` from `char *`. + fnmatch(CS_FULL_FILENAME_FORMAT, fullFileName.c_str(), 0)) + { + return -1; + } + + std::vector dirNames; + // We need exact 6 instances. + dirNames.reserve(6); + + uint32_t end = fullFileNameLen; + // Signed integer for `index` since it could be less than zero. + int32_t index = fullFileNameLen - 1; + + // Iterate over `fullFileName` starting from the end and split it by + // directory separator. Since we starting from the end we need just 6 + // instances to match ColumnStore file name format specification. + while (index >= 0 && dirNames.size() < dirNamesMaxSize) + { + while (index >= 0 && fullFileName[index] != dirSep) + { + --index; + } + + // Begin is a `dirSep` index + 1. + uint32_t begin = index + 1; + const uint32_t dirNameLen = end - begin; + // We already checked the `fullFileName` format, + // but this check is only intended to make sure that this algo works + // correctly on any input, if something changes. + if (dirNameLen > 0 && dirNameLen < MAX_DB_DIR_NAME_SIZE) + { + dirNames.push_back(fullFileName.substr(begin, dirNameLen)); + } + else + { + // Something wrong with filename, just return an error. + return -1; + } + // Set `end` to the last directory separator index. + end = index; + // Skip current directory separator. + --index; + } + + // Make sure we parsed 6 instances. + if (dirNames.size() != 6) + { + return -1; + } + + // Initialize `dmFilePathArgs_t` struct. + dmFilePathArgs_t args; + + char aBuff[MAX_DB_DIR_NAME_SIZE]; + char bBuff[MAX_DB_DIR_NAME_SIZE]; + char cBuff[MAX_DB_DIR_NAME_SIZE]; + char dBuff[MAX_DB_DIR_NAME_SIZE]; + char eBuff[MAX_DB_DIR_NAME_SIZE]; + char fnBuff[MAX_DB_DIR_NAME_SIZE]; + + args.pDirA = aBuff; + args.pDirB = bBuff; + args.pDirC = cBuff; + args.pDirD = dBuff; + args.pDirE = eBuff; + args.pFName = fnBuff; + + args.ALen = sizeof(aBuff); + args.BLen = sizeof(bBuff); + args.CLen = sizeof(cBuff); + args.DLen = sizeof(dBuff); + args.ELen = sizeof(eBuff); + args.FNLen = sizeof(fnBuff); + + args.Arc = 0; + args.Brc = 0; + args.Crc = 0; + args.Drc = 0; + args.Erc = 0; + args.FNrc = 0; + + // Populate `dmFilePathArgs_t` struct with the given names. + strcpy(args.pFName, dirNames[0].c_str()); + strcpy(args.pDirE, dirNames[1].c_str()); + strcpy(args.pDirD, dirNames[2].c_str()); + strcpy(args.pDirC, dirNames[3].c_str()); + strcpy(args.pDirB, dirNames[4].c_str()); + strcpy(args.pDirA, dirNames[5].c_str()); + + // FIXME: Currently used ERR_DM_CONVERT_OID, should we introduce new error + // code? + RETURN_ON_WE_ERROR(dmFPath2Oid(&args, oid, partition, segment), + ERR_DM_CONVERT_OID); + + return NO_ERROR; +} + /******************************************************************************* * DESCRIPTION: * Map specified errno to the associated error message string. @@ -923,5 +1128,69 @@ int Convertor::dmOid2FPath(uint32_t oid, uint32_t partition, uint32_t segment, return 0; } +/******************************************************************************* + * DESCRIPTION: + * Converts populated `dmFilePathArgs_t` struct to an oid, partition, + * and segment. + * + * PARAMETERS: + * pArgs INPUT -- a pointer to `dmFilePathArgs_t` struct. + * oid OUTPUT -- oid for the given file name. + * partition OUTPUT -- partition for the given file name. + * segment OUTPUT -- segment for the given filename. + * + * RETURN: + * return 0 if everything went OK. -1 if an error occured. + ******************************************************************************/ +int32_t Convertor::dmFPath2Oid(dmFilePathArgs_t* pArgs, uint32_t& oid, + uint32_t& partition, uint32_t& segment) +{ + uint32_t val = 0; + + // OID. + // Directory A. + oid = 0; + if ((pArgs->Arc = _fromDir(pArgs->pDirA, val)) == -1) + { + return -1; + } + oid = val << 24; + + // Directory B. + if ((pArgs->Brc = _fromDir(pArgs->pDirB, val)) == -1) + { + return -1; + } + oid |= val << 16; + + // Directory C. + if ((pArgs->Crc = _fromDir(pArgs->pDirC, val)) == -1) + { + return -1; + } + oid |= val << 8; + + // Directory D. + if ((pArgs->Drc = _fromDir(pArgs->pDirD, val)) == -1) + { + return -1; + } + oid |= val; + + // Partition. + if ((pArgs->Erc = _fromDir(pArgs->pDirE, partition)) == -1) + { + return -1; + } + + // Segment. + if ((pArgs->FNrc = _fromFile(pArgs->pFName, segment)) == -1) + { + return -1; + } + + return 0; +} + } //end of namespace diff --git a/writeengine/shared/we_convertor.h b/writeengine/shared/we_convertor.h index 5bec87434..b5e88d250 100644 --- a/writeengine/shared/we_convertor.h +++ b/writeengine/shared/we_convertor.h @@ -82,6 +82,9 @@ public: char dbDirName[][MAX_DB_DIR_NAME_SIZE], uint32_t partition, uint16_t segment); + EXPORT static int fileName2Oid(const std::string &fullFileName, + uint32_t &oid, uint32_t &partition, + uint32_t &segment); /** * @brief Convert specified errno to associated error msg string * @@ -139,7 +142,8 @@ private: struct dmFilePathArgs_t; static int dmOid2FPath(uint32_t oid, uint32_t partition, uint32_t segment, dmFilePathArgs_t* pArgs); - + static int32_t dmFPath2Oid(dmFilePathArgs_t* pArgs, uint32_t& oid, + uint32_t& partition, uint32_t& segment); }; } //end of namespace