1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

MCOL-4566: Add file2Oid function.

* This patch adds file2Oid function. This function is needed
  to map ColumnStore file name to an oid, partition and segment.
* Tests added to check that this function works correctly.
* This patch is related to MCOL-4566, so it adds a new file with GTests.

Note: The description for the functions follows the description style
in the current file.
This commit is contained in:
Denis Khalikov
2021-03-04 22:13:03 +03:00
parent 2d6d8b901e
commit 797716ef13
4 changed files with 439 additions and 2 deletions

View File

@@ -36,3 +36,9 @@ if (WITH_SHARED_COMP_TESTS)
target_link_libraries(we_shared_components_tests ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} cppunit)
install(TARGETS we_shared_components_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif()
if (WITH_REBUILD_EM_UT)
add_executable(rebuild_em_tests rebuild-em-tests.cpp)
target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif()

158
tests/rebuild-em-tests.cpp Normal file
View File

@@ -0,0 +1,158 @@
/* Copyright (C) 2020 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "we_convertor.h"
class RebuildEMTest : public ::testing::Test
{
protected:
struct FileId
{
FileId() : oid(0), partition(0), segment(0) {}
FileId(uint32_t oid, uint32_t partition, uint32_t segment)
: oid(oid), partition(partition), segment(segment)
{
}
uint32_t oid;
uint32_t partition;
uint32_t segment;
};
static uint32_t getOid(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
uint32_t oid = 0;
oid |= a << 24;
oid |= b << 16;
oid |= c << 8;
oid |= d;
return oid;
}
};
TEST_F(RebuildEMTest, File2OidCheckFileFormatTest)
{
// Valid file names.
auto aFileName = "001.dir/002.dir/003.dir/004.dir/005.dir/FILE006.cdf";
FileId aExpected(getOid(1, 2, 3, 4), 5, 6);
auto bFileName = "/011.dir/022.dir/033.dir/044.dir/055.dir/FILE066.cdf";
FileId bExpected(getOid(11, 22, 33, 44), 55, 66);
auto cFileName =
"data1/255.dir/255.dir/255.dir/255.dir/255.dir/FILE255.cdf";
FileId cExpected(getOid(255, 255, 255, 255), 255, 255);
auto dFileName =
"/data1/000.dir/000.dir/000.dir/001.dir/001.dir/FILE001.cdf";
FileId dExpected(getOid(0, 0, 0, 1), 1, 1);
auto eFileName = "/data0/data1/data2/data3/data4/data5/data6/../../../../"
"../data1/000.dir/000.dir/000.dir/000.dir/"
"000.dir/FILE000.cdf";
FileId eExpected(getOid(0, 0, 0, 0), 0, 0);
auto fFileName = "data1/data2/data3/data4/data5/data6/data7/000.dir/"
"000.dir/008.dir/028.dir/000.dir/FILE079.cdf";
FileId fExpected(getOid(0, 0, 8, 28), 00, 79);
std::vector<std::pair<std::string, FileId>> expectedFileIds = {
make_pair(aFileName, aExpected), make_pair(bFileName, bExpected),
make_pair(cFileName, cExpected), make_pair(dFileName, dExpected),
make_pair(eFileName, eExpected), make_pair(fFileName, fExpected)};
for (const auto& expectedPair : expectedFileIds)
{
FileId calculated;
auto rc = WriteEngine::Convertor::fileName2Oid(
expectedPair.first, calculated.oid, calculated.partition,
calculated.segment);
ASSERT_EQ(rc, 0);
EXPECT_EQ(expectedPair.second.oid, calculated.oid);
EXPECT_EQ(expectedPair.second.partition, calculated.partition);
EXPECT_EQ(expectedPair.second.segment, calculated.segment);
}
// Invalid file names.
// Dir exceed 255.
auto aInvalidFileName =
"256.dir/000.dir/001.dir/002.dir/003.dir/FILE004.cdf";
// Segment exceed 255.
auto bInvalidFileName =
"000.dir/000.dir/001.dir/002.dir/003.dir/FILE256.cdf";
// Just a random path.
auto cInvalidFileName = "/usr////bin//lib///";
// Empty string.
auto dInvalidFileName = "";
// Does not have A dir.
auto eInvalidFileName =
"/data1/000.dir/001.dir/002.dir/003.dir/FILE000.cdf";
// Invalid partition name dir.
auto fInvalidFileName =
"/000.dir/000.dir/001.dir/002.dir/003.ir/FILE000.cdf";
// Invalid segment name.
auto gInvalidFileName =
"/000.dir/000.dir/001.dir/002.dir/003.ir/FILE00.cdf";
// Invalid dir name.
auto hInvalidFileName = "/00.dir/00.dir/001.dir/002.dir/003.ir/FIE000.cdf";
// Invalid amount of dirs.
auto iInvalidFileName = "/002.dir/003.ir/FIE000.cdf";
std::vector<std::string> invalidFileNames = {
aInvalidFileName, bInvalidFileName, cInvalidFileName,
dInvalidFileName, eInvalidFileName, fInvalidFileName,
gInvalidFileName, hInvalidFileName, iInvalidFileName};
for (const auto& invalidFileName : invalidFileNames)
{
FileId calculated;
auto rc = WriteEngine::Convertor::fileName2Oid(
invalidFileName, calculated.oid, calculated.partition,
calculated.segment);
ASSERT_NE(rc, 0);
}
}
TEST_F(RebuildEMTest, File2OidCalculationTest)
{
char dbDirName[20][20];
char fileName[64];
for (uint32_t i = 3; i < 256; ++i)
{
FileId expectedFileId(getOid(i, i - 1, i - 2, i - 3), i, i);
memset(fileName, 0, 64);
// Generate the filename by the oid, partition and segment.
auto rc = WriteEngine::Convertor::oid2FileName(
expectedFileId.oid, fileName, dbDirName, expectedFileId.partition,
expectedFileId.segment);
ASSERT_EQ(rc, 0);
FileId calculatedFileId;
// Generate an oid, partition and segment from the given file name.
rc = WriteEngine::Convertor::fileName2Oid(
fileName, calculatedFileId.oid, calculatedFileId.partition,
calculatedFileId.segment);
ASSERT_EQ(rc, 0);
EXPECT_EQ(expectedFileId.oid, calculatedFileId.oid);
EXPECT_EQ(expectedFileId.partition, calculatedFileId.partition);
EXPECT_EQ(expectedFileId.segment, calculatedFileId.segment);
}
}

View File

@@ -22,8 +22,10 @@
/** @file */
#include <unistd.h>
#include <fnmatch.h>
#include <limits>
#include <cstring>
#include <vector>
#ifdef _MSC_VER
#include <cstdio>
#endif
@@ -36,6 +38,14 @@ using namespace execplan;
namespace
{
const char DATE_TIME_FORMAT[] = "%04d-%02d-%02d %02d:%02d:%02d";
// ColumnStore file `full file name` format.
const char CS_FULL_FILENAME_FORMAT[] =
"*[0-9][0-9][0-9].dir/[0-9][0-9][0-9].dir/[[0-9][0-9][0-9].dir/"
"[0-9][0-9][0-9].dir/[0-9][0-9][0-9].dir/FILE[0-9][0-9][0-9].cdf";
// ColumnStore file `directory name` format.
const char CS_DIR_FORMAT[] = "[0-9][0-9][0-9].dir";
// ColumnStore file `file name` format.
const char CS_FILE_FORMAT[] = "FILE[0-9][0-9][0-9].cdf";
/*******************************************************************************
* DESCRIPTION:
@@ -92,6 +102,75 @@ int _doFile(char* pBuffer, int blen, unsigned char val)
return rc;
}
/*******************************************************************************
* DESCRIPTION:
* Takes a buffer in ColumnStore `directory` format and converts it to an
* integer.
* PARAMETERS:
* buffer(input) - a pointer to the input buffer.
val (output) - converted integer.
* RETURN:
* 0 is returned on success, -1 is returned on error.
******************************************************************************/
int32_t _fromDir(const char* buffer, uint32_t& val)
{
int32_t rc = -1;
// Number length in characters.
const uint32_t numberLen = 3;
// Check that buffer is in the correct `directory` format.
if (buffer && (fnmatch(CS_DIR_FORMAT, buffer, 0) == 0))
{
char num[numberLen + 1];
strncpy(num, buffer, numberLen);
num[numberLen] = '\0';
val = atoi(num);
// The number cannot exceed 0xff.
if (val < 256)
{
rc = 0;
}
}
return rc;
}
/*******************************************************************************
* DESCRIPTION:
* Takes a buffer in ColumnStore `file` format and converts it to an
* integer.
* PARAMETERS:
* buffer(input) - a pointer to the input buffer.
val (output) - converted integer.
* RETURN:
* 0 is returned on success, -1 is returned on error.
******************************************************************************/
int32_t _fromFile(const char* buffer, uint32_t& val)
{
int32_t rc = -1;
// Offset from the beggining e.g. `FILE000.cdf`.
const uint32_t offset = 4;
// Number length in characters.
const uint32_t numberLen = 3;
// Check that buffer is in the correct `file` format.
if (buffer && (fnmatch(CS_FILE_FORMAT, buffer, 0) == 0))
{
char num[numberLen + 1];
strncpy(num, buffer + offset, numberLen);
num[numberLen] = '\0';
val = atoi(num);
// The number cannot exceed 0xff.
if (val < 256)
{
rc = 0;
}
}
return rc;
}
}
namespace WriteEngine
@@ -285,7 +364,133 @@ int Convertor::oid2FileName(FID fid,
return NO_ERROR;
}
/*******************************************************************************
* DESCRIPTION:
* Convert the given filename to an oid, segment and partition.
* PARAMETERS:
* fullFileName INPUT -- filename.
* oid OUTPUT -- oid number from the given filename.
* partition OUTPUT -- partition number from the given filename.
* segment OUTPUT -- segment number from the given fielname.
* RETURN:
* NO_ERROR if success, other if fail.
******************************************************************************/
int Convertor::fileName2Oid(const std::string& fullFileName, uint32_t& oid,
uint32_t& partition, uint32_t& segment)
{
// ColumnStore file directory separator.
const char dirSep = '/';
// The number of the directories in the ColumnStore file name.
// Note: without `DBRoot` directory.
const uint32_t dirNamesMaxSize = 6;
const uint32_t fullFileNameLen = fullFileName.size();
// Verify the given `fullFileName`.
if (!fullFileNameLen ||
// If not match `fnmatch` returns a result code which is not equal to
// zero.
// TODO: Probably we should use `std::regex_match`, but currently
// there are still parts of code which use legacy `char *`, so
// `std::regex_match` is not applicable there without creating
// additional `std::string` from `char *`.
fnmatch(CS_FULL_FILENAME_FORMAT, fullFileName.c_str(), 0))
{
return -1;
}
std::vector<std::string> dirNames;
// We need exact 6 instances.
dirNames.reserve(6);
uint32_t end = fullFileNameLen;
// Signed integer for `index` since it could be less than zero.
int32_t index = fullFileNameLen - 1;
// Iterate over `fullFileName` starting from the end and split it by
// directory separator. Since we starting from the end we need just 6
// instances to match ColumnStore file name format specification.
while (index >= 0 && dirNames.size() < dirNamesMaxSize)
{
while (index >= 0 && fullFileName[index] != dirSep)
{
--index;
}
// Begin is a `dirSep` index + 1.
uint32_t begin = index + 1;
const uint32_t dirNameLen = end - begin;
// We already checked the `fullFileName` format,
// but this check is only intended to make sure that this algo works
// correctly on any input, if something changes.
if (dirNameLen > 0 && dirNameLen < MAX_DB_DIR_NAME_SIZE)
{
dirNames.push_back(fullFileName.substr(begin, dirNameLen));
}
else
{
// Something wrong with filename, just return an error.
return -1;
}
// Set `end` to the last directory separator index.
end = index;
// Skip current directory separator.
--index;
}
// Make sure we parsed 6 instances.
if (dirNames.size() != 6)
{
return -1;
}
// Initialize `dmFilePathArgs_t` struct.
dmFilePathArgs_t args;
char aBuff[MAX_DB_DIR_NAME_SIZE];
char bBuff[MAX_DB_DIR_NAME_SIZE];
char cBuff[MAX_DB_DIR_NAME_SIZE];
char dBuff[MAX_DB_DIR_NAME_SIZE];
char eBuff[MAX_DB_DIR_NAME_SIZE];
char fnBuff[MAX_DB_DIR_NAME_SIZE];
args.pDirA = aBuff;
args.pDirB = bBuff;
args.pDirC = cBuff;
args.pDirD = dBuff;
args.pDirE = eBuff;
args.pFName = fnBuff;
args.ALen = sizeof(aBuff);
args.BLen = sizeof(bBuff);
args.CLen = sizeof(cBuff);
args.DLen = sizeof(dBuff);
args.ELen = sizeof(eBuff);
args.FNLen = sizeof(fnBuff);
args.Arc = 0;
args.Brc = 0;
args.Crc = 0;
args.Drc = 0;
args.Erc = 0;
args.FNrc = 0;
// Populate `dmFilePathArgs_t` struct with the given names.
strcpy(args.pFName, dirNames[0].c_str());
strcpy(args.pDirE, dirNames[1].c_str());
strcpy(args.pDirD, dirNames[2].c_str());
strcpy(args.pDirC, dirNames[3].c_str());
strcpy(args.pDirB, dirNames[4].c_str());
strcpy(args.pDirA, dirNames[5].c_str());
// FIXME: Currently used ERR_DM_CONVERT_OID, should we introduce new error
// code?
RETURN_ON_WE_ERROR(dmFPath2Oid(&args, oid, partition, segment),
ERR_DM_CONVERT_OID);
return NO_ERROR;
}
/*******************************************************************************
* DESCRIPTION:
* Map specified errno to the associated error message string.
@@ -923,5 +1128,69 @@ int Convertor::dmOid2FPath(uint32_t oid, uint32_t partition, uint32_t segment,
return 0;
}
/*******************************************************************************
* DESCRIPTION:
* Converts populated `dmFilePathArgs_t` struct to an oid, partition,
* and segment.
*
* PARAMETERS:
* pArgs INPUT -- a pointer to `dmFilePathArgs_t` struct.
* oid OUTPUT -- oid for the given file name.
* partition OUTPUT -- partition for the given file name.
* segment OUTPUT -- segment for the given filename.
*
* RETURN:
* return 0 if everything went OK. -1 if an error occured.
******************************************************************************/
int32_t Convertor::dmFPath2Oid(dmFilePathArgs_t* pArgs, uint32_t& oid,
uint32_t& partition, uint32_t& segment)
{
uint32_t val = 0;
// OID.
// Directory A.
oid = 0;
if ((pArgs->Arc = _fromDir(pArgs->pDirA, val)) == -1)
{
return -1;
}
oid = val << 24;
// Directory B.
if ((pArgs->Brc = _fromDir(pArgs->pDirB, val)) == -1)
{
return -1;
}
oid |= val << 16;
// Directory C.
if ((pArgs->Crc = _fromDir(pArgs->pDirC, val)) == -1)
{
return -1;
}
oid |= val << 8;
// Directory D.
if ((pArgs->Drc = _fromDir(pArgs->pDirD, val)) == -1)
{
return -1;
}
oid |= val;
// Partition.
if ((pArgs->Erc = _fromDir(pArgs->pDirE, partition)) == -1)
{
return -1;
}
// Segment.
if ((pArgs->FNrc = _fromFile(pArgs->pFName, segment)) == -1)
{
return -1;
}
return 0;
}
} //end of namespace

View File

@@ -82,6 +82,9 @@ public:
char dbDirName[][MAX_DB_DIR_NAME_SIZE],
uint32_t partition, uint16_t segment);
EXPORT static int fileName2Oid(const std::string &fullFileName,
uint32_t &oid, uint32_t &partition,
uint32_t &segment);
/**
* @brief Convert specified errno to associated error msg string
*
@@ -139,7 +142,8 @@ private:
struct dmFilePathArgs_t;
static int dmOid2FPath(uint32_t oid, uint32_t partition, uint32_t segment,
dmFilePathArgs_t* pArgs);
static int32_t dmFPath2Oid(dmFilePathArgs_t* pArgs, uint32_t& oid,
uint32_t& partition, uint32_t& segment);
};
} //end of namespace