1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

[MCOL-5106] Add support to work with StorageManager.

This patch eliminates boost::filesystem from `mcsRebuildEM` tool.
After this change we should be able to work with any filesystem
even S3.
This commit is contained in:
Denis Khalikov
2022-05-30 20:15:46 +03:00
parent de522ed15a
commit fb1e23bb83
5 changed files with 154 additions and 89 deletions

View File

@ -18,7 +18,6 @@
#include <iostream>
#include <string>
#include <ftw.h>
#include <boost/filesystem.hpp>
#include <boost/algorithm/string/case_conv.hpp>
#include "configcpp.h"
@ -29,7 +28,7 @@
using namespace idbdatafile;
using namespace RebuildExtentMap;
static void usage(const string& pname)
static void usage(const std::string& pname)
{
std::cout << "usage: " << pname << " [-vdhs]" << std::endl;
std::cout << "rebuilds the extent map from the contents of the database file "
@ -41,6 +40,20 @@ static void usage(const string& pname)
std::cout << " -s show extent map and quit" << std::endl;
}
static bool isYes()
{
std::string confirmation;
std::cin >> confirmation;
if (confirmation.size() == 0)
return false;
boost::algorithm::to_lower(confirmation);
if (!(confirmation == "y" || confirmation == "yes"))
return false;
return true;
}
int main(int argc, char** argv)
{
int32_t option;
@ -78,41 +91,28 @@ int main(int argc, char** argv)
// MCOL-4685
std::cout << "The launch of mcsRebuildEM tool must be sanctioned by MariaDB support. " << std::endl;
std::cout << "Requirement: all DBRoots must be on this node. " << std::endl;
std::cout << "Do you want to continue Y/N? ";
std::string confirmation;
cin >> confirmation;
if (confirmation.size() == 0)
return 0;
boost::algorithm::to_lower(confirmation);
if (!(confirmation == "y" || confirmation == "yes"))
if (!isYes())
return 0;
auto* config = config::Config::makeConfig();
// Check for storage type.
const auto DBRootStorageType = config->getConfig("Installation", "DBRootStorageType");
if (DBRootStorageType != "internal")
{
std::cout << "Only internal DBRootStorageType is supported, provided: " << DBRootStorageType << std::endl;
return 0;
}
const auto BRMSavesEM = config->getConfig("SystemConfig", "DBRMRoot") + "_em";
// Check for `BRM_saves_em` file presents.
// TODO: Should we add force option to remove file?
if (boost::filesystem::exists(BRMSavesEM))
if (IDBPolicy::exists(BRMSavesEM.c_str()))
{
std::cout << BRMSavesEM << " file exists. " << std::endl;
std::cout << "Please note: this tool is only suitable in situations "
"where there is no `BRM_saves_em` file. "
<< std::endl;
std::cout << "If `BRM_saves_em` "
"exists extent map will be restored from it. "
<< std::endl;
std::cout << "Exiting. " << std::endl;
return 0;
std::cout << "Do you want to delete this file Y/N? ";
if (!isYes())
return 0;
if (IDBPolicy::remove(BRMSavesEM.c_str()) == -1)
{
std::cout << "Cannot remove " << BRMSavesEM << std::endl;
std::cout << "Exiting. " << std::endl;
return 0;
}
}
// Initialize system extents from the binary blob.
@ -136,10 +136,11 @@ int main(int argc, char** argv)
auto dbRootPath = config->getConfig("SystemConfig", dbRootName);
emReBuilder.setDBRoot(dbRootNumber);
emReBuilder.collectExtents(dbRootPath.c_str());
emReBuilder.rebuildExtentMap();
emReBuilder.clear();
}
emReBuilder.rebuildExtentMap();
emReBuilder.clear();
// Save restored extent map.
emReBuilder.getEM().save(BRMSavesEM);
std::cout << "Completed." << std::endl;

View File

@ -16,7 +16,6 @@
MA 02110-1301, USA. */
#include <iostream>
#include <boost/filesystem.hpp>
#include <stdint.h>
#include "rebuildEM.h"
@ -36,16 +35,36 @@ using namespace idbdatafile;
namespace RebuildExtentMap
{
int32_t EMReBuilder::collectExtents(const string& dbRootPath)
void EMReBuilder::collectFileNames(const std::string& partialPath, std::string currentPath,
std::vector<std::string>& fileNames)
{
currentPath.append(partialPath);
std::list<std::string> partialPathes;
IDBPolicy::listDirectory(currentPath.c_str(), partialPathes);
if (partialPathes.size() == 0)
{
fileNames.push_back(currentPath);
return;
}
currentPath.push_back('/');
for (const auto& partialPath : partialPathes)
collectFileNames(partialPath, currentPath, fileNames);
}
int32_t EMReBuilder::collectExtents(const std::string& dbRootPath)
{
if (doVerbose())
{
std::cout << "Collect extents for the DBRoot " << dbRootPath << std::endl;
}
for (boost::filesystem::recursive_directory_iterator dirIt(dbRootPath), dirEnd; dirIt != dirEnd; ++dirIt)
std::vector<std::string> fileNames;
collectFileNames(dbRootPath, "", fileNames);
for (const auto& fileName : fileNames)
{
(void)collectExtent(dirIt->path().string());
(void)collectExtent(fileName);
}
return 0;
@ -141,8 +160,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
}
uint64_t hwm = 0;
rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth, blockCount, isDict,
compressionType, hwm);
rc = searchHWMInSegmentFile(fullFileName, oid, getDBRoot(), partition, segment, colDataType, colWidth,
blockCount, isDict, compressionType, hwm);
if (rc != 0)
return rc;
@ -156,13 +175,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex)
{
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
FileId fileId(oid, partition, segment, getDBRoot(), colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
extentMap.push_back(fileId);
}
// Last one has an actual HWM.
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
FileId fileId(oid, partition, segment, getDBRoot(), colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
if (doVerbose())
@ -175,7 +194,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
{
// One extent per segment file.
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
FileId fileId(oid, partition, segment, getDBRoot(), colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
if (doVerbose())
@ -213,14 +232,14 @@ int32_t EMReBuilder::rebuildExtentMap()
{
// Create a dictionary extent for the given oid, partition,
// segment, dbroot.
getEM().createDictStoreExtent(fileId.oid, getDBRoot(), fileId.partition, fileId.segment, lbid,
getEM().createDictStoreExtent(fileId.oid, fileId.dbroot, fileId.partition, fileId.segment, lbid,
allocdSize);
}
else
{
// Create a column extent for the given oid, partition,
// segment, dbroot and column width.
getEM().createColumnExtentExactFile(fileId.oid, fileId.colWidth, getDBRoot(), fileId.partition,
getEM().createColumnExtentExactFile(fileId.oid, fileId.colWidth, fileId.dbroot, fileId.partition,
fileId.segment, fileId.colDataType, lbid, allocdSize,
startBlockOffset);
}
@ -263,8 +282,8 @@ int32_t EMReBuilder::rebuildExtentMap()
return 0;
}
int32_t EMReBuilder::searchHWMInSegmentFile(uint32_t oid, uint32_t dbRoot, uint32_t partition,
uint32_t segment,
int32_t EMReBuilder::searchHWMInSegmentFile(const std::string& fullFileName, uint32_t oid, uint32_t dbRoot,
uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth, uint64_t blockCount, bool isDict,
uint32_t compressionType, uint64_t& hwm)
@ -275,12 +294,12 @@ int32_t EMReBuilder::searchHWMInSegmentFile(uint32_t oid, uint32_t dbRoot, uint3
if (isDict)
{
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperDict>(new ChunkManagerWrapperDict(
oid, dbRoot, partition, segment, colDataType, colWidth, compressionType));
fullFileName, oid, dbRoot, partition, segment, colDataType, colWidth, compressionType));
}
else
{
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperColumn>(new ChunkManagerWrapperColumn(
oid, dbRoot, partition, segment, colDataType, colWidth, compressionType));
fullFileName, oid, dbRoot, partition, segment, colDataType, colWidth, compressionType));
}
}
catch (...)
@ -348,7 +367,8 @@ int32_t EMReBuilder::initializeSystemExtents()
return 0;
}
ChunkManagerWrapper::ChunkManagerWrapper(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
ChunkManagerWrapper::ChunkManagerWrapper(const std::string& filename, uint32_t oid, uint32_t dbRoot,
uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth)
: oid(oid)
@ -359,6 +379,7 @@ ChunkManagerWrapper::ChunkManagerWrapper(uint32_t oid, uint32_t dbRoot, uint32_t
, colWidth(colWidth)
, size(colWidth)
, pFileOp(nullptr)
, fileName(filename)
{
}
@ -370,19 +391,19 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber)
return 0;
}
ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(uint32_t oid, uint32_t dbRoot, uint32_t partition,
uint32_t segment,
ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(const std::string& filename, uint32_t oid,
uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth, uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, colWidth)
: ChunkManagerWrapper(filename, oid, dbRoot, partition, segment, colDataType, colWidth)
{
pFileOp =
std::unique_ptr<WriteEngine::ColumnOpCompress1>(new WriteEngine::ColumnOpCompress1(compressionType));
chunkManager.fileOp(pFileOp.get());
// Open compressed column segment file. We will read block by block
// from the compressed chunks.
pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment, colDataType, colWidth, fileName,
"rb", size, false, false);
pFile = chunkManager.getFilePtrByName(fileName, oid, dbRoot, partition, segment, colDataType, colWidth,
"rb", size, false, false);
if (!pFile)
{
throw std::bad_alloc();
@ -425,17 +446,17 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const
return false;
}
ChunkManagerWrapperDict::ChunkManagerWrapperDict(uint32_t oid, uint32_t dbRoot, uint32_t partition,
uint32_t segment,
ChunkManagerWrapperDict::ChunkManagerWrapperDict(const std::string& filename, uint32_t oid, uint32_t dbRoot,
uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth, uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, colWidth)
: ChunkManagerWrapper(filename, oid, dbRoot, partition, segment, colDataType, colWidth)
{
pFileOp = std::unique_ptr<WriteEngine::DctnryCompress1>(new WriteEngine::DctnryCompress1(compressionType));
chunkManager.fileOp(pFileOp.get());
// Open compressed dict segment file.
pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment, colDataType, colWidth, fileName,
"rb", size, false, true);
pFile = chunkManager.getFilePtrByName(fileName, oid, dbRoot, partition, segment, colDataType, colWidth,
"rb", size, false, true);
if (!pFile)
{
throw std::bad_alloc();

View File

@ -39,11 +39,12 @@ namespace RebuildExtentMap
// This struct represents a FileId. For internal purpose only.
struct FileId
{
FileId(uint32_t oid, uint32_t partition, uint32_t segment, uint32_t colWidth,
FileId(uint32_t oid, uint32_t partition, uint32_t segment, uint32_t dbroot, uint32_t colWidth,
execplan::CalpontSystemCatalog::ColDataType colDataType, int64_t lbid, uint64_t hwm, bool isDict)
: oid(oid)
, partition(partition)
, segment(segment)
, dbroot(dbroot)
, colWidth(colWidth)
, colDataType(colDataType)
, lbid(lbid)
@ -55,6 +56,7 @@ struct FileId
uint32_t oid;
uint32_t partition;
uint32_t segment;
uint32_t dbroot;
uint32_t colWidth;
execplan::CalpontSystemCatalog::ColDataType colDataType;
int64_t lbid;
@ -70,13 +72,17 @@ class EMReBuilder
EMReBuilder(bool verbose, bool display) : verbose(verbose), display(display)
{
// Initalize plugins.
IDBPolicy::init(true, false, "", 0);
IDBPolicy::configIDBPolicy();
}
~EMReBuilder() = default;
// Collects extents from the given DBRoot path.
int32_t collectExtents(const std::string& dbRootPath);
// Collects file names for the given `partialPath` direcotory.
void collectFileNames(const std::string& partialPath, std::string currentPath,
std::vector<std::string>& fileNames);
// Clears collected extents.
void clear()
{
@ -124,7 +130,8 @@ class EMReBuilder
int32_t rebuildExtentMap();
// Search HWM in the given segment file.
int32_t searchHWMInSegmentFile(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
int32_t searchHWMInSegmentFile(const std::string& fullFileName, uint32_t oid, uint32_t dbRoot,
uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t width,
uint64_t blocksCount, bool isDict, uint32_t compressionType, uint64_t& hwm);
@ -158,8 +165,9 @@ class EMReBuilder
class ChunkManagerWrapper
{
public:
ChunkManagerWrapper(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth);
ChunkManagerWrapper(const std::string& filename, uint32_t oid, uint32_t dbRoot, uint32_t partition,
uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth);
virtual ~ChunkManagerWrapper() = default;
ChunkManagerWrapper(const ChunkManagerWrapper& other) = delete;
@ -182,8 +190,8 @@ class ChunkManagerWrapper
execplan::CalpontSystemCatalog::ColDataType colDataType;
uint32_t colWidth;
int32_t size;
std::string fileName;
std::unique_ptr<WriteEngine::FileOp> pFileOp;
std::string fileName;
// Note: We cannot clear this pointer directly, because
// `ChunkManager` closes this file for us, otherwise we will get double
// free error.
@ -196,9 +204,9 @@ class ChunkManagerWrapper
class ChunkManagerWrapperColumn : public ChunkManagerWrapper
{
public:
ChunkManagerWrapperColumn(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint32_t compressionType);
ChunkManagerWrapperColumn(const std::string& filename, uint32_t oid, uint32_t dbRoot, uint32_t partition,
uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperColumn() = default;
ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete;
@ -219,9 +227,9 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper
class ChunkManagerWrapperDict : public ChunkManagerWrapper
{
public:
ChunkManagerWrapperDict(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint32_t compressionType);
ChunkManagerWrapperDict(const std::string& filename, uint32_t oid, uint32_t dbRoot, uint32_t partition,
uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperDict() = default;
ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete;