diff --git a/tools/rebuildEM/main.cpp b/tools/rebuildEM/main.cpp index c59e8cb5f..72b576bf6 100644 --- a/tools/rebuildEM/main.cpp +++ b/tools/rebuildEM/main.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "configcpp.h" @@ -29,7 +28,7 @@ using namespace idbdatafile; using namespace RebuildExtentMap; -static void usage(const string& pname) +static void usage(const std::string& pname) { std::cout << "usage: " << pname << " [-vdhs]" << std::endl; std::cout << "rebuilds the extent map from the contents of the database file " @@ -41,6 +40,20 @@ static void usage(const string& pname) std::cout << " -s show extent map and quit" << std::endl; } +static bool isYes() +{ + std::string confirmation; + std::cin >> confirmation; + if (confirmation.size() == 0) + return false; + + boost::algorithm::to_lower(confirmation); + if (!(confirmation == "y" || confirmation == "yes")) + return false; + + return true; +} + int main(int argc, char** argv) { int32_t option; @@ -78,41 +91,28 @@ int main(int argc, char** argv) // MCOL-4685 std::cout << "The launch of mcsRebuildEM tool must be sanctioned by MariaDB support. " << std::endl; - std::cout << "Requirement: all DBRoots must be on this node. " << std::endl; std::cout << "Do you want to continue Y/N? "; - std::string confirmation; - cin >> confirmation; - if (confirmation.size() == 0) - return 0; - - boost::algorithm::to_lower(confirmation); - if (!(confirmation == "y" || confirmation == "yes")) + if (!isYes()) return 0; auto* config = config::Config::makeConfig(); - - // Check for storage type. - const auto DBRootStorageType = config->getConfig("Installation", "DBRootStorageType"); - if (DBRootStorageType != "internal") - { - std::cout << "Only internal DBRootStorageType is supported, provided: " << DBRootStorageType << std::endl; - return 0; - } - const auto BRMSavesEM = config->getConfig("SystemConfig", "DBRMRoot") + "_em"; + // Check for `BRM_saves_em` file presents. // TODO: Should we add force option to remove file? - if (boost::filesystem::exists(BRMSavesEM)) + if (IDBPolicy::exists(BRMSavesEM.c_str())) { std::cout << BRMSavesEM << " file exists. " << std::endl; - std::cout << "Please note: this tool is only suitable in situations " - "where there is no `BRM_saves_em` file. " - << std::endl; - std::cout << "If `BRM_saves_em` " - "exists extent map will be restored from it. " - << std::endl; - std::cout << "Exiting. " << std::endl; - return 0; + std::cout << "Do you want to delete this file Y/N? "; + if (!isYes()) + return 0; + + if (IDBPolicy::remove(BRMSavesEM.c_str()) == -1) + { + std::cout << "Cannot remove " << BRMSavesEM << std::endl; + std::cout << "Exiting. " << std::endl; + return 0; + } } // Initialize system extents from the binary blob. @@ -136,10 +136,11 @@ int main(int argc, char** argv) auto dbRootPath = config->getConfig("SystemConfig", dbRootName); emReBuilder.setDBRoot(dbRootNumber); emReBuilder.collectExtents(dbRootPath.c_str()); - emReBuilder.rebuildExtentMap(); - emReBuilder.clear(); } + emReBuilder.rebuildExtentMap(); + emReBuilder.clear(); + // Save restored extent map. emReBuilder.getEM().save(BRMSavesEM); std::cout << "Completed." << std::endl; diff --git a/tools/rebuildEM/rebuildEM.cpp b/tools/rebuildEM/rebuildEM.cpp index 8dc5954d5..30861c458 100644 --- a/tools/rebuildEM/rebuildEM.cpp +++ b/tools/rebuildEM/rebuildEM.cpp @@ -16,7 +16,6 @@ MA 02110-1301, USA. */ #include -#include #include #include "rebuildEM.h" @@ -36,16 +35,36 @@ using namespace idbdatafile; namespace RebuildExtentMap { -int32_t EMReBuilder::collectExtents(const string& dbRootPath) +void EMReBuilder::collectFileNames(const std::string& partialPath, std::string currentPath, + std::vector& fileNames) +{ + currentPath.append(partialPath); + + std::list partialPathes; + IDBPolicy::listDirectory(currentPath.c_str(), partialPathes); + if (partialPathes.size() == 0) + { + fileNames.push_back(currentPath); + return; + } + + currentPath.push_back('/'); + for (const auto& partialPath : partialPathes) + collectFileNames(partialPath, currentPath, fileNames); +} + +int32_t EMReBuilder::collectExtents(const std::string& dbRootPath) { if (doVerbose()) { std::cout << "Collect extents for the DBRoot " << dbRootPath << std::endl; } - for (boost::filesystem::recursive_directory_iterator dirIt(dbRootPath), dirEnd; dirIt != dirEnd; ++dirIt) + std::vector fileNames; + collectFileNames(dbRootPath, "", fileNames); + for (const auto& fileName : fileNames) { - (void)collectExtent(dirIt->path().string()); + (void)collectExtent(fileName); } return 0; @@ -141,8 +160,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) } uint64_t hwm = 0; - rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth, blockCount, isDict, - compressionType, hwm); + rc = searchHWMInSegmentFile(fullFileName, oid, getDBRoot(), partition, segment, colDataType, colWidth, + blockCount, isDict, compressionType, hwm); if (rc != 0) return rc; @@ -156,13 +175,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex) { auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex); - FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict); + FileId fileId(oid, partition, segment, getDBRoot(), colWidth, colDataType, lbid, /*hwm*/ 0, isDict); extentMap.push_back(fileId); } // Last one has an actual HWM. auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1); - FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict); + FileId fileId(oid, partition, segment, getDBRoot(), colWidth, colDataType, lbid, hwm, isDict); extentMap.push_back(fileId); if (doVerbose()) @@ -175,7 +194,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) { // One extent per segment file. auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0); - FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict); + FileId fileId(oid, partition, segment, getDBRoot(), colWidth, colDataType, lbid, hwm, isDict); extentMap.push_back(fileId); if (doVerbose()) @@ -213,14 +232,14 @@ int32_t EMReBuilder::rebuildExtentMap() { // Create a dictionary extent for the given oid, partition, // segment, dbroot. - getEM().createDictStoreExtent(fileId.oid, getDBRoot(), fileId.partition, fileId.segment, lbid, + getEM().createDictStoreExtent(fileId.oid, fileId.dbroot, fileId.partition, fileId.segment, lbid, allocdSize); } else { // Create a column extent for the given oid, partition, // segment, dbroot and column width. - getEM().createColumnExtentExactFile(fileId.oid, fileId.colWidth, getDBRoot(), fileId.partition, + getEM().createColumnExtentExactFile(fileId.oid, fileId.colWidth, fileId.dbroot, fileId.partition, fileId.segment, fileId.colDataType, lbid, allocdSize, startBlockOffset); } @@ -263,8 +282,8 @@ int32_t EMReBuilder::rebuildExtentMap() return 0; } -int32_t EMReBuilder::searchHWMInSegmentFile(uint32_t oid, uint32_t dbRoot, uint32_t partition, - uint32_t segment, +int32_t EMReBuilder::searchHWMInSegmentFile(const std::string& fullFileName, uint32_t oid, uint32_t dbRoot, + uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, uint64_t blockCount, bool isDict, uint32_t compressionType, uint64_t& hwm) @@ -275,12 +294,12 @@ int32_t EMReBuilder::searchHWMInSegmentFile(uint32_t oid, uint32_t dbRoot, uint3 if (isDict) { chunkManagerWrapper = std::unique_ptr(new ChunkManagerWrapperDict( - oid, dbRoot, partition, segment, colDataType, colWidth, compressionType)); + fullFileName, oid, dbRoot, partition, segment, colDataType, colWidth, compressionType)); } else { chunkManagerWrapper = std::unique_ptr(new ChunkManagerWrapperColumn( - oid, dbRoot, partition, segment, colDataType, colWidth, compressionType)); + fullFileName, oid, dbRoot, partition, segment, colDataType, colWidth, compressionType)); } } catch (...) @@ -348,7 +367,8 @@ int32_t EMReBuilder::initializeSystemExtents() return 0; } -ChunkManagerWrapper::ChunkManagerWrapper(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, +ChunkManagerWrapper::ChunkManagerWrapper(const std::string& filename, uint32_t oid, uint32_t dbRoot, + uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth) : oid(oid) @@ -359,6 +379,7 @@ ChunkManagerWrapper::ChunkManagerWrapper(uint32_t oid, uint32_t dbRoot, uint32_t , colWidth(colWidth) , size(colWidth) , pFileOp(nullptr) + , fileName(filename) { } @@ -370,19 +391,19 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber) return 0; } -ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(uint32_t oid, uint32_t dbRoot, uint32_t partition, - uint32_t segment, +ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(const std::string& filename, uint32_t oid, + uint32_t dbRoot, uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, uint32_t compressionType) - : ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, colWidth) + : ChunkManagerWrapper(filename, oid, dbRoot, partition, segment, colDataType, colWidth) { pFileOp = std::unique_ptr(new WriteEngine::ColumnOpCompress1(compressionType)); chunkManager.fileOp(pFileOp.get()); // Open compressed column segment file. We will read block by block // from the compressed chunks. - pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment, colDataType, colWidth, fileName, - "rb", size, false, false); + pFile = chunkManager.getFilePtrByName(fileName, oid, dbRoot, partition, segment, colDataType, colWidth, + "rb", size, false, false); if (!pFile) { throw std::bad_alloc(); @@ -425,17 +446,17 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const return false; } -ChunkManagerWrapperDict::ChunkManagerWrapperDict(uint32_t oid, uint32_t dbRoot, uint32_t partition, - uint32_t segment, +ChunkManagerWrapperDict::ChunkManagerWrapperDict(const std::string& filename, uint32_t oid, uint32_t dbRoot, + uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, uint32_t compressionType) - : ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, colWidth) + : ChunkManagerWrapper(filename, oid, dbRoot, partition, segment, colDataType, colWidth) { pFileOp = std::unique_ptr(new WriteEngine::DctnryCompress1(compressionType)); chunkManager.fileOp(pFileOp.get()); // Open compressed dict segment file. - pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment, colDataType, colWidth, fileName, - "rb", size, false, true); + pFile = chunkManager.getFilePtrByName(fileName, oid, dbRoot, partition, segment, colDataType, colWidth, + "rb", size, false, true); if (!pFile) { throw std::bad_alloc(); diff --git a/tools/rebuildEM/rebuildEM.h b/tools/rebuildEM/rebuildEM.h index 14f8159e5..6cbe2fcfd 100644 --- a/tools/rebuildEM/rebuildEM.h +++ b/tools/rebuildEM/rebuildEM.h @@ -39,11 +39,12 @@ namespace RebuildExtentMap // This struct represents a FileId. For internal purpose only. struct FileId { - FileId(uint32_t oid, uint32_t partition, uint32_t segment, uint32_t colWidth, + FileId(uint32_t oid, uint32_t partition, uint32_t segment, uint32_t dbroot, uint32_t colWidth, execplan::CalpontSystemCatalog::ColDataType colDataType, int64_t lbid, uint64_t hwm, bool isDict) : oid(oid) , partition(partition) , segment(segment) + , dbroot(dbroot) , colWidth(colWidth) , colDataType(colDataType) , lbid(lbid) @@ -55,6 +56,7 @@ struct FileId uint32_t oid; uint32_t partition; uint32_t segment; + uint32_t dbroot; uint32_t colWidth; execplan::CalpontSystemCatalog::ColDataType colDataType; int64_t lbid; @@ -70,13 +72,17 @@ class EMReBuilder EMReBuilder(bool verbose, bool display) : verbose(verbose), display(display) { // Initalize plugins. - IDBPolicy::init(true, false, "", 0); + IDBPolicy::configIDBPolicy(); } ~EMReBuilder() = default; // Collects extents from the given DBRoot path. int32_t collectExtents(const std::string& dbRootPath); + // Collects file names for the given `partialPath` direcotory. + void collectFileNames(const std::string& partialPath, std::string currentPath, + std::vector& fileNames); + // Clears collected extents. void clear() { @@ -124,7 +130,8 @@ class EMReBuilder int32_t rebuildExtentMap(); // Search HWM in the given segment file. - int32_t searchHWMInSegmentFile(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, + int32_t searchHWMInSegmentFile(const std::string& fullFileName, uint32_t oid, uint32_t dbRoot, + uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t width, uint64_t blocksCount, bool isDict, uint32_t compressionType, uint64_t& hwm); @@ -158,8 +165,9 @@ class EMReBuilder class ChunkManagerWrapper { public: - ChunkManagerWrapper(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth); + ChunkManagerWrapper(const std::string& filename, uint32_t oid, uint32_t dbRoot, uint32_t partition, + uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, + uint32_t colWidth); virtual ~ChunkManagerWrapper() = default; ChunkManagerWrapper(const ChunkManagerWrapper& other) = delete; @@ -182,8 +190,8 @@ class ChunkManagerWrapper execplan::CalpontSystemCatalog::ColDataType colDataType; uint32_t colWidth; int32_t size; - std::string fileName; std::unique_ptr pFileOp; + std::string fileName; // Note: We cannot clear this pointer directly, because // `ChunkManager` closes this file for us, otherwise we will get double // free error. @@ -196,9 +204,9 @@ class ChunkManagerWrapper class ChunkManagerWrapperColumn : public ChunkManagerWrapper { public: - ChunkManagerWrapperColumn(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, - uint32_t compressionType); + ChunkManagerWrapperColumn(const std::string& filename, uint32_t oid, uint32_t dbRoot, uint32_t partition, + uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, + uint32_t colWidth, uint32_t compressionType); ~ChunkManagerWrapperColumn() = default; ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete; @@ -219,9 +227,9 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper class ChunkManagerWrapperDict : public ChunkManagerWrapper { public: - ChunkManagerWrapperDict(uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, - uint32_t compressionType); + ChunkManagerWrapperDict(const std::string& filename, uint32_t oid, uint32_t dbRoot, uint32_t partition, + uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, + uint32_t colWidth, uint32_t compressionType); ~ChunkManagerWrapperDict() = default; ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete; diff --git a/writeengine/shared/we_chunkmanager.cpp b/writeengine/shared/we_chunkmanager.cpp index 1254176ef..784f99902 100644 --- a/writeengine/shared/we_chunkmanager.cpp +++ b/writeengine/shared/we_chunkmanager.cpp @@ -280,19 +280,20 @@ IDBDataFile* ChunkManager::getFilePtr(const FID& fid, uint16_t root, uint32_t pa } //------------------------------------------------------------------------------ -// Get/Return IDBDataFile* for specified OID, root, partition, and segment. +// Get/Return IDBDataFile* by the given `filename`. +// OID, partition and segment are needed for a file cache. // Function is to be used to open column/dict segment file. // If the IDBDataFile* is not found, then a segment file will be opened using -// the mode (mode) and I/O buffer size (size) that is given. Name of the -// resulting file is returned in filename. +// the mode (mode) and I/O buffer size (size) that is given. //------------------------------------------------------------------------------ -IDBDataFile* ChunkManager::getSegmentFilePtr(FID& fid, uint16_t root, uint32_t partition, uint16_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, - uint32_t colWidth, std::string& filename, const char* mode, - int32_t size, bool useTmpSuffix, bool isDict) const +IDBDataFile* ChunkManager::getFilePtrByName(const std::string& filename, FID& fid, uint16_t root, + uint32_t partition, uint16_t segment, + execplan::CalpontSystemCatalog::ColDataType colDataType, + uint32_t colWidth, const char* mode, int32_t size, + bool useTmpSuffix, bool isDict) const { - CompFileData* fileData = getFileData(fid, root, partition, segment, filename, mode, size, colDataType, - colWidth, useTmpSuffix, isDict); + CompFileData* fileData = getFileDataByName(filename, fid, root, partition, segment, mode, size, colDataType, + colWidth, useTmpSuffix, isDict); return (fileData ? fileData->fFilePtr : NULL); } @@ -328,12 +329,39 @@ CompFileData* ChunkManager::getFileData(const FID& fid, uint16_t root, uint32_t // New CompFileData pointer needs to be created char name[FILE_NAME_SIZE]; - if (fFileOp->getFileName(fid, name, root, partition, segment) != NO_ERROR) return NULL; - CompFileData* fileData = new CompFileData(fileID, fid, colDataType, colWidth); - fileData->fFileName = filename = name; + // Initialize the given `filename`. + filename = name; + return getFileData_(fileID, filename, mode, size, colDataType, colWidth, useTmpSuffix, dctnry); +} + +CompFileData* ChunkManager::getFileDataByName(const std::string& filename, const FID& fid, uint16_t root, + uint32_t partition, uint16_t segment, const char* mode, + int size, const CalpontSystemCatalog::ColDataType colDataType, + int colWidth, bool useTmpSuffix, bool dctnry) const +{ + FileID fileID(fid, root, partition, segment); + map::const_iterator mit = fFileMap.find(fileID); + + WE_COMP_DBG(cout << "getFileData: fid:" << fid << " root:" << root << " part:" << partition << " seg:" + << segment << " file* " << ((mit != fFileMap.end()) ? "" : "not ") << "found." << endl;) + + // Get CompFileData pointer for existing Column or Dictionary store file + if (mit != fFileMap.end()) + return mit->second; + + return getFileData_(fileID, filename, mode, size, colDataType, colWidth, useTmpSuffix, dctnry); +} + + +CompFileData* ChunkManager::getFileData_(const FileID& fileID, const string& filename, const char* mode, + int size, const CalpontSystemCatalog::ColDataType colDataType, + int colWidth, bool useTmpSuffix, bool dctnry) const +{ + CompFileData* fileData = new CompFileData(fileID, fileID.fFid, colDataType, colWidth); + fileData->fFileName = filename; if (openFile(fileData, mode, colWidth, useTmpSuffix, __LINE__) != NO_ERROR) { @@ -369,13 +397,12 @@ CompFileData* ChunkManager::getFileData(const FID& fid, uint16_t root, uint32_t return NULL; } - int headerSize = compress::CompressInterface::getHdrSize(fileData->fFileHeader.fControlData); - int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; + const int32_t headerSize = compress::CompressInterface::getHdrSize(fileData->fFileHeader.fControlData); + const int32_t ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; // Save segment file compression type. - uint32_t compressionType = + fileData->fCompressionType = compress::CompressInterface::getCompressionType(fileData->fFileHeader.fControlData); - fileData->fCompressionType = compressionType; if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT) { diff --git a/writeengine/shared/we_chunkmanager.h b/writeengine/shared/we_chunkmanager.h index 5dce0d1e7..9d9a082ec 100644 --- a/writeengine/shared/we_chunkmanager.h +++ b/writeengine/shared/we_chunkmanager.h @@ -189,12 +189,12 @@ class ChunkManager IDBDataFile* getFilePtr(const FID& fid, uint16_t root, uint32_t partition, uint16_t segment, std::string& filename, const char* mode, int size, bool useTmpSuffix) const; - // @brief Retrieve a file pointer in the chunk manager. + // @brief Retrieve a file pointer in the chunk manager by the given `filename`. // for column/dict segment file - IDBDataFile* getSegmentFilePtr(FID& fid, uint16_t root, uint32_t partition, uint16_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, - std::string& filename, const char* mode, int32_t size, bool useTmpSuffix, - bool isDict) const; + IDBDataFile* getFilePtrByName(const std::string& filename, FID& fid, uint16_t root, uint32_t partition, + uint16_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, + uint32_t colWidth, const char* mode, int32_t size, bool useTmpSuffix, + bool isDict) const; // @brief Create a compressed dictionary file with an appropriate header. IDBDataFile* createDctnryFile(const FID& fid, int64_t width, uint16_t root, uint32_t partition, @@ -284,6 +284,11 @@ class ChunkManager const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth, bool useTmpSuffix, bool dictnry = false) const; + CompFileData* getFileDataByName(const std::string& filename, const FID& fid, uint16_t root, + uint32_t partition, uint16_t segment, const char* mode, int size, + const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth, + bool useTmpSuffix, bool dctnry) const; + // @brief Retrieve a chunk of pFile from disk. int fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*& chunkData); @@ -366,6 +371,9 @@ class ChunkManager size_t COMPRESSED_CHUNK_SIZE; private: + CompFileData* getFileData_(const FileID& fid, const std::string& filename, const char* mode, int size, + const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth, + bool useTmpSuffix, bool dictnry = false) const; }; } // namespace WriteEngine