You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
MCOL-4566: Add rebuildEM tool support to work with compressed files.
* This patch adds rebuildEM tool support to work with compressed files. * This patch increases a version of the file header. Note: Default version of the `rebuildEM` tool was using very old API, those functions are not present currently. So `rebuildEM` will not work with files created without compression, because we cannot deduce some info which are needed to create column extent.
This commit is contained in:
230
tools/rebuildEM/rebuildEM.h
Normal file
230
tools/rebuildEM/rebuildEM.h
Normal file
@ -0,0 +1,230 @@
|
||||
/* Copyright (C) 2021 MariaDB Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
#ifndef REBUILD_EM_H
|
||||
#define REBUILD_EM_H
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <ftw.h>
|
||||
|
||||
#include "calpontsystemcatalog.h"
|
||||
#include "extentmap.h"
|
||||
#include "IDBPolicy.h"
|
||||
#include "IDBFileSystem.h"
|
||||
#include "idbcompress.h"
|
||||
#include "blocksize.h"
|
||||
#include "we_convertor.h"
|
||||
#include "we_fileop.h"
|
||||
#include "IDBPolicy.h"
|
||||
#include "we_chunkmanager.h"
|
||||
|
||||
using namespace idbdatafile;
|
||||
|
||||
namespace RebuildExtentMap
|
||||
{
|
||||
// This struct represents a FileId. For internal purpose only.
|
||||
struct FileId
|
||||
{
|
||||
FileId(uint32_t oid, uint32_t partition, uint32_t segment,
|
||||
uint32_t colWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
int64_t lbid, uint64_t hwm, bool isDict)
|
||||
: oid(oid), partition(partition), segment(segment), colWidth(colWidth),
|
||||
colDataType(colDataType), lbid(lbid), hwm(hwm), isDict(isDict)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t oid;
|
||||
uint32_t partition;
|
||||
uint32_t segment;
|
||||
uint32_t colWidth;
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType;
|
||||
int64_t lbid;
|
||||
uint64_t hwm;
|
||||
bool isDict;
|
||||
};
|
||||
std::ostream& operator<<(std::ostream& os, const FileId& fileID);
|
||||
|
||||
// This class represents extent map rebuilder.
|
||||
class EMReBuilder
|
||||
{
|
||||
public:
|
||||
EMReBuilder(bool verbose, bool display)
|
||||
: verbose(verbose), display(display)
|
||||
{
|
||||
// Initalize plugins.
|
||||
IDBPolicy::init(true, false, "", 0);
|
||||
}
|
||||
~EMReBuilder() = default;
|
||||
|
||||
// Collects extents from the given DBRoot path.
|
||||
int32_t collectExtents(const std::string& dbRootPath);
|
||||
|
||||
// Clears collected extents.
|
||||
void clear() { extentMap.clear(); }
|
||||
|
||||
// Specifies whether we need verbose to output.
|
||||
bool doVerbose() const { return verbose; }
|
||||
|
||||
// Specifies whether we need just display a pipeline, but not actually run
|
||||
// it.
|
||||
bool doDisplay() const { return display; }
|
||||
|
||||
// Returns the number of current DBRoot.
|
||||
uint32_t getDBRoot() const { return dbRoot; }
|
||||
|
||||
// Retunrs a reference to `ExtentMap` object.
|
||||
BRM::ExtentMap& getEM() { return em; }
|
||||
|
||||
// Checks if the given data specifies a dictionary file.
|
||||
static bool
|
||||
isDictFile(execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint64_t width);
|
||||
|
||||
// Initializes system extents from the binary blob.
|
||||
// This function solves the problem related to system segment files.
|
||||
// Currently those files do not have file header, so we cannot
|
||||
// get the data (like width, colType, lbid) to restore an extent for this
|
||||
// particular segment file. The current approach is to keep a binary blob
|
||||
// of initial state of the system extents.
|
||||
// Returns -1 on error.
|
||||
int32_t initializeSystemExtents();
|
||||
|
||||
// Rebuilds extent map from the collected map.
|
||||
int32_t rebuildExtentMap();
|
||||
|
||||
// Search HWM in the given segment file.
|
||||
int32_t searchHWMInSegmentFile(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t width, uint64_t blocksCount, bool isDict, uint64_t& hwm);
|
||||
|
||||
// Sets the dbroot to the given `number`.
|
||||
void setDBRoot(uint32_t number) { dbRoot = number; }
|
||||
|
||||
// Shows the extent map.
|
||||
void showExtentMap();
|
||||
|
||||
private:
|
||||
EMReBuilder(const EMReBuilder&) = delete;
|
||||
EMReBuilder(EMReBuilder&&) = delete;
|
||||
EMReBuilder& operator=(const EMReBuilder&) = delete;
|
||||
EMReBuilder& operator=(EMReBuilder&&) = delete;
|
||||
|
||||
// Collects the information for extent from the given file and stores
|
||||
// it in `extentMap` set.
|
||||
int32_t collectExtent(const std::string& fullFileName);
|
||||
bool verbose;
|
||||
bool display;
|
||||
uint32_t dbRoot;
|
||||
BRM::ExtentMap em;
|
||||
std::vector<FileId> extentMap;
|
||||
};
|
||||
|
||||
// The base class aroud `ChunkManager` to read and write decompressed blocks
|
||||
// from segment file.
|
||||
class ChunkManagerWrapper
|
||||
{
|
||||
public:
|
||||
ChunkManagerWrapper(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t colWidth);
|
||||
|
||||
virtual ~ChunkManagerWrapper() = default;
|
||||
ChunkManagerWrapper(const ChunkManagerWrapper& other) = delete;
|
||||
ChunkManagerWrapper& operator=(const ChunkManagerWrapper& other) = delete;
|
||||
ChunkManagerWrapper(ChunkManagerWrapper&& other) = delete;
|
||||
ChunkManagerWrapper& operator=(ChunkManagerWrapper&& other) = delete;
|
||||
|
||||
// Reads block, by given `blockNumber` from associated segment file and
|
||||
// populates internal block buffer.
|
||||
int32_t readBlock(uint32_t blockNumber);
|
||||
|
||||
// Checks that last read block is empty.
|
||||
virtual bool isEmptyBlock() = 0;
|
||||
|
||||
protected:
|
||||
uint32_t oid;
|
||||
uint32_t dbRoot;
|
||||
uint32_t partition;
|
||||
uint32_t segment;
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType;
|
||||
uint32_t colWidth;
|
||||
int32_t size;
|
||||
std::string fileName;
|
||||
std::unique_ptr<WriteEngine::FileOp> pFileOp;
|
||||
// Note: We cannot clear this pointer directly, because
|
||||
// `ChunkManager` closes this file for us, otherwise we will get double
|
||||
// free error.
|
||||
IDBDataFile* pFile;
|
||||
WriteEngine::ChunkManager chunkManager;
|
||||
uint8_t blockData[WriteEngine::BYTE_PER_BLOCK];
|
||||
};
|
||||
|
||||
// Class to read decompressed blocks from column segment files.
|
||||
class ChunkManagerWrapperColumn : public ChunkManagerWrapper
|
||||
{
|
||||
public:
|
||||
ChunkManagerWrapperColumn(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t colWidth);
|
||||
|
||||
~ChunkManagerWrapperColumn() = default;
|
||||
ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete;
|
||||
ChunkManagerWrapperColumn&
|
||||
operator=(const ChunkManagerWrapperColumn& other) = delete;
|
||||
ChunkManagerWrapperColumn(ChunkManagerWrapperColumn&& other) = delete;
|
||||
ChunkManagerWrapperColumn&
|
||||
operator=(ChunkManagerWrapperColumn&& other) = delete;
|
||||
|
||||
bool isEmptyBlock() override;
|
||||
bool isEmptyValue(const uint8_t* value) const;
|
||||
|
||||
private:
|
||||
const uint8_t* emptyValue;
|
||||
uint32_t midOffset;
|
||||
uint32_t endOffset;
|
||||
};
|
||||
|
||||
// Class to read decompressed blocks from dict segment files.
|
||||
class ChunkManagerWrapperDict : public ChunkManagerWrapper
|
||||
{
|
||||
public:
|
||||
ChunkManagerWrapperDict(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t colWidth);
|
||||
|
||||
~ChunkManagerWrapperDict() = default;
|
||||
ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete;
|
||||
ChunkManagerWrapperDict&
|
||||
operator=(const ChunkManagerWrapperDict& other) = delete;
|
||||
ChunkManagerWrapperDict(ChunkManagerWrapperDict&& other) = delete;
|
||||
ChunkManagerWrapperDict&
|
||||
operator=(ChunkManagerWrapperDict&& other) = delete;
|
||||
|
||||
bool isEmptyBlock() override;
|
||||
|
||||
private:
|
||||
uint32_t emptyBlock;
|
||||
};
|
||||
|
||||
} // namespace RebuildExtentMap
|
||||
#endif
|
Reference in New Issue
Block a user