1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-20 09:07:44 +03:00
2023-03-02 15:59:42 +00:00

373 lines
12 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
// $Id: we_chunkmanager.h 4726 2013-08-07 03:38:36Z bwilkinson $
/** @file */
#pragma once
#include <cstdio>
#include <map>
#include <list>
#include <string>
#include <boost/scoped_array.hpp>
#include "we_type.h"
#include "we_typeext.h"
#include "we_define.h"
#include "idbcompress.h"
#include "IDBFileSystem.h"
#define EXPORT
//#define IDB_COMP_DEBUG
#ifdef IDB_COMP_DEBUG
#define WE_COMP_DBG(x) \
{ \
x \
}
#else
#define WE_COMP_DBG(x) \
{ \
}
#endif
namespace logging
{
// use Logger (not we_log) for now.
class Logger;
} // namespace logging
namespace WriteEngine
{
// forward reference
class FileOp;
const int UNCOMPRESSED_CHUNK_SIZE = compress::CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int COMPRESSED_FILE_HEADER_UNIT = compress::CompressInterface::HDR_BUF_LEN;
// assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding
const int BLOCKS_IN_CHUNK = UNCOMPRESSED_CHUNK_SIZE / BYTE_PER_BLOCK;
const int MAXOFFSET_PER_CHUNK = 511 * BYTE_PER_BLOCK;
// chunk information
typedef int64_t ChunkId;
struct ChunkData
{
ChunkId fChunkId;
unsigned int fLenUnCompressed;
char fBufUnCompressed[UNCOMPRESSED_CHUNK_SIZE];
bool fWriteToFile;
ChunkData(ChunkId id = 0) : fChunkId(id), fLenUnCompressed(0), fWriteToFile(false)
{
}
bool operator<(const ChunkData& rhs) const
{
return fChunkId < rhs.fChunkId;
}
};
// compressed DB file header information
struct CompFileHeader
{
char fHeaderData[COMPRESSED_FILE_HEADER_UNIT * 2];
char* fControlData;
char* fPtrSection;
boost::scoped_array<char> fLongPtrSectData;
CompFileHeader() : fControlData(fHeaderData), fPtrSection(fHeaderData + COMPRESSED_FILE_HEADER_UNIT)
{
}
};
// unique ID of a DB file
struct FileID
{
FID fFid;
uint32_t fDbRoot;
uint32_t fPartition;
uint32_t fSegment;
FileID(FID f, uint32_t r, uint32_t p, uint32_t s) : fFid(f), fDbRoot(r), fPartition(p), fSegment(s)
{
}
bool operator<(const FileID& rhs) const
{
return ((fFid < rhs.fFid) || (fFid == rhs.fFid && fDbRoot < rhs.fDbRoot) ||
(fFid == rhs.fFid && fDbRoot == rhs.fDbRoot && fPartition < rhs.fPartition) ||
(fFid == rhs.fFid && fDbRoot == rhs.fDbRoot && fPartition == rhs.fPartition &&
fSegment < rhs.fSegment));
}
bool operator==(const FileID& rhs) const
{
return (fFid == rhs.fFid && fDbRoot == rhs.fDbRoot && fPartition == rhs.fPartition &&
fSegment == rhs.fSegment);
}
};
// compressed DB file information
class CompFileData
{
public:
CompFileData(const FileID& id, const FID& fid,
const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth,
bool readOnly = false)
: fFileID(id)
, fFid(fid)
, fColDataType(colDataType)
, fColWidth(colWidth)
, fDctnryCol(false)
, fFilePtr(NULL)
, fCompressionType(1)
, fReadOnly(readOnly)
{
}
ChunkData* findChunk(int64_t cid) const;
protected:
FileID fFileID;
FID fFid;
execplan::CalpontSystemCatalog::ColDataType fColDataType;
int fColWidth;
bool fDctnryCol;
IDBDataFile* fFilePtr;
std::string fFileName;
CompFileHeader fFileHeader;
std::list<ChunkData*> fChunkList;
uint32_t fCompressionType;
bool fReadOnly;
friend class ChunkManager;
};
class ChunkManager
{
public:
// @brief constructor
EXPORT ChunkManager();
// @brief destructor
EXPORT virtual ~ChunkManager();
// @brief Retrieve a file pointer in the chunk manager.
// for column file
IDBDataFile* getFilePtr(const Column& column, uint16_t root, uint32_t partition, uint16_t segment,
std::string& filename, const char* mode, int size, bool useTmpSuffix,
bool isReadOnly = false) const;
// @brief Retrieve a file pointer in the chunk manager.
// for dictionary file
IDBDataFile* getFilePtr(const FID& fid, uint16_t root, uint32_t partition, uint16_t segment,
std::string& filename, const char* mode, int size, bool useTmpSuffix) const;
// @brief Retrieve a file pointer in the chunk manager by the given `filename`.
// for column/dict segment file
IDBDataFile* getFilePtrByName(const std::string& filename, FID& fid, uint16_t root, uint32_t partition,
uint16_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth, const char* mode, int32_t size, bool useTmpSuffix,
bool isDict) const;
// @brief Create a compressed dictionary file with an appropriate header.
IDBDataFile* createDctnryFile(const FID& fid, int64_t width, uint16_t root, uint32_t partition,
uint16_t segment, const char* filename, const char* mode, int size,
int64_t lbid);
// @brief Read a block from pFile at offset fbo.
// The data may copied from memory if the chunk it belongs to is already available.
int readBlock(IDBDataFile* pFile, unsigned char* readBuf, uint64_t fbo);
// @brief Save a block to a chunk in pFile.
// The block is not written to disk immediately, will be delayed until flush.
int saveBlock(IDBDataFile* pFile, const unsigned char* writeBuf, uint64_t fbo);
// @brief Write all active chunks to disk, and reset all repository.
EXPORT int flushChunks(int rc, const std::map<FID, FID>& columOids);
// @brief Reset all repository without writing anything to disk.
void cleanUp(const std::map<FID, FID>& columOids);
// @brief Expand an initial column, not dictionary, extent to a full extent.
int expandAbbrevColumnExtent(IDBDataFile* pFile, const uint8_t* emptyVal, int width);
// @brief Update column extent
int updateColumnExtent(IDBDataFile* pFile, int addBlockCount, int64_t lbid);
// @brief Update dictionary extent
int updateDctnryExtent(IDBDataFile* pFile, int addBlockCount, int64_t lbid);
// @brief Read in n continuous blocks to read buffer.
// for backing up blocks to version buffer
int readBlocks(IDBDataFile* pFile, unsigned char* readBuf, uint64_t fbo, size_t n);
// @brief Restore the data block at offset fbo from version buffer
// for rollback
int restoreBlock(IDBDataFile* pFile, const unsigned char* writeBuf, uint64_t fbo);
// @brief Retrieve the total block count of a DB file.
int getBlockCount(IDBDataFile* pFile);
// @brief Set FileOp pointer (for compression type, empty value, txnId, etc.)
void fileOp(FileOp* fileOp);
// @brief Control the number of active chunks being stored in memory
void setMaxActiveChunkNum(unsigned int maxActiveChunkNum)
{
fMaxActiveChunkNum = maxActiveChunkNum;
}
// @brief Use this flag to avoid logging and backing up chunks, tmp files.
void setBulkFlag(bool isBulkLoad)
{
fIsBulkLoad = isBulkLoad;
}
// @brief Use this flag to flush chunk when is full.
void setIsInsert(bool isInsert)
{
fIsInsert = isInsert;
}
bool getIsInsert()
{
return fIsInsert;
}
void setTransId(const TxnID& transId)
{
fTransId = transId;
}
// @brief bug5504, Use non transactional DML for InfiniDB with HDFS
EXPORT int startTransaction(const TxnID& transId) const;
EXPORT int confirmTransaction(const TxnID& transId) const;
EXPORT int endTransaction(const TxnID& transId, bool success) const;
// @brief Use this flag to fix bad chunk.
void setFixFlag(bool isFix)
{
fIsFix = isFix;
}
EXPORT int checkFixLastDictChunk(const FID& fid, uint16_t root, uint32_t partition, uint16_t segment);
protected:
// @brief Retrieve pointer to a compressed DB file.
CompFileData* getFileData(const FID& fid, uint16_t root, uint32_t partition, uint16_t segment,
std::string& filename, const char* mode, int size,
const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth,
bool useTmpSuffix, bool dictnry = false, bool isReadOnly = false) const;
CompFileData* getFileDataByName(const std::string& filename, const FID& fid, uint16_t root,
uint32_t partition, uint16_t segment, const char* mode, int size,
const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth,
bool useTmpSuffix, bool dctnry) const;
// @brief Retrieve a chunk of pFile from disk.
int fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*& chunkData);
// @brief Compress a chunk and write it to file.
int writeChunkToFile(CompFileData* fileData, int64_t id);
int writeChunkToFile(CompFileData* fileData, ChunkData* chunkData);
// @brief Write the compressed data to file and log a recover entry.
int writeCompressedChunk(CompFileData* fileData, int64_t offset, int64_t size);
inline int writeCompressedChunk_(CompFileData* fileData, int64_t offset);
// @brief Write the file header to disk.
int writeHeader(CompFileData* fileData, int ln);
inline int writeHeader_(CompFileData* fileData, int ptrSecSize);
// @brief open a compressed DB file.
int openFile(CompFileData* fileData, const char* mode, int colWidth, bool useTmpSuffix, int ln) const;
// @brief set offset in a compressed DB file from beginning.
int setFileOffset(IDBDataFile* pFile, const std::string& fileName, off64_t offset, int ln) const;
// @brief read from a compressed DB file.
int readFile(IDBDataFile* pFile, const std::string& fileName, void* buf, size_t size, int ln) const;
// @brief write to a compressed DB file.
int writeFile(IDBDataFile* pFile, const std::string& fileName, void* buf, size_t size, int ln) const;
// @brief Close a compressed DB file.
int closeFile(CompFileData* fileData);
// @brief Set empty values to a chunk.
void initializeColumnChunk(char* buf, CompFileData* fileData);
void initializeDctnryChunk(char* buf, int size);
// @brief Calculate the header size based on column width.
int calculateHeaderSize(int width);
// @brief Moving chunks as a result of expanding a chunk.
int reallocateChunks(CompFileData* fileData);
// @brief verify chunks in the file are OK
int verifyChunksAfterRealloc(CompFileData* fileData);
// @brief log a message to the syslog
void logMessage(int code, int level, int lineNum, int fromLine = -1) const;
void logMessage(const std::string& msg, int level) const;
// @brief Write a DML recovery log
int writeLog(TxnID txnId, std::string backUpFileType, std::string filename, std::string& aDMLLogFileName,
int64_t size = 0, int64_t offset = 0) const;
// @brief remove DML recovery logs
int removeBackups(TxnID txnId);
// @brief swap the src file to dest file
int swapTmpFile(const std::string& src, const std::string& dest);
// @brief construnct a DML log file name
int getDMLLogFileName(std::string& aDMLLogFileName, const TxnID& txnId) const;
mutable std::map<FileID, CompFileData*> fFileMap;
mutable std::map<IDBDataFile*, CompFileData*> fFilePtrMap;
std::list<std::pair<FileID, ChunkData*> > fActiveChunks;
unsigned int fMaxActiveChunkNum; // max active chunks per file
char* fBufCompressed;
size_t fLenCompressed;
size_t fMaxCompressedBufSize;
size_t fUserPaddings;
bool fIsBulkLoad;
bool fDropFdCache;
bool fIsInsert;
bool fIsHdfs;
FileOp* fFileOp;
compress::CompressorPool fCompressorPool;
logging::Logger* fSysLogger;
TxnID fTransId;
int fLocalModuleId;
idbdatafile::IDBFileSystem& fFs;
bool fIsFix;
size_t COMPRESSED_CHUNK_SIZE;
private:
CompFileData* getFileData_(const FileID& fid, const std::string& filename, const char* mode, int size,
const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth,
bool useTmpSuffix, bool dictnry = false, bool isReadOnly = false) const;
};
} // namespace WriteEngine
#undef EXPORT