/* Copyright (C) 2014 InfiniDB, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ // $Id: we_dctnry.h 4726 2013-08-07 03:38:36Z bwilkinson $ /** @we_dctnry.h * Defines the Dctnry class * When a signature is given, the value will be stored in dictionary and * a token will be issued. Given a token, the signature in the dictionary * can be deleted */ #pragma once #include #include #include #include #include "we_dbfileop.h" #include "we_type.h" #include "we_brm.h" #include "bytestream.h" #include "nullstring.h" #define EXPORT /** Namespace WriteEngine */ namespace WriteEngine { //--------------------------------------------------------------------------- // Structure used to store signatures in string cache //--------------------------------------------------------------------------- typedef struct Signature { int size; unsigned char* signature; Token token; } Signature; struct sig_compare { bool operator()(const Signature& a, const Signature& b) const { if (a.size == b.size) { return memcmp(a.signature, b.signature, a.size) < 0; } else if (a.size < b.size) { return true; } else { return false; } } }; /** * @brief Class to interface with dictionary store files. */ class Dctnry : public DbFileOp { //-------------------------------------------------------------------------- // Public members //-------------------------------------------------------------------------- public: /** * @brief Dctnry Constructor */ EXPORT Dctnry(); /** * @brief Dctnry Destructor */ EXPORT virtual ~Dctnry(); /** * @brief Close the dictionary file handle. */ EXPORT int closeDctnry(bool realClose = true); /** * @brief Close the dictionary file handle without flushing the current blk * buffer or updating HWM to BRM. */ EXPORT int closeDctnryOnly(); /** * @brief Create a dictionary extent * * If 'flag' is true, a new file is created with an abbreviated extent. * If 'flag' is false, then function adds a full exent to an already open * file, basically assuming that the file already has 1 or more extents. * * @param dctnryOID - dictionary file OID * @param colWidth - dictionary string width (not the token width) * @param dbRoot - DBRoot for store file * @param partition - partition number for store file * @param segment - column segment number for store file * @param flag - indicates whether extent is added to new file (true) * @param startLbid - starting LBID for the newly allocated extent */ EXPORT int createDctnry(const OID& dctnryOID, int colWidth, const uint16_t dbRoot, const uint32_t partition, const uint16_t segment, BRM::LBID_t& startLbid, bool flag = true); /** * @brief Drop dictionary store * * @param dctnryOID- OID of dictionary store file to be deleted */ EXPORT int dropDctnry(const OID& dctnryOID); /** * @brief Accessors */ const std::string& getFileName() const { return m_segFileName; } HWM getHWM() const { return m_hwm; } EXPORT bool getTokenFromArray(Signature& sig); EXPORT uint64_t getCurLbid() { return m_curLbid; } const unsigned char* getDctnryHeader2() const { return m_dctnryHeader2; } /** * @brief Insert a signature value to a file block and return token/pointer. * (for DDL/DML use) * * @param sgnature_size - size of signature to be inserted * @param sgnature_value - signature to be inserted * @param token - (output) token associated with inserted signature */ EXPORT int insertDctnry(const int& sgnature_size, const unsigned char* sgnature_value, Token& token); /** * @brief Insert a signature value to a file block and return token/pointer * (for Bulk use) * * @param buf - bulk buffer containing strings to be parsed * @param pos - list of offsets into buf * @param totalRow - total number of rows in buf * @param col - the column to be parsed from buf * @param tokenBuf - (output) list of tokens for the parsed strings */ EXPORT int insertDctnry(const char* buf, ColPosPair** pos, const int totalRow, const int col, char* tokenBuf, long long& truncCount, const CHARSET_INFO* cs, const WriteEngine::ColType& weType); /** * @brief Update dictionary store with tokenized strings (for DDL/DML use) * * @param sigValue - signature value * @param sigSize - signature size * @param token - (output) token that was added */ EXPORT int updateDctnry(unsigned char* sigValue, int& sigSize, Token& token); /** * @brief open dictionary store * * @param dctnryOID - dictionary file OID * @param dbRoot - DBRoot for store file * @param partition - partition number for store file * @param segment - column segment number for store file * @param useTmpSuffix - for Bulk HDFS usage: use or not use *.tmp file suffix */ EXPORT int openDctnry(const OID& dctnryOID, const uint16_t dbRoot, const uint32_t partition, const uint16_t segment, const bool useTmpSuffix); /** * @brief copy the dictionary header to buffer */ void copyDctnryHeader(void* buf); /** * @brief Set logger that can be used for logging (primarily by bulk load) */ void setLogger(Log* logger) { m_logger = logger; } /** * @brief Set dictionary column width for this column */ void setColWidth(int colWidth) { m_colWidth = colWidth; } /** * @brief Set dictionary default for this column */ void setDefault(const utils::NullString& defVal) { m_defVal = defVal; } void setImportDataMode(ImportDataMode importMode) { m_importDataMode = importMode; } virtual int checkFixLastDictChunk() { return NO_ERROR; } /** * @brief Use this only in Unit Tests and not in prod */ virtual IDBDataFile* createDctnryFileUnit(const char* name, int width, const char* mode, int ioBuffSize) { return createDctnryFile(name, width, mode, ioBuffSize); } //------------------------------------------------------------------------------ // Protected members //------------------------------------------------------------------------------ protected: // // Add the specified signature (string) to the string cache // void addToStringCache(const Signature& newSig); // // Clear the dictionary store. // void clear() { m_dFile = NULL; m_dctnryOID = (OID)INVALID_NUM; } // Expand an abbreviated extent on disk. int expandDctnryExtent(); // Free memory consumed by strings in the string cache void freeStringCache(); // // Functions to read data: // getBlockOpCount - get the ordinal position (OP) count from the header // getEndOp - read OP of the end of header for specified fbo // void getBlockOpCount(const DataBlock& fileBlock, int& op_count); int getEndOp(IDBDataFile* dFile, int fbo, int& op); // // Initialization // int init(); // // Support functions for inserting values into dictionary. // insertDctnryHdr inserts the new value info into the header. // insertSgnture inserts the new value into the block. // int insertDctnry2(Signature& sig); void insertDctnryHdr(unsigned char* blockBuf, const int& size); void insertSgnture(unsigned char* blockBuf, const int& size, unsigned char* value); // // Preloads the strings from the specified DataBlock. Currently // used to preload the first block, of a store file having only 1 block. // void preLoadStringCache(const DataBlock& fileBlock); // methods to be overriden by compression classes // (width argument in createDctnryFile() is string width, not token width) virtual IDBDataFile* createDctnryFile(const char* name, int width, const char* mode, int ioBuffSize, BRM::LBID_t lbid = -1); virtual IDBDataFile* openDctnryFile(bool useTmpSuffix); virtual void closeDctnryFile(bool doFlush, std::map& oids); virtual int numOfBlocksInFile(); std::set m_sigArray; int m_arraySize; // num strings in m_sigArray // m_dctnryHeader used for hdr when readSubBlockEntry is used to read a blk // m_dctnryHeader2 contains filled in template used to initialize new blocks unsigned char m_dctnryHeader[DCTNRY_HEADER_SIZE]; // first 14 bytes of hdr unsigned char m_dctnryHeader2[DCTNRY_HEADER_SIZE]; // first 14 bytes of hdr uint64_t m_nextPtr; // next pointer // relate to different Dictionary file FID m_dctnryOID; // OID for the dctnry file IDBDataFile* m_dFile; // dictionary file uint32_t m_partition; // partition associated with OID uint16_t m_segment; // segment associated with OID uint16_t m_dbRoot; // DBRoot associated with OID std::string m_segFileName; // current column segment file int m_numBlocks; // num "raw" uncompressed blocks in file int m_lastFbo; HWM m_hwm; // Need to be initialized for different Dictionary file int m_newStartOffset; // start offset uint16_t m_freeSpace; // free space (bytes) within current block int m_curOp; // current ordinal pointer within m_curFbo int m_curFbo; // current "raw" (uncompressed) FBO BRM::LBID_t m_curLbid; // LBID associated with m_curFbo DataBlock m_curBlock; // current "raw" (uncompressed) data block Log* m_logger; // logger, mainly for bulk load int m_colWidth; // width of this dictionary column utils::NullString m_defVal; // optional default string value ImportDataMode m_importDataMode; // Import data in text or binary mode }; // end of class } // namespace WriteEngine #undef EXPORT