You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-30 07:25:34 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			522 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			522 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (C) 2014 InfiniDB, Inc.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| /** @file */
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include <unistd.h>
 | |
| #include <sys/types.h>
 | |
| #include <vector>
 | |
| #include <utility>
 | |
| #include <unordered_map>
 | |
| 
 | |
| #include "calpontsystemcatalog.h"
 | |
| 
 | |
| #define EXPORT
 | |
| 
 | |
| namespace compress
 | |
| {
 | |
| typedef std::pair<uint64_t, uint64_t> CompChunkPtr;
 | |
| typedef std::vector<CompChunkPtr> CompChunkPtrList;
 | |
| 
 | |
| class CompressInterface
 | |
| {
 | |
|  public:
 | |
|   static const unsigned int HDR_BUF_LEN = 4096;
 | |
|   static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
 | |
|   static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
 | |
| 
 | |
|   // error codes from uncompressBlock()
 | |
|   static const int ERR_OK = 0;
 | |
|   static const int ERR_CHECKSUM = -1;
 | |
|   static const int ERR_DECOMPRESS = -2;
 | |
|   static const int ERR_BADINPUT = -3;
 | |
|   static const int ERR_BADOUTSIZE = -4;
 | |
|   static const int ERR_COMPRESS = -5;
 | |
| 
 | |
|   /**
 | |
|    * When CompressInterface object is being used to compress a chunk, this
 | |
|    * construct can be used to specify the padding added by padCompressedChunks
 | |
|    */
 | |
|   EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
 | |
| 
 | |
|   /**
 | |
|    * dtor
 | |
|    */
 | |
|   EXPORT virtual ~CompressInterface() = default;
 | |
| 
 | |
|   /**
 | |
|    * see if the algo is available in this lib
 | |
|    */
 | |
|   EXPORT static bool isCompressionAvail(int compressionType = 0);
 | |
| 
 | |
|   /**
 | |
|    * Returns the maximum compressed size from all available compression
 | |
|    * types.
 | |
|    */
 | |
|   EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
 | |
| 
 | |
|   /**
 | |
|    * Compresses specified "in" buffer of length "inLen" bytes.
 | |
|    * Compressed data and size are returned in "out" and "outLen".
 | |
|    * "out" should be sized using maxCompressedSize() to allow for incompressible data.
 | |
|    * Returns 0 if success.
 | |
|    */
 | |
| 
 | |
|   EXPORT int compressBlock(const char* in, const size_t inLen, unsigned char* out, size_t& outLen) const;
 | |
| 
 | |
|   /**
 | |
|    * outLen must be initialized with the size of the out buffer before calling uncompressBlock.
 | |
|    * On return, outLen will have the number of bytes used in out.
 | |
|    */
 | |
|   EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out, size_t& outLen) const;
 | |
| 
 | |
|   /**
 | |
|    * This fcn wraps whatever compression algorithm we're using at the time, and
 | |
|    * is not specific to blocks on disk.
 | |
|    */
 | |
|   EXPORT virtual int compress(const char* in, size_t inLen, char* out, size_t* outLen) const = 0;
 | |
| 
 | |
|   /**
 | |
|    * This fcn wraps whatever compression algorithm we're using at the time, and
 | |
|    * is not specific to blocks on disk.  The caller needs to make sure out is big
 | |
|    * enough to contain the output by using getUncompressedSize().
 | |
|    */
 | |
|   EXPORT virtual int uncompress(const char* in, size_t inLen, char* out, size_t* outLen) const = 0;
 | |
| 
 | |
|   /**
 | |
|    * Initialize header buffer at start of compressed db file.
 | |
|    *
 | |
|    * @warning hdrBuf must be at least HDR_BUF_LEN bytes
 | |
|    * @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
 | |
|    */
 | |
|   EXPORT static void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
 | |
|                              execplan::CalpontSystemCatalog::ColDataType columnType, int compressionType,
 | |
|                              int hdrSize);
 | |
|   /**
 | |
|    * Initialize header buffer at start of compressed db file.
 | |
|    *
 | |
|    * @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
 | |
|    */
 | |
|   EXPORT static void initHdr(void* hdrBuf, uint32_t columnWidth,
 | |
|                              execplan::CalpontSystemCatalog::ColDataType columnType, int compressionType);
 | |
| 
 | |
|   /**
 | |
|    * Verify the passed in buffer contains a compressed db file header.
 | |
|    */
 | |
|   EXPORT static int verifyHdr(const void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * Extracts list of compression pointers from the specified ptr buffer.
 | |
|    * ptrBuf points to the pointer section taken from the headers.
 | |
|    * chunkPtrs is a vector of offset, size pairs for the compressed chunks.
 | |
|    * Returns 0 if success.
 | |
|    */
 | |
|   EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize, CompChunkPtrList& chunkPtrs);
 | |
| 
 | |
|   /**
 | |
|    * Extracts list of compression pointers from the specified header.
 | |
|    * hdrBuf points to start of 2 buffer headers from compressed db file.
 | |
|    * Overloaded for backward compatibility. For none dictionary columns.
 | |
|    * Note: the pointer passed in is the beginning of the header,
 | |
|    *       not the pointer section as above.
 | |
|    */
 | |
|   EXPORT static int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs);
 | |
| 
 | |
|   /**
 | |
|    * Return the number of chunk pointers contained in the specified ptr buffer.
 | |
|    * ptrBuf points to the pointer section taken from the headers.
 | |
|    */
 | |
|   EXPORT static unsigned int getPtrCount(const char* ptrBuf, const int ptrBufSize);
 | |
| 
 | |
|   /**
 | |
|    * Return the number of chunk pointers contained in the specified header.
 | |
|    * hdrBuf points to start of 2 buffer headers from compressed db file.
 | |
|    * For non-dictionary columns.
 | |
|    */
 | |
|   EXPORT static unsigned int getPtrCount(const char* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * Store vector of pointers into the specified buffer header's pointer section.
 | |
|    */
 | |
|   EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf, int ptrSectionSize);
 | |
| 
 | |
|   /**
 | |
|    * Store vector of pointers into the specified buffer header.
 | |
|    * Overloaded for backward compatibility. For none dictionary columns.
 | |
|    * Note: the pointer passed in is the beginning of the header,
 | |
|    *       not the pointer section as above.
 | |
|    */
 | |
|   EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * Calculates the chunk, and the block offset within the chunk, for the
 | |
|    * specified block number.
 | |
|    */
 | |
|   EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
 | |
|                           unsigned int& blockOffsetWithinChunk) const;
 | |
| 
 | |
|   /**
 | |
|    * Pads the specified compressed chunk to the nearest compressed chunk
 | |
|    * increment, by padding buf with 0's, and updating len accordingly.
 | |
|    * maxLen is the maximum size for buf.  nonzero return code means the
 | |
|    * result output buffer length is > than maxLen.
 | |
|    */
 | |
|   EXPORT int padCompressedChunks(unsigned char* buf, size_t& len, unsigned int maxLen) const;
 | |
| 
 | |
|   /*
 | |
|    * Mutator methods for the block count in the file
 | |
|    */
 | |
|   /**
 | |
|    * getVersionNumber
 | |
|    */
 | |
|   EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * setBlockCount
 | |
|    */
 | |
|   EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
 | |
| 
 | |
|   /**
 | |
|    * getBlockCount
 | |
|    */
 | |
|   EXPORT static uint64_t getBlockCount(const void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * getCompressionType
 | |
|    */
 | |
|   EXPORT static uint64_t getCompressionType(const void* hdrBuf);
 | |
| 
 | |
|   /*
 | |
|    * Mutator methods for the overall header size
 | |
|    */
 | |
|   /**
 | |
|    * setHdrSize
 | |
|    */
 | |
|   EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
 | |
| 
 | |
|   /**
 | |
|    * getHdrSize
 | |
|    */
 | |
|   EXPORT static uint64_t getHdrSize(const void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * getColumnType
 | |
|    */
 | |
|   EXPORT static execplan::CalpontSystemCatalog::ColDataType getColDataType(const void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * getColumnWidth
 | |
|    */
 | |
|   EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * getLBIDByIndex
 | |
|    */
 | |
|   EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
 | |
| 
 | |
|   /**
 | |
|    * setLBIDByIndex
 | |
|    */
 | |
|   EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
 | |
| 
 | |
|   /**
 | |
|    * getLBIDCount
 | |
|    */
 | |
|   EXPORT static uint64_t getLBIDCount(void* hdrBuf);
 | |
| 
 | |
|   /**
 | |
|    * Mutator methods for the user padding bytes
 | |
|    */
 | |
|   /**
 | |
|    * set numUserPaddingBytes
 | |
|    */
 | |
|   EXPORT void numUserPaddingBytes(uint64_t num)
 | |
|   {
 | |
|     fNumUserPaddingBytes = num;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * get numUserPaddingBytes
 | |
|    */
 | |
|   EXPORT uint64_t numUserPaddingBytes() const
 | |
|   {
 | |
|     return fNumUserPaddingBytes;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Given an input, uncompressed block, what's the maximum possible output,
 | |
|    * compressed size?
 | |
|    */
 | |
|   EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
 | |
| 
 | |
|   /**
 | |
|    * Given a compressed block, returns the uncompressed size in outLen.
 | |
|    * Returns false on error, true on success.
 | |
|    */
 | |
|   EXPORT virtual bool getUncompressedSize(char* in, size_t inLen, size_t* outLen) const = 0;
 | |
| 
 | |
|  protected:
 | |
|   virtual uint8_t getChunkMagicNumber() const = 0;
 | |
| 
 | |
|  private:
 | |
|   // defaults okay
 | |
|   // CompressInterface(const CompressInterface& rhs);
 | |
|   // CompressInterface& operator=(const CompressInterface& rhs);
 | |
| 
 | |
|   unsigned int fNumUserPaddingBytes;  // Num bytes to pad compressed chunks
 | |
| };
 | |
| 
 | |
| class CompressInterfaceSnappy : public CompressInterface
 | |
| {
 | |
|  public:
 | |
|   EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
 | |
|   EXPORT ~CompressInterfaceSnappy() = default;
 | |
|   /**
 | |
|    * Compress the given block using snappy compression API.
 | |
|    */
 | |
|   EXPORT int32_t compress(const char* in, size_t inLen, char* out, size_t* outLen) const override;
 | |
|   /**
 | |
|    * Uncompress the given block using snappy compression API.
 | |
|    */
 | |
|   EXPORT int32_t uncompress(const char* in, size_t inLen, char* out, size_t* outLen) const override;
 | |
|   /**
 | |
|    * Get max compressed size for the given `uncompSize` value using snappy
 | |
|    * compression API.
 | |
|    */
 | |
|   EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
 | |
| 
 | |
|   /**
 | |
|    * Get uncompressed size for the given block using snappy
 | |
|    * compression API.
 | |
|    */
 | |
|   EXPORT
 | |
|   bool getUncompressedSize(char* in, size_t inLen, size_t* outLen) const override;
 | |
| 
 | |
|  protected:
 | |
|   uint8_t getChunkMagicNumber() const override;
 | |
| 
 | |
|  private:
 | |
|   const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
 | |
| };
 | |
| 
 | |
| class CompressInterfaceLZ4 : public CompressInterface
 | |
| {
 | |
|  public:
 | |
|   EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
 | |
|   EXPORT ~CompressInterfaceLZ4() = default;
 | |
|   /**
 | |
|    * Compress the given block using LZ4 compression API.
 | |
|    */
 | |
|   EXPORT int32_t compress(const char* in, size_t inLen, char* out, size_t* outLen) const override;
 | |
|   /**
 | |
|    * Uncompress the given block using LZ4 compression API.
 | |
|    */
 | |
|   EXPORT int32_t uncompress(const char* in, size_t inLen, char* out, size_t* outLen) const override;
 | |
|   /**
 | |
|    * Get max compressed size for the given `uncompSize` value using LZ4
 | |
|    * compression API.
 | |
|    */
 | |
|   EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
 | |
| 
 | |
|   /**
 | |
|    * Get uncompressed size for the given block using LZ4
 | |
|    * compression API.
 | |
|    */
 | |
|   EXPORT
 | |
|   bool getUncompressedSize(char* in, size_t inLen, size_t* outLen) const override;
 | |
| 
 | |
|  protected:
 | |
|   uint8_t getChunkMagicNumber() const override;
 | |
| 
 | |
|  private:
 | |
|   const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
 | |
| };
 | |
| 
 | |
| using CompressorPool = std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>;
 | |
| 
 | |
| /**
 | |
|  *  Returns a pointer to the appropriate compression interface based on
 | |
|  * `compressionType`. `compressionType` must be greater than 0.
 | |
|  *  Note: caller is responsible for memory deallocation.
 | |
|  */
 | |
| EXPORT CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
 | |
|                                                      uint32_t numUserPaddingBytes = 0);
 | |
| 
 | |
| /**
 | |
|  *  Returns a pointer to the appropriate compression interface based on
 | |
|  * `compressionName`.
 | |
|  *  Note: caller is responsible for memory deallocation.
 | |
|  */
 | |
| EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
 | |
|                                                      uint32_t numUserPaddingBytes = 0);
 | |
| 
 | |
| /**
 | |
|  *  Initializes a given `unordered_map` with all available compression
 | |
|  *  interfaces.
 | |
|  */
 | |
| EXPORT void initializeCompressorPool(CompressorPool& compressorPool, uint32_t numUserPaddingBytes = 0);
 | |
| 
 | |
| /**
 | |
|  *  Returns a `shared_ptr` to the appropriate compression interface.
 | |
|  */
 | |
| EXPORT std::shared_ptr<CompressInterface> getCompressorByType(CompressorPool& compressorPool,
 | |
|                                                               uint32_t compressionType);
 | |
| 
 | |
| #ifdef SKIP_IDB_COMPRESSION
 | |
| inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/)
 | |
| {
 | |
| }
 | |
| inline bool CompressInterface::isCompressionAvail(int c)
 | |
| {
 | |
|   return (c == 0);
 | |
| }
 | |
| inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
 | |
| {
 | |
|   return -1;
 | |
| }
 | |
| inline int CompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
 | |
|                                               size_t& outLen) const
 | |
| {
 | |
|   return -1;
 | |
| }
 | |
| inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int)
 | |
| {
 | |
| }
 | |
| inline int CompressInterface::verifyHdr(const void*)
 | |
| {
 | |
|   return -1;
 | |
| }
 | |
| inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType,
 | |
|                                        int, int)
 | |
| {
 | |
| }
 | |
| inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType,
 | |
|                                        int) const
 | |
| {
 | |
| }
 | |
| inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
 | |
| {
 | |
|   return -1;
 | |
| }
 | |
| inline unsigned int CompressInterface::getPtrCount(const char*, const int)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline unsigned int CompressInterface::getPtrCount(const char*)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int)
 | |
| {
 | |
| }
 | |
| inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*)
 | |
| {
 | |
| }
 | |
| inline void CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
 | |
|                                            unsigned int& blockOffsetWithinChunk) const
 | |
| {
 | |
| }
 | |
| inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len,
 | |
|                                                   unsigned int maxLen) const
 | |
| {
 | |
|   return -1;
 | |
| }
 | |
| inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
 | |
| {
 | |
| }
 | |
| inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline execplan::CalpontSystemCatalog::ColDataType CompressInterface::getColDataType(const void* hdrBuf)
 | |
| {
 | |
|   return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
 | |
| }
 | |
| inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline uint64_t getLBID0(const void* hdrBuf)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| void setLBID0(void* hdrBuf, uint64_t lbid)
 | |
| {
 | |
| }
 | |
| inline uint64_t getLBID1(const void* hdrBuf)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| void setLBID1(void* hdrBuf, uint64_t lbid)
 | |
| {
 | |
| }
 | |
| inline void CompressInterface::setHdrSize(void*, uint64_t)
 | |
| {
 | |
| }
 | |
| inline uint64_t CompressInterface::getHdrSize(const void*)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
 | |
|  : CompressInterface(numUserPaddingBytes)
 | |
| {
 | |
| }
 | |
| inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
 | |
| {
 | |
|   return uncompSize;
 | |
| }
 | |
| inline uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint32_t index) const
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint32_t index) const
 | |
| {
 | |
| }
 | |
| void IDBCompressInterface::getLBIDCount(void* hdrBuf) const
 | |
| {
 | |
| }
 | |
| inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
 | |
| {
 | |
|   return false;
 | |
| }
 | |
| uint8_t getChunkMagicNumber() const
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| CompressInterface* getCompressInterfaceByType(uint32_t compressionType, uint32_t numUserPaddingBytes)
 | |
| {
 | |
|   return nullptr;
 | |
| }
 | |
| #endif
 | |
| }  // namespace compress
 | |
| 
 | |
| #undef EXPORT
 |