You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Merge pull request #1842 from denis0x0D/MCOL-987_LZ
MCOL-987 LZ4 compression support.
This commit is contained in:
@ -10,7 +10,7 @@ add_definitions(-DNDEBUG)
|
||||
|
||||
add_library(compress SHARED ${compress_LIB_SRCS})
|
||||
|
||||
target_link_libraries(compress ${SNAPPY_LIBRARIES})
|
||||
target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES})
|
||||
|
||||
install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)
|
||||
|
||||
|
@ -22,12 +22,14 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
using namespace std;
|
||||
|
||||
#include "blocksize.h"
|
||||
#include "logger.h"
|
||||
#include "snappy.h"
|
||||
#include "hasher.h"
|
||||
#include "lz4.h"
|
||||
|
||||
#define IDBCOMP_DLLEXPORT
|
||||
#include "idbcompress.h"
|
||||
@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL;
|
||||
const uint64_t VERSION_NUM1 = 1;
|
||||
const uint64_t VERSION_NUM2 = 2;
|
||||
const uint64_t VERSION_NUM3 = 3;
|
||||
const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
|
||||
const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN;
|
||||
const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN;
|
||||
|
||||
// version 1.1 of the chunk data has a short header
|
||||
// QuickLZ compressed data never has the high bit set on the first byte
|
||||
@ -83,7 +84,7 @@ struct CompressedDBFileHeader
|
||||
union CompressedDBFileHeaderBlock
|
||||
{
|
||||
CompressedDBFileHeader fHeader;
|
||||
char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN];
|
||||
char fDummy[compress::CompressInterface::HDR_BUF_LEN];
|
||||
};
|
||||
|
||||
void initCompressedDBFileHeader(
|
||||
@ -110,53 +111,57 @@ namespace compress
|
||||
{
|
||||
#ifndef SKIP_IDB_COMPRESSION
|
||||
|
||||
IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) :
|
||||
CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) :
|
||||
fNumUserPaddingBytes(numUserPaddingBytes)
|
||||
{ }
|
||||
|
||||
IDBCompressInterface::~IDBCompressInterface()
|
||||
{ }
|
||||
|
||||
/* V1 is really only available for decompression, we kill any DDL using V1 by hand.
|
||||
* Maybe should have a new api, isDecompressionAvail() ? Any request to compress
|
||||
* using V1 will silently be changed to V2.
|
||||
*/
|
||||
bool IDBCompressInterface::isCompressionAvail(int compressionType) const
|
||||
/*static*/
|
||||
bool CompressInterface::isCompressionAvail(int compressionType)
|
||||
{
|
||||
if ( (compressionType == 0) ||
|
||||
(compressionType == 1) ||
|
||||
(compressionType == 2) )
|
||||
return true;
|
||||
return ((compressionType == 0) || (compressionType == 1) ||
|
||||
(compressionType == 2) || (compressionType == 3));
|
||||
}
|
||||
|
||||
return false;
|
||||
size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen)
|
||||
{
|
||||
return std::max(snappy::MaxCompressedLength(inLen),
|
||||
LZ4_COMPRESSBOUND(inLen)) +
|
||||
HEADER_SIZE;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Compress a block of data
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::compressBlock(const char* in,
|
||||
const size_t inLen,
|
||||
unsigned char* out,
|
||||
unsigned int& outLen) const
|
||||
int CompressInterface::compressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out, size_t& outLen) const
|
||||
{
|
||||
size_t snaplen = 0;
|
||||
utils::Hasher128 hasher;
|
||||
|
||||
// loose input checking.
|
||||
if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE)
|
||||
if (outLen < maxCompressedSize(inLen))
|
||||
{
|
||||
cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " <<
|
||||
(snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl;
|
||||
cerr << "got outLen = " << outLen << " for inLen = " << inLen
|
||||
<< ", needed " << (maxCompressedSize(inLen)) << endl;
|
||||
return ERR_BADOUTSIZE;
|
||||
}
|
||||
|
||||
//apparently this never fails?
|
||||
snappy::RawCompress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]), &snaplen);
|
||||
auto rc = compress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]),
|
||||
&outLen);
|
||||
if (rc != ERR_OK)
|
||||
{
|
||||
return rc;
|
||||
}
|
||||
|
||||
snaplen = outLen;
|
||||
uint8_t* signature = (uint8_t*) &out[SIG_OFFSET];
|
||||
uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET];
|
||||
uint32_t* len = (uint32_t*) &out[LEN_OFFSET];
|
||||
*signature = CHUNK_MAGIC3;
|
||||
*signature = getChunkMagicNumber();
|
||||
*checksum = hasher((char*) &out[HEADER_SIZE], snaplen);
|
||||
*len = snaplen;
|
||||
|
||||
@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Decompress a block of data
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
|
||||
unsigned int& outLen) const
|
||||
int CompressInterface::uncompressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out,
|
||||
size_t& outLen) const
|
||||
{
|
||||
bool comprc = false;
|
||||
size_t ol = 0;
|
||||
|
||||
uint32_t realChecksum;
|
||||
uint32_t storedChecksum;
|
||||
uint32_t storedLen;
|
||||
uint8_t storedMagic;
|
||||
utils::Hasher128 hasher;
|
||||
|
||||
auto tmpOutLen = outLen;
|
||||
outLen = 0;
|
||||
|
||||
if (inLen < 1)
|
||||
{
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
storedMagic = *((uint8_t*) &in[SIG_OFFSET]);
|
||||
|
||||
if (storedMagic == CHUNK_MAGIC3)
|
||||
if (storedMagic == getChunkMagicNumber())
|
||||
{
|
||||
if (inLen < HEADER_SIZE)
|
||||
{
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]);
|
||||
storedLen = *((uint32_t*) (&in[LEN_OFFSET]));
|
||||
|
||||
if (inLen < storedLen + HEADER_SIZE)
|
||||
{
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
realChecksum = hasher(&in[HEADER_SIZE], storedLen);
|
||||
|
||||
if (storedChecksum != realChecksum)
|
||||
{
|
||||
return ERR_CHECKSUM;
|
||||
|
||||
auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out), &tmpOutLen);
|
||||
if (rc != ERR_OK)
|
||||
{
|
||||
cerr << "uncompressBlock failed!" << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) &&
|
||||
snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out));
|
||||
outLen = tmpOutLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
if (!comprc)
|
||||
{
|
||||
cerr << "decomp failed!" << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
outLen = ol;
|
||||
//cerr << "ub: " << inLen << " : " << outLen << endl;
|
||||
|
||||
return ERR_OK;
|
||||
@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
|
||||
//------------------------------------------------------------------------------
|
||||
// Verify the passed in buffer contains a valid compression file header.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
|
||||
int CompressInterface::verifyHdr(const void* hdrBuf)
|
||||
{
|
||||
const CompressedDBFileHeader* hdr = reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf);
|
||||
|
||||
@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
|
||||
// Extract compression pointer information out of the pointer buffer that is
|
||||
// passed in. ptrBuf points to the pointer section of the compression hdr.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::getPtrList(const char* ptrBuf,
|
||||
const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs ) const
|
||||
int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs)
|
||||
{
|
||||
int rc = 0;
|
||||
chunkPtrs.clear();
|
||||
@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf,
|
||||
// one for the file header, and one for the list of pointers.
|
||||
// Wrapper of above method for backward compatibility.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const
|
||||
int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs )
|
||||
{
|
||||
return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs);
|
||||
}
|
||||
@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk
|
||||
//------------------------------------------------------------------------------
|
||||
// Count the number of chunk pointers in the pointer header(s)
|
||||
//------------------------------------------------------------------------------
|
||||
unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize) const
|
||||
unsigned int CompressInterface::getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize)
|
||||
{
|
||||
unsigned int chunkCount = 0;
|
||||
|
||||
@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
|
||||
// This should not be used for compressed dictionary files which could have
|
||||
// more compression chunk headers.
|
||||
//------------------------------------------------------------------------------
|
||||
unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
|
||||
unsigned int CompressInterface::getPtrCount(const char* hdrBuf)
|
||||
{
|
||||
return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN);
|
||||
}
|
||||
@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Store list of compression pointers into the specified header.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* ptrBuf,
|
||||
int ptrSectionSize) const
|
||||
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* ptrBuf, int ptrSectionSize)
|
||||
{
|
||||
memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0
|
||||
uint64_t* hdrPtrs = reinterpret_cast<uint64_t*>(ptrBuf);
|
||||
@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
//------------------------------------------------------------------------------
|
||||
// Wrapper of above method for backward compatibility
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf) const
|
||||
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf)
|
||||
{
|
||||
storePtrs(ptrs, reinterpret_cast<char*>(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN);
|
||||
}
|
||||
@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* pt
|
||||
//------------------------------------------------------------------------------
|
||||
// Initialize the header blocks to be written at the start of a dictionary file.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::initHdr(
|
||||
void CompressInterface::initHdr(
|
||||
void* hdrBuf, void* ptrBuf, uint32_t colWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType, int hdrSize) const
|
||||
int compressionType, int hdrSize)
|
||||
{
|
||||
memset(hdrBuf, 0, HDR_BUF_LEN);
|
||||
memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN);
|
||||
@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr(
|
||||
//------------------------------------------------------------------------------
|
||||
// Initialize the header blocks to be written at the start of a column file.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::initHdr(
|
||||
void CompressInterface::initHdr(
|
||||
void* hdrBuf, uint32_t columnWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType) const
|
||||
int compressionType)
|
||||
{
|
||||
memset(hdrBuf, 0, HDR_BUF_LEN * 2);
|
||||
initCompressedDBFileHeader(hdrBuf, columnWidth, columnType,
|
||||
@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr(
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the header's version number
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
|
||||
{
|
||||
return (
|
||||
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fVersionNum);
|
||||
@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Set the file's block count
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
|
||||
void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
|
||||
{
|
||||
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fBlockCount = count;
|
||||
}
|
||||
@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the file's block count
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
|
||||
{
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fBlockCount);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the file's compression type
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
|
||||
{
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)
|
||||
->fCompressionType);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Set the overall header size
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
|
||||
void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size)
|
||||
{
|
||||
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fHeaderSize = size;
|
||||
}
|
||||
@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the overall header size
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getHdrSize(const void* hdrBuf)
|
||||
{
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fHeaderSize);
|
||||
}
|
||||
@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
|
||||
// Get column type
|
||||
//-----------------------------------------------------------------------------
|
||||
execplan::CalpontSystemCatalog::ColDataType
|
||||
IDBCompressInterface::getColDataType(const void* hdrBuf) const
|
||||
CompressInterface::getColDataType(const void* hdrBuf)
|
||||
{
|
||||
return (
|
||||
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColDataType);
|
||||
@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get column width
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getColumnWidth(const void* hdrBuf)
|
||||
{
|
||||
return (
|
||||
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColumnWidth);
|
||||
@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get LBID by index
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const
|
||||
uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index)
|
||||
{
|
||||
if (index < LBID_MAX_SIZE)
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDS[index]);
|
||||
@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index
|
||||
//------------------------------------------------------------------------------
|
||||
// Set LBID by index
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const
|
||||
void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index)
|
||||
{
|
||||
if (lbid && index < LBID_MAX_SIZE)
|
||||
{
|
||||
@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get LBID count
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t CompressInterface::getLBIDCount(void* hdrBuf)
|
||||
{
|
||||
return reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDCount;
|
||||
}
|
||||
@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
|
||||
// Calculates the chunk and block offset within the chunk for the specified
|
||||
// block number.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const
|
||||
void CompressInterface::locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const
|
||||
{
|
||||
const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN;
|
||||
|
||||
@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block,
|
||||
// also expand to allow for user requested padding. Lastly, initialize padding
|
||||
// bytes to 0.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
|
||||
unsigned int& len,
|
||||
unsigned int maxLen) const
|
||||
int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len,
|
||||
unsigned int maxLen) const
|
||||
{
|
||||
int nPaddingBytes = 0;
|
||||
int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE;
|
||||
@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* static */
|
||||
uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
|
||||
// Snappy
|
||||
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
|
||||
: CompressInterface(numUserPaddingBytes)
|
||||
{
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen,
|
||||
char* out, size_t* outLen) const
|
||||
{
|
||||
snappy::RawCompress(in, inLen, out, outLen);
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen
|
||||
<< std::endl;
|
||||
#endif
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen,
|
||||
char* out, size_t* outLen) const
|
||||
{
|
||||
size_t realOutLen = 0;
|
||||
auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen);
|
||||
|
||||
if (!rc || realOutLen > *outLen)
|
||||
{
|
||||
cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen
|
||||
<< ", outLen: " << *outLen << ", realOutLen: " << realOutLen
|
||||
<< endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
rc = snappy::RawUncompress(in, inLen, out);
|
||||
|
||||
if (!rc)
|
||||
{
|
||||
cerr << "snappy::RawUnompress failed. InLen: " << inLen
|
||||
<< ", outLen: " << *outLen << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen "
|
||||
<< *outLen << std::endl;
|
||||
#endif
|
||||
*outLen = realOutLen;
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const
|
||||
{
|
||||
return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE);
|
||||
}
|
||||
|
||||
int IDBCompressInterface::compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const
|
||||
{
|
||||
snappy::RawCompress(in, inLen, out, outLen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
|
||||
{
|
||||
return !(snappy::RawUncompress(in, inLen, out));
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
|
||||
bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const
|
||||
{
|
||||
return snappy::GetUncompressedLength(in, inLen, outLen);
|
||||
}
|
||||
|
||||
uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const
|
||||
{
|
||||
return CHUNK_MAGIC_SNAPPY;
|
||||
}
|
||||
|
||||
// LZ4
|
||||
CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes)
|
||||
: CompressInterface(numUserPaddingBytes)
|
||||
{
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const
|
||||
{
|
||||
auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen);
|
||||
|
||||
if (!compressedLen)
|
||||
{
|
||||
cerr << "LZ_compress_default failed. InLen: " << inLen
|
||||
<< ", compressedLen: " << compressedLen << endl;
|
||||
return ERR_COMPRESS;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen "
|
||||
<< compressedLen << std::endl;
|
||||
#endif
|
||||
|
||||
*outLen = compressedLen;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen,
|
||||
char* out, size_t* outLen) const
|
||||
{
|
||||
auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen);
|
||||
|
||||
if (decompressedLen < 0)
|
||||
{
|
||||
cerr << "LZ_decompress_safe failed with error code " << decompressedLen
|
||||
<< endl;
|
||||
cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
*outLen = decompressedLen;
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen
|
||||
<< std::endl;
|
||||
#endif
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const
|
||||
{
|
||||
return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE);
|
||||
}
|
||||
|
||||
bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const
|
||||
{
|
||||
// LZ4 does not have such function.
|
||||
idbassert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const
|
||||
{
|
||||
return CHUNK_MAGIC_LZ4;
|
||||
}
|
||||
|
||||
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
switch (compressionType)
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
return new CompressInterfaceSnappy(numUserPaddingBytes);
|
||||
case 3:
|
||||
return new CompressInterfaceLZ4(numUserPaddingBytes);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
if (compressionName == "SNAPPY")
|
||||
return new CompressInterfaceSnappy(numUserPaddingBytes);
|
||||
else if (compressionName == "LZ4")
|
||||
return new CompressInterfaceLZ4(numUserPaddingBytes);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void initializeCompressorPool(
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
|
||||
compressorPool,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
compressorPool = {
|
||||
make_pair(2, std::shared_ptr<CompressInterface>(
|
||||
new CompressInterfaceSnappy(numUserPaddingBytes))),
|
||||
make_pair(3, std::shared_ptr<CompressInterface>(
|
||||
new CompressInterfaceLZ4(numUserPaddingBytes)))};
|
||||
}
|
||||
|
||||
std::shared_ptr<CompressInterface> getCompressorByType(
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
|
||||
compressorPool,
|
||||
uint32_t compressionType)
|
||||
{
|
||||
switch (compressionType)
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
if (!compressorPool.count(2))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return compressorPool[2];
|
||||
case 3:
|
||||
if (!compressorPool.count(3))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return compressorPool[3];
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace compress
|
||||
|
@ -26,6 +26,7 @@
|
||||
#endif
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "calpontsystemcatalog.h"
|
||||
|
||||
@ -41,11 +42,12 @@ namespace compress
|
||||
typedef std::pair<uint64_t, uint64_t> CompChunkPtr;
|
||||
typedef std::vector<CompChunkPtr> CompChunkPtrList;
|
||||
|
||||
class IDBCompressInterface
|
||||
class CompressInterface
|
||||
{
|
||||
public:
|
||||
static const unsigned int HDR_BUF_LEN = 4096;
|
||||
static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
|
||||
static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
|
||||
|
||||
// error codes from uncompressBlock()
|
||||
static const int ERR_OK = 0;
|
||||
@ -53,22 +55,29 @@ public:
|
||||
static const int ERR_DECOMPRESS = -2;
|
||||
static const int ERR_BADINPUT = -3;
|
||||
static const int ERR_BADOUTSIZE = -4;
|
||||
static const int ERR_COMPRESS = -5;
|
||||
|
||||
/**
|
||||
* When IDBCompressInterface object is being used to compress a chunk, this
|
||||
* When CompressInterface object is being used to compress a chunk, this
|
||||
* construct can be used to specify the padding added by padCompressedChunks
|
||||
*/
|
||||
EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0);
|
||||
EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* dtor
|
||||
*/
|
||||
EXPORT virtual ~IDBCompressInterface();
|
||||
EXPORT virtual ~CompressInterface() = default;
|
||||
|
||||
/**
|
||||
* see if the algo is available in this lib
|
||||
*/
|
||||
EXPORT bool isCompressionAvail(int compressionType = 0) const;
|
||||
EXPORT static bool isCompressionAvail(int compressionType = 0);
|
||||
|
||||
/**
|
||||
* Returns the maximum compressed size from all available compression
|
||||
* types.
|
||||
*/
|
||||
EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
|
||||
|
||||
/**
|
||||
* Compresses specified "in" buffer of length "inLen" bytes.
|
||||
@ -76,30 +85,31 @@ public:
|
||||
* "out" should be sized using maxCompressedSize() to allow for incompressible data.
|
||||
* Returns 0 if success.
|
||||
*/
|
||||
EXPORT int compressBlock(const char* in,
|
||||
const size_t inLen,
|
||||
unsigned char* out,
|
||||
unsigned int& outLen) const;
|
||||
|
||||
EXPORT int compressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out, size_t& outLen) const;
|
||||
|
||||
/**
|
||||
* outLen must be initialized with the size of the out buffer before calling uncompressBlock.
|
||||
* On return, outLen will have the number of bytes used in out.
|
||||
*/
|
||||
EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
|
||||
unsigned int& outLen) const;
|
||||
EXPORT int uncompressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out, size_t& outLen) const;
|
||||
|
||||
/**
|
||||
* This fcn wraps whatever compression algorithm we're using at the time, and
|
||||
* is not specific to blocks on disk.
|
||||
*/
|
||||
EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const;
|
||||
EXPORT virtual int compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const = 0;
|
||||
|
||||
/**
|
||||
* This fcn wraps whatever compression algorithm we're using at the time, and
|
||||
* is not specific to blocks on disk. The caller needs to make sure out is big
|
||||
* enough to contain the output by using getUncompressedSize().
|
||||
*/
|
||||
EXPORT int uncompress(const char* in, size_t inLen, char* out) const;
|
||||
EXPORT virtual int uncompress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const = 0;
|
||||
|
||||
/**
|
||||
* Initialize header buffer at start of compressed db file.
|
||||
@ -107,23 +117,24 @@ public:
|
||||
* @warning hdrBuf must be at least HDR_BUF_LEN bytes
|
||||
* @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
|
||||
*/
|
||||
EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType, int hdrSize) const;
|
||||
|
||||
EXPORT static void
|
||||
initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType, int hdrSize);
|
||||
/**
|
||||
* Initialize header buffer at start of compressed db file.
|
||||
*
|
||||
* @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
|
||||
*/
|
||||
EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType) const;
|
||||
EXPORT static void
|
||||
initHdr(void* hdrBuf, uint32_t columnWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType);
|
||||
|
||||
/**
|
||||
* Verify the passed in buffer contains a compressed db file header.
|
||||
*/
|
||||
EXPORT int verifyHdr(const void* hdrBuf) const;
|
||||
EXPORT static int verifyHdr(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* Extracts list of compression pointers from the specified ptr buffer.
|
||||
@ -131,9 +142,8 @@ public:
|
||||
* chunkPtrs is a vector of offset, size pairs for the compressed chunks.
|
||||
* Returns 0 if success.
|
||||
*/
|
||||
EXPORT int getPtrList(const char* ptrBuf,
|
||||
const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs) const;
|
||||
EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs);
|
||||
|
||||
/**
|
||||
* Extracts list of compression pointers from the specified header.
|
||||
@ -142,28 +152,28 @@ public:
|
||||
* Note: the pointer passed in is the beginning of the header,
|
||||
* not the pointer section as above.
|
||||
*/
|
||||
EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const;
|
||||
EXPORT static int getPtrList(const char* hdrBuf,
|
||||
CompChunkPtrList& chunkPtrs);
|
||||
|
||||
/**
|
||||
* Return the number of chunk pointers contained in the specified ptr buffer.
|
||||
* ptrBuf points to the pointer section taken from the headers.
|
||||
*/
|
||||
EXPORT unsigned int getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize) const;
|
||||
EXPORT static unsigned int getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize);
|
||||
|
||||
/**
|
||||
* Return the number of chunk pointers contained in the specified header.
|
||||
* hdrBuf points to start of 2 buffer headers from compressed db file.
|
||||
* For non-dictionary columns.
|
||||
*/
|
||||
EXPORT unsigned int getPtrCount(const char* hdrBuf) const;
|
||||
EXPORT static unsigned int getPtrCount(const char* hdrBuf);
|
||||
|
||||
/**
|
||||
* Store vector of pointers into the specified buffer header's pointer section.
|
||||
*/
|
||||
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* hdrBuf,
|
||||
int ptrSectionSize) const;
|
||||
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* hdrBuf, int ptrSectionSize);
|
||||
|
||||
/**
|
||||
* Store vector of pointers into the specified buffer header.
|
||||
@ -171,14 +181,14 @@ public:
|
||||
* Note: the pointer passed in is the beginning of the header,
|
||||
* not the pointer section as above.
|
||||
*/
|
||||
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf) const;
|
||||
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* hdrBuf);
|
||||
|
||||
/**
|
||||
* Calculates the chunk, and the block offset within the chunk, for the
|
||||
* specified block number.
|
||||
*/
|
||||
EXPORT void locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex,
|
||||
EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const;
|
||||
|
||||
/**
|
||||
@ -187,9 +197,8 @@ public:
|
||||
* maxLen is the maximum size for buf. nonzero return code means the
|
||||
* result output buffer length is > than maxLen.
|
||||
*/
|
||||
EXPORT int padCompressedChunks(unsigned char* buf,
|
||||
unsigned int& len,
|
||||
unsigned int maxLen ) const;
|
||||
EXPORT int padCompressedChunks(unsigned char* buf, size_t& len,
|
||||
unsigned int maxLen) const;
|
||||
|
||||
/*
|
||||
* Mutator methods for the block count in the file
|
||||
@ -197,17 +206,22 @@ public:
|
||||
/**
|
||||
* getVersionNumber
|
||||
*/
|
||||
EXPORT uint64_t getVersionNumber(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* setBlockCount
|
||||
*/
|
||||
EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const;
|
||||
EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
|
||||
|
||||
/**
|
||||
* getBlockCount
|
||||
*/
|
||||
EXPORT uint64_t getBlockCount(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getBlockCount(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getCompressionType
|
||||
*/
|
||||
EXPORT static uint64_t getCompressionType(const void* hdrBuf);
|
||||
|
||||
/*
|
||||
* Mutator methods for the overall header size
|
||||
@ -215,38 +229,38 @@ public:
|
||||
/**
|
||||
* setHdrSize
|
||||
*/
|
||||
EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const;
|
||||
EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
|
||||
|
||||
/**
|
||||
* getHdrSize
|
||||
*/
|
||||
EXPORT uint64_t getHdrSize(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getHdrSize(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getColumnType
|
||||
*/
|
||||
EXPORT execplan::CalpontSystemCatalog::ColDataType
|
||||
getColDataType(const void* hdrBuf) const;
|
||||
EXPORT static execplan::CalpontSystemCatalog::ColDataType
|
||||
getColDataType(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getColumnWidth
|
||||
*/
|
||||
EXPORT uint64_t getColumnWidth(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getLBIDByIndex
|
||||
*/
|
||||
EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const;
|
||||
EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
|
||||
|
||||
/**
|
||||
* setLBIDByIndex
|
||||
*/
|
||||
EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const;
|
||||
EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
|
||||
|
||||
/**
|
||||
* getLBIDCount
|
||||
*/
|
||||
EXPORT uint64_t getLBIDCount(void* hdrBuf) const;
|
||||
EXPORT static uint64_t getLBIDCount(void* hdrBuf);
|
||||
|
||||
/**
|
||||
* Mutator methods for the user padding bytes
|
||||
@ -271,97 +285,213 @@ public:
|
||||
* Given an input, uncompressed block, what's the maximum possible output,
|
||||
* compressed size?
|
||||
*/
|
||||
EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize);
|
||||
EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
|
||||
|
||||
/**
|
||||
* Given a compressed block, returns the uncompressed size in outLen.
|
||||
* Returns false on error, true on success.
|
||||
*/
|
||||
EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen);
|
||||
EXPORT virtual bool getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const = 0;
|
||||
|
||||
protected:
|
||||
protected:
|
||||
virtual uint8_t getChunkMagicNumber() const = 0;
|
||||
|
||||
private:
|
||||
private:
|
||||
//defaults okay
|
||||
//IDBCompressInterface(const IDBCompressInterface& rhs);
|
||||
//IDBCompressInterface& operator=(const IDBCompressInterface& rhs);
|
||||
//CompressInterface(const CompressInterface& rhs);
|
||||
//CompressInterface& operator=(const CompressInterface& rhs);
|
||||
|
||||
unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks
|
||||
};
|
||||
|
||||
class CompressInterfaceSnappy : public CompressInterface
|
||||
{
|
||||
public:
|
||||
EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
|
||||
EXPORT ~CompressInterfaceSnappy() = default;
|
||||
/**
|
||||
* Compress the given block using snappy compression API.
|
||||
*/
|
||||
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Uncompress the given block using snappy compression API.
|
||||
*/
|
||||
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Get max compressed size for the given `uncompSize` value using snappy
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
|
||||
|
||||
/**
|
||||
* Get uncompressed size for the given block using snappy
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT
|
||||
bool getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const override;
|
||||
|
||||
protected:
|
||||
uint8_t getChunkMagicNumber() const override;
|
||||
|
||||
private:
|
||||
const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
|
||||
};
|
||||
|
||||
class CompressInterfaceLZ4 : public CompressInterface
|
||||
{
|
||||
public:
|
||||
EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
|
||||
EXPORT ~CompressInterfaceLZ4() = default;
|
||||
/**
|
||||
* Compress the given block using LZ4 compression API.
|
||||
*/
|
||||
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Uncompress the given block using LZ4 compression API.
|
||||
*/
|
||||
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Get max compressed size for the given `uncompSize` value using LZ4
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
|
||||
|
||||
/**
|
||||
* Get uncompressed size for the given block using LZ4
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT
|
||||
bool getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const override;
|
||||
|
||||
protected:
|
||||
uint8_t getChunkMagicNumber() const override;
|
||||
|
||||
private:
|
||||
const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
|
||||
};
|
||||
|
||||
using CompressorPool =
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>;
|
||||
|
||||
/**
|
||||
* Returns a pointer to the appropriate compression interface based on
|
||||
* `compressionType`. `compressionType` must be greater than 0.
|
||||
* Note: caller is responsible for memory deallocation.
|
||||
*/
|
||||
EXPORT CompressInterface*
|
||||
getCompressInterfaceByType(uint32_t compressionType,
|
||||
uint32_t numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* Returns a pointer to the appropriate compression interface based on
|
||||
* `compressionName`.
|
||||
* Note: caller is responsible for memory deallocation.
|
||||
*/
|
||||
EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
|
||||
uint32_t numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* Initializes a given `unordered_map` with all available compression
|
||||
* interfaces.
|
||||
*/
|
||||
EXPORT void initializeCompressorPool(CompressorPool& compressorPool,
|
||||
uint32_t numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* Returns a `shared_ptr` to the appropriate compression interface.
|
||||
*/
|
||||
EXPORT std::shared_ptr<CompressInterface>
|
||||
getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType);
|
||||
|
||||
#ifdef SKIP_IDB_COMPRESSION
|
||||
inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {}
|
||||
inline IDBCompressInterface::~IDBCompressInterface() {}
|
||||
inline bool IDBCompressInterface::isCompressionAvail(int c) const
|
||||
inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {}
|
||||
inline bool CompressInterface::isCompressionAvail(int c)
|
||||
{
|
||||
return (c == 0);
|
||||
}
|
||||
inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const
|
||||
inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const
|
||||
inline int CompressInterface::uncompressBlock(const char* in,
|
||||
const size_t inLen,
|
||||
unsigned char* out,
|
||||
size_t& outLen) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const
|
||||
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {}
|
||||
inline int CompressInterface::verifyHdr(const void*)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
|
||||
inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {}
|
||||
inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
|
||||
inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline unsigned int CompressInterface::getPtrCount(const char*, const int)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {}
|
||||
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
|
||||
inline int IDBCompressInterface::verifyHdr(const void*) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const
|
||||
inline unsigned int CompressInterface::getPtrCount(const char*)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline unsigned int IDBCompressInterface::getPtrCount(const char*) const
|
||||
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) {}
|
||||
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*) {}
|
||||
inline void
|
||||
CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) const {}
|
||||
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*) const {}
|
||||
inline void IDBCompressInterface::locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {}
|
||||
inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
|
||||
inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline uint64_t
|
||||
IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
|
||||
inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {}
|
||||
inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
|
||||
inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {}
|
||||
inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {}
|
||||
inline uint64_t IDBCompressInterface::getHdrSize(const void*) const
|
||||
inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline execplan::CalpontSystemCatalog::ColDataType
|
||||
IDBCompressInterface::getColDataType(const void* hdrBuf) const
|
||||
CompressInterface::getColDataType(const void* hdrBuf)
|
||||
{
|
||||
return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
|
||||
}
|
||||
inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline uint64_t getLBID0(const void* hdrBuf) { return 0; }
|
||||
void setLBID0(void* hdrBuf, uint64_t lbid) {}
|
||||
inline uint64_t getLBID1(const void* hdrBuf) { return 0; }
|
||||
void setLBID1(void* hdrBuf, uint64_t lbid) {}
|
||||
inline void CompressInterface::setHdrSize(void*, uint64_t) {}
|
||||
inline uint64_t CompressInterface::getHdrSize(const void*)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
|
||||
: CompressInterface(numUserPaddingBytes)
|
||||
{
|
||||
}
|
||||
inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; }
|
||||
inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
|
||||
{
|
||||
@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si
|
||||
{
|
||||
return false;
|
||||
}
|
||||
uint8_t getChunkMagicNumber() const { return 0; }
|
||||
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#undef EXPORT
|
||||
|
@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
|
||||
return -1;
|
||||
}
|
||||
|
||||
compress::IDBCompressInterface decompressor;
|
||||
char hdr1[compress::CompressInterface::HDR_BUF_LEN];
|
||||
nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN);
|
||||
|
||||
char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN];
|
||||
nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN);
|
||||
|
||||
if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN )
|
||||
if ( nBytes != compress::CompressInterface::HDR_BUF_LEN )
|
||||
{
|
||||
delete pFile;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Verify we are a compressed file
|
||||
if (decompressor.verifyHdr(hdr1) < 0)
|
||||
if (compress::CompressInterface::verifyHdr(hdr1) < 0)
|
||||
{
|
||||
delete pFile;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN;
|
||||
int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
|
||||
compress::CompressInterface::HDR_BUF_LEN;
|
||||
char* hdr2 = new char[ptrSecSize];
|
||||
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
|
||||
|
||||
@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
|
||||
}
|
||||
|
||||
compress::CompChunkPtrList chunkPtrs;
|
||||
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
|
||||
int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize,
|
||||
chunkPtrs);
|
||||
delete[] hdr2;
|
||||
|
||||
if (rc != 0)
|
||||
|
@ -50,7 +50,10 @@ namespace joiner
|
||||
|
||||
uint64_t uniqueNums = 0;
|
||||
|
||||
JoinPartition::JoinPartition() { }
|
||||
JoinPartition::JoinPartition()
|
||||
{
|
||||
compressor.reset(new compress::CompressInterfaceSnappy());
|
||||
}
|
||||
|
||||
/* This is the ctor used by THJS */
|
||||
JoinPartition::JoinPartition(const RowGroup& lRG,
|
||||
@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
|
||||
|
||||
for (int i = 0; i < (int) bucketCount; i++)
|
||||
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
|
||||
|
||||
string compressionType;
|
||||
try
|
||||
{
|
||||
compressionType =
|
||||
config->getConfig("HashJoin", "TempFileCompressionType");
|
||||
} catch (...) {}
|
||||
|
||||
if (compressionType == "LZ4")
|
||||
{
|
||||
compressor.reset(new compress::CompressInterfaceLZ4());
|
||||
}
|
||||
else
|
||||
{
|
||||
compressor.reset(new compress::CompressInterfaceSnappy());
|
||||
}
|
||||
}
|
||||
|
||||
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
|
||||
@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
|
||||
smallRG.setData(&buffer);
|
||||
smallRG.resetRowGroup(0);
|
||||
smallRG.getRow(0, &smallRow);
|
||||
|
||||
compressor = jp.compressor;
|
||||
}
|
||||
|
||||
|
||||
@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
|
||||
|
||||
fs.seekg(offset);
|
||||
fs.read((char*) &len, sizeof(len));
|
||||
|
||||
saveErrno = errno;
|
||||
|
||||
if (!fs)
|
||||
@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
|
||||
else
|
||||
{
|
||||
size_t uncompressedSize;
|
||||
fs.read((char*) &uncompressedSize, sizeof(uncompressedSize));
|
||||
|
||||
boost::scoped_array<char> buf(new char[len]);
|
||||
|
||||
fs.read(buf.get(), len);
|
||||
saveErrno = errno;
|
||||
|
||||
if (!fs)
|
||||
if (!fs || !uncompressedSize)
|
||||
{
|
||||
fs.close();
|
||||
ostringstream os;
|
||||
@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
|
||||
}
|
||||
|
||||
totalBytesRead += len;
|
||||
compressor.getUncompressedSize(buf.get(), len, &uncompressedSize);
|
||||
bs->needAtLeast(uncompressedSize);
|
||||
compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr());
|
||||
compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(),
|
||||
&uncompressedSize);
|
||||
bs->advanceInputPtr(uncompressedSize);
|
||||
}
|
||||
|
||||
@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs)
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t maxSize = compressor.maxCompressedSize(len);
|
||||
size_t actualSize;
|
||||
size_t maxSize = compressor->maxCompressedSize(len);
|
||||
size_t actualSize = maxSize;
|
||||
boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]);
|
||||
|
||||
compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
|
||||
ret = actualSize + 4;
|
||||
compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
|
||||
ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4?
|
||||
fs.write((char*) &actualSize, sizeof(actualSize));
|
||||
// Save uncompressed len.
|
||||
fs.write((char*) &len, sizeof(len));
|
||||
fs.write((char*) compressed.get(), actualSize);
|
||||
saveErrno = errno;
|
||||
|
||||
|
@ -164,7 +164,7 @@ private:
|
||||
|
||||
/* Compression support */
|
||||
bool useCompression;
|
||||
compress::IDBCompressInterface compressor;
|
||||
std::shared_ptr<compress::CompressInterface> compressor;
|
||||
/* TBD: do the reading/writing in one thread, compression/decompression in another */
|
||||
|
||||
/* Some stats for reporting */
|
||||
|
@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
|
||||
{
|
||||
config::Config* config = config::Config::makeConfig();
|
||||
string val;
|
||||
string compressionType;
|
||||
|
||||
try
|
||||
{
|
||||
@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
|
||||
useCompression = true;
|
||||
else
|
||||
useCompression = false;
|
||||
|
||||
try
|
||||
{
|
||||
compressionType =
|
||||
config->getConfig("NetworkCompression", "NetworkCompression");
|
||||
}
|
||||
catch (...) { }
|
||||
|
||||
auto* compressInterface = compress::getCompressInterfaceByName(compressionType);
|
||||
if (!compressInterface)
|
||||
compressInterface = new compress::CompressInterfaceSnappy();
|
||||
|
||||
alg.reset(compressInterface);
|
||||
}
|
||||
|
||||
Socket* CompressedInetStreamSocket::clone() const
|
||||
@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
|
||||
{
|
||||
SBS readBS, ret;
|
||||
size_t uncompressedSize;
|
||||
bool err;
|
||||
|
||||
readBS = InetStreamSocket::read(timeout, isTimeOut, stats);
|
||||
|
||||
if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC)
|
||||
return readBS;
|
||||
|
||||
err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize);
|
||||
// Read stored len, first 4 bytes.
|
||||
uint32_t storedLen = *(uint32_t*) readBS->buf();
|
||||
|
||||
if (!err)
|
||||
if (!storedLen)
|
||||
return SBS(new ByteStream(0));
|
||||
|
||||
uncompressedSize = storedLen;
|
||||
ret.reset(new ByteStream(uncompressedSize));
|
||||
alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr());
|
||||
|
||||
alg->uncompress((char*) readBS->buf() + HEADER_SIZE,
|
||||
readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(),
|
||||
&uncompressedSize);
|
||||
|
||||
ret->advanceInputPtr(uncompressedSize);
|
||||
|
||||
return ret;
|
||||
@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
|
||||
|
||||
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
|
||||
{
|
||||
size_t outLen = 0;
|
||||
uint32_t len = msg.length();
|
||||
size_t len = msg.length();
|
||||
|
||||
if (useCompression && (len > 512))
|
||||
{
|
||||
ByteStream smsg(alg.maxCompressedSize(len));
|
||||
size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE;
|
||||
ByteStream smsg(outLen);
|
||||
|
||||
alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen);
|
||||
smsg.advanceInputPtr(outLen);
|
||||
alg->compress((char*) msg.buf(), len,
|
||||
(char*) smsg.getInputPtr() + HEADER_SIZE, &outLen);
|
||||
// Save original len.
|
||||
*(uint32_t*) smsg.getInputPtr() = len;
|
||||
smsg.advanceInputPtr(outLen + HEADER_SIZE);
|
||||
|
||||
if (outLen < len)
|
||||
do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats);
|
||||
|
@ -54,8 +54,9 @@ public:
|
||||
virtual const IOSocket accept(const struct timespec* timeout);
|
||||
virtual void connect(const sockaddr* addr);
|
||||
private:
|
||||
compress::IDBCompressInterface alg;
|
||||
std::shared_ptr<compress::CompressInterface> alg;
|
||||
bool useCompression;
|
||||
static const uint32_t HEADER_SIZE = 4;
|
||||
};
|
||||
|
||||
} //namespace messageqcpp
|
||||
|
Reference in New Issue
Block a user