1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

Merge pull request #1842 from denis0x0D/MCOL-987_LZ

MCOL-987 LZ4 compression support.
This commit is contained in:
Roman Nozdrin
2021-07-07 13:13:18 +03:00
committed by GitHub
45 changed files with 1311 additions and 549 deletions

View File

@ -10,7 +10,7 @@ add_definitions(-DNDEBUG)
add_library(compress SHARED ${compress_LIB_SRCS})
target_link_libraries(compress ${SNAPPY_LIBRARIES})
target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES})
install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)

View File

@ -22,12 +22,14 @@
#include <cstring>
#include <iostream>
#include <stdexcept>
#include <unordered_map>
using namespace std;
#include "blocksize.h"
#include "logger.h"
#include "snappy.h"
#include "hasher.h"
#include "lz4.h"
#define IDBCOMP_DLLEXPORT
#include "idbcompress.h"
@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL;
const uint64_t VERSION_NUM1 = 1;
const uint64_t VERSION_NUM2 = 2;
const uint64_t VERSION_NUM3 = 3;
const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN;
const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN;
// version 1.1 of the chunk data has a short header
// QuickLZ compressed data never has the high bit set on the first byte
@ -83,7 +84,7 @@ struct CompressedDBFileHeader
union CompressedDBFileHeaderBlock
{
CompressedDBFileHeader fHeader;
char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN];
char fDummy[compress::CompressInterface::HDR_BUF_LEN];
};
void initCompressedDBFileHeader(
@ -110,53 +111,57 @@ namespace compress
{
#ifndef SKIP_IDB_COMPRESSION
IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) :
CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) :
fNumUserPaddingBytes(numUserPaddingBytes)
{ }
IDBCompressInterface::~IDBCompressInterface()
{ }
/* V1 is really only available for decompression, we kill any DDL using V1 by hand.
* Maybe should have a new api, isDecompressionAvail() ? Any request to compress
* using V1 will silently be changed to V2.
*/
bool IDBCompressInterface::isCompressionAvail(int compressionType) const
/*static*/
bool CompressInterface::isCompressionAvail(int compressionType)
{
if ( (compressionType == 0) ||
(compressionType == 1) ||
(compressionType == 2) )
return true;
return ((compressionType == 0) || (compressionType == 1) ||
(compressionType == 2) || (compressionType == 3));
}
return false;
size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen)
{
return std::max(snappy::MaxCompressedLength(inLen),
LZ4_COMPRESSBOUND(inLen)) +
HEADER_SIZE;
}
//------------------------------------------------------------------------------
// Compress a block of data
//------------------------------------------------------------------------------
int IDBCompressInterface::compressBlock(const char* in,
const size_t inLen,
unsigned char* out,
unsigned int& outLen) const
int CompressInterface::compressBlock(const char* in, const size_t inLen,
unsigned char* out, size_t& outLen) const
{
size_t snaplen = 0;
utils::Hasher128 hasher;
// loose input checking.
if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE)
if (outLen < maxCompressedSize(inLen))
{
cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " <<
(snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl;
cerr << "got outLen = " << outLen << " for inLen = " << inLen
<< ", needed " << (maxCompressedSize(inLen)) << endl;
return ERR_BADOUTSIZE;
}
//apparently this never fails?
snappy::RawCompress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]), &snaplen);
auto rc = compress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]),
&outLen);
if (rc != ERR_OK)
{
return rc;
}
snaplen = outLen;
uint8_t* signature = (uint8_t*) &out[SIG_OFFSET];
uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET];
uint32_t* len = (uint32_t*) &out[LEN_OFFSET];
*signature = CHUNK_MAGIC3;
*signature = getChunkMagicNumber();
*checksum = hasher((char*) &out[HEADER_SIZE], snaplen);
*len = snaplen;
@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in,
//------------------------------------------------------------------------------
// Decompress a block of data
//------------------------------------------------------------------------------
int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
unsigned int& outLen) const
int CompressInterface::uncompressBlock(const char* in, const size_t inLen,
unsigned char* out,
size_t& outLen) const
{
bool comprc = false;
size_t ol = 0;
uint32_t realChecksum;
uint32_t storedChecksum;
uint32_t storedLen;
uint8_t storedMagic;
utils::Hasher128 hasher;
auto tmpOutLen = outLen;
outLen = 0;
if (inLen < 1)
{
return ERR_BADINPUT;
}
storedMagic = *((uint8_t*) &in[SIG_OFFSET]);
if (storedMagic == CHUNK_MAGIC3)
if (storedMagic == getChunkMagicNumber())
{
if (inLen < HEADER_SIZE)
{
return ERR_BADINPUT;
}
storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]);
storedLen = *((uint32_t*) (&in[LEN_OFFSET]));
if (inLen < storedLen + HEADER_SIZE)
{
return ERR_BADINPUT;
}
realChecksum = hasher(&in[HEADER_SIZE], storedLen);
if (storedChecksum != realChecksum)
{
return ERR_CHECKSUM;
auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out), &tmpOutLen);
if (rc != ERR_OK)
{
cerr << "uncompressBlock failed!" << endl;
return ERR_DECOMPRESS;
}
comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) &&
snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out));
outLen = tmpOutLen;
}
else
{
@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
return ERR_BADINPUT;
}
if (!comprc)
{
cerr << "decomp failed!" << endl;
return ERR_DECOMPRESS;
}
outLen = ol;
//cerr << "ub: " << inLen << " : " << outLen << endl;
return ERR_OK;
@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
//------------------------------------------------------------------------------
// Verify the passed in buffer contains a valid compression file header.
//------------------------------------------------------------------------------
int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
int CompressInterface::verifyHdr(const void* hdrBuf)
{
const CompressedDBFileHeader* hdr = reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf);
@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
// Extract compression pointer information out of the pointer buffer that is
// passed in. ptrBuf points to the pointer section of the compression hdr.
//------------------------------------------------------------------------------
int IDBCompressInterface::getPtrList(const char* ptrBuf,
const int ptrBufSize,
CompChunkPtrList& chunkPtrs ) const
int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize,
CompChunkPtrList& chunkPtrs)
{
int rc = 0;
chunkPtrs.clear();
@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf,
// one for the file header, and one for the list of pointers.
// Wrapper of above method for backward compatibility.
//------------------------------------------------------------------------------
int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const
int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs )
{
return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs);
}
@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk
//------------------------------------------------------------------------------
// Count the number of chunk pointers in the pointer header(s)
//------------------------------------------------------------------------------
unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
const int ptrBufSize) const
unsigned int CompressInterface::getPtrCount(const char* ptrBuf,
const int ptrBufSize)
{
unsigned int chunkCount = 0;
@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
// This should not be used for compressed dictionary files which could have
// more compression chunk headers.
//------------------------------------------------------------------------------
unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
unsigned int CompressInterface::getPtrCount(const char* hdrBuf)
{
return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN);
}
@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
//------------------------------------------------------------------------------
// Store list of compression pointers into the specified header.
//------------------------------------------------------------------------------
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
void* ptrBuf,
int ptrSectionSize) const
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
void* ptrBuf, int ptrSectionSize)
{
memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0
uint64_t* hdrPtrs = reinterpret_cast<uint64_t*>(ptrBuf);
@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
//------------------------------------------------------------------------------
// Wrapper of above method for backward compatibility
//------------------------------------------------------------------------------
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf) const
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf)
{
storePtrs(ptrs, reinterpret_cast<char*>(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN);
}
@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* pt
//------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a dictionary file.
//------------------------------------------------------------------------------
void IDBCompressInterface::initHdr(
void CompressInterface::initHdr(
void* hdrBuf, void* ptrBuf, uint32_t colWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize) const
int compressionType, int hdrSize)
{
memset(hdrBuf, 0, HDR_BUF_LEN);
memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN);
@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a column file.
//------------------------------------------------------------------------------
void IDBCompressInterface::initHdr(
void CompressInterface::initHdr(
void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType) const
int compressionType)
{
memset(hdrBuf, 0, HDR_BUF_LEN * 2);
initCompressedDBFileHeader(hdrBuf, columnWidth, columnType,
@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------
// Get the header's version number
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{
return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fVersionNum);
@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Set the file's block count
//------------------------------------------------------------------------------
void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
{
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fBlockCount = count;
}
@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
//------------------------------------------------------------------------------
// Get the file's block count
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fBlockCount);
}
//------------------------------------------------------------------------------
// Get the file's compression type
//------------------------------------------------------------------------------
uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)
->fCompressionType);
}
//------------------------------------------------------------------------------
// Set the overall header size
//------------------------------------------------------------------------------
void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size)
{
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fHeaderSize = size;
}
@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
//------------------------------------------------------------------------------
// Get the overall header size
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
uint64_t CompressInterface::getHdrSize(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fHeaderSize);
}
@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
// Get column type
//-----------------------------------------------------------------------------
execplan::CalpontSystemCatalog::ColDataType
IDBCompressInterface::getColDataType(const void* hdrBuf) const
CompressInterface::getColDataType(const void* hdrBuf)
{
return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColDataType);
@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Get column width
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
uint64_t CompressInterface::getColumnWidth(const void* hdrBuf)
{
return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColumnWidth);
@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Get LBID by index
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const
uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index)
{
if (index < LBID_MAX_SIZE)
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDS[index]);
@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index
//------------------------------------------------------------------------------
// Set LBID by index
//------------------------------------------------------------------------------
void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const
void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index)
{
if (lbid && index < LBID_MAX_SIZE)
{
@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t
}
}
uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
//------------------------------------------------------------------------------
// Get LBID count
//------------------------------------------------------------------------------
uint64_t CompressInterface::getLBIDCount(void* hdrBuf)
{
return reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDCount;
}
@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
// Calculates the chunk and block offset within the chunk for the specified
// block number.
//------------------------------------------------------------------------------
void IDBCompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
void CompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
{
const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN;
@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block,
// also expand to allow for user requested padding. Lastly, initialize padding
// bytes to 0.
//------------------------------------------------------------------------------
int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
unsigned int& len,
unsigned int maxLen) const
int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len,
unsigned int maxLen) const
{
int nPaddingBytes = 0;
int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE;
@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
return 0;
}
/* static */
uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
// Snappy
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
snappy::RawCompress(in, inLen, out, outLen);
#ifdef DEBUG_COMPRESSION
std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen
<< std::endl;
#endif
return ERR_OK;
}
int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
size_t realOutLen = 0;
auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen);
if (!rc || realOutLen > *outLen)
{
cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen
<< ", outLen: " << *outLen << ", realOutLen: " << realOutLen
<< endl;
return ERR_DECOMPRESS;
}
rc = snappy::RawUncompress(in, inLen, out);
if (!rc)
{
cerr << "snappy::RawUnompress failed. InLen: " << inLen
<< ", outLen: " << *outLen << endl;
return ERR_DECOMPRESS;
}
#ifdef DEBUG_COMPRESSION
std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen "
<< *outLen << std::endl;
#endif
*outLen = realOutLen;
return ERR_OK;
}
size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const
{
return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE);
}
int IDBCompressInterface::compress(const char* in, size_t inLen, char* out,
size_t* outLen) const
{
snappy::RawCompress(in, inLen, out, outLen);
return 0;
}
int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
{
return !(snappy::RawUncompress(in, inLen, out));
}
/* static */
bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const
{
return snappy::GetUncompressedLength(in, inLen, outLen);
}
uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const
{
return CHUNK_MAGIC_SNAPPY;
}
// LZ4
CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out,
size_t* outLen) const
{
auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen);
if (!compressedLen)
{
cerr << "LZ_compress_default failed. InLen: " << inLen
<< ", compressedLen: " << compressedLen << endl;
return ERR_COMPRESS;
}
#ifdef DEBUG_COMPRESSION
std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen "
<< compressedLen << std::endl;
#endif
*outLen = compressedLen;
return ERR_OK;
}
int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen);
if (decompressedLen < 0)
{
cerr << "LZ_decompress_safe failed with error code " << decompressedLen
<< endl;
cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl;
return ERR_DECOMPRESS;
}
*outLen = decompressedLen;
#ifdef DEBUG_COMPRESSION
std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen
<< std::endl;
#endif
return ERR_OK;
}
size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const
{
return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE);
}
bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const
{
// LZ4 does not have such function.
idbassert(false);
return false;
}
uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const
{
return CHUNK_MAGIC_LZ4;
}
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes)
{
switch (compressionType)
{
case 1:
case 2:
return new CompressInterfaceSnappy(numUserPaddingBytes);
case 3:
return new CompressInterfaceLZ4(numUserPaddingBytes);
}
return nullptr;
}
CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
uint32_t numUserPaddingBytes)
{
if (compressionName == "SNAPPY")
return new CompressInterfaceSnappy(numUserPaddingBytes);
else if (compressionName == "LZ4")
return new CompressInterfaceLZ4(numUserPaddingBytes);
return nullptr;
}
void initializeCompressorPool(
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
compressorPool,
uint32_t numUserPaddingBytes)
{
compressorPool = {
make_pair(2, std::shared_ptr<CompressInterface>(
new CompressInterfaceSnappy(numUserPaddingBytes))),
make_pair(3, std::shared_ptr<CompressInterface>(
new CompressInterfaceLZ4(numUserPaddingBytes)))};
}
std::shared_ptr<CompressInterface> getCompressorByType(
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
compressorPool,
uint32_t compressionType)
{
switch (compressionType)
{
case 1:
case 2:
if (!compressorPool.count(2))
{
return nullptr;
}
return compressorPool[2];
case 3:
if (!compressorPool.count(3))
{
return nullptr;
}
return compressorPool[3];
}
return nullptr;
}
#endif
} // namespace compress

View File

@ -26,6 +26,7 @@
#endif
#include <vector>
#include <utility>
#include <unordered_map>
#include "calpontsystemcatalog.h"
@ -41,11 +42,12 @@ namespace compress
typedef std::pair<uint64_t, uint64_t> CompChunkPtr;
typedef std::vector<CompChunkPtr> CompChunkPtrList;
class IDBCompressInterface
class CompressInterface
{
public:
static const unsigned int HDR_BUF_LEN = 4096;
static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
// error codes from uncompressBlock()
static const int ERR_OK = 0;
@ -53,22 +55,29 @@ public:
static const int ERR_DECOMPRESS = -2;
static const int ERR_BADINPUT = -3;
static const int ERR_BADOUTSIZE = -4;
static const int ERR_COMPRESS = -5;
/**
* When IDBCompressInterface object is being used to compress a chunk, this
* When CompressInterface object is being used to compress a chunk, this
* construct can be used to specify the padding added by padCompressedChunks
*/
EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0);
EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
/**
* dtor
*/
EXPORT virtual ~IDBCompressInterface();
EXPORT virtual ~CompressInterface() = default;
/**
* see if the algo is available in this lib
*/
EXPORT bool isCompressionAvail(int compressionType = 0) const;
EXPORT static bool isCompressionAvail(int compressionType = 0);
/**
* Returns the maximum compressed size from all available compression
* types.
*/
EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
/**
* Compresses specified "in" buffer of length "inLen" bytes.
@ -76,30 +85,31 @@ public:
* "out" should be sized using maxCompressedSize() to allow for incompressible data.
* Returns 0 if success.
*/
EXPORT int compressBlock(const char* in,
const size_t inLen,
unsigned char* out,
unsigned int& outLen) const;
EXPORT int compressBlock(const char* in, const size_t inLen,
unsigned char* out, size_t& outLen) const;
/**
* outLen must be initialized with the size of the out buffer before calling uncompressBlock.
* On return, outLen will have the number of bytes used in out.
*/
EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
unsigned int& outLen) const;
EXPORT int uncompressBlock(const char* in, const size_t inLen,
unsigned char* out, size_t& outLen) const;
/**
* This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk.
*/
EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const;
EXPORT virtual int compress(const char* in, size_t inLen, char* out,
size_t* outLen) const = 0;
/**
* This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk. The caller needs to make sure out is big
* enough to contain the output by using getUncompressedSize().
*/
EXPORT int uncompress(const char* in, size_t inLen, char* out) const;
EXPORT virtual int uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const = 0;
/**
* Initialize header buffer at start of compressed db file.
@ -107,23 +117,24 @@ public:
* @warning hdrBuf must be at least HDR_BUF_LEN bytes
* @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
*/
EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize) const;
EXPORT static void
initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize);
/**
* Initialize header buffer at start of compressed db file.
*
* @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
*/
EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType) const;
EXPORT static void
initHdr(void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType);
/**
* Verify the passed in buffer contains a compressed db file header.
*/
EXPORT int verifyHdr(const void* hdrBuf) const;
EXPORT static int verifyHdr(const void* hdrBuf);
/**
* Extracts list of compression pointers from the specified ptr buffer.
@ -131,9 +142,8 @@ public:
* chunkPtrs is a vector of offset, size pairs for the compressed chunks.
* Returns 0 if success.
*/
EXPORT int getPtrList(const char* ptrBuf,
const int ptrBufSize,
CompChunkPtrList& chunkPtrs) const;
EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize,
CompChunkPtrList& chunkPtrs);
/**
* Extracts list of compression pointers from the specified header.
@ -142,28 +152,28 @@ public:
* Note: the pointer passed in is the beginning of the header,
* not the pointer section as above.
*/
EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const;
EXPORT static int getPtrList(const char* hdrBuf,
CompChunkPtrList& chunkPtrs);
/**
* Return the number of chunk pointers contained in the specified ptr buffer.
* ptrBuf points to the pointer section taken from the headers.
*/
EXPORT unsigned int getPtrCount(const char* ptrBuf,
const int ptrBufSize) const;
EXPORT static unsigned int getPtrCount(const char* ptrBuf,
const int ptrBufSize);
/**
* Return the number of chunk pointers contained in the specified header.
* hdrBuf points to start of 2 buffer headers from compressed db file.
* For non-dictionary columns.
*/
EXPORT unsigned int getPtrCount(const char* hdrBuf) const;
EXPORT static unsigned int getPtrCount(const char* hdrBuf);
/**
* Store vector of pointers into the specified buffer header's pointer section.
*/
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf,
int ptrSectionSize) const;
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf, int ptrSectionSize);
/**
* Store vector of pointers into the specified buffer header.
@ -171,14 +181,14 @@ public:
* Note: the pointer passed in is the beginning of the header,
* not the pointer section as above.
*/
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf) const;
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf);
/**
* Calculates the chunk, and the block offset within the chunk, for the
* specified block number.
*/
EXPORT void locateBlock(unsigned int block,
unsigned int& chunkIndex,
EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const;
/**
@ -187,9 +197,8 @@ public:
* maxLen is the maximum size for buf. nonzero return code means the
* result output buffer length is > than maxLen.
*/
EXPORT int padCompressedChunks(unsigned char* buf,
unsigned int& len,
unsigned int maxLen ) const;
EXPORT int padCompressedChunks(unsigned char* buf, size_t& len,
unsigned int maxLen) const;
/*
* Mutator methods for the block count in the file
@ -197,17 +206,22 @@ public:
/**
* getVersionNumber
*/
EXPORT uint64_t getVersionNumber(const void* hdrBuf) const;
EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
/**
* setBlockCount
*/
EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const;
EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
/**
* getBlockCount
*/
EXPORT uint64_t getBlockCount(const void* hdrBuf) const;
EXPORT static uint64_t getBlockCount(const void* hdrBuf);
/**
* getCompressionType
*/
EXPORT static uint64_t getCompressionType(const void* hdrBuf);
/*
* Mutator methods for the overall header size
@ -215,38 +229,38 @@ public:
/**
* setHdrSize
*/
EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const;
EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
/**
* getHdrSize
*/
EXPORT uint64_t getHdrSize(const void* hdrBuf) const;
EXPORT static uint64_t getHdrSize(const void* hdrBuf);
/**
* getColumnType
*/
EXPORT execplan::CalpontSystemCatalog::ColDataType
getColDataType(const void* hdrBuf) const;
EXPORT static execplan::CalpontSystemCatalog::ColDataType
getColDataType(const void* hdrBuf);
/**
* getColumnWidth
*/
EXPORT uint64_t getColumnWidth(const void* hdrBuf) const;
EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
/**
* getLBIDByIndex
*/
EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const;
EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
/**
* setLBIDByIndex
*/
EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const;
EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
/**
* getLBIDCount
*/
EXPORT uint64_t getLBIDCount(void* hdrBuf) const;
EXPORT static uint64_t getLBIDCount(void* hdrBuf);
/**
* Mutator methods for the user padding bytes
@ -271,97 +285,213 @@ public:
* Given an input, uncompressed block, what's the maximum possible output,
* compressed size?
*/
EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize);
EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
/**
* Given a compressed block, returns the uncompressed size in outLen.
* Returns false on error, true on success.
*/
EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen);
EXPORT virtual bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const = 0;
protected:
protected:
virtual uint8_t getChunkMagicNumber() const = 0;
private:
private:
//defaults okay
//IDBCompressInterface(const IDBCompressInterface& rhs);
//IDBCompressInterface& operator=(const IDBCompressInterface& rhs);
//CompressInterface(const CompressInterface& rhs);
//CompressInterface& operator=(const CompressInterface& rhs);
unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks
};
class CompressInterfaceSnappy : public CompressInterface
{
public:
EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
EXPORT ~CompressInterfaceSnappy() = default;
/**
* Compress the given block using snappy compression API.
*/
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Uncompress the given block using snappy compression API.
*/
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Get max compressed size for the given `uncompSize` value using snappy
* compression API.
*/
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
/**
* Get uncompressed size for the given block using snappy
* compression API.
*/
EXPORT
bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const override;
protected:
uint8_t getChunkMagicNumber() const override;
private:
const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
};
class CompressInterfaceLZ4 : public CompressInterface
{
public:
EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
EXPORT ~CompressInterfaceLZ4() = default;
/**
* Compress the given block using LZ4 compression API.
*/
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Uncompress the given block using LZ4 compression API.
*/
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Get max compressed size for the given `uncompSize` value using LZ4
* compression API.
*/
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
/**
* Get uncompressed size for the given block using LZ4
* compression API.
*/
EXPORT
bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const override;
protected:
uint8_t getChunkMagicNumber() const override;
private:
const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
};
using CompressorPool =
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>;
/**
* Returns a pointer to the appropriate compression interface based on
* `compressionType`. `compressionType` must be greater than 0.
* Note: caller is responsible for memory deallocation.
*/
EXPORT CompressInterface*
getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes = 0);
/**
* Returns a pointer to the appropriate compression interface based on
* `compressionName`.
* Note: caller is responsible for memory deallocation.
*/
EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
uint32_t numUserPaddingBytes = 0);
/**
* Initializes a given `unordered_map` with all available compression
* interfaces.
*/
EXPORT void initializeCompressorPool(CompressorPool& compressorPool,
uint32_t numUserPaddingBytes = 0);
/**
* Returns a `shared_ptr` to the appropriate compression interface.
*/
EXPORT std::shared_ptr<CompressInterface>
getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType);
#ifdef SKIP_IDB_COMPRESSION
inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {}
inline IDBCompressInterface::~IDBCompressInterface() {}
inline bool IDBCompressInterface::isCompressionAvail(int c) const
inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {}
inline bool CompressInterface::isCompressionAvail(int c)
{
return (c == 0);
}
inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const
inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
{
return -1;
}
inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const
inline int CompressInterface::uncompressBlock(const char* in,
const size_t inLen,
unsigned char* out,
size_t& outLen) const
{
return -1;
}
inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {}
inline int CompressInterface::verifyHdr(const void*)
{
return -1;
}
inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {}
inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
{
return -1;
}
inline unsigned int CompressInterface::getPtrCount(const char*, const int)
{
return 0;
}
inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {}
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
inline int IDBCompressInterface::verifyHdr(const void*) const
{
return -1;
}
inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const
{
return -1;
}
inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const
{
return -1;
}
inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const
inline unsigned int CompressInterface::getPtrCount(const char*)
{
return 0;
}
inline unsigned int IDBCompressInterface::getPtrCount(const char*) const
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) {}
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*) {}
inline void
CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
{
return 0;
}
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) const {}
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*) const {}
inline void IDBCompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {}
inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
{
return -1;
}
inline uint64_t
IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{
return 0;
}
inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {}
inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {}
inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{
return 0;
}
inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {}
inline uint64_t IDBCompressInterface::getHdrSize(const void*) const
inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
{
return 0;
}
inline execplan::CalpontSystemCatalog::ColDataType
IDBCompressInterface::getColDataType(const void* hdrBuf) const
CompressInterface::getColDataType(const void* hdrBuf)
{
return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
}
inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
{
return 0;
}
inline uint64_t getLBID0(const void* hdrBuf) { return 0; }
void setLBID0(void* hdrBuf, uint64_t lbid) {}
inline uint64_t getLBID1(const void* hdrBuf) { return 0; }
void setLBID1(void* hdrBuf, uint64_t lbid) {}
inline void CompressInterface::setHdrSize(void*, uint64_t) {}
inline uint64_t CompressInterface::getHdrSize(const void*)
{
return 0;
}
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; }
inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
{
@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si
{
return false;
}
uint8_t getChunkMagicNumber() const { return 0; }
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes)
{
return nullptr;
}
#endif
}
#undef EXPORT

View File

@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
return -1;
}
compress::IDBCompressInterface decompressor;
char hdr1[compress::CompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN);
char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN);
if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN )
if ( nBytes != compress::CompressInterface::HDR_BUF_LEN )
{
delete pFile;
return -1;
}
// Verify we are a compressed file
if (decompressor.verifyHdr(hdr1) < 0)
if (compress::CompressInterface::verifyHdr(hdr1) < 0)
{
delete pFile;
return -1;
}
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN;
int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
compress::CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
}
compress::CompChunkPtrList chunkPtrs;
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize,
chunkPtrs);
delete[] hdr2;
if (rc != 0)

View File

@ -50,7 +50,10 @@ namespace joiner
uint64_t uniqueNums = 0;
JoinPartition::JoinPartition() { }
JoinPartition::JoinPartition()
{
compressor.reset(new compress::CompressInterfaceSnappy());
}
/* This is the ctor used by THJS */
JoinPartition::JoinPartition(const RowGroup& lRG,
@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
for (int i = 0; i < (int) bucketCount; i++)
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
string compressionType;
try
{
compressionType =
config->getConfig("HashJoin", "TempFileCompressionType");
} catch (...) {}
if (compressionType == "LZ4")
{
compressor.reset(new compress::CompressInterfaceLZ4());
}
else
{
compressor.reset(new compress::CompressInterfaceSnappy());
}
}
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
smallRG.setData(&buffer);
smallRG.resetRowGroup(0);
smallRG.getRow(0, &smallRow);
compressor = jp.compressor;
}
@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
fs.seekg(offset);
fs.read((char*) &len, sizeof(len));
saveErrno = errno;
if (!fs)
@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
else
{
size_t uncompressedSize;
fs.read((char*) &uncompressedSize, sizeof(uncompressedSize));
boost::scoped_array<char> buf(new char[len]);
fs.read(buf.get(), len);
saveErrno = errno;
if (!fs)
if (!fs || !uncompressedSize)
{
fs.close();
ostringstream os;
@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
}
totalBytesRead += len;
compressor.getUncompressedSize(buf.get(), len, &uncompressedSize);
bs->needAtLeast(uncompressedSize);
compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr());
compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(),
&uncompressedSize);
bs->advanceInputPtr(uncompressedSize);
}
@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs)
}
else
{
uint64_t maxSize = compressor.maxCompressedSize(len);
size_t actualSize;
size_t maxSize = compressor->maxCompressedSize(len);
size_t actualSize = maxSize;
boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]);
compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
ret = actualSize + 4;
compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4?
fs.write((char*) &actualSize, sizeof(actualSize));
// Save uncompressed len.
fs.write((char*) &len, sizeof(len));
fs.write((char*) compressed.get(), actualSize);
saveErrno = errno;

View File

@ -164,7 +164,7 @@ private:
/* Compression support */
bool useCompression;
compress::IDBCompressInterface compressor;
std::shared_ptr<compress::CompressInterface> compressor;
/* TBD: do the reading/writing in one thread, compression/decompression in another */
/* Some stats for reporting */

View File

@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
{
config::Config* config = config::Config::makeConfig();
string val;
string compressionType;
try
{
@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
useCompression = true;
else
useCompression = false;
try
{
compressionType =
config->getConfig("NetworkCompression", "NetworkCompression");
}
catch (...) { }
auto* compressInterface = compress::getCompressInterfaceByName(compressionType);
if (!compressInterface)
compressInterface = new compress::CompressInterfaceSnappy();
alg.reset(compressInterface);
}
Socket* CompressedInetStreamSocket::clone() const
@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
{
SBS readBS, ret;
size_t uncompressedSize;
bool err;
readBS = InetStreamSocket::read(timeout, isTimeOut, stats);
if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC)
return readBS;
err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize);
// Read stored len, first 4 bytes.
uint32_t storedLen = *(uint32_t*) readBS->buf();
if (!err)
if (!storedLen)
return SBS(new ByteStream(0));
uncompressedSize = storedLen;
ret.reset(new ByteStream(uncompressedSize));
alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr());
alg->uncompress((char*) readBS->buf() + HEADER_SIZE,
readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(),
&uncompressedSize);
ret->advanceInputPtr(uncompressedSize);
return ret;
@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
{
size_t outLen = 0;
uint32_t len = msg.length();
size_t len = msg.length();
if (useCompression && (len > 512))
{
ByteStream smsg(alg.maxCompressedSize(len));
size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE;
ByteStream smsg(outLen);
alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen);
smsg.advanceInputPtr(outLen);
alg->compress((char*) msg.buf(), len,
(char*) smsg.getInputPtr() + HEADER_SIZE, &outLen);
// Save original len.
*(uint32_t*) smsg.getInputPtr() = len;
smsg.advanceInputPtr(outLen + HEADER_SIZE);
if (outLen < len)
do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats);

View File

@ -54,8 +54,9 @@ public:
virtual const IOSocket accept(const struct timespec* timeout);
virtual void connect(const sockaddr* addr);
private:
compress::IDBCompressInterface alg;
std::shared_ptr<compress::CompressInterface> alg;
bool useCompression;
static const uint32_t HEADER_SIZE = 4;
};
} //namespace messageqcpp