1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00

MCOL-987 Add LZ4 compression.

* Adds CompressInterfaceLZ4 which uses LZ4 API for compress/uncompress.
* Adds CMake machinery to search LZ4 on running host.
* All methods which use static data and do not modify any internal data - become `static`,
  so we can use them without creation of the specific object. This is possible, because
  the header specification has not been modified. We still use 2 sections in header, first
  one with file meta data, the second one with pointers for compressed chunks.
* Methods `compress`, `uncompress`, `maxCompressedSize`, `getUncompressedSize` - become
  pure virtual, so we can override them for the other compression algos.
* Adds method `getChunkMagicNumber`, so we can verify chunk magic number
  for each compression algo.
* Renames "s/IDBCompressInterface/CompressInterface/g" according to requirement.
This commit is contained in:
Denis Khalikov 2021-04-01 17:26:38 +03:00
parent dd12bd3cd0
commit cc1c3629c5
45 changed files with 1311 additions and 549 deletions

View File

@ -36,9 +36,9 @@ local deb_build_deps = 'apt update && apt install --yes --no-install-recommends
local platformMap(platform) =
local platform_map = {
'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran liblz4-devel && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' lz4-devel && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' lz4-devel cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
'debian:9': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=stretch' debian/autobake-deb.sh",
'debian:10': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=buster' debian/autobake-deb.sh",
'ubuntu:18.04': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=bionic' debian/autobake-deb.sh",

View File

@ -163,6 +163,12 @@ if(NOT AWK_EXECUTABLE)
return()
endif()
FIND_PACKAGE(LZ4)
if (NOT LZ4_FOUND)
MESSAGE_ONCE(CS_NO_LZ4 "lz4 not found")
return()
endif()
IF (NOT INSTALL_LAYOUT)
INCLUDE(check_compiler_flag)

25
cmake/FindLZ4.cmake Normal file
View File

@ -0,0 +1,25 @@
find_path(LZ4_ROOT_DIR
NAMES include/lz4.h
)
find_library(LZ4_LIBRARIES
NAMES lz4
HINTS ${LZ4_ROOT_DIR}/lib
)
find_path(LZ4_INCLUDE_DIR
NAMES lz4.h
HINTS ${LZ4_ROOT_DIR}/include
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(lz4 DEFAULT_MSG
LZ4_LIBRARIES
LZ4_INCLUDE_DIR
)
mark_as_advanced(
LZ4_ROOT_DIR
LZ4_LIBRARIES
LZ4_INCLUDE_DIR
)

View File

@ -146,9 +146,7 @@ pColStep::pColStep(
if (fOid < 1000)
throw runtime_error("pColStep: invalid column");
compress::IDBCompressInterface cmpif;
if (!cmpif.isCompressionAvail(fColType.compressionType))
if (!compress::CompressInterface::isCompressionAvail(fColType.compressionType))
{
ostringstream oss;
oss << "Unsupported compression type " << fColType.compressionType;

View File

@ -95,7 +95,11 @@ DROP PROCEDURE IF EXISTS `compression_ratio` //
CREATE PROCEDURE compression_ratio() SQL SECURITY INVOKER
BEGIN
SELECT CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files WHERE compressed_data_size IS NOT NULL), ':1') COMPRESSION_RATIO;
SELECT 'Snappy' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='Snappy' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') compression_ratio
UNION ALL
SELECT 'LZ4' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='LZ4') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='LZ4' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') as compression_ratio;
END //
create or replace procedure columnstore_upgrade() SQL SECURITY INVOKER

View File

@ -777,7 +777,6 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
parser.setDefaultSchema(schema);
parser.setDefaultCharset(default_table_charset);
int rc = 0;
IDBCompressInterface idbCompress;
parser.Parse(ddlStatement.c_str());
if (get_fe_conn_info_ptr() == NULL)
@ -981,7 +980,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
if (compressionType == 1) compressionType = 2;
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{
rc = 1;
ci->alterTableState = cal_connection_info::NOT_ALTER;
@ -1368,7 +1369,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc;
}
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{
rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@ -1713,7 +1716,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc;
}
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{
rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@ -1842,7 +1847,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc;
}
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{
rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@ -2364,9 +2371,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea
if (compressiontype == 1) compressiontype = 2;
IDBCompressInterface idbCompress;
if ( ( compressiontype > 0 ) && !(idbCompress.isCompressionAvail( compressiontype )) )
if ((compressiontype > 0) &&
!(compress::CompressInterface::isCompressionAvail(compressiontype)))
{
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE);
setError(thd, ER_INTERNAL_ERROR, emsg);

View File

@ -21,8 +21,10 @@
#include "ha_mcs_sysvars.h"
const char* mcs_compression_type_names[] = {
"SNAPPY",
"SNAPPY",
"SNAPPY", // 0
"SNAPPY", // 1
"SNAPPY", // 2
"LZ4", // 3
NullS
};
@ -39,7 +41,8 @@ static MYSQL_THDVAR_ENUM(
PLUGIN_VAR_RQCMDARG,
"Controls compression algorithm for create tables. Possible values are: "
"NO_COMPRESSION segment files aren't compressed; "
"SNAPPY segment files are Snappy compressed (default);",
"SNAPPY segment files are Snappy compressed (default);"
"LZ4 segment files are LZ4 compressed;",
NULL, // check
NULL, // update
1, //default

View File

@ -30,7 +30,8 @@ extern char cs_commit_hash[];
// compression_type
enum mcs_compression_type_t {
NO_COMPRESSION = 0,
SNAPPY = 2
SNAPPY = 2,
LZ4 = 3
};
// use_import_for_batchinsert mode

View File

@ -183,6 +183,10 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond)
compression_type = "Snappy";
break;
case 3:
compression_type = "LZ4";
break;
default:
compression_type = "Unknown";
break;

View File

@ -492,6 +492,7 @@
<CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin>
<JobList>
<FlushInterval>16K</FlushInterval>
@ -539,6 +540,7 @@
</UserPriority>
<NetworkCompression>
<Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression>
<QueryTele>
<Host>127.0.0.1</Host>

View File

@ -308,7 +308,7 @@ void waitForRetry(long count)
//Must hold the FD cache lock!
int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterface& decompressor)
static int updateptrs(char* ptr, FdCacheType_t::iterator fdit)
{
ssize_t i;
uint32_t progress;
@ -357,7 +357,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
fdit->second->cmpMTime = mtime;
int gplRc = 0;
gplRc = decompressor.getPtrList(&ptr[4096], 4096, fdit->second->ptrList);
gplRc = compress::CompressInterface::getPtrList(&ptr[4096], 4096,
fdit->second->ptrList);
if (gplRc != 0)
return -5; // go for a retry.
@ -391,7 +392,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
return -8;
CompChunkPtrList nextPtrList;
gplRc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
gplRc = compress::CompressInterface::getPtrList(
&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
if (gplRc != 0)
return -7; // go for a retry.
@ -445,7 +447,6 @@ void* thr_popper(ioManager* arg)
double rqst3;
bool locked = false;
SPFdEntry_t fe;
IDBCompressInterface decompressor;
vector<CacheInsert_t> cacheInsertOps;
bool copyLocked = false;
@ -463,8 +464,10 @@ void* thr_popper(ioManager* arg)
FdCacheType_t::iterator fdit;
IDBDataFile* fp = 0;
uint32_t maxCompSz = IDBCompressInterface::maxCompressedSize(iom->blocksPerRead * BLOCK_SIZE);
uint32_t readBufferSz = maxCompSz + pageSize;
size_t maxCompSz =
compress::CompressInterface::getMaxCompressedSizeGeneric(
iom->blocksPerRead * BLOCK_SIZE);
size_t readBufferSz = maxCompSz + pageSize;
realbuff.reset(new char[readBufferSz]);
@ -863,7 +866,7 @@ retryReadHeaders:
cur_mtime = fp_mtime;
if (decompRetryCount > 0 || retryReadHeadersCount > 0 || cur_mtime > fdit->second->cmpMTime)
updatePtrsRc = updateptrs(&alignedbuff[0], fdit, decompressor);
updatePtrsRc = updateptrs(&alignedbuff[0], fdit);
fdMapMutex.unlock();
@ -1052,7 +1055,7 @@ retryReadHeaders:
#ifdef _MSC_VER
unsigned int blen = 4 * 1024 * 1024 + 4;
#else
uint32_t blen = 4 * 1024 * 1024 + 4;
size_t blen = 4 * 1024 * 1024 + 4;
#endif
#ifdef IDB_COMP_POC_DEBUG
{
@ -1060,7 +1063,18 @@ retryReadHeaders:
cout << "decompress(0x" << hex << (ptrdiff_t)&alignedbuff[0] << dec << ", " << fdit->second->ptrList[cmpOffFact.quot].second << ", 0x" << hex << (ptrdiff_t)uCmpBuf << dec << ", " << blen << ")" << endl;
}
#endif
int dcrc = decompressor.uncompressBlock(&alignedbuff[0],
std::unique_ptr<compress::CompressInterface> decompressor(
compress::getCompressInterfaceByType(
static_cast<uint32_t>(fdit->second->compType)));
if (!decompressor)
{
// Use default?
decompressor.reset(
new compress::CompressInterfaceSnappy());
}
int dcrc = decompressor->uncompressBlock(&alignedbuff[0],
fdit->second->ptrList[cmpOffFact.quot].second, uCmpBuf, blen);
if (dcrc != 0)

View File

@ -696,13 +696,25 @@ blockReadRetry:
i = fp->pread( &cmpHdrBuf[0], 0, 4096 * 3);
CompChunkPtrList ptrList;
IDBCompressInterface decompressor;
std::unique_ptr<CompressInterface> decompressor(
compress::getCompressInterfaceByType(
compress::CompressInterface::getCompressionType(
&cmpHdrBuf[0])));
if (!decompressor)
{
// Use default?
decompressor.reset(
new compress::CompressInterfaceSnappy());
}
int dcrc = 0;
if (i == 4096 * 3)
{
uint64_t numHdrs = 0; // extra headers
dcrc = decompressor.getPtrList(&cmpHdrBuf[4096], 4096, ptrList);
dcrc = compress::CompressInterface::getPtrList(
&cmpHdrBuf[4096], 4096, ptrList);
if (dcrc == 0 && ptrList.size() > 0)
numHdrs = ptrList[0].first / 4096ULL - 2ULL;
@ -723,7 +735,8 @@ blockReadRetry:
i = fp->pread( &nextHdrBufPtr[0], 4096 * 2, numHdrs * 4096 );
CompChunkPtrList nextPtrList;
dcrc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
dcrc = compress::CompressInterface::getPtrList(
&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
if (dcrc == 0)
ptrList.insert(ptrList.end(), nextPtrList.begin(), nextPtrList.end());
@ -777,11 +790,11 @@ blockReadRetry:
cmpBuf = (char*) alignedBuffer;
}
unsigned blen = 4 * 1024 * 1024;
size_t blen = 4 * 1024 * 1024;
i = fp->pread( cmpBuf, cmpBufOff, cmpBufSz );
dcrc = decompressor.uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
dcrc = decompressor->uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
if (dcrc == 0)
{

View File

@ -42,3 +42,9 @@ if (WITH_REBUILD_EM_UT)
target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif()
if (WITH_COMPRESSION_UT)
add_executable(compression_tests compression-tests.cpp)
target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif()

126
tests/compression-tests.cpp Normal file
View File

@ -0,0 +1,126 @@
/* Copyright (C) 2021 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "idbcompress.h"
class CompressionTest : public ::testing::Test
{
protected:
std::string genPermutations(string& data)
{
std::string generated;
generate(data, 0, generated);
return generated;
}
private:
void generate(string& data, uint32_t i, std::string& generated)
{
if (i == data.size())
{
generated.append(data);
return;
}
for (uint32_t k = i, e = data.size(); k < e; ++k)
{
std::swap(data[i], data[k]);
generate(data, i + 1, generated);
std::swap(data[i], data[k]);
}
}
};
TEST_F(CompressionTest, LZ4CanCompress)
{
std::string originalData =
"This program is free software; you can redistribute it and/or"
"modify it under the terms of the GNU General Public License"
"as published by the Free Software Foundation; version 2 of"
"the License.";
std::unique_ptr<compress::CompressInterface> compressor(
new compress::CompressInterfaceLZ4());
size_t originalSize = originalData.size();
size_t compressedSize = compressor->maxCompressedSize(originalSize);
std::unique_ptr<char[]> compressedData(new char[compressedSize]);
std::memset(compressedData.get(), 0, compressedSize);
auto rc = compressor->compress(originalData.data(), originalSize,
compressedData.get(), &compressedSize);
ASSERT_EQ(rc, 0);
std::unique_ptr<char[]> uncompressedData(new char[originalSize]);
rc = compressor->uncompress(compressedData.get(), compressedSize,
uncompressedData.get(), &originalSize);
ASSERT_EQ(rc, 0);
std::string result(uncompressedData.get());
EXPECT_EQ(originalData, result);
}
TEST_F(CompressionTest, LZvsSnappyUnique)
{
std::unique_ptr<compress::CompressInterface> lz4Compressor(
new compress::CompressInterfaceLZ4());
std::unique_ptr<compress::CompressInterface> snappyCompressor(
new compress::CompressInterfaceSnappy());
// Generate permutations.
// 9! * 9 == 3265920 (closer to current chunk size)
std::vector<std::string> dataPool{"abcdefghi", "aaadefghi", "aaaaafghi",
"aaaaaaahi", "aaaaaaaaj"};
for (auto& data : dataPool)
{
std::cout << "Permutations generated for: " << data << std::endl;
auto generated = genPermutations(data);
auto generatedSize = generated.size();
auto compressedSizeLZ4 =
lz4Compressor->maxCompressedSize(generatedSize);
auto compressedSizeSnappy =
snappyCompressor->maxCompressedSize(generatedSize);
std::unique_ptr<char[]> lz4CompressedData(new char[compressedSizeLZ4]);
auto rc = lz4Compressor->compress(generated.data(), generatedSize,
lz4CompressedData.get(),
&compressedSizeLZ4);
ASSERT_EQ(rc, 0);
std::unique_ptr<char[]> snappyCompressedData(
new char[compressedSizeSnappy]);
rc = snappyCompressor->compress(generated.data(), generatedSize,
snappyCompressedData.get(),
&compressedSizeSnappy);
ASSERT_EQ(rc, 0);
std::cout << "LZ ratio: "
<< (float) ((float) generatedSize /
(float) compressedSizeLZ4)
<< std::endl;
std::cout << "Snappy ratio: "
<< (float) ((float) generatedSize /
(float) compressedSizeSnappy)
<< std::endl;
}
}

View File

@ -383,7 +383,7 @@ public:
BlockOp blockOp;
char fileName[20];
int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
printf("\nRunning testCreateDeleteFile \n");
idbdatafile::IDBPolicy::init(true, false, "", 0);
@ -966,7 +966,7 @@ public:
BlockOp blockOp;
char fileName[20];
int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1;
printf("\nRunning testExtensionWOPrealloc \n");
@ -1085,7 +1085,7 @@ public:
int dbRoot = 1;
int colWidth = 65535;
DctnryCompress1 m_Dctnry;
DctnryCompress1 m_Dctnry(/*compressionType=*/1);
// This is the magic for the stub in FileOp::oid2FileName
int oId = 42;
@ -1565,7 +1565,7 @@ public:
BlockOp blockOp;
char fileName[20];
int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1;
idbdatafile::IDBPolicy::init(true, false, "", 0);

View File

@ -89,7 +89,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
}
// Read and verify header.
char fileHeader[compress::IDBCompressInterface::HDR_BUF_LEN * 2];
char fileHeader[compress::CompressInterface::HDR_BUF_LEN * 2];
rc = fileOp.readHeaders(dbFile.get(), fileHeader);
if (rc != 0)
{
@ -116,8 +116,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
}
// Read the `colDataType` and `colWidth` from the given header.
compress::IDBCompressInterface compressor;
const auto versionNumber = compressor.getVersionNumber(fileHeader);
const auto versionNumber =
compress::CompressInterface::getVersionNumber(fileHeader);
// Verify header number.
if (versionNumber < 3)
{
@ -129,10 +129,11 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
return -1;
}
auto colDataType = compressor.getColDataType(fileHeader);
auto colWidth = compressor.getColumnWidth(fileHeader);
auto blockCount = compressor.getBlockCount(fileHeader);
auto lbidCount = compressor.getLBIDCount(fileHeader);
auto colDataType = compress::CompressInterface::getColDataType(fileHeader);
auto colWidth = compress::CompressInterface::getColumnWidth(fileHeader);
auto blockCount = compress::CompressInterface::getBlockCount(fileHeader);
auto lbidCount = compress::CompressInterface::getLBIDCount(fileHeader);
auto compressionType = compress::CompressInterface::getCompressionType(fileHeader);
if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED)
{
@ -155,7 +156,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
uint64_t hwm = 0;
rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth,
blockCount, isDict, hwm);
blockCount, isDict, compressionType, hwm);
if (rc != 0)
{
return rc;
@ -172,13 +173,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
{
for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex)
{
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidIndex);
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
extentMap.push_back(fileId);
}
// Last one has an actual HWM.
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidCount - 1);
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
@ -192,7 +193,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
else
{
// One extent per segment file.
auto lbid = compressor.getLBIDByIndex(fileHeader, 0);
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
@ -293,7 +294,7 @@ int32_t EMReBuilder::rebuildExtentMap()
int32_t EMReBuilder::searchHWMInSegmentFile(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint64_t blockCount, bool isDict, uint64_t& hwm)
uint64_t blockCount, bool isDict, uint32_t compressionType, uint64_t& hwm)
{
std::unique_ptr<ChunkManagerWrapper> chunkManagerWrapper;
try
@ -302,13 +303,15 @@ int32_t EMReBuilder::searchHWMInSegmentFile(
{
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperDict>(
new ChunkManagerWrapperDict(oid, dbRoot, partition, segment,
colDataType, colWidth));
colDataType, colWidth,
compressionType));
}
else
{
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperColumn>(
new ChunkManagerWrapperColumn(oid, dbRoot, partition, segment,
colDataType, colWidth));
colDataType, colWidth,
compressionType));
}
}
catch (...)
@ -401,12 +404,13 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber)
ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth)
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
colWidth)
{
pFileOp = std::unique_ptr<WriteEngine::ColumnOpCompress1>(
new WriteEngine::ColumnOpCompress1());
new WriteEngine::ColumnOpCompress1(compressionType));
chunkManager.fileOp(pFileOp.get());
// Open compressed column segment file. We will read block by block
// from the compressed chunks.
@ -463,12 +467,13 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const
ChunkManagerWrapperDict::ChunkManagerWrapperDict(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth)
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
colWidth)
{
pFileOp = std::unique_ptr<WriteEngine::DctnryCompress1>(
new WriteEngine::DctnryCompress1());
new WriteEngine::DctnryCompress1(compressionType));
chunkManager.fileOp(pFileOp.get());
// Open compressed dict segment file.
pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment,

View File

@ -112,7 +112,8 @@ class EMReBuilder
int32_t searchHWMInSegmentFile(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t width, uint64_t blocksCount, bool isDict, uint64_t& hwm);
uint32_t width, uint64_t blocksCount, bool isDict,
uint32_t compressionType, uint64_t& hwm);
// Sets the dbroot to the given `number`.
void setDBRoot(uint32_t number) { dbRoot = number; }
@ -184,7 +185,7 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper
ChunkManagerWrapperColumn(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth);
uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperColumn() = default;
ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete;
@ -210,7 +211,7 @@ class ChunkManagerWrapperDict : public ChunkManagerWrapper
ChunkManagerWrapperDict(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth);
uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperDict() = default;
ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete;

View File

@ -10,7 +10,7 @@ add_definitions(-DNDEBUG)
add_library(compress SHARED ${compress_LIB_SRCS})
target_link_libraries(compress ${SNAPPY_LIBRARIES})
target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES})
install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)

View File

@ -22,12 +22,14 @@
#include <cstring>
#include <iostream>
#include <stdexcept>
#include <unordered_map>
using namespace std;
#include "blocksize.h"
#include "logger.h"
#include "snappy.h"
#include "hasher.h"
#include "lz4.h"
#define IDBCOMP_DLLEXPORT
#include "idbcompress.h"
@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL;
const uint64_t VERSION_NUM1 = 1;
const uint64_t VERSION_NUM2 = 2;
const uint64_t VERSION_NUM3 = 3;
const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN;
const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN;
// version 1.1 of the chunk data has a short header
// QuickLZ compressed data never has the high bit set on the first byte
@ -83,7 +84,7 @@ struct CompressedDBFileHeader
union CompressedDBFileHeaderBlock
{
CompressedDBFileHeader fHeader;
char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN];
char fDummy[compress::CompressInterface::HDR_BUF_LEN];
};
void initCompressedDBFileHeader(
@ -110,53 +111,57 @@ namespace compress
{
#ifndef SKIP_IDB_COMPRESSION
IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) :
CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) :
fNumUserPaddingBytes(numUserPaddingBytes)
{ }
IDBCompressInterface::~IDBCompressInterface()
{ }
/* V1 is really only available for decompression, we kill any DDL using V1 by hand.
* Maybe should have a new api, isDecompressionAvail() ? Any request to compress
* using V1 will silently be changed to V2.
*/
bool IDBCompressInterface::isCompressionAvail(int compressionType) const
/*static*/
bool CompressInterface::isCompressionAvail(int compressionType)
{
if ( (compressionType == 0) ||
(compressionType == 1) ||
(compressionType == 2) )
return true;
return ((compressionType == 0) || (compressionType == 1) ||
(compressionType == 2) || (compressionType == 3));
}
return false;
size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen)
{
return std::max(snappy::MaxCompressedLength(inLen),
LZ4_COMPRESSBOUND(inLen)) +
HEADER_SIZE;
}
//------------------------------------------------------------------------------
// Compress a block of data
//------------------------------------------------------------------------------
int IDBCompressInterface::compressBlock(const char* in,
const size_t inLen,
unsigned char* out,
unsigned int& outLen) const
int CompressInterface::compressBlock(const char* in, const size_t inLen,
unsigned char* out, size_t& outLen) const
{
size_t snaplen = 0;
utils::Hasher128 hasher;
// loose input checking.
if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE)
if (outLen < maxCompressedSize(inLen))
{
cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " <<
(snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl;
cerr << "got outLen = " << outLen << " for inLen = " << inLen
<< ", needed " << (maxCompressedSize(inLen)) << endl;
return ERR_BADOUTSIZE;
}
//apparently this never fails?
snappy::RawCompress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]), &snaplen);
auto rc = compress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]),
&outLen);
if (rc != ERR_OK)
{
return rc;
}
snaplen = outLen;
uint8_t* signature = (uint8_t*) &out[SIG_OFFSET];
uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET];
uint32_t* len = (uint32_t*) &out[LEN_OFFSET];
*signature = CHUNK_MAGIC3;
*signature = getChunkMagicNumber();
*checksum = hasher((char*) &out[HEADER_SIZE], snaplen);
*len = snaplen;
@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in,
//------------------------------------------------------------------------------
// Decompress a block of data
//------------------------------------------------------------------------------
int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
unsigned int& outLen) const
int CompressInterface::uncompressBlock(const char* in, const size_t inLen,
unsigned char* out,
size_t& outLen) const
{
bool comprc = false;
size_t ol = 0;
uint32_t realChecksum;
uint32_t storedChecksum;
uint32_t storedLen;
uint8_t storedMagic;
utils::Hasher128 hasher;
auto tmpOutLen = outLen;
outLen = 0;
if (inLen < 1)
{
return ERR_BADINPUT;
}
storedMagic = *((uint8_t*) &in[SIG_OFFSET]);
if (storedMagic == CHUNK_MAGIC3)
if (storedMagic == getChunkMagicNumber())
{
if (inLen < HEADER_SIZE)
{
return ERR_BADINPUT;
}
storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]);
storedLen = *((uint32_t*) (&in[LEN_OFFSET]));
if (inLen < storedLen + HEADER_SIZE)
{
return ERR_BADINPUT;
}
realChecksum = hasher(&in[HEADER_SIZE], storedLen);
if (storedChecksum != realChecksum)
{
return ERR_CHECKSUM;
auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out), &tmpOutLen);
if (rc != ERR_OK)
{
cerr << "uncompressBlock failed!" << endl;
return ERR_DECOMPRESS;
}
comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) &&
snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out));
outLen = tmpOutLen;
}
else
{
@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
return ERR_BADINPUT;
}
if (!comprc)
{
cerr << "decomp failed!" << endl;
return ERR_DECOMPRESS;
}
outLen = ol;
//cerr << "ub: " << inLen << " : " << outLen << endl;
return ERR_OK;
@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
//------------------------------------------------------------------------------
// Verify the passed in buffer contains a valid compression file header.
//------------------------------------------------------------------------------
int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
int CompressInterface::verifyHdr(const void* hdrBuf)
{
const CompressedDBFileHeader* hdr = reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf);
@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
// Extract compression pointer information out of the pointer buffer that is
// passed in. ptrBuf points to the pointer section of the compression hdr.
//------------------------------------------------------------------------------
int IDBCompressInterface::getPtrList(const char* ptrBuf,
const int ptrBufSize,
CompChunkPtrList& chunkPtrs ) const
int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize,
CompChunkPtrList& chunkPtrs)
{
int rc = 0;
chunkPtrs.clear();
@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf,
// one for the file header, and one for the list of pointers.
// Wrapper of above method for backward compatibility.
//------------------------------------------------------------------------------
int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const
int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs )
{
return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs);
}
@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk
//------------------------------------------------------------------------------
// Count the number of chunk pointers in the pointer header(s)
//------------------------------------------------------------------------------
unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
const int ptrBufSize) const
unsigned int CompressInterface::getPtrCount(const char* ptrBuf,
const int ptrBufSize)
{
unsigned int chunkCount = 0;
@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
// This should not be used for compressed dictionary files which could have
// more compression chunk headers.
//------------------------------------------------------------------------------
unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
unsigned int CompressInterface::getPtrCount(const char* hdrBuf)
{
return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN);
}
@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
//------------------------------------------------------------------------------
// Store list of compression pointers into the specified header.
//------------------------------------------------------------------------------
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
void* ptrBuf,
int ptrSectionSize) const
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
void* ptrBuf, int ptrSectionSize)
{
memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0
uint64_t* hdrPtrs = reinterpret_cast<uint64_t*>(ptrBuf);
@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
//------------------------------------------------------------------------------
// Wrapper of above method for backward compatibility
//------------------------------------------------------------------------------
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf) const
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf)
{
storePtrs(ptrs, reinterpret_cast<char*>(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN);
}
@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* pt
//------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a dictionary file.
//------------------------------------------------------------------------------
void IDBCompressInterface::initHdr(
void CompressInterface::initHdr(
void* hdrBuf, void* ptrBuf, uint32_t colWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize) const
int compressionType, int hdrSize)
{
memset(hdrBuf, 0, HDR_BUF_LEN);
memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN);
@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a column file.
//------------------------------------------------------------------------------
void IDBCompressInterface::initHdr(
void CompressInterface::initHdr(
void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType) const
int compressionType)
{
memset(hdrBuf, 0, HDR_BUF_LEN * 2);
initCompressedDBFileHeader(hdrBuf, columnWidth, columnType,
@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------
// Get the header's version number
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{
return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fVersionNum);
@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Set the file's block count
//------------------------------------------------------------------------------
void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
{
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fBlockCount = count;
}
@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
//------------------------------------------------------------------------------
// Get the file's block count
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fBlockCount);
}
//------------------------------------------------------------------------------
// Get the file's compression type
//------------------------------------------------------------------------------
uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)
->fCompressionType);
}
//------------------------------------------------------------------------------
// Set the overall header size
//------------------------------------------------------------------------------
void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size)
{
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fHeaderSize = size;
}
@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
//------------------------------------------------------------------------------
// Get the overall header size
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
uint64_t CompressInterface::getHdrSize(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fHeaderSize);
}
@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
// Get column type
//-----------------------------------------------------------------------------
execplan::CalpontSystemCatalog::ColDataType
IDBCompressInterface::getColDataType(const void* hdrBuf) const
CompressInterface::getColDataType(const void* hdrBuf)
{
return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColDataType);
@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Get column width
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
uint64_t CompressInterface::getColumnWidth(const void* hdrBuf)
{
return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColumnWidth);
@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Get LBID by index
//------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const
uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index)
{
if (index < LBID_MAX_SIZE)
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDS[index]);
@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index
//------------------------------------------------------------------------------
// Set LBID by index
//------------------------------------------------------------------------------
void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const
void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index)
{
if (lbid && index < LBID_MAX_SIZE)
{
@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t
}
}
uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
//------------------------------------------------------------------------------
// Get LBID count
//------------------------------------------------------------------------------
uint64_t CompressInterface::getLBIDCount(void* hdrBuf)
{
return reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDCount;
}
@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
// Calculates the chunk and block offset within the chunk for the specified
// block number.
//------------------------------------------------------------------------------
void IDBCompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
void CompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
{
const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN;
@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block,
// also expand to allow for user requested padding. Lastly, initialize padding
// bytes to 0.
//------------------------------------------------------------------------------
int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
unsigned int& len,
unsigned int maxLen) const
int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len,
unsigned int maxLen) const
{
int nPaddingBytes = 0;
int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE;
@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
return 0;
}
/* static */
uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
// Snappy
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
snappy::RawCompress(in, inLen, out, outLen);
#ifdef DEBUG_COMPRESSION
std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen
<< std::endl;
#endif
return ERR_OK;
}
int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
size_t realOutLen = 0;
auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen);
if (!rc || realOutLen > *outLen)
{
cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen
<< ", outLen: " << *outLen << ", realOutLen: " << realOutLen
<< endl;
return ERR_DECOMPRESS;
}
rc = snappy::RawUncompress(in, inLen, out);
if (!rc)
{
cerr << "snappy::RawUnompress failed. InLen: " << inLen
<< ", outLen: " << *outLen << endl;
return ERR_DECOMPRESS;
}
#ifdef DEBUG_COMPRESSION
std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen "
<< *outLen << std::endl;
#endif
*outLen = realOutLen;
return ERR_OK;
}
size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const
{
return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE);
}
int IDBCompressInterface::compress(const char* in, size_t inLen, char* out,
size_t* outLen) const
{
snappy::RawCompress(in, inLen, out, outLen);
return 0;
}
int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
{
return !(snappy::RawUncompress(in, inLen, out));
}
/* static */
bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const
{
return snappy::GetUncompressedLength(in, inLen, outLen);
}
uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const
{
return CHUNK_MAGIC_SNAPPY;
}
// LZ4
CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out,
size_t* outLen) const
{
auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen);
if (!compressedLen)
{
cerr << "LZ_compress_default failed. InLen: " << inLen
<< ", compressedLen: " << compressedLen << endl;
return ERR_COMPRESS;
}
#ifdef DEBUG_COMPRESSION
std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen "
<< compressedLen << std::endl;
#endif
*outLen = compressedLen;
return ERR_OK;
}
int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen);
if (decompressedLen < 0)
{
cerr << "LZ_decompress_safe failed with error code " << decompressedLen
<< endl;
cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl;
return ERR_DECOMPRESS;
}
*outLen = decompressedLen;
#ifdef DEBUG_COMPRESSION
std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen
<< std::endl;
#endif
return ERR_OK;
}
size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const
{
return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE);
}
bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const
{
// LZ4 does not have such function.
idbassert(false);
return false;
}
uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const
{
return CHUNK_MAGIC_LZ4;
}
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes)
{
switch (compressionType)
{
case 1:
case 2:
return new CompressInterfaceSnappy(numUserPaddingBytes);
case 3:
return new CompressInterfaceLZ4(numUserPaddingBytes);
}
return nullptr;
}
CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
uint32_t numUserPaddingBytes)
{
if (compressionName == "SNAPPY")
return new CompressInterfaceSnappy(numUserPaddingBytes);
else if (compressionName == "LZ4")
return new CompressInterfaceLZ4(numUserPaddingBytes);
return nullptr;
}
void initializeCompressorPool(
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
compressorPool,
uint32_t numUserPaddingBytes)
{
compressorPool = {
make_pair(2, std::shared_ptr<CompressInterface>(
new CompressInterfaceSnappy(numUserPaddingBytes))),
make_pair(3, std::shared_ptr<CompressInterface>(
new CompressInterfaceLZ4(numUserPaddingBytes)))};
}
std::shared_ptr<CompressInterface> getCompressorByType(
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
compressorPool,
uint32_t compressionType)
{
switch (compressionType)
{
case 1:
case 2:
if (!compressorPool.count(2))
{
return nullptr;
}
return compressorPool[2];
case 3:
if (!compressorPool.count(3))
{
return nullptr;
}
return compressorPool[3];
}
return nullptr;
}
#endif
} // namespace compress

View File

@ -26,6 +26,7 @@
#endif
#include <vector>
#include <utility>
#include <unordered_map>
#include "calpontsystemcatalog.h"
@ -41,11 +42,12 @@ namespace compress
typedef std::pair<uint64_t, uint64_t> CompChunkPtr;
typedef std::vector<CompChunkPtr> CompChunkPtrList;
class IDBCompressInterface
class CompressInterface
{
public:
static const unsigned int HDR_BUF_LEN = 4096;
static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
// error codes from uncompressBlock()
static const int ERR_OK = 0;
@ -53,22 +55,29 @@ public:
static const int ERR_DECOMPRESS = -2;
static const int ERR_BADINPUT = -3;
static const int ERR_BADOUTSIZE = -4;
static const int ERR_COMPRESS = -5;
/**
* When IDBCompressInterface object is being used to compress a chunk, this
* When CompressInterface object is being used to compress a chunk, this
* construct can be used to specify the padding added by padCompressedChunks
*/
EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0);
EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
/**
* dtor
*/
EXPORT virtual ~IDBCompressInterface();
EXPORT virtual ~CompressInterface() = default;
/**
* see if the algo is available in this lib
*/
EXPORT bool isCompressionAvail(int compressionType = 0) const;
EXPORT static bool isCompressionAvail(int compressionType = 0);
/**
* Returns the maximum compressed size from all available compression
* types.
*/
EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
/**
* Compresses specified "in" buffer of length "inLen" bytes.
@ -76,30 +85,31 @@ public:
* "out" should be sized using maxCompressedSize() to allow for incompressible data.
* Returns 0 if success.
*/
EXPORT int compressBlock(const char* in,
const size_t inLen,
unsigned char* out,
unsigned int& outLen) const;
EXPORT int compressBlock(const char* in, const size_t inLen,
unsigned char* out, size_t& outLen) const;
/**
* outLen must be initialized with the size of the out buffer before calling uncompressBlock.
* On return, outLen will have the number of bytes used in out.
*/
EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
unsigned int& outLen) const;
EXPORT int uncompressBlock(const char* in, const size_t inLen,
unsigned char* out, size_t& outLen) const;
/**
* This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk.
*/
EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const;
EXPORT virtual int compress(const char* in, size_t inLen, char* out,
size_t* outLen) const = 0;
/**
* This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk. The caller needs to make sure out is big
* enough to contain the output by using getUncompressedSize().
*/
EXPORT int uncompress(const char* in, size_t inLen, char* out) const;
EXPORT virtual int uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const = 0;
/**
* Initialize header buffer at start of compressed db file.
@ -107,23 +117,24 @@ public:
* @warning hdrBuf must be at least HDR_BUF_LEN bytes
* @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
*/
EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize) const;
EXPORT static void
initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize);
/**
* Initialize header buffer at start of compressed db file.
*
* @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
*/
EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType) const;
EXPORT static void
initHdr(void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType);
/**
* Verify the passed in buffer contains a compressed db file header.
*/
EXPORT int verifyHdr(const void* hdrBuf) const;
EXPORT static int verifyHdr(const void* hdrBuf);
/**
* Extracts list of compression pointers from the specified ptr buffer.
@ -131,9 +142,8 @@ public:
* chunkPtrs is a vector of offset, size pairs for the compressed chunks.
* Returns 0 if success.
*/
EXPORT int getPtrList(const char* ptrBuf,
const int ptrBufSize,
CompChunkPtrList& chunkPtrs) const;
EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize,
CompChunkPtrList& chunkPtrs);
/**
* Extracts list of compression pointers from the specified header.
@ -142,28 +152,28 @@ public:
* Note: the pointer passed in is the beginning of the header,
* not the pointer section as above.
*/
EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const;
EXPORT static int getPtrList(const char* hdrBuf,
CompChunkPtrList& chunkPtrs);
/**
* Return the number of chunk pointers contained in the specified ptr buffer.
* ptrBuf points to the pointer section taken from the headers.
*/
EXPORT unsigned int getPtrCount(const char* ptrBuf,
const int ptrBufSize) const;
EXPORT static unsigned int getPtrCount(const char* ptrBuf,
const int ptrBufSize);
/**
* Return the number of chunk pointers contained in the specified header.
* hdrBuf points to start of 2 buffer headers from compressed db file.
* For non-dictionary columns.
*/
EXPORT unsigned int getPtrCount(const char* hdrBuf) const;
EXPORT static unsigned int getPtrCount(const char* hdrBuf);
/**
* Store vector of pointers into the specified buffer header's pointer section.
*/
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf,
int ptrSectionSize) const;
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf, int ptrSectionSize);
/**
* Store vector of pointers into the specified buffer header.
@ -171,14 +181,14 @@ public:
* Note: the pointer passed in is the beginning of the header,
* not the pointer section as above.
*/
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf) const;
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf);
/**
* Calculates the chunk, and the block offset within the chunk, for the
* specified block number.
*/
EXPORT void locateBlock(unsigned int block,
unsigned int& chunkIndex,
EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const;
/**
@ -187,9 +197,8 @@ public:
* maxLen is the maximum size for buf. nonzero return code means the
* result output buffer length is > than maxLen.
*/
EXPORT int padCompressedChunks(unsigned char* buf,
unsigned int& len,
unsigned int maxLen ) const;
EXPORT int padCompressedChunks(unsigned char* buf, size_t& len,
unsigned int maxLen) const;
/*
* Mutator methods for the block count in the file
@ -197,17 +206,22 @@ public:
/**
* getVersionNumber
*/
EXPORT uint64_t getVersionNumber(const void* hdrBuf) const;
EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
/**
* setBlockCount
*/
EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const;
EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
/**
* getBlockCount
*/
EXPORT uint64_t getBlockCount(const void* hdrBuf) const;
EXPORT static uint64_t getBlockCount(const void* hdrBuf);
/**
* getCompressionType
*/
EXPORT static uint64_t getCompressionType(const void* hdrBuf);
/*
* Mutator methods for the overall header size
@ -215,38 +229,38 @@ public:
/**
* setHdrSize
*/
EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const;
EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
/**
* getHdrSize
*/
EXPORT uint64_t getHdrSize(const void* hdrBuf) const;
EXPORT static uint64_t getHdrSize(const void* hdrBuf);
/**
* getColumnType
*/
EXPORT execplan::CalpontSystemCatalog::ColDataType
getColDataType(const void* hdrBuf) const;
EXPORT static execplan::CalpontSystemCatalog::ColDataType
getColDataType(const void* hdrBuf);
/**
* getColumnWidth
*/
EXPORT uint64_t getColumnWidth(const void* hdrBuf) const;
EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
/**
* getLBIDByIndex
*/
EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const;
EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
/**
* setLBIDByIndex
*/
EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const;
EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
/**
* getLBIDCount
*/
EXPORT uint64_t getLBIDCount(void* hdrBuf) const;
EXPORT static uint64_t getLBIDCount(void* hdrBuf);
/**
* Mutator methods for the user padding bytes
@ -271,97 +285,213 @@ public:
* Given an input, uncompressed block, what's the maximum possible output,
* compressed size?
*/
EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize);
EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
/**
* Given a compressed block, returns the uncompressed size in outLen.
* Returns false on error, true on success.
*/
EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen);
EXPORT virtual bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const = 0;
protected:
protected:
virtual uint8_t getChunkMagicNumber() const = 0;
private:
private:
//defaults okay
//IDBCompressInterface(const IDBCompressInterface& rhs);
//IDBCompressInterface& operator=(const IDBCompressInterface& rhs);
//CompressInterface(const CompressInterface& rhs);
//CompressInterface& operator=(const CompressInterface& rhs);
unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks
};
class CompressInterfaceSnappy : public CompressInterface
{
public:
EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
EXPORT ~CompressInterfaceSnappy() = default;
/**
* Compress the given block using snappy compression API.
*/
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Uncompress the given block using snappy compression API.
*/
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Get max compressed size for the given `uncompSize` value using snappy
* compression API.
*/
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
/**
* Get uncompressed size for the given block using snappy
* compression API.
*/
EXPORT
bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const override;
protected:
uint8_t getChunkMagicNumber() const override;
private:
const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
};
class CompressInterfaceLZ4 : public CompressInterface
{
public:
EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
EXPORT ~CompressInterfaceLZ4() = default;
/**
* Compress the given block using LZ4 compression API.
*/
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Uncompress the given block using LZ4 compression API.
*/
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Get max compressed size for the given `uncompSize` value using LZ4
* compression API.
*/
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
/**
* Get uncompressed size for the given block using LZ4
* compression API.
*/
EXPORT
bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const override;
protected:
uint8_t getChunkMagicNumber() const override;
private:
const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
};
using CompressorPool =
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>;
/**
* Returns a pointer to the appropriate compression interface based on
* `compressionType`. `compressionType` must be greater than 0.
* Note: caller is responsible for memory deallocation.
*/
EXPORT CompressInterface*
getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes = 0);
/**
* Returns a pointer to the appropriate compression interface based on
* `compressionName`.
* Note: caller is responsible for memory deallocation.
*/
EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
uint32_t numUserPaddingBytes = 0);
/**
* Initializes a given `unordered_map` with all available compression
* interfaces.
*/
EXPORT void initializeCompressorPool(CompressorPool& compressorPool,
uint32_t numUserPaddingBytes = 0);
/**
* Returns a `shared_ptr` to the appropriate compression interface.
*/
EXPORT std::shared_ptr<CompressInterface>
getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType);
#ifdef SKIP_IDB_COMPRESSION
inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {}
inline IDBCompressInterface::~IDBCompressInterface() {}
inline bool IDBCompressInterface::isCompressionAvail(int c) const
inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {}
inline bool CompressInterface::isCompressionAvail(int c)
{
return (c == 0);
}
inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const
inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
{
return -1;
}
inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const
inline int CompressInterface::uncompressBlock(const char* in,
const size_t inLen,
unsigned char* out,
size_t& outLen) const
{
return -1;
}
inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {}
inline int CompressInterface::verifyHdr(const void*)
{
return -1;
}
inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {}
inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
{
return -1;
}
inline unsigned int CompressInterface::getPtrCount(const char*, const int)
{
return 0;
}
inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {}
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
inline int IDBCompressInterface::verifyHdr(const void*) const
{
return -1;
}
inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const
{
return -1;
}
inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const
{
return -1;
}
inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const
inline unsigned int CompressInterface::getPtrCount(const char*)
{
return 0;
}
inline unsigned int IDBCompressInterface::getPtrCount(const char*) const
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) {}
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*) {}
inline void
CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
{
return 0;
}
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) const {}
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*) const {}
inline void IDBCompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {}
inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
{
return -1;
}
inline uint64_t
IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{
return 0;
}
inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {}
inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {}
inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{
return 0;
}
inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {}
inline uint64_t IDBCompressInterface::getHdrSize(const void*) const
inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
{
return 0;
}
inline execplan::CalpontSystemCatalog::ColDataType
IDBCompressInterface::getColDataType(const void* hdrBuf) const
CompressInterface::getColDataType(const void* hdrBuf)
{
return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
}
inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
{
return 0;
}
inline uint64_t getLBID0(const void* hdrBuf) { return 0; }
void setLBID0(void* hdrBuf, uint64_t lbid) {}
inline uint64_t getLBID1(const void* hdrBuf) { return 0; }
void setLBID1(void* hdrBuf, uint64_t lbid) {}
inline void CompressInterface::setHdrSize(void*, uint64_t) {}
inline uint64_t CompressInterface::getHdrSize(const void*)
{
return 0;
}
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; }
inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
{
@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si
{
return false;
}
uint8_t getChunkMagicNumber() const { return 0; }
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes)
{
return nullptr;
}
#endif
}
#undef EXPORT

View File

@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
return -1;
}
compress::IDBCompressInterface decompressor;
char hdr1[compress::CompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN);
char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN);
if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN )
if ( nBytes != compress::CompressInterface::HDR_BUF_LEN )
{
delete pFile;
return -1;
}
// Verify we are a compressed file
if (decompressor.verifyHdr(hdr1) < 0)
if (compress::CompressInterface::verifyHdr(hdr1) < 0)
{
delete pFile;
return -1;
}
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN;
int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
compress::CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
}
compress::CompChunkPtrList chunkPtrs;
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize,
chunkPtrs);
delete[] hdr2;
if (rc != 0)

View File

@ -50,7 +50,10 @@ namespace joiner
uint64_t uniqueNums = 0;
JoinPartition::JoinPartition() { }
JoinPartition::JoinPartition()
{
compressor.reset(new compress::CompressInterfaceSnappy());
}
/* This is the ctor used by THJS */
JoinPartition::JoinPartition(const RowGroup& lRG,
@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
for (int i = 0; i < (int) bucketCount; i++)
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
string compressionType;
try
{
compressionType =
config->getConfig("HashJoin", "TempFileCompressionType");
} catch (...) {}
if (compressionType == "LZ4")
{
compressor.reset(new compress::CompressInterfaceLZ4());
}
else
{
compressor.reset(new compress::CompressInterfaceSnappy());
}
}
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
smallRG.setData(&buffer);
smallRG.resetRowGroup(0);
smallRG.getRow(0, &smallRow);
compressor = jp.compressor;
}
@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
fs.seekg(offset);
fs.read((char*) &len, sizeof(len));
saveErrno = errno;
if (!fs)
@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
else
{
size_t uncompressedSize;
fs.read((char*) &uncompressedSize, sizeof(uncompressedSize));
boost::scoped_array<char> buf(new char[len]);
fs.read(buf.get(), len);
saveErrno = errno;
if (!fs)
if (!fs || !uncompressedSize)
{
fs.close();
ostringstream os;
@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
}
totalBytesRead += len;
compressor.getUncompressedSize(buf.get(), len, &uncompressedSize);
bs->needAtLeast(uncompressedSize);
compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr());
compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(),
&uncompressedSize);
bs->advanceInputPtr(uncompressedSize);
}
@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs)
}
else
{
uint64_t maxSize = compressor.maxCompressedSize(len);
size_t actualSize;
size_t maxSize = compressor->maxCompressedSize(len);
size_t actualSize = maxSize;
boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]);
compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
ret = actualSize + 4;
compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4?
fs.write((char*) &actualSize, sizeof(actualSize));
// Save uncompressed len.
fs.write((char*) &len, sizeof(len));
fs.write((char*) compressed.get(), actualSize);
saveErrno = errno;

View File

@ -164,7 +164,7 @@ private:
/* Compression support */
bool useCompression;
compress::IDBCompressInterface compressor;
std::shared_ptr<compress::CompressInterface> compressor;
/* TBD: do the reading/writing in one thread, compression/decompression in another */
/* Some stats for reporting */

View File

@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
{
config::Config* config = config::Config::makeConfig();
string val;
string compressionType;
try
{
@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
useCompression = true;
else
useCompression = false;
try
{
compressionType =
config->getConfig("NetworkCompression", "NetworkCompression");
}
catch (...) { }
auto* compressInterface = compress::getCompressInterfaceByName(compressionType);
if (!compressInterface)
compressInterface = new compress::CompressInterfaceSnappy();
alg.reset(compressInterface);
}
Socket* CompressedInetStreamSocket::clone() const
@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
{
SBS readBS, ret;
size_t uncompressedSize;
bool err;
readBS = InetStreamSocket::read(timeout, isTimeOut, stats);
if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC)
return readBS;
err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize);
// Read stored len, first 4 bytes.
uint32_t storedLen = *(uint32_t*) readBS->buf();
if (!err)
if (!storedLen)
return SBS(new ByteStream(0));
uncompressedSize = storedLen;
ret.reset(new ByteStream(uncompressedSize));
alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr());
alg->uncompress((char*) readBS->buf() + HEADER_SIZE,
readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(),
&uncompressedSize);
ret->advanceInputPtr(uncompressedSize);
return ret;
@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
{
size_t outLen = 0;
uint32_t len = msg.length();
size_t len = msg.length();
if (useCompression && (len > 512))
{
ByteStream smsg(alg.maxCompressedSize(len));
size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE;
ByteStream smsg(outLen);
alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen);
smsg.advanceInputPtr(outLen);
alg->compress((char*) msg.buf(), len,
(char*) smsg.getInputPtr() + HEADER_SIZE, &outLen);
// Save original len.
*(uint32_t*) smsg.getInputPtr() = len;
smsg.advanceInputPtr(outLen + HEADER_SIZE);
if (outLen < len)
do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats);

View File

@ -54,8 +54,9 @@ public:
virtual const IOSocket accept(const struct timespec* timeout);
virtual void connect(const sockaddr* addr);
private:
compress::IDBCompressInterface alg;
std::shared_ptr<compress::CompressInterface> alg;
bool useCompression;
static const uint32_t HEADER_SIZE = 4;
};
} //namespace messageqcpp

View File

@ -337,15 +337,12 @@ int BulkLoad::loadJobInfo(
}
}
// Validate that specified compression type is available
compress::IDBCompressInterface compressor;
for (unsigned kT = 0; kT < curJob.jobTableList.size(); kT++)
{
for (unsigned kC = 0; kC < curJob.jobTableList[kT].colList.size(); kC++)
{
if ( !compressor.isCompressionAvail(
curJob.jobTableList[kT].colList[kC].compressionType) )
if (!compress::CompressInterface::isCompressionAvail(
curJob.jobTableList[kT].colList[kC].compressionType))
{
std::ostringstream oss;
oss << "Specified compression type (" <<

View File

@ -60,12 +60,11 @@ ColumnBufferCompressed::ColumnBufferCompressed( ColumnInfo* pColInfo,
fToBeCompressedBuffer(0),
fToBeCompressedCapacity(0),
fNumBytes(0),
fCompressor(0),
fPreLoadHWMChunk(true),
fFlushedStartHwmChunk(false)
{
fUserPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
fCompressor = new compress::IDBCompressInterface( fUserPaddingBytes );
compress::initializeCompressorPool(fCompressorPool, fUserPaddingBytes);
}
//------------------------------------------------------------------------------
@ -79,7 +78,6 @@ ColumnBufferCompressed::~ColumnBufferCompressed()
fToBeCompressedBuffer = 0;
fToBeCompressedCapacity = 0;
fNumBytes = 0;
delete fCompressor;
}
//------------------------------------------------------------------------------
@ -91,9 +89,7 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
fFile = f;
fStartingHwm = startHwm;
IDBCompressInterface compressor;
if (compressor.getPtrList(hdrs, fChunkPtrs) != 0)
if (compress::CompressInterface::getPtrList(hdrs, fChunkPtrs) != 0)
{
return ERR_COMP_PARSE_HDRS;
}
@ -102,7 +98,15 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
// rollback), that fall after the HWM, then drop those trailing ptrs.
unsigned int chunkIndex = 0;
unsigned int blockOffsetWithinChunk = 0;
fCompressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
auto compressor = compress::getCompressorByType(
fCompressorPool, fColInfo->column.compressionType);
if (!compressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
compressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
if ((chunkIndex + 1) < fChunkPtrs.size())
{
@ -127,11 +131,11 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
if (!fToBeCompressedBuffer)
{
fToBeCompressedBuffer =
new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN];
new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
}
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
@ -147,10 +151,10 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
}
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
// Set file offset past end of last chunk
startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2;
startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
if (fChunkPtrs.size() > 0)
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
@ -223,7 +227,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Expand the compression buffer size if working with an abbrev extent, and
// the bytes we are about to add will overflow the abbreviated extent.
if ((fToBeCompressedCapacity < IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) &&
if ((fToBeCompressedCapacity < CompressInterface::UNCOMPRESSED_INBUF_LEN) &&
((fNumBytes + writeSize + fillUpWEmptiesWriteSize) > fToBeCompressedCapacity) )
{
std::ostringstream oss;
@ -233,7 +237,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
"; part-" << fColInfo->curCol.dataFile.fPartition <<
"; seg-" << fColInfo->curCol.dataFile.fSegment;
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
}
if ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) <= fToBeCompressedCapacity)
@ -316,12 +320,12 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Start over again loading a new to-be-compressed buffer
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
fToBeCompressedCapacity =
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
CompressInterface::UNCOMPRESSED_INBUF_LEN;
bufOffset = fToBeCompressedBuffer;
fNumBytes = 0;
@ -377,21 +381,31 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
//------------------------------------------------------------------------------
int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
{
const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(fToBeCompressedCapacity) +
fUserPaddingBytes;
auto compressor = compress::getCompressorByType(
fCompressorPool, fColInfo->column.compressionType);
if (!compressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
const size_t OUTPUT_BUFFER_SIZE =
compressor->maxCompressedSize(fToBeCompressedCapacity) +
fUserPaddingBytes +
// Padded len = len + COMPRESSED_SIZE_INCREMENT_CHUNK - (len %
// COMPRESSED_SIZE_INCREMENT_CHUNK) + usePadding
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
unsigned char* compressedOutBuf = new unsigned char[ OUTPUT_BUFFER_SIZE ];
boost::scoped_array<unsigned char> compressedOutBufPtr(compressedOutBuf);
unsigned int outputLen = OUTPUT_BUFFER_SIZE;
size_t outputLen = OUTPUT_BUFFER_SIZE;
#ifdef PROFILE
Stats::startParseEvent(WE_STATS_COMPRESS_COL_COMPRESS);
#endif
int rc = fCompressor->compressBlock(
reinterpret_cast<char*>(fToBeCompressedBuffer),
fToBeCompressedCapacity,
compressedOutBuf,
outputLen );
int rc = compressor->compressBlock(
reinterpret_cast<char*>(fToBeCompressedBuffer),
fToBeCompressedCapacity, compressedOutBuf, outputLen);
if (rc != 0)
{
@ -399,7 +413,7 @@ int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
}
// Round up the compressed chunk size
rc = fCompressor->padCompressedChunks( compressedOutBuf,
rc = compressor->padCompressedChunks( compressedOutBuf,
outputLen, OUTPUT_BUFFER_SIZE );
if (rc != 0)
@ -581,26 +595,24 @@ int ColumnBufferCompressed::finishFile(bool bTruncFile)
int ColumnBufferCompressed::saveCompressionHeaders( )
{
// Construct the header records
char hdrBuf[IDBCompressInterface::HDR_BUF_LEN * 2];
char hdrBuf[CompressInterface::HDR_BUF_LEN * 2];
RETURN_ON_ERROR(fColInfo->colOp->readHeaders(fFile, hdrBuf));
BRM::LBID_t lbid = fCompressor->getLBIDByIndex(hdrBuf, 0);
fCompressor->initHdr(hdrBuf, fColInfo->column.width,
fColInfo->column.dataType,
fColInfo->column.compressionType);
fCompressor->setBlockCount(hdrBuf,
(fColInfo->getFileSize() / BYTE_PER_BLOCK) );
BRM::LBID_t lbid = compress::CompressInterface::getLBIDByIndex(hdrBuf, 0);
compress::CompressInterface::initHdr(hdrBuf, fColInfo->column.width, fColInfo->column.dataType,
fColInfo->column.compressionType);
compress::CompressInterface::setBlockCount(hdrBuf, (fColInfo->getFileSize() / BYTE_PER_BLOCK));
// If lbid written in the header is not 0 and not equal to `lastupdatedlbid` - we are running
// for the next extent for column segment file.
const auto lastUpdatedLbid = fColInfo->getLastUpdatedLBID();
if (lbid && lastUpdatedLbid != lbid)
{
// Write back lbid, after header initialization.
fCompressor->setLBIDByIndex(hdrBuf, lbid, 0);
fCompressor->setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
compress::CompressInterface::setLBIDByIndex(hdrBuf, lbid, 0);
compress::CompressInterface::setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
}
else
fCompressor->setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
compress::CompressInterface::setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
std::vector<uint64_t> ptrs;
@ -611,7 +623,7 @@ int ColumnBufferCompressed::saveCompressionHeaders( )
unsigned lastIdx = fChunkPtrs.size() - 1;
ptrs.push_back( fChunkPtrs[lastIdx].first + fChunkPtrs[lastIdx].second );
fCompressor->storePtrs( ptrs, hdrBuf );
compress::CompressInterface::storePtrs(ptrs, hdrBuf);
// Write out the header records
//char resp;
@ -641,9 +653,9 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
if (!fToBeCompressedBuffer)
{
fToBeCompressedBuffer =
new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN];
new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
bNewBuffer = true;
@ -656,12 +668,19 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
unsigned int blockOffsetWithinChunk = 0;
bool bSkipStartingBlks = false;
auto compressor = compress::getCompressorByType(
fCompressorPool, fColInfo->column.compressionType);
if (!compressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fPreLoadHWMChunk)
{
if (fChunkPtrs.size() > 0)
{
fCompressor->locateBlock(fStartingHwm,
chunkIndex, blockOffsetWithinChunk);
compressor->locateBlock(fStartingHwm, chunkIndex,
blockOffsetWithinChunk);
if (chunkIndex < fChunkPtrs.size())
startFileOffset = fChunkPtrs[chunkIndex].first;
@ -718,8 +737,8 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
}
// Uncompress the chunk into our 4MB buffer
unsigned int outLen = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
int rc = fCompressor->uncompressBlock(
size_t outLen = CompressInterface::UNCOMPRESSED_INBUF_LEN;
int rc = compressor->uncompressBlock(
compressedOutBuf,
fChunkPtrs[chunkIndex].second,
fToBeCompressedBuffer,
@ -758,7 +777,7 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
if (!bNewBuffer)
{
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
}
@ -775,10 +794,10 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
}
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
// Set file offset to start after last current chunk
startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2;
startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
if (fChunkPtrs.size() > 0)
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
@ -796,5 +815,4 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
return NO_ERROR;
}
}

View File

@ -107,8 +107,7 @@ private:
// should always be 4MB, unless
// working with abbrev extent.
size_t fNumBytes; // num Bytes in comp buffer
compress::IDBCompressInterface*
fCompressor; // data compression object
compress::CompressorPool fCompressorPool; // data compression object pool
compress::CompChunkPtrList
fChunkPtrs; // col file header information
bool fPreLoadHWMChunk; // preload 1st HWM chunk only

View File

@ -450,7 +450,7 @@ int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
if (column.dctnry.fCompressionType != 0)
{
DctnryCompress1* tempD1;
tempD1 = new DctnryCompress1;
tempD1 = new DctnryCompress1(column.dctnry.fCompressionType);
tempD1->setMaxActiveChunkNum(1);
tempD1->setBulkFlag(true);
tempD = tempD1;
@ -668,7 +668,7 @@ int ColumnInfo::extendColumnNewExtent(
uint16_t segmentNew = 0;
BRM::LBID_t startLbid;
char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
// Extend the column by adding an extent to the next
// DBRoot, partition, and segment file in the rotation
@ -1684,7 +1684,8 @@ int ColumnInfo::openDctnryStore( bool bMustExist )
if ( column.dctnry.fCompressionType != 0)
{
DctnryCompress1* dctnryCompress1 = new DctnryCompress1;
DctnryCompress1* dctnryCompress1 =
new DctnryCompress1(column.dctnry.fCompressionType);
dctnryCompress1->setMaxActiveChunkNum(1);
dctnryCompress1->setBulkFlag(true);
fStore = dctnryCompress1;

View File

@ -108,7 +108,7 @@ int ColumnInfoCompressed::closeColumnFile(bool bCompletingExtent, bool bAbort)
//------------------------------------------------------------------------------
int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
{
char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
RETURN_ON_ERROR( colOp->readHeaders(curCol.dataFile.pFile, hdr) );
// Initialize the output buffer manager for the column.
@ -129,10 +129,9 @@ int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
fColBufferMgr = mgr;
IDBCompressInterface compressor;
int abbrevFlag =
( compressor.getBlockCount(hdr) ==
uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width / BYTE_PER_BLOCK) );
int abbrevFlag = (compress::CompressInterface::getBlockCount(hdr) ==
uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width /
BYTE_PER_BLOCK));
setFileSize( hwm, abbrevFlag );
// See if dealing with abbreviated extent that will need expanding.
@ -324,9 +323,9 @@ int ColumnInfoCompressed::truncateDctnryStore(
return rc;
}
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
char controlHdr[ CompressInterface::HDR_BUF_LEN ];
rc = fTruncateDctnryFileOp.readFile( dFile,
(unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN);
(unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
if (rc != NO_ERROR)
{
@ -345,8 +344,7 @@ int ColumnInfoCompressed::truncateDctnryStore(
return rc;
}
IDBCompressInterface compressor;
int rc1 = compressor.verifyHdr( controlHdr );
int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0)
{
@ -372,7 +370,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
// actually grow the file (something we don't want to do), because we have
// not yet reserved a full extent (on disk) for this dictionary store file.
const int PSEUDO_COL_WIDTH = 8;
uint64_t numBlocks = compressor.getBlockCount( controlHdr );
uint64_t numBlocks =
compress::CompressInterface::getBlockCount(controlHdr);
if ( numBlocks == uint64_t
(INITIAL_EXTENT_ROWS_TO_DISK * PSEUDO_COL_WIDTH / BYTE_PER_BLOCK) )
@ -390,8 +389,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
return NO_ERROR;
}
uint64_t hdrSize = compressor.getHdrSize(controlHdr);
uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize];
rc = fTruncateDctnryFileOp.readFile(dFile,
@ -416,7 +415,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
}
CompChunkPtrList chunkPtrs;
rc1 = compressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs );
rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
chunkPtrs);
delete[] pointerHdr;
if (rc1 != 0)

View File

@ -96,7 +96,7 @@ size_t readFillBuffer(
return totalBytesRead;
}
off64_t getCompressedDataSize(string& fileName)
static off64_t getCompressedDataSize(string& fileName)
{
off64_t dataSize = 0;
IDBDataFile* pFile = 0;
@ -119,21 +119,21 @@ off64_t getCompressedDataSize(string& fileName)
throw std::runtime_error(oss.str());
}
IDBCompressInterface decompressor;
//--------------------------------------------------------------------------
// Read headers and extract compression pointers
//--------------------------------------------------------------------------
char hdr1[IDBCompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, IDBCompressInterface::HDR_BUF_LEN);
char hdr1[CompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, CompressInterface::HDR_BUF_LEN);
if ( nBytes != IDBCompressInterface::HDR_BUF_LEN )
if ( nBytes != CompressInterface::HDR_BUF_LEN )
{
std::ostringstream oss;
oss << "Error reading first header from file " << fileName;
throw std::runtime_error(oss.str());
}
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - IDBCompressInterface::HDR_BUF_LEN;
int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@ -145,7 +145,8 @@ off64_t getCompressedDataSize(string& fileName)
}
CompChunkPtrList chunkPtrs;
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
int rc =
compress::CompressInterface::getPtrList(hdr2, ptrSecSize, chunkPtrs);
delete[] hdr2;
if (rc != 0)

View File

@ -51,6 +51,7 @@ namespace WriteEngine
BulkRollbackFileCompressed::BulkRollbackFileCompressed(BulkRollbackMgr* mgr) :
BulkRollbackFile(mgr)
{
compress::initializeCompressorPool(fCompressorPool);
}
//------------------------------------------------------------------------------
@ -104,7 +105,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
}
// Read and parse the header pointers
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];;
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];;
CompChunkPtrList chunkPtrs;
std::string errMsg;
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
@ -127,7 +128,20 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
unsigned int blockOffset = fileSizeBlocks - 1;
unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0;
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
auto fCompressor = compress::getCompressorByType(
fCompressorPool,
compress::CompressInterface::getCompressionType(hdrs));
if (!fCompressor)
{
std::ostringstream oss;
oss << "Error, wrong compression type for segment file"
<< ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
<< partNum << "; segment-" << segNum << ";";
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
}
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
// Truncate the extra extents that are to be aborted
if (chunkIndex < chunkPtrs.size())
@ -145,7 +159,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
logging::M0075, columnOID, msgText2.str() );
// Drop off any trailing pointers (that point beyond the last block)
fCompressor.setBlockCount( hdrs, fileSizeBlocks );
compress::CompressInterface::setBlockCount(hdrs, fileSizeBlocks);
std::vector<uint64_t> ptrs;
for (unsigned i = 0; i <= chunkIndex; i++)
@ -155,7 +169,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
ptrs.push_back( chunkPtrs[chunkIndex].first +
chunkPtrs[chunkIndex].second );
fCompressor.storePtrs( ptrs, hdrs );
compress::CompressInterface::storePtrs(ptrs, hdrs);
rc = fDbFile.writeHeaders( pFile, hdrs );
@ -252,7 +266,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
}
// Read and parse the header pointers
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
CompChunkPtrList chunkPtrs;
std::string errMsg;
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
@ -275,7 +289,20 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
unsigned int blockOffset = startOffsetBlk - 1;
unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0;
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
auto fCompressor = compress::getCompressorByType(
fCompressorPool,
compress::CompressInterface::getCompressionType(hdrs));
if (!fCompressor)
{
std::ostringstream oss;
oss << "Error, wrong compression type for segment file"
<< ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
<< partNum << "; segment-" << segNum << ";";
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
}
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
if (chunkIndex < chunkPtrs.size())
{
@ -401,7 +428,8 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
// Watch for the special case where we are restoring a db file as an
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we
// just restore the first pointer (set to 8192).
fCompressor.setBlockCount( hdrs, (startOffsetBlk + nBlocks) );
compress::CompressInterface::setBlockCount(hdrs,
(startOffsetBlk + nBlocks));
std::vector<uint64_t> newPtrs;
if ((chunkIndex > 0) || (restoredChunkLen > 0))
@ -413,7 +441,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
}
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
fCompressor.storePtrs( newPtrs, hdrs );
compress::CompressInterface::storePtrs(newPtrs, hdrs);
rc = fDbFile.writeHeaders( pFile, hdrs );
@ -482,7 +510,7 @@ int BulkRollbackFileCompressed::loadColumnHdrPtrs(
}
// Parse the header pointers
int rc1 = fCompressor.getPtrList( hdrs, chunkPtrs );
int rc1 = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
if (rc1 != 0)
{
@ -548,7 +576,7 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
throw WeException( oss.str(), ERR_FILE_OPEN );
}
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
char controlHdr[ CompressInterface::HDR_BUF_LEN ];
CompChunkPtrList chunkPtrs;
uint64_t ptrHdrSize;
std::string errMsg;
@ -572,7 +600,20 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
unsigned int blockOffset = startOffsetBlk - 1;
unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0;
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
auto fCompressor = compress::getCompressorByType(
fCompressorPool,
compress::CompressInterface::getCompressionType(controlHdr));
if (!fCompressor)
{
std::ostringstream oss;
oss << "Error, wrong compression type for segment file"
<< ": OID-" << dStoreOID << "; DbRoot-" << dbRoot << "; partition-"
<< partNum << "; segment-" << segNum << ";";
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
}
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
if (chunkIndex < chunkPtrs.size())
{
@ -686,7 +727,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
// Watch for the special case where we are restoring a db file as an
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we
// just restore the first pointer (set to 8192).
fCompressor.setBlockCount( controlHdr, (startOffsetBlk + nBlocks) );
compress::CompressInterface::setBlockCount(controlHdr,
(startOffsetBlk + nBlocks));
std::vector<uint64_t> newPtrs;
if ((chunkIndex > 0) || (restoredChunkLen > 0))
@ -699,7 +741,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
char* pointerHdr = new char[ptrHdrSize];
fCompressor.storePtrs( newPtrs, pointerHdr, ptrHdrSize );
compress::CompressInterface::storePtrs(newPtrs, pointerHdr,
ptrHdrSize);
rc = fDbFile.writeHeaders( pFile, controlHdr, pointerHdr, ptrHdrSize );
delete[] pointerHdr;
@ -759,7 +802,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
std::string& errMsg) const
{
int rc = fDbFile.readFile(
pFile, (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN);
pFile, (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
if (rc != NO_ERROR)
{
@ -771,7 +814,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
return rc;
}
int rc1 = fCompressor.verifyHdr( controlHdr );
int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0)
{
@ -786,8 +829,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
return rc;
}
uint64_t hdrSize = fCompressor.getHdrSize(controlHdr);
ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize];
rc = fDbFile.readFile(pFile, (unsigned char*)pointerHdr, ptrHdrSize);
@ -804,7 +847,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
}
// Parse the header pointers
rc1 = fCompressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs );
rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
chunkPtrs);
delete[] pointerHdr;
if (rc1 != 0)
@ -1033,5 +1077,4 @@ size_t BulkRollbackFileCompressed::readFillBuffer(
return totalBytesRead;
}
} //end of namespace

View File

@ -28,6 +28,7 @@
#include <cstdio>
#include <cstring>
#include <unordered_map>
#include "we_define.h"
#include "we_type.h"
@ -148,7 +149,7 @@ private:
uint64_t& ptrHdrSize,
std::string& errMsg ) const;
compress::IDBCompressInterface fCompressor;
compress::CompressorPool fCompressorPool;
};
} //end of namespace

View File

@ -67,8 +67,6 @@ namespace WriteEngine
extern int NUM_BLOCKS_PER_INITIAL_EXTENT; // defined in we_dctnry.cpp
extern WErrorCodes ec; // defined in we_log.cpp
const int COMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::maxCompressedSize(UNCOMPRESSED_CHUNK_SIZE) + 64 + 3 + 8 * 1024;
//------------------------------------------------------------------------------
// Search for the specified chunk in fChunkList.
//------------------------------------------------------------------------------
@ -91,18 +89,24 @@ ChunkData* CompFileData::findChunk(int64_t id) const
//------------------------------------------------------------------------------
// ChunkManager constructor
//------------------------------------------------------------------------------
ChunkManager::ChunkManager() : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
fFileOp(0), fSysLogger(NULL), fTransId(-1),
fLocalModuleId(Config::getLocalModuleID()),
fFs(fIsHdfs ?
IDBFileSystem::getFs(IDBDataFile::HDFS) :
IDBPolicy::useCloud() ?
IDBFileSystem::getFs(IDBDataFile::CLOUD) :
IDBFileSystem::getFs(IDBDataFile::BUFFERED))
ChunkManager::ChunkManager()
: fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
fFileOp(0), fSysLogger(NULL), fTransId(-1),
fLocalModuleId(Config::getLocalModuleID()),
fFs(fIsHdfs ? IDBFileSystem::getFs(IDBDataFile::HDFS)
: IDBPolicy::useCloud()
? IDBFileSystem::getFs(IDBDataFile::CLOUD)
: IDBFileSystem::getFs(IDBDataFile::BUFFERED))
{
fUserPaddings = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
fCompressor.numUserPaddingBytes(fUserPaddings);
compress::initializeCompressorPool(fCompressorPool, fUserPaddings);
COMPRESSED_CHUNK_SIZE =
compress::CompressInterface::getMaxCompressedSizeGeneric(
UNCOMPRESSED_CHUNK_SIZE) +
64 + 3 + 8 * 1024;
fMaxCompressedBufSize = COMPRESSED_CHUNK_SIZE + fUserPaddings;
fBufCompressed = new char[fMaxCompressedBufSize];
fSysLogger = new logging::Logger(SUBSYSTEM_ID_WE);
@ -383,16 +387,22 @@ CompFileData* ChunkManager::getFileData(const FID& fid,
}
// make sure the header is valid
if (fCompressor.verifyHdr(fileData->fFileHeader.fControlData) != 0)
if (compress::CompressInterface::verifyHdr(fileData->fFileHeader.fControlData) != 0)
{
WE_COMP_DBG(cout << "Invalid header." << endl;)
delete fileData;
return NULL;
}
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// Save segment file compression type.
uint32_t compressionType = compress::CompressInterface::getCompressionType(
fileData->fFileHeader.fControlData);
fileData->fCompressionType = compressionType;
if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT)
{
// >8K header, dictionary width > 128
@ -462,11 +472,12 @@ IDBDataFile* ChunkManager::createDctnryFile(const FID& fid,
// Dictionary store extent width == 0. See more details in function
// `createDictStoreExtent`.
fCompressor.initHdr(fileData->fFileHeader.fControlData,
fileData->fFileHeader.fPtrSection,
/*colWidth=*/0, fileData->fColDataType,
fFileOp->compressionType(), hdrSize);
fCompressor.setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
compress::CompressInterface::initHdr(
fileData->fFileHeader.fControlData, fileData->fFileHeader.fPtrSection,
/*colWidth=*/0, fileData->fColDataType, fFileOp->compressionType(), hdrSize);
compress::CompressInterface::setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
// Save compression type.
fileData->fCompressionType = fFileOp->compressionType();
if (writeHeader(fileData, __LINE__) != NO_ERROR)
{
@ -771,9 +782,16 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
}
// uncompress the read in buffer
unsigned int dataLen = sizeof(chunkData->fBufUnCompressed);
size_t dataLen = sizeof(chunkData->fBufUnCompressed);
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
{
if (fIsFix)
@ -784,7 +802,7 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
{
char* hdr = fileData->fFileHeader.fControlData;
if (fCompressor.getBlockCount(hdr) < 512)
if (compress::CompressInterface::getBlockCount(hdr) < 512)
blocks = 256;
}
@ -820,7 +838,8 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
{
if (id == 0 && ptrs[id] == 0) // if the 1st ptr is not set for new extent
{
ptrs[0] = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
ptrs[0] = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
}
// load the uncompressed buffer with empty values.
@ -907,10 +926,17 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
// compress the chunk before writing it to file
fLenCompressed = fMaxCompressedBufSize;
if (fCompressor.compressBlock((char*)chunkData->fBufUnCompressed,
chunkData->fLenUnCompressed,
(unsigned char*)fBufCompressed,
fLenCompressed) != 0)
auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fCompressor->compressBlock((char*) chunkData->fBufUnCompressed,
chunkData->fLenUnCompressed,
(unsigned char*) fBufCompressed,
fLenCompressed) != 0)
{
logMessage(ERR_COMP_COMPRESS, logging::LOG_TYPE_ERROR, __LINE__);
return ERR_COMP_COMPRESS;
@ -941,7 +967,8 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
// [chunkId+0] is the start offset of current chunk.
// [chunkId+1] is the start offset of next chunk, the offset diff is current chunk size.
// [chunkId+2] is 0 or not indicates if the next chunk exists.
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
int64_t usablePtrIds = (ptrSecSize / sizeof(uint64_t)) - 2;
@ -968,7 +995,7 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
else if (lastChunk)
{
// add padding space if the chunk is written first time
if (fCompressor.padCompressedChunks(
if (fCompressor->padCompressedChunks(
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize) != 0)
{
WE_COMP_DBG(cout << "Last chunk:" << chunkId << ", padding failed." << endl;)
@ -1272,7 +1299,8 @@ int ChunkManager::closeFile(CompFileData* fileData)
int ChunkManager::writeHeader(CompFileData* fileData, int ln)
{
int rc = NO_ERROR;
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
if (!fIsHdfs && !fIsBulkLoad)
@ -1422,8 +1450,10 @@ int ChunkManager::updateColumnExtent(IDBDataFile* pFile, int addBlockCount, int6
int rc = NO_ERROR;
char* hdr = pFileData->fFileHeader.fControlData;
fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount);
fCompressor.setLBIDByIndex(hdr, lbid, 1);
compress::CompressInterface::setBlockCount(
hdr, compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
compress::CompressInterface::setLBIDByIndex(hdr, lbid, 1);
ChunkData* chunkData = (pFileData)->findChunk(0);
if (chunkData != NULL)
@ -1475,7 +1505,7 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
char* hdr = i->second->fFileHeader.fControlData;
char* uncompressedBuf = chunkData->fBufUnCompressed;
int currentBlockCount = fCompressor.getBlockCount(hdr);
int currentBlockCount = compress::CompressInterface::getBlockCount(hdr);
// Bug 3203, write out the compressed initial extent.
if (currentBlockCount == 0)
@ -1511,13 +1541,15 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
}
if (rc == NO_ERROR)
fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount);
compress::CompressInterface::setBlockCount(
hdr,
compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
if (currentBlockCount)
{
// Append to the end.
uint64_t lbidCount = fCompressor.getLBIDCount(hdr);
fCompressor.setLBIDByIndex(hdr, lbid, lbidCount);
uint64_t lbidCount = compress::CompressInterface::getLBIDCount(hdr);
compress::CompressInterface::setLBIDByIndex(hdr, lbid, lbidCount);
}
return rc;
}
@ -1684,7 +1716,8 @@ int ChunkManager::getBlockCount(IDBDataFile* pFile)
map<IDBDataFile*, CompFileData*>::iterator fpIt = fFilePtrMap.find(pFile);
idbassert(fpIt != fFilePtrMap.end());
return fCompressor.getBlockCount(fpIt->second->fFileHeader.fControlData);
return compress::CompressInterface::getBlockCount(
fpIt->second->fFileHeader.fControlData);
}
//------------------------------------------------------------------------------
@ -1758,11 +1791,13 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
origFilePtr->flush();
// back out the current pointers
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
compress::CompChunkPtrList origPtrs;
if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
if (compress::CompressInterface::getPtrList(
fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
{
ostringstream oss;
oss << "Chunk shifting failed, file:" << origFileName << " -- invalid header.";
@ -1876,7 +1911,14 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
ChunkData* chunkData = chunksTouched[k];
fLenCompressed = fMaxCompressedBufSize;
if ((rc = fCompressor.compressBlock((char*)chunkData->fBufUnCompressed,
auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if ((rc = fCompressor->compressBlock((char*)chunkData->fBufUnCompressed,
chunkData->fLenUnCompressed,
(unsigned char*)fBufCompressed,
fLenCompressed)) != 0)
@ -1894,7 +1936,7 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
<< fLenCompressed;)
// shifting chunk, add padding space
if ((rc = fCompressor.padCompressedChunks(
if ((rc = fCompressor->padCompressedChunks(
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize)) != 0)
{
WE_COMP_DBG(cout << ", but padding failed." << endl;)
@ -2245,7 +2287,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
}
// make sure the header is valid
if ((rc = fCompressor.verifyHdr(fileData->fFileHeader.fControlData)) != 0)
if ((rc = compress::CompressInterface::verifyHdr(
fileData->fFileHeader.fControlData)) != 0)
{
ostringstream oss;
oss << "Invalid header in new " << fileData->fFileName << ", roll back";
@ -2254,7 +2297,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
return rc;
}
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// read in the pointer section in header
@ -2270,7 +2314,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
// get pointer list
compress::CompChunkPtrList ptrs;
if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
if (compress::CompressInterface::getPtrList(
fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
{
ostringstream oss;
oss << "Failed to parse pointer list from new " << fileData->fFileName << "@" << __LINE__;
@ -2282,6 +2327,13 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
ChunkData chunkData;
int numOfChunks = ptrs.size(); // number of chunks in the file
auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
for (int i = 0; i < numOfChunks && rc == NO_ERROR; i++)
{
unsigned int chunkSize = ptrs[i].second;
@ -2304,9 +2356,9 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
}
// uncompress the read in buffer
unsigned int dataLen = sizeof(chunkData.fBufUnCompressed);
size_t dataLen = sizeof(chunkData.fBufUnCompressed);
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData.fBufUnCompressed, dataLen) != 0)
{
ostringstream oss;
@ -2624,13 +2676,15 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
if (mit != fFileMap.end())
{
int headerSize = fCompressor.getHdrSize(mit->second->fFileHeader.fControlData);
int headerSize = compress::CompressInterface::getHdrSize(
mit->second->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// get pointer list
compress::CompChunkPtrList ptrs;
if (fCompressor.getPtrList(mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
if (compress::CompressInterface::getPtrList(
mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
{
ostringstream oss;
oss << "Failed to parse pointer list from new " << mit->second->fFileName << "@" << __LINE__;
@ -2662,9 +2716,16 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
// uncompress the read in buffer
chunkData = new ChunkData(numOfChunks - 1);
unsigned int dataLen = sizeof(chunkData->fBufUnCompressed);
size_t dataLen = sizeof(chunkData->fBufUnCompressed);
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
auto fCompressor = compress::getCompressorByType(
fCompressorPool, mit->second->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
{
mit->second->fChunkList.push_back(chunkData);
@ -2676,7 +2737,7 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
{
char* hdr = mit->second->fFileHeader.fControlData;
if (fCompressor.getBlockCount(hdr) < 512)
if (compress::CompressInterface::getBlockCount(hdr) < 512)
blocks = 256;
}
@ -2693,7 +2754,6 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
return rc;
}
}
// vim:ts=4 sw=4:

View File

@ -64,8 +64,8 @@ namespace WriteEngine
// forward reference
class FileOp;
const int UNCOMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
const int COMPRESSED_FILE_HEADER_UNIT = compress::IDBCompressInterface::HDR_BUF_LEN;
const int UNCOMPRESSED_CHUNK_SIZE = compress::CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int COMPRESSED_FILE_HEADER_UNIT = compress::CompressInterface::HDR_BUF_LEN;
// assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding
@ -136,7 +136,7 @@ class CompFileData
public:
CompFileData(const FileID& id, const FID& fid, const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth) :
fFileID(id), fFid(fid), fColDataType(colDataType), fColWidth(colWidth), fDctnryCol(false),
fFilePtr(NULL), fIoBSize(0) {}
fFilePtr(NULL), fIoBSize(0), fCompressionType(1) {}
ChunkData* findChunk(int64_t cid) const;
@ -152,6 +152,7 @@ protected:
std::list<ChunkData*> fChunkList;
boost::scoped_array<char> fIoBuffer;
size_t fIoBSize;
uint32_t fCompressionType;
friend class ChunkManager;
};
@ -369,22 +370,23 @@ protected:
std::list<std::pair<FileID, ChunkData*> > fActiveChunks;
unsigned int fMaxActiveChunkNum; // max active chunks per file
char* fBufCompressed;
unsigned int fLenCompressed;
unsigned int fMaxCompressedBufSize;
unsigned int fUserPaddings;
size_t fLenCompressed;
size_t fMaxCompressedBufSize;
size_t fUserPaddings;
bool fIsBulkLoad;
bool fDropFdCache;
bool fIsInsert;
bool fIsHdfs;
FileOp* fFileOp;
compress::IDBCompressInterface fCompressor;
compress::CompressorPool fCompressorPool;
logging::Logger* fSysLogger;
TxnID fTransId;
int fLocalModuleId;
idbdatafile::IDBFileSystem& fFs;
bool fIsFix;
size_t COMPRESSED_CHUNK_SIZE;
private:
private:
};
}

View File

@ -348,6 +348,7 @@ const int ERR_COMP_READ_FILE = ERR_COMPBASE + 16;// Failed to read from a
const int ERR_COMP_WRITE_FILE = ERR_COMPBASE + 17;// Failed to write to a compresssed data file
const int ERR_COMP_CLOSE_FILE = ERR_COMPBASE + 18;// Failed to close a compressed data file
const int ERR_COMP_TRUNCATE_ZERO = ERR_COMPBASE + 19;// Invalid attempt to truncate file to 0 bytes
const int ERR_COMP_WRONG_COMP_TYPE = ERR_COMPBASE + 20;// Invalid compression type.
//--------------------------------------------------------------------------
// Auto-increment error

View File

@ -652,14 +652,19 @@ int FileOp::extendFile(
// @bug 5349: check that new extent's fbo is not past current EOF
if (m_compressionType)
{
char hdrsIn[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdrsIn[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
RETURN_ON_ERROR( readHeaders(pFile, hdrsIn) );
IDBCompressInterface compressor;
unsigned int ptrCount = compressor.getPtrCount(hdrsIn);
std::unique_ptr<compress::CompressInterface> compressor(
compress::getCompressInterfaceByType(
compress::CompressInterface::getCompressionType(hdrsIn)));
unsigned int ptrCount =
compress::CompressInterface::getPtrCount(hdrsIn);
unsigned int chunkIndex = 0;
unsigned int blockOffsetWithinChunk = 0;
compressor.locateBlock((hwm - 1), chunkIndex, blockOffsetWithinChunk);
compressor->locateBlock((hwm - 1), chunkIndex,
blockOffsetWithinChunk);
//std::ostringstream oss1;
//oss1 << "Extending compressed column file"<<
@ -816,9 +821,8 @@ int FileOp::extendFile(
if ((m_compressionType) && (hdrs))
{
IDBCompressInterface compressor;
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
compressor.setLBIDByIndex(hdrs, startLbid, 0);
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
}
}
@ -976,9 +980,8 @@ int FileOp::addExtentExactFile(
if ((m_compressionType) && (hdrs))
{
IDBCompressInterface compressor;
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
compressor.setLBIDByIndex(hdrs, startLbid, 0);
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
}
}
@ -1064,13 +1067,11 @@ int FileOp::initColumnExtent(
{
if ((bNewFile) && (m_compressionType))
{
char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2];
IDBCompressInterface compressor;
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
compressor.setLBIDByIndex(hdrs, lbid, 0);
char hdrs[CompressInterface::HDR_BUF_LEN * 2];
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compress::CompressInterface::setLBIDByIndex(hdrs, lbid, 0);
if (bAbbrevExtent)
compressor.setBlockCount(hdrs, nBlocks);
compress::CompressInterface::setBlockCount(hdrs, nBlocks);
RETURN_ON_ERROR(writeHeaders(pFile, hdrs));
}
@ -1262,7 +1263,7 @@ int FileOp::initAbbrevCompColumnExtent(
Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_ABBREV_EXT);
#endif
char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2];
char hdrs[CompressInterface::HDR_BUF_LEN * 2];
rc = writeInitialCompColumnChunk( pFile,
nBlocks,
INITIAL_EXTENT_ROWS_TO_DISK,
@ -1308,24 +1309,30 @@ int FileOp::writeInitialCompColumnChunk(
execplan::CalpontSystemCatalog::ColDataType colDataType,
char* hdrs)
{
const int INPUT_BUFFER_SIZE = nRows * width;
const size_t INPUT_BUFFER_SIZE = nRows * width;
char* toBeCompressedInput = new char[INPUT_BUFFER_SIZE];
unsigned int userPaddingBytes = Config::getNumCompressedPadBlks() *
BYTE_PER_BLOCK;
const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(INPUT_BUFFER_SIZE) +
userPaddingBytes;
// Compress an initialized abbreviated extent
// Initially m_compressionType == 0, but this function is used under
// condtion where m_compressionType > 0.
std::unique_ptr<CompressInterface> compressor(
compress::getCompressInterfaceByType(m_compressionType,
userPaddingBytes));
const size_t OUTPUT_BUFFER_SIZE =
compressor->maxCompressedSize(INPUT_BUFFER_SIZE) + userPaddingBytes +
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
unsigned char* compressedOutput = new unsigned char[OUTPUT_BUFFER_SIZE];
unsigned int outputLen = OUTPUT_BUFFER_SIZE;
size_t outputLen = OUTPUT_BUFFER_SIZE;
boost::scoped_array<char> toBeCompressedInputPtr( toBeCompressedInput );
boost::scoped_array<unsigned char> compressedOutputPtr(compressedOutput);
setEmptyBuf( (unsigned char*)toBeCompressedInput,
INPUT_BUFFER_SIZE, emptyVal, width);
// Compress an initialized abbreviated extent
IDBCompressInterface compressor( userPaddingBytes );
int rc = compressor.compressBlock(toBeCompressedInput,
INPUT_BUFFER_SIZE, compressedOutput, outputLen );
int rc = compressor->compressBlock(toBeCompressedInput, INPUT_BUFFER_SIZE,
compressedOutput, outputLen);
if (rc != 0)
{
@ -1333,8 +1340,8 @@ int FileOp::writeInitialCompColumnChunk(
}
// Round up the compressed chunk size
rc = compressor.padCompressedChunks( compressedOutput,
outputLen, OUTPUT_BUFFER_SIZE );
rc = compressor->padCompressedChunks(compressedOutput, outputLen,
OUTPUT_BUFFER_SIZE);
if (rc != 0)
{
@ -1347,23 +1354,22 @@ int FileOp::writeInitialCompColumnChunk(
// "; blkAllocCnt: " << nBlocksAllocated <<
// "; compressedByteCnt: " << outputLen << std::endl;
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
compressor.setBlockCount(hdrs, nBlocksAllocated);
compressor.setLBIDByIndex(hdrs, startLBID, 0);
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compress::CompressInterface::setBlockCount(hdrs, nBlocksAllocated);
compress::CompressInterface::setLBIDByIndex(hdrs, startLBID, 0);
// Store compression pointers in the header
std::vector<uint64_t> ptrs;
ptrs.push_back( IDBCompressInterface::HDR_BUF_LEN * 2 );
ptrs.push_back( outputLen + (IDBCompressInterface::HDR_BUF_LEN * 2) );
compressor.storePtrs(ptrs, hdrs);
ptrs.push_back( CompressInterface::HDR_BUF_LEN * 2 );
ptrs.push_back( outputLen + (CompressInterface::HDR_BUF_LEN * 2) );
compress::CompressInterface::storePtrs(ptrs, hdrs);
RETURN_ON_ERROR( writeHeaders(pFile, hdrs) );
// Write the compressed data
if ( pFile->write( compressedOutput, outputLen ) != outputLen )
{
size_t writtenLen = pFile->write(compressedOutput, outputLen);
if (writtenLen != outputLen)
return ERR_FILE_WRITE;
}
return NO_ERROR;
}
@ -1421,7 +1427,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
return ERR_FILE_OPEN;
}
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
rc = readHeaders( pFile, hdrs );
if (rc != NO_ERROR)
@ -1432,9 +1438,14 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
}
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
IDBCompressInterface compressor( userPadBytes );
std::unique_ptr<CompressInterface> compressor(
compress::getCompressInterfaceByType(
compress::CompressInterface::getCompressionType(hdrs),
userPadBytes));
CompChunkPtrList chunkPtrs;
int rcComp = compressor.getPtrList( hdrs, chunkPtrs );
int rcComp = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
if (rcComp != 0)
{
@ -1444,7 +1455,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
}
// Nothing to do if the proposed HWM is < the current block count
uint64_t blkCount = compressor.getBlockCount(hdrs);
uint64_t blkCount = compress::CompressInterface::getBlockCount(hdrs);
if (blkCount > (hwm + 1))
{
@ -1455,7 +1466,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
const unsigned int ROWS_PER_EXTENT =
BRMWrapper::getInstance()->getInstance()->getExtentRows();
const unsigned int ROWS_PER_CHUNK =
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
CompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
const unsigned int CHUNKS_PER_EXTENT = ROWS_PER_EXTENT / ROWS_PER_CHUNK;
// If this is an abbreviated extent, we first expand to a full extent
@ -1493,7 +1504,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
CompChunkPtr chunkOutPtr;
rc = expandAbbrevColumnChunk( pFile, emptyVal, colWidth,
chunkPtrs[0], chunkOutPtr );
chunkPtrs[0], chunkOutPtr, hdrs );
if (rc != NO_ERROR)
{
@ -1515,7 +1526,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Update block count to reflect a full extent
blkCount = (ROWS_PER_EXTENT * colWidth) / BYTE_PER_BLOCK;
compressor.setBlockCount( hdrs, blkCount );
compress::CompressInterface::setBlockCount(hdrs, blkCount);
}
// Calculate the number of empty chunks we need to add to fill this extent
@ -1532,7 +1543,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
compressor.getBlockCount(hdrs) << std::endl;
std::cout << "Pointer Header Size (in bytes): " <<
(compressor.getHdrSize(hdrs) -
IDBCompressInterface::HDR_BUF_LEN) << std::endl;
CompressInterface::HDR_BUF_LEN) << std::endl;
std::cout << "Chunk Pointers (offset,length): " << std::endl;
for (unsigned k = 0; k < chunkPtrs.size(); k++)
@ -1551,8 +1562,10 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Fill in or add necessary remaining empty chunks
if (numChunksToFill > 0)
{
const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes;
const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int OUT_BUF_LEN =
compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
// Allocate buffer, and store in scoped_array to insure it's deletion.
// Create scope {...} to manage deletion of buffers
@ -1566,9 +1579,9 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Compress and then pad the compressed chunk
setEmptyBuf( (unsigned char*)toBeCompressedBuf,
IN_BUF_LEN, emptyVal, colWidth );
unsigned int outputLen = OUT_BUF_LEN;
rcComp = compressor.compressBlock( toBeCompressedBuf,
IN_BUF_LEN, compressedBuf, outputLen );
size_t outputLen = OUT_BUF_LEN;
rcComp = compressor->compressBlock(toBeCompressedBuf, IN_BUF_LEN,
compressedBuf, outputLen);
if (rcComp != 0)
{
@ -1579,8 +1592,8 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
toBeCompressedInputPtr.reset(); // release memory
rcComp = compressor.padCompressedChunks( compressedBuf,
outputLen, OUT_BUF_LEN );
rcComp = compressor->padCompressedChunks(compressedBuf, outputLen,
OUT_BUF_LEN);
if (rcComp != 0)
{
@ -1639,7 +1652,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
ptrs.push_back( chunkPtrs[chunkPtrs.size() - 1].first +
chunkPtrs[chunkPtrs.size() - 1].second );
compressor.storePtrs( ptrs, hdrs );
compress::CompressInterface::storePtrs(ptrs, hdrs);
rc = writeHeaders( pFile, hdrs );
@ -1697,11 +1710,24 @@ int FileOp::expandAbbrevColumnChunk(
const uint8_t* emptyVal,
int colWidth,
const CompChunkPtr& chunkInPtr,
CompChunkPtr& chunkOutPtr )
CompChunkPtr& chunkOutPtr,
const char *hdrs )
{
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes;
auto realCompressionType = m_compressionType;
if (hdrs)
{
realCompressionType =
compress::CompressInterface::getCompressionType(hdrs);
}
std::unique_ptr<CompressInterface> compressor(
compress::getCompressInterfaceByType(realCompressionType,
userPadBytes));
const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int OUT_BUF_LEN =
compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
char* toBeCompressedBuf = new char[ IN_BUF_LEN ];
boost::scoped_array<char> toBeCompressedPtr(toBeCompressedBuf);
@ -1717,13 +1743,10 @@ int FileOp::expandAbbrevColumnChunk(
chunkInPtr.second) );
// Uncompress an "abbreviated" chunk into our 4MB buffer
unsigned int outputLen = IN_BUF_LEN;
IDBCompressInterface compressor( userPadBytes );
int rc = compressor.uncompressBlock(
compressedInBuf,
chunkInPtr.second,
(unsigned char*)toBeCompressedBuf,
outputLen);
size_t outputLen = IN_BUF_LEN;
int rc = compressor->uncompressBlock(compressedInBuf, chunkInPtr.second,
(unsigned char*) toBeCompressedBuf,
outputLen);
if (rc != 0)
{
@ -1739,11 +1762,8 @@ int FileOp::expandAbbrevColumnChunk(
// Compress the data we just read, as a "full" 4MB chunk
outputLen = OUT_BUF_LEN;
rc = compressor.compressBlock(
reinterpret_cast<char*>(toBeCompressedBuf),
IN_BUF_LEN,
compressedOutBuf,
outputLen );
rc = compressor->compressBlock(reinterpret_cast<char*>(toBeCompressedBuf),
IN_BUF_LEN, compressedOutBuf, outputLen);
if (rc != 0)
{
@ -1751,8 +1771,8 @@ int FileOp::expandAbbrevColumnChunk(
}
// Round up the compressed chunk size
rc = compressor.padCompressedChunks( compressedOutBuf,
outputLen, OUT_BUF_LEN );
rc = compressor->padCompressedChunks(compressedOutBuf, outputLen,
OUT_BUF_LEN);
if (rc != 0)
{
@ -1782,7 +1802,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* hdr) const
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
// Write the headers
if (pFile->write( hdr, IDBCompressInterface::HDR_BUF_LEN * 2 ) != IDBCompressInterface::HDR_BUF_LEN * 2)
if (pFile->write( hdr, CompressInterface::HDR_BUF_LEN * 2 ) != CompressInterface::HDR_BUF_LEN * 2)
{
return ERR_FILE_WRITE;
}
@ -1808,7 +1828,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
// Write the control header
if (pFile->write( controlHdr, IDBCompressInterface::HDR_BUF_LEN ) != IDBCompressInterface::HDR_BUF_LEN)
if (pFile->write( controlHdr, CompressInterface::HDR_BUF_LEN ) != CompressInterface::HDR_BUF_LEN)
{
return ERR_FILE_WRITE;
}
@ -2651,9 +2671,8 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdrs ) const
{
RETURN_ON_ERROR( setFileOffset(pFile, 0) );
RETURN_ON_ERROR( readFile( pFile, reinterpret_cast<unsigned char*>(hdrs),
(IDBCompressInterface::HDR_BUF_LEN * 2) ) );
IDBCompressInterface compressor;
int rc = compressor.verifyHdr( hdrs );
(CompressInterface::HDR_BUF_LEN * 2) ) );
int rc = compress::CompressInterface::verifyHdr(hdrs);
if (rc != 0)
{
@ -2671,11 +2690,10 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdr1, char* hdr2 ) const
unsigned char* hdrPtr = reinterpret_cast<unsigned char*>(hdr1);
RETURN_ON_ERROR( setFileOffset(pFile, 0) );
RETURN_ON_ERROR( readFile( pFile, hdrPtr,
IDBCompressInterface::HDR_BUF_LEN ));
CompressInterface::HDR_BUF_LEN ));
IDBCompressInterface compressor;
int ptrSecSize = compressor.getHdrSize(hdrPtr) -
IDBCompressInterface::HDR_BUF_LEN;
int ptrSecSize = compress::CompressInterface::getHdrSize(hdrPtr) -
CompressInterface::HDR_BUF_LEN;
return readFile( pFile, reinterpret_cast<unsigned char*>(hdr2),
ptrSecSize );
}

View File

@ -529,11 +529,11 @@ private:
FileOp(const FileOp& rhs);
FileOp& operator=(const FileOp& rhs);
int expandAbbrevColumnChunk( IDBDataFile* pFile,
const uint8_t* emptyVal,
int colWidth,
const compress::CompChunkPtr& chunkInPtr,
compress::CompChunkPtr& chunkOutPt);
int expandAbbrevColumnChunk(IDBDataFile* pFile, const uint8_t* emptyVal,
int colWidth,
const compress::CompChunkPtr& chunkInPtr,
compress::CompChunkPtr& chunkOutPt,
const char* hdrs = nullptr);
int initAbbrevCompColumnExtent(
IDBDataFile* pFile, uint16_t dbRoot, int nBlocks,

View File

@ -1007,9 +1007,9 @@ void RBMetaWriter::backupHWMChunk(
}
// Read Control header
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
char controlHdr[ CompressInterface::HDR_BUF_LEN ];
rc = fileOp.readFile( dbFile, (unsigned char*)controlHdr,
IDBCompressInterface::HDR_BUF_LEN );
CompressInterface::HDR_BUF_LEN );
if (rc != NO_ERROR)
{
@ -1025,8 +1025,7 @@ void RBMetaWriter::backupHWMChunk(
throw WeException( oss.str(), rc );
}
IDBCompressInterface compressor;
int rc1 = compressor.verifyHdr( controlHdr );
int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0)
{
@ -1045,9 +1044,23 @@ void RBMetaWriter::backupHWMChunk(
throw WeException( oss.str(), rc );
}
auto compressionType =
compress::CompressInterface::getCompressionType(controlHdr);
std::unique_ptr<compress::CompressInterface> compressor(
compress::getCompressInterfaceByType(compressionType));
if (!compressor)
{
WErrorCodes ec;
std::ostringstream oss;
oss << "Ivalid compression type " << compressionType;
fileOp.closeFile( dbFile );
throw WeException(oss.str(), rc);
}
// Read Pointer header data
uint64_t hdrSize = compressor.getHdrSize(controlHdr);
uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize];
rc = fileOp.readFile( dbFile, (unsigned char*)pointerHdr, ptrHdrSize );
@ -1067,7 +1080,8 @@ void RBMetaWriter::backupHWMChunk(
}
CompChunkPtrList chunkPtrs;
rc = compressor.getPtrList(pointerHdr, ptrHdrSize, chunkPtrs );
rc = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
chunkPtrs);
delete[] pointerHdr;
if (rc != 0)
@ -1087,7 +1101,7 @@ void RBMetaWriter::backupHWMChunk(
unsigned int blockOffsetWithinChunk = 0;
unsigned char* buffer = 0;
uint64_t chunkSize = 0;
compressor.locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
compressor->locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
if (chunkIndex < chunkPtrs.size())
{

View File

@ -121,9 +121,9 @@ int ColumnOpCompress0::saveBlock(IDBDataFile* pFile, const unsigned char* writeB
* Constructor
*/
ColumnOpCompress1::ColumnOpCompress1(Log* logger)
ColumnOpCompress1::ColumnOpCompress1(uint32_t compressionType, Log* logger)
{
m_compressionType = 1;
m_compressionType = compressionType;
m_chunkManager = new ChunkManager();
if (logger)
@ -164,11 +164,7 @@ bool ColumnOpCompress1::abbreviatedExtent(IDBDataFile* pFile, int colWidth) cons
int ColumnOpCompress1::blocksInFile(IDBDataFile* pFile) const
{
CompFileHeader compFileHeader;
readHeaders(pFile, compFileHeader.fControlData, compFileHeader.fPtrSection);
compress::IDBCompressInterface compressor;
return compressor.getBlockCount(compFileHeader.fControlData);
return m_chunkManager->getBlockCount(pFile);
}

View File

@ -97,7 +97,7 @@ public:
/**
* @brief Constructor
*/
EXPORT ColumnOpCompress1(Log* logger = 0);
EXPORT ColumnOpCompress1(uint32_t compressionType, Log* logger = 0);
/**
* @brief Default Destructor

View File

@ -67,9 +67,9 @@ DctnryCompress0::~DctnryCompress0()
/**
* Constructor
*/
DctnryCompress1::DctnryCompress1(Log* logger)
DctnryCompress1::DctnryCompress1(uint32_t compressionType, Log* logger)
{
m_compressionType = 1;
m_compressionType = compressionType;
m_chunkManager = new ChunkManager();
if (logger)

View File

@ -62,7 +62,7 @@ public:
/**
* @brief Constructor
*/
EXPORT DctnryCompress1(Log* logger = 0);
EXPORT DctnryCompress1(uint32_t compressionType, Log* logger = 0);
/**
* @brief Default Destructor

View File

@ -76,19 +76,25 @@ StopWatch timer;
WriteEngineWrapper::WriteEngineWrapper() : m_opType(NOOP)
{
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*comressionType=*/1);
m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*comressionType=*/3);
m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
}
WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opType(rhs.m_opType)
{
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*compressionType=*/1);
m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*compressionType=*/3);
m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
}
/**@brief WriteEngineWrapper Constructor
@ -96,9 +102,13 @@ WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opTyp
WriteEngineWrapper::~WriteEngineWrapper()
{
delete m_colOp[UN_COMPRESSED_OP];
delete m_colOp[COMPRESSED_OP];
delete m_dctnry[UN_COMPRESSED_OP];
delete m_dctnry[COMPRESSED_OP];
delete m_colOp[COMPRESSED_OP_1];
delete m_dctnry[COMPRESSED_OP_1];
delete m_colOp[COMPRESSED_OP_2];
delete m_dctnry[COMPRESSED_OP_2];
}
/**@brief Perform upfront initialization

View File

@ -58,9 +58,10 @@ namespace WriteEngine
{
//... Total compression operation: un_compresssed, compressed
const int UN_COMPRESSED_OP = 0;
const int COMPRESSED_OP = 1;
const int TOTAL_COMPRESS_OP = 2;
const int UN_COMPRESSED_OP = 0;
const int COMPRESSED_OP_1 = 1;
const int COMPRESSED_OP_2 = 2;
const int TOTAL_COMPRESS_OP = 3;
//...Forward class declarations
class Log;
@ -446,8 +447,10 @@ public:
*/
void setIsInsert(bool bIsInsert)
{
m_colOp[COMPRESSED_OP]->chunkManager()->setIsInsert(bIsInsert);
m_dctnry[COMPRESSED_OP]->chunkManager()->setIsInsert(true);
m_colOp[COMPRESSED_OP_1]->chunkManager()->setIsInsert(bIsInsert);
m_dctnry[COMPRESSED_OP_1]->chunkManager()->setIsInsert(true);
m_colOp[COMPRESSED_OP_2]->chunkManager()->setIsInsert(bIsInsert);
m_dctnry[COMPRESSED_OP_2]->chunkManager()->setIsInsert(true);
}
/**
@ -458,7 +461,7 @@ public:
*/
bool getIsInsert()
{
return m_colOp[COMPRESSED_OP]->chunkManager()->getIsInsert();
return m_colOp[COMPRESSED_OP_1]->chunkManager()->getIsInsert();
}
std::tr1::unordered_map<TxnID, SP_TxnLBIDRec_t>& getTxnMap()
@ -475,10 +478,23 @@ public:
*/
int flushChunks(int rc, const std::map<FID, FID>& columOids)
{
int rtn1 = m_colOp[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids);
int rtn2 = m_dctnry[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids);
std::vector<int32_t> compressedOpIds = {COMPRESSED_OP_1,
COMPRESSED_OP_2};
return (rtn1 != NO_ERROR ? rtn1 : rtn2);
for (const auto compressedOpId : compressedOpIds)
{
auto rtn = m_colOp[compressedOpId]->chunkManager()->flushChunks(
rc, columOids);
if (rtn != NO_ERROR)
return rtn;
rtn = m_dctnry[compressedOpId]->chunkManager()->flushChunks(
rc, columOids);
if (rtn != NO_ERROR)
return rtn;
}
return NO_ERROR;
}
/**
@ -524,7 +540,7 @@ public:
int startTransaction(const TxnID& txnid)
{
int rc = 0;
rc = m_colOp[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->startTransaction(txnid);
//if ( rc == 0)
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
return rc;
@ -537,7 +553,8 @@ public:
int confirmTransaction (const TxnID& txnid)
{
int rc = 0;
rc = m_colOp[COMPRESSED_OP]->chunkManager()->confirmTransaction (txnid);
rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->confirmTransaction(
txnid);
return rc;
}
@ -549,7 +566,8 @@ public:
int endTransaction(const TxnID& txnid, bool success)
{
int rc = 0;
rc = m_colOp[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->endTransaction(txnid,
success);
//if ( rc == 0)
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
return rc;
@ -785,7 +803,16 @@ private:
int op(int compressionType)
{
return (compressionType > 0 ? COMPRESSED_OP : UN_COMPRESSED_OP);
switch (compressionType)
{
case 1:
case 2:
return COMPRESSED_OP_1;
case 3:
return COMPRESSED_OP_2;
}
return 0;
}