1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

Merge pull request #1842 from denis0x0D/MCOL-987_LZ

MCOL-987 LZ4 compression support.
This commit is contained in:
Roman Nozdrin
2021-07-07 13:13:18 +03:00
committed by GitHub
45 changed files with 1311 additions and 549 deletions

View File

@ -36,9 +36,9 @@ local deb_build_deps = 'apt update && apt install --yes --no-install-recommends
local platformMap(platform) = local platformMap(platform) =
local platform_map = { local platform_map = {
'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package', 'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran liblz4-devel && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package', 'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' lz4-devel && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package', 'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' lz4-devel cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
'debian:9': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=stretch' debian/autobake-deb.sh", 'debian:9': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=stretch' debian/autobake-deb.sh",
'debian:10': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=buster' debian/autobake-deb.sh", 'debian:10': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=buster' debian/autobake-deb.sh",
'ubuntu:18.04': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=bionic' debian/autobake-deb.sh", 'ubuntu:18.04': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=bionic' debian/autobake-deb.sh",

View File

@ -163,6 +163,12 @@ if(NOT AWK_EXECUTABLE)
return() return()
endif() endif()
FIND_PACKAGE(LZ4)
if (NOT LZ4_FOUND)
MESSAGE_ONCE(CS_NO_LZ4 "lz4 not found")
return()
endif()
IF (NOT INSTALL_LAYOUT) IF (NOT INSTALL_LAYOUT)
INCLUDE(check_compiler_flag) INCLUDE(check_compiler_flag)

25
cmake/FindLZ4.cmake Normal file
View File

@ -0,0 +1,25 @@
find_path(LZ4_ROOT_DIR
NAMES include/lz4.h
)
find_library(LZ4_LIBRARIES
NAMES lz4
HINTS ${LZ4_ROOT_DIR}/lib
)
find_path(LZ4_INCLUDE_DIR
NAMES lz4.h
HINTS ${LZ4_ROOT_DIR}/include
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(lz4 DEFAULT_MSG
LZ4_LIBRARIES
LZ4_INCLUDE_DIR
)
mark_as_advanced(
LZ4_ROOT_DIR
LZ4_LIBRARIES
LZ4_INCLUDE_DIR
)

View File

@ -145,9 +145,7 @@ pColStep::pColStep(
if (fOid < 1000) if (fOid < 1000)
throw runtime_error("pColStep: invalid column"); throw runtime_error("pColStep: invalid column");
compress::IDBCompressInterface cmpif; if (!compress::CompressInterface::isCompressionAvail(fColType.compressionType))
if (!cmpif.isCompressionAvail(fColType.compressionType))
{ {
ostringstream oss; ostringstream oss;
oss << "Unsupported compression type " << fColType.compressionType; oss << "Unsupported compression type " << fColType.compressionType;

View File

@ -95,7 +95,11 @@ DROP PROCEDURE IF EXISTS `compression_ratio` //
CREATE PROCEDURE compression_ratio() SQL SECURITY INVOKER CREATE PROCEDURE compression_ratio() SQL SECURITY INVOKER
BEGIN BEGIN
SELECT CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files WHERE compressed_data_size IS NOT NULL), ':1') COMPRESSION_RATIO;
SELECT 'Snappy' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='Snappy' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') compression_ratio
UNION ALL
SELECT 'LZ4' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='LZ4') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='LZ4' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') as compression_ratio;
END // END //
create or replace procedure columnstore_upgrade() SQL SECURITY INVOKER create or replace procedure columnstore_upgrade() SQL SECURITY INVOKER

View File

@ -777,7 +777,6 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
parser.setDefaultSchema(schema); parser.setDefaultSchema(schema);
parser.setDefaultCharset(default_table_charset); parser.setDefaultCharset(default_table_charset);
int rc = 0; int rc = 0;
IDBCompressInterface idbCompress;
parser.Parse(ddlStatement.c_str()); parser.Parse(ddlStatement.c_str());
if (get_fe_conn_info_ptr() == NULL) if (get_fe_conn_info_ptr() == NULL)
@ -981,7 +980,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
if (compressionType == 1) compressionType = 2; if (compressionType == 1) compressionType = 2;
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{ {
rc = 1; rc = 1;
ci->alterTableState = cal_connection_info::NOT_ALTER; ci->alterTableState = cal_connection_info::NOT_ALTER;
@ -1368,7 +1369,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc; return rc;
} }
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{ {
rc = 1; rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str()); thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@ -1713,7 +1716,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc; return rc;
} }
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{ {
rc = 1; rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str()); thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@ -1842,7 +1847,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc; return rc;
} }
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) if ((compressionType > 0) &&
!(compress::CompressInterface::isCompressionAvail(
compressionType)))
{ {
rc = 1; rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str()); thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@ -2364,9 +2371,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea
if (compressiontype == 1) compressiontype = 2; if (compressiontype == 1) compressiontype = 2;
IDBCompressInterface idbCompress; if ((compressiontype > 0) &&
!(compress::CompressInterface::isCompressionAvail(compressiontype)))
if ( ( compressiontype > 0 ) && !(idbCompress.isCompressionAvail( compressiontype )) )
{ {
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE); string emsg = IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE);
setError(thd, ER_INTERNAL_ERROR, emsg); setError(thd, ER_INTERNAL_ERROR, emsg);

View File

@ -21,8 +21,10 @@
#include "ha_mcs_sysvars.h" #include "ha_mcs_sysvars.h"
const char* mcs_compression_type_names[] = { const char* mcs_compression_type_names[] = {
"SNAPPY", "SNAPPY", // 0
"SNAPPY", "SNAPPY", // 1
"SNAPPY", // 2
"LZ4", // 3
NullS NullS
}; };
@ -39,7 +41,8 @@ static MYSQL_THDVAR_ENUM(
PLUGIN_VAR_RQCMDARG, PLUGIN_VAR_RQCMDARG,
"Controls compression algorithm for create tables. Possible values are: " "Controls compression algorithm for create tables. Possible values are: "
"NO_COMPRESSION segment files aren't compressed; " "NO_COMPRESSION segment files aren't compressed; "
"SNAPPY segment files are Snappy compressed (default);", "SNAPPY segment files are Snappy compressed (default);"
"LZ4 segment files are LZ4 compressed;",
NULL, // check NULL, // check
NULL, // update NULL, // update
1, //default 1, //default

View File

@ -30,7 +30,8 @@ extern char cs_commit_hash[];
// compression_type // compression_type
enum mcs_compression_type_t { enum mcs_compression_type_t {
NO_COMPRESSION = 0, NO_COMPRESSION = 0,
SNAPPY = 2 SNAPPY = 2,
LZ4 = 3
}; };
// use_import_for_batchinsert mode // use_import_for_batchinsert mode

View File

@ -183,6 +183,10 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond)
compression_type = "Snappy"; compression_type = "Snappy";
break; break;
case 3:
compression_type = "LZ4";
break;
default: default:
compression_type = "Unknown"; compression_type = "Unknown";
break; break;

View File

@ -492,6 +492,7 @@
<CPUniqueLimit>100</CPUniqueLimit> <CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin> <AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression> <TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin> </HashJoin>
<JobList> <JobList>
<FlushInterval>16K</FlushInterval> <FlushInterval>16K</FlushInterval>
@ -539,6 +540,7 @@
</UserPriority> </UserPriority>
<NetworkCompression> <NetworkCompression>
<Enabled>Y</Enabled> <Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression> </NetworkCompression>
<QueryTele> <QueryTele>
<Host>127.0.0.1</Host> <Host>127.0.0.1</Host>

View File

@ -308,7 +308,7 @@ void waitForRetry(long count)
//Must hold the FD cache lock! //Must hold the FD cache lock!
int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterface& decompressor) static int updateptrs(char* ptr, FdCacheType_t::iterator fdit)
{ {
ssize_t i; ssize_t i;
uint32_t progress; uint32_t progress;
@ -357,7 +357,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
fdit->second->cmpMTime = mtime; fdit->second->cmpMTime = mtime;
int gplRc = 0; int gplRc = 0;
gplRc = decompressor.getPtrList(&ptr[4096], 4096, fdit->second->ptrList); gplRc = compress::CompressInterface::getPtrList(&ptr[4096], 4096,
fdit->second->ptrList);
if (gplRc != 0) if (gplRc != 0)
return -5; // go for a retry. return -5; // go for a retry.
@ -391,7 +392,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
return -8; return -8;
CompChunkPtrList nextPtrList; CompChunkPtrList nextPtrList;
gplRc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList); gplRc = compress::CompressInterface::getPtrList(
&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
if (gplRc != 0) if (gplRc != 0)
return -7; // go for a retry. return -7; // go for a retry.
@ -445,7 +447,6 @@ void* thr_popper(ioManager* arg)
double rqst3; double rqst3;
bool locked = false; bool locked = false;
SPFdEntry_t fe; SPFdEntry_t fe;
IDBCompressInterface decompressor;
vector<CacheInsert_t> cacheInsertOps; vector<CacheInsert_t> cacheInsertOps;
bool copyLocked = false; bool copyLocked = false;
@ -463,8 +464,10 @@ void* thr_popper(ioManager* arg)
FdCacheType_t::iterator fdit; FdCacheType_t::iterator fdit;
IDBDataFile* fp = 0; IDBDataFile* fp = 0;
uint32_t maxCompSz = IDBCompressInterface::maxCompressedSize(iom->blocksPerRead * BLOCK_SIZE); size_t maxCompSz =
uint32_t readBufferSz = maxCompSz + pageSize; compress::CompressInterface::getMaxCompressedSizeGeneric(
iom->blocksPerRead * BLOCK_SIZE);
size_t readBufferSz = maxCompSz + pageSize;
realbuff.reset(new char[readBufferSz]); realbuff.reset(new char[readBufferSz]);
@ -863,7 +866,7 @@ retryReadHeaders:
cur_mtime = fp_mtime; cur_mtime = fp_mtime;
if (decompRetryCount > 0 || retryReadHeadersCount > 0 || cur_mtime > fdit->second->cmpMTime) if (decompRetryCount > 0 || retryReadHeadersCount > 0 || cur_mtime > fdit->second->cmpMTime)
updatePtrsRc = updateptrs(&alignedbuff[0], fdit, decompressor); updatePtrsRc = updateptrs(&alignedbuff[0], fdit);
fdMapMutex.unlock(); fdMapMutex.unlock();
@ -1052,7 +1055,7 @@ retryReadHeaders:
#ifdef _MSC_VER #ifdef _MSC_VER
unsigned int blen = 4 * 1024 * 1024 + 4; unsigned int blen = 4 * 1024 * 1024 + 4;
#else #else
uint32_t blen = 4 * 1024 * 1024 + 4; size_t blen = 4 * 1024 * 1024 + 4;
#endif #endif
#ifdef IDB_COMP_POC_DEBUG #ifdef IDB_COMP_POC_DEBUG
{ {
@ -1060,7 +1063,18 @@ retryReadHeaders:
cout << "decompress(0x" << hex << (ptrdiff_t)&alignedbuff[0] << dec << ", " << fdit->second->ptrList[cmpOffFact.quot].second << ", 0x" << hex << (ptrdiff_t)uCmpBuf << dec << ", " << blen << ")" << endl; cout << "decompress(0x" << hex << (ptrdiff_t)&alignedbuff[0] << dec << ", " << fdit->second->ptrList[cmpOffFact.quot].second << ", 0x" << hex << (ptrdiff_t)uCmpBuf << dec << ", " << blen << ")" << endl;
} }
#endif #endif
int dcrc = decompressor.uncompressBlock(&alignedbuff[0],
std::unique_ptr<compress::CompressInterface> decompressor(
compress::getCompressInterfaceByType(
static_cast<uint32_t>(fdit->second->compType)));
if (!decompressor)
{
// Use default?
decompressor.reset(
new compress::CompressInterfaceSnappy());
}
int dcrc = decompressor->uncompressBlock(&alignedbuff[0],
fdit->second->ptrList[cmpOffFact.quot].second, uCmpBuf, blen); fdit->second->ptrList[cmpOffFact.quot].second, uCmpBuf, blen);
if (dcrc != 0) if (dcrc != 0)

View File

@ -696,13 +696,25 @@ blockReadRetry:
i = fp->pread( &cmpHdrBuf[0], 0, 4096 * 3); i = fp->pread( &cmpHdrBuf[0], 0, 4096 * 3);
CompChunkPtrList ptrList; CompChunkPtrList ptrList;
IDBCompressInterface decompressor; std::unique_ptr<CompressInterface> decompressor(
compress::getCompressInterfaceByType(
compress::CompressInterface::getCompressionType(
&cmpHdrBuf[0])));
if (!decompressor)
{
// Use default?
decompressor.reset(
new compress::CompressInterfaceSnappy());
}
int dcrc = 0; int dcrc = 0;
if (i == 4096 * 3) if (i == 4096 * 3)
{ {
uint64_t numHdrs = 0; // extra headers uint64_t numHdrs = 0; // extra headers
dcrc = decompressor.getPtrList(&cmpHdrBuf[4096], 4096, ptrList); dcrc = compress::CompressInterface::getPtrList(
&cmpHdrBuf[4096], 4096, ptrList);
if (dcrc == 0 && ptrList.size() > 0) if (dcrc == 0 && ptrList.size() > 0)
numHdrs = ptrList[0].first / 4096ULL - 2ULL; numHdrs = ptrList[0].first / 4096ULL - 2ULL;
@ -723,7 +735,8 @@ blockReadRetry:
i = fp->pread( &nextHdrBufPtr[0], 4096 * 2, numHdrs * 4096 ); i = fp->pread( &nextHdrBufPtr[0], 4096 * 2, numHdrs * 4096 );
CompChunkPtrList nextPtrList; CompChunkPtrList nextPtrList;
dcrc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList); dcrc = compress::CompressInterface::getPtrList(
&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
if (dcrc == 0) if (dcrc == 0)
ptrList.insert(ptrList.end(), nextPtrList.begin(), nextPtrList.end()); ptrList.insert(ptrList.end(), nextPtrList.begin(), nextPtrList.end());
@ -777,11 +790,11 @@ blockReadRetry:
cmpBuf = (char*) alignedBuffer; cmpBuf = (char*) alignedBuffer;
} }
unsigned blen = 4 * 1024 * 1024; size_t blen = 4 * 1024 * 1024;
i = fp->pread( cmpBuf, cmpBufOff, cmpBufSz ); i = fp->pread( cmpBuf, cmpBufOff, cmpBufSz );
dcrc = decompressor.uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen); dcrc = decompressor->uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
if (dcrc == 0) if (dcrc == 0)
{ {

View File

@ -42,3 +42,9 @@ if (WITH_REBUILD_EM_UT)
target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS}) target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif() endif()
if (WITH_COMPRESSION_UT)
add_executable(compression_tests compression-tests.cpp)
target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif()

126
tests/compression-tests.cpp Normal file
View File

@ -0,0 +1,126 @@
/* Copyright (C) 2021 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "idbcompress.h"
class CompressionTest : public ::testing::Test
{
protected:
std::string genPermutations(string& data)
{
std::string generated;
generate(data, 0, generated);
return generated;
}
private:
void generate(string& data, uint32_t i, std::string& generated)
{
if (i == data.size())
{
generated.append(data);
return;
}
for (uint32_t k = i, e = data.size(); k < e; ++k)
{
std::swap(data[i], data[k]);
generate(data, i + 1, generated);
std::swap(data[i], data[k]);
}
}
};
TEST_F(CompressionTest, LZ4CanCompress)
{
std::string originalData =
"This program is free software; you can redistribute it and/or"
"modify it under the terms of the GNU General Public License"
"as published by the Free Software Foundation; version 2 of"
"the License.";
std::unique_ptr<compress::CompressInterface> compressor(
new compress::CompressInterfaceLZ4());
size_t originalSize = originalData.size();
size_t compressedSize = compressor->maxCompressedSize(originalSize);
std::unique_ptr<char[]> compressedData(new char[compressedSize]);
std::memset(compressedData.get(), 0, compressedSize);
auto rc = compressor->compress(originalData.data(), originalSize,
compressedData.get(), &compressedSize);
ASSERT_EQ(rc, 0);
std::unique_ptr<char[]> uncompressedData(new char[originalSize]);
rc = compressor->uncompress(compressedData.get(), compressedSize,
uncompressedData.get(), &originalSize);
ASSERT_EQ(rc, 0);
std::string result(uncompressedData.get());
EXPECT_EQ(originalData, result);
}
TEST_F(CompressionTest, LZvsSnappyUnique)
{
std::unique_ptr<compress::CompressInterface> lz4Compressor(
new compress::CompressInterfaceLZ4());
std::unique_ptr<compress::CompressInterface> snappyCompressor(
new compress::CompressInterfaceSnappy());
// Generate permutations.
// 9! * 9 == 3265920 (closer to current chunk size)
std::vector<std::string> dataPool{"abcdefghi", "aaadefghi", "aaaaafghi",
"aaaaaaahi", "aaaaaaaaj"};
for (auto& data : dataPool)
{
std::cout << "Permutations generated for: " << data << std::endl;
auto generated = genPermutations(data);
auto generatedSize = generated.size();
auto compressedSizeLZ4 =
lz4Compressor->maxCompressedSize(generatedSize);
auto compressedSizeSnappy =
snappyCompressor->maxCompressedSize(generatedSize);
std::unique_ptr<char[]> lz4CompressedData(new char[compressedSizeLZ4]);
auto rc = lz4Compressor->compress(generated.data(), generatedSize,
lz4CompressedData.get(),
&compressedSizeLZ4);
ASSERT_EQ(rc, 0);
std::unique_ptr<char[]> snappyCompressedData(
new char[compressedSizeSnappy]);
rc = snappyCompressor->compress(generated.data(), generatedSize,
snappyCompressedData.get(),
&compressedSizeSnappy);
ASSERT_EQ(rc, 0);
std::cout << "LZ ratio: "
<< (float) ((float) generatedSize /
(float) compressedSizeLZ4)
<< std::endl;
std::cout << "Snappy ratio: "
<< (float) ((float) generatedSize /
(float) compressedSizeSnappy)
<< std::endl;
}
}

View File

@ -383,7 +383,7 @@ public:
BlockOp blockOp; BlockOp blockOp;
char fileName[20]; char fileName[20];
int rc; int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
printf("\nRunning testCreateDeleteFile \n"); printf("\nRunning testCreateDeleteFile \n");
idbdatafile::IDBPolicy::init(true, false, "", 0); idbdatafile::IDBPolicy::init(true, false, "", 0);
@ -966,7 +966,7 @@ public:
BlockOp blockOp; BlockOp blockOp;
char fileName[20]; char fileName[20];
int rc; int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1; int dbRoot = 1;
printf("\nRunning testExtensionWOPrealloc \n"); printf("\nRunning testExtensionWOPrealloc \n");
@ -1085,7 +1085,7 @@ public:
int dbRoot = 1; int dbRoot = 1;
int colWidth = 65535; int colWidth = 65535;
DctnryCompress1 m_Dctnry; DctnryCompress1 m_Dctnry(/*compressionType=*/1);
// This is the magic for the stub in FileOp::oid2FileName // This is the magic for the stub in FileOp::oid2FileName
int oId = 42; int oId = 42;
@ -1565,7 +1565,7 @@ public:
BlockOp blockOp; BlockOp blockOp;
char fileName[20]; char fileName[20];
int rc; int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1; int dbRoot = 1;
idbdatafile::IDBPolicy::init(true, false, "", 0); idbdatafile::IDBPolicy::init(true, false, "", 0);

View File

@ -89,7 +89,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
} }
// Read and verify header. // Read and verify header.
char fileHeader[compress::IDBCompressInterface::HDR_BUF_LEN * 2]; char fileHeader[compress::CompressInterface::HDR_BUF_LEN * 2];
rc = fileOp.readHeaders(dbFile.get(), fileHeader); rc = fileOp.readHeaders(dbFile.get(), fileHeader);
if (rc != 0) if (rc != 0)
{ {
@ -116,8 +116,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
} }
// Read the `colDataType` and `colWidth` from the given header. // Read the `colDataType` and `colWidth` from the given header.
compress::IDBCompressInterface compressor; const auto versionNumber =
const auto versionNumber = compressor.getVersionNumber(fileHeader); compress::CompressInterface::getVersionNumber(fileHeader);
// Verify header number. // Verify header number.
if (versionNumber < 3) if (versionNumber < 3)
{ {
@ -129,10 +129,11 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
return -1; return -1;
} }
auto colDataType = compressor.getColDataType(fileHeader); auto colDataType = compress::CompressInterface::getColDataType(fileHeader);
auto colWidth = compressor.getColumnWidth(fileHeader); auto colWidth = compress::CompressInterface::getColumnWidth(fileHeader);
auto blockCount = compressor.getBlockCount(fileHeader); auto blockCount = compress::CompressInterface::getBlockCount(fileHeader);
auto lbidCount = compressor.getLBIDCount(fileHeader); auto lbidCount = compress::CompressInterface::getLBIDCount(fileHeader);
auto compressionType = compress::CompressInterface::getCompressionType(fileHeader);
if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED) if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED)
{ {
@ -155,7 +156,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
uint64_t hwm = 0; uint64_t hwm = 0;
rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth, rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth,
blockCount, isDict, hwm); blockCount, isDict, compressionType, hwm);
if (rc != 0) if (rc != 0)
{ {
return rc; return rc;
@ -172,13 +173,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
{ {
for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex) for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex)
{ {
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidIndex); auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict); FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
extentMap.push_back(fileId); extentMap.push_back(fileId);
} }
// Last one has an actual HWM. // Last one has an actual HWM.
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidCount - 1); auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict); FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId); extentMap.push_back(fileId);
@ -192,7 +193,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
else else
{ {
// One extent per segment file. // One extent per segment file.
auto lbid = compressor.getLBIDByIndex(fileHeader, 0); auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict); FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId); extentMap.push_back(fileId);
@ -293,7 +294,7 @@ int32_t EMReBuilder::rebuildExtentMap()
int32_t EMReBuilder::searchHWMInSegmentFile( int32_t EMReBuilder::searchHWMInSegmentFile(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint64_t blockCount, bool isDict, uint64_t& hwm) uint64_t blockCount, bool isDict, uint32_t compressionType, uint64_t& hwm)
{ {
std::unique_ptr<ChunkManagerWrapper> chunkManagerWrapper; std::unique_ptr<ChunkManagerWrapper> chunkManagerWrapper;
try try
@ -302,13 +303,15 @@ int32_t EMReBuilder::searchHWMInSegmentFile(
{ {
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperDict>( chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperDict>(
new ChunkManagerWrapperDict(oid, dbRoot, partition, segment, new ChunkManagerWrapperDict(oid, dbRoot, partition, segment,
colDataType, colWidth)); colDataType, colWidth,
compressionType));
} }
else else
{ {
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperColumn>( chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperColumn>(
new ChunkManagerWrapperColumn(oid, dbRoot, partition, segment, new ChunkManagerWrapperColumn(oid, dbRoot, partition, segment,
colDataType, colWidth)); colDataType, colWidth,
compressionType));
} }
} }
catch (...) catch (...)
@ -401,12 +404,13 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber)
ChunkManagerWrapperColumn::ChunkManagerWrapperColumn( ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth) execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, : ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
colWidth) colWidth)
{ {
pFileOp = std::unique_ptr<WriteEngine::ColumnOpCompress1>( pFileOp = std::unique_ptr<WriteEngine::ColumnOpCompress1>(
new WriteEngine::ColumnOpCompress1()); new WriteEngine::ColumnOpCompress1(compressionType));
chunkManager.fileOp(pFileOp.get()); chunkManager.fileOp(pFileOp.get());
// Open compressed column segment file. We will read block by block // Open compressed column segment file. We will read block by block
// from the compressed chunks. // from the compressed chunks.
@ -463,12 +467,13 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const
ChunkManagerWrapperDict::ChunkManagerWrapperDict( ChunkManagerWrapperDict::ChunkManagerWrapperDict(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth) execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, : ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
colWidth) colWidth)
{ {
pFileOp = std::unique_ptr<WriteEngine::DctnryCompress1>( pFileOp = std::unique_ptr<WriteEngine::DctnryCompress1>(
new WriteEngine::DctnryCompress1()); new WriteEngine::DctnryCompress1(compressionType));
chunkManager.fileOp(pFileOp.get()); chunkManager.fileOp(pFileOp.get());
// Open compressed dict segment file. // Open compressed dict segment file.
pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment, pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment,

View File

@ -112,7 +112,8 @@ class EMReBuilder
int32_t searchHWMInSegmentFile( int32_t searchHWMInSegmentFile(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t width, uint64_t blocksCount, bool isDict, uint64_t& hwm); uint32_t width, uint64_t blocksCount, bool isDict,
uint32_t compressionType, uint64_t& hwm);
// Sets the dbroot to the given `number`. // Sets the dbroot to the given `number`.
void setDBRoot(uint32_t number) { dbRoot = number; } void setDBRoot(uint32_t number) { dbRoot = number; }
@ -184,7 +185,7 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper
ChunkManagerWrapperColumn( ChunkManagerWrapperColumn(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth); uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperColumn() = default; ~ChunkManagerWrapperColumn() = default;
ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete; ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete;
@ -210,7 +211,7 @@ class ChunkManagerWrapperDict : public ChunkManagerWrapper
ChunkManagerWrapperDict( ChunkManagerWrapperDict(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, execplan::CalpontSystemCatalog::ColDataType colDataType,
uint32_t colWidth); uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperDict() = default; ~ChunkManagerWrapperDict() = default;
ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete; ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete;

View File

@ -10,7 +10,7 @@ add_definitions(-DNDEBUG)
add_library(compress SHARED ${compress_LIB_SRCS}) add_library(compress SHARED ${compress_LIB_SRCS})
target_link_libraries(compress ${SNAPPY_LIBRARIES}) target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES})
install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine) install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)

View File

@ -22,12 +22,14 @@
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <stdexcept> #include <stdexcept>
#include <unordered_map>
using namespace std; using namespace std;
#include "blocksize.h" #include "blocksize.h"
#include "logger.h" #include "logger.h"
#include "snappy.h" #include "snappy.h"
#include "hasher.h" #include "hasher.h"
#include "lz4.h"
#define IDBCOMP_DLLEXPORT #define IDBCOMP_DLLEXPORT
#include "idbcompress.h" #include "idbcompress.h"
@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL;
const uint64_t VERSION_NUM1 = 1; const uint64_t VERSION_NUM1 = 1;
const uint64_t VERSION_NUM2 = 2; const uint64_t VERSION_NUM2 = 2;
const uint64_t VERSION_NUM3 = 3; const uint64_t VERSION_NUM3 = 3;
const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192; const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN;
const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN;
// version 1.1 of the chunk data has a short header // version 1.1 of the chunk data has a short header
// QuickLZ compressed data never has the high bit set on the first byte // QuickLZ compressed data never has the high bit set on the first byte
@ -83,7 +84,7 @@ struct CompressedDBFileHeader
union CompressedDBFileHeaderBlock union CompressedDBFileHeaderBlock
{ {
CompressedDBFileHeader fHeader; CompressedDBFileHeader fHeader;
char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN]; char fDummy[compress::CompressInterface::HDR_BUF_LEN];
}; };
void initCompressedDBFileHeader( void initCompressedDBFileHeader(
@ -110,53 +111,57 @@ namespace compress
{ {
#ifndef SKIP_IDB_COMPRESSION #ifndef SKIP_IDB_COMPRESSION
IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) : CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) :
fNumUserPaddingBytes(numUserPaddingBytes) fNumUserPaddingBytes(numUserPaddingBytes)
{ } { }
IDBCompressInterface::~IDBCompressInterface()
{ }
/* V1 is really only available for decompression, we kill any DDL using V1 by hand. /* V1 is really only available for decompression, we kill any DDL using V1 by hand.
* Maybe should have a new api, isDecompressionAvail() ? Any request to compress * Maybe should have a new api, isDecompressionAvail() ? Any request to compress
* using V1 will silently be changed to V2. * using V1 will silently be changed to V2.
*/ */
bool IDBCompressInterface::isCompressionAvail(int compressionType) const /*static*/
bool CompressInterface::isCompressionAvail(int compressionType)
{ {
if ( (compressionType == 0) || return ((compressionType == 0) || (compressionType == 1) ||
(compressionType == 1) || (compressionType == 2) || (compressionType == 3));
(compressionType == 2) ) }
return true;
return false; size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen)
{
return std::max(snappy::MaxCompressedLength(inLen),
LZ4_COMPRESSBOUND(inLen)) +
HEADER_SIZE;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Compress a block of data // Compress a block of data
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int IDBCompressInterface::compressBlock(const char* in, int CompressInterface::compressBlock(const char* in, const size_t inLen,
const size_t inLen, unsigned char* out, size_t& outLen) const
unsigned char* out,
unsigned int& outLen) const
{ {
size_t snaplen = 0; size_t snaplen = 0;
utils::Hasher128 hasher; utils::Hasher128 hasher;
// loose input checking. // loose input checking.
if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE) if (outLen < maxCompressedSize(inLen))
{ {
cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " << cerr << "got outLen = " << outLen << " for inLen = " << inLen
(snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl; << ", needed " << (maxCompressedSize(inLen)) << endl;
return ERR_BADOUTSIZE; return ERR_BADOUTSIZE;
} }
//apparently this never fails? auto rc = compress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]),
snappy::RawCompress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]), &snaplen); &outLen);
if (rc != ERR_OK)
{
return rc;
}
snaplen = outLen;
uint8_t* signature = (uint8_t*) &out[SIG_OFFSET]; uint8_t* signature = (uint8_t*) &out[SIG_OFFSET];
uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET]; uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET];
uint32_t* len = (uint32_t*) &out[LEN_OFFSET]; uint32_t* len = (uint32_t*) &out[LEN_OFFSET];
*signature = CHUNK_MAGIC3; *signature = getChunkMagicNumber();
*checksum = hasher((char*) &out[HEADER_SIZE], snaplen); *checksum = hasher((char*) &out[HEADER_SIZE], snaplen);
*len = snaplen; *len = snaplen;
@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in,
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Decompress a block of data // Decompress a block of data
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, int CompressInterface::uncompressBlock(const char* in, const size_t inLen,
unsigned int& outLen) const unsigned char* out,
size_t& outLen) const
{ {
bool comprc = false;
size_t ol = 0;
uint32_t realChecksum; uint32_t realChecksum;
uint32_t storedChecksum; uint32_t storedChecksum;
uint32_t storedLen; uint32_t storedLen;
uint8_t storedMagic; uint8_t storedMagic;
utils::Hasher128 hasher; utils::Hasher128 hasher;
auto tmpOutLen = outLen;
outLen = 0; outLen = 0;
if (inLen < 1) if (inLen < 1)
{
return ERR_BADINPUT; return ERR_BADINPUT;
}
storedMagic = *((uint8_t*) &in[SIG_OFFSET]); storedMagic = *((uint8_t*) &in[SIG_OFFSET]);
if (storedMagic == CHUNK_MAGIC3) if (storedMagic == getChunkMagicNumber())
{ {
if (inLen < HEADER_SIZE) if (inLen < HEADER_SIZE)
{
return ERR_BADINPUT; return ERR_BADINPUT;
}
storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]); storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]);
storedLen = *((uint32_t*) (&in[LEN_OFFSET])); storedLen = *((uint32_t*) (&in[LEN_OFFSET]));
if (inLen < storedLen + HEADER_SIZE) if (inLen < storedLen + HEADER_SIZE)
{
return ERR_BADINPUT; return ERR_BADINPUT;
}
realChecksum = hasher(&in[HEADER_SIZE], storedLen); realChecksum = hasher(&in[HEADER_SIZE], storedLen);
if (storedChecksum != realChecksum) if (storedChecksum != realChecksum)
{
return ERR_CHECKSUM; return ERR_CHECKSUM;
auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out), &tmpOutLen);
if (rc != ERR_OK)
{
cerr << "uncompressBlock failed!" << endl;
return ERR_DECOMPRESS;
} }
comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) && outLen = tmpOutLen;
snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out));
} }
else else
{ {
@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
return ERR_BADINPUT; return ERR_BADINPUT;
} }
if (!comprc)
{
cerr << "decomp failed!" << endl;
return ERR_DECOMPRESS;
}
outLen = ol;
//cerr << "ub: " << inLen << " : " << outLen << endl; //cerr << "ub: " << inLen << " : " << outLen << endl;
return ERR_OK; return ERR_OK;
@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Verify the passed in buffer contains a valid compression file header. // Verify the passed in buffer contains a valid compression file header.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int IDBCompressInterface::verifyHdr(const void* hdrBuf) const int CompressInterface::verifyHdr(const void* hdrBuf)
{ {
const CompressedDBFileHeader* hdr = reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf); const CompressedDBFileHeader* hdr = reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf);
@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
// Extract compression pointer information out of the pointer buffer that is // Extract compression pointer information out of the pointer buffer that is
// passed in. ptrBuf points to the pointer section of the compression hdr. // passed in. ptrBuf points to the pointer section of the compression hdr.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int IDBCompressInterface::getPtrList(const char* ptrBuf, int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize,
const int ptrBufSize, CompChunkPtrList& chunkPtrs)
CompChunkPtrList& chunkPtrs ) const
{ {
int rc = 0; int rc = 0;
chunkPtrs.clear(); chunkPtrs.clear();
@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf,
// one for the file header, and one for the list of pointers. // one for the file header, and one for the list of pointers.
// Wrapper of above method for backward compatibility. // Wrapper of above method for backward compatibility.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs )
{ {
return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs); return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs);
} }
@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Count the number of chunk pointers in the pointer header(s) // Count the number of chunk pointers in the pointer header(s)
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf, unsigned int CompressInterface::getPtrCount(const char* ptrBuf,
const int ptrBufSize) const const int ptrBufSize)
{ {
unsigned int chunkCount = 0; unsigned int chunkCount = 0;
@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
// This should not be used for compressed dictionary files which could have // This should not be used for compressed dictionary files which could have
// more compression chunk headers. // more compression chunk headers.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const unsigned int CompressInterface::getPtrCount(const char* hdrBuf)
{ {
return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN); return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN);
} }
@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Store list of compression pointers into the specified header. // Store list of compression pointers into the specified header.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
void* ptrBuf, void* ptrBuf, int ptrSectionSize)
int ptrSectionSize) const
{ {
memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0 memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0
uint64_t* hdrPtrs = reinterpret_cast<uint64_t*>(ptrBuf); uint64_t* hdrPtrs = reinterpret_cast<uint64_t*>(ptrBuf);
@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Wrapper of above method for backward compatibility // Wrapper of above method for backward compatibility
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf) const void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf)
{ {
storePtrs(ptrs, reinterpret_cast<char*>(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN); storePtrs(ptrs, reinterpret_cast<char*>(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN);
} }
@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* pt
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a dictionary file. // Initialize the header blocks to be written at the start of a dictionary file.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::initHdr( void CompressInterface::initHdr(
void* hdrBuf, void* ptrBuf, uint32_t colWidth, void* hdrBuf, void* ptrBuf, uint32_t colWidth,
execplan::CalpontSystemCatalog::ColDataType columnType, execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize) const int compressionType, int hdrSize)
{ {
memset(hdrBuf, 0, HDR_BUF_LEN); memset(hdrBuf, 0, HDR_BUF_LEN);
memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN); memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN);
@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a column file. // Initialize the header blocks to be written at the start of a column file.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::initHdr( void CompressInterface::initHdr(
void* hdrBuf, uint32_t columnWidth, void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType, execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType) const int compressionType)
{ {
memset(hdrBuf, 0, HDR_BUF_LEN * 2); memset(hdrBuf, 0, HDR_BUF_LEN * 2);
initCompressedDBFileHeader(hdrBuf, columnWidth, columnType, initCompressedDBFileHeader(hdrBuf, columnWidth, columnType,
@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Get the header's version number // Get the header's version number
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{ {
return ( return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fVersionNum); reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fVersionNum);
@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Set the file's block count // Set the file's block count
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
{ {
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fBlockCount = count; reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fBlockCount = count;
} }
@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Get the file's block count // Get the file's block count
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{ {
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fBlockCount); return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fBlockCount);
} }
//------------------------------------------------------------------------------
// Get the file's compression type
//------------------------------------------------------------------------------
uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
{
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)
->fCompressionType);
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Set the overall header size // Set the overall header size
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size)
{ {
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fHeaderSize = size; reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fHeaderSize = size;
} }
@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Get the overall header size // Get the overall header size
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const uint64_t CompressInterface::getHdrSize(const void* hdrBuf)
{ {
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fHeaderSize); return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fHeaderSize);
} }
@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
// Get column type // Get column type
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
execplan::CalpontSystemCatalog::ColDataType execplan::CalpontSystemCatalog::ColDataType
IDBCompressInterface::getColDataType(const void* hdrBuf) const CompressInterface::getColDataType(const void* hdrBuf)
{ {
return ( return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColDataType); reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColDataType);
@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Get column width // Get column width
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const uint64_t CompressInterface::getColumnWidth(const void* hdrBuf)
{ {
return ( return (
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColumnWidth); reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColumnWidth);
@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Get LBID by index // Get LBID by index
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index)
{ {
if (index < LBID_MAX_SIZE) if (index < LBID_MAX_SIZE)
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDS[index]); return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDS[index]);
@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Set LBID by index // Set LBID by index
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index)
{ {
if (lbid && index < LBID_MAX_SIZE) if (lbid && index < LBID_MAX_SIZE)
{ {
@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t
} }
} }
uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const //------------------------------------------------------------------------------
// Get LBID count
//------------------------------------------------------------------------------
uint64_t CompressInterface::getLBIDCount(void* hdrBuf)
{ {
return reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDCount; return reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDCount;
} }
@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
// Calculates the chunk and block offset within the chunk for the specified // Calculates the chunk and block offset within the chunk for the specified
// block number. // block number.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
void IDBCompressInterface::locateBlock(unsigned int block, void CompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const unsigned int& blockOffsetWithinChunk) const
{ {
const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN; const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN;
@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block,
// also expand to allow for user requested padding. Lastly, initialize padding // also expand to allow for user requested padding. Lastly, initialize padding
// bytes to 0. // bytes to 0.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int IDBCompressInterface::padCompressedChunks(unsigned char* buf, int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len,
unsigned int& len, unsigned int maxLen) const
unsigned int maxLen) const
{ {
int nPaddingBytes = 0; int nPaddingBytes = 0;
int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE; int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE;
@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
return 0; return 0;
} }
/* static */ // Snappy
uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize) CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
snappy::RawCompress(in, inLen, out, outLen);
#ifdef DEBUG_COMPRESSION
std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen
<< std::endl;
#endif
return ERR_OK;
}
int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
size_t realOutLen = 0;
auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen);
if (!rc || realOutLen > *outLen)
{
cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen
<< ", outLen: " << *outLen << ", realOutLen: " << realOutLen
<< endl;
return ERR_DECOMPRESS;
}
rc = snappy::RawUncompress(in, inLen, out);
if (!rc)
{
cerr << "snappy::RawUnompress failed. InLen: " << inLen
<< ", outLen: " << *outLen << endl;
return ERR_DECOMPRESS;
}
#ifdef DEBUG_COMPRESSION
std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen "
<< *outLen << std::endl;
#endif
*outLen = realOutLen;
return ERR_OK;
}
size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const
{ {
return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE); return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE);
} }
int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const size_t* outLen) const
{
snappy::RawCompress(in, inLen, out, outLen);
return 0;
}
int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
{
return !(snappy::RawUncompress(in, inLen, out));
}
/* static */
bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
{ {
return snappy::GetUncompressedLength(in, inLen, outLen); return snappy::GetUncompressedLength(in, inLen, outLen);
} }
uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const
{
return CHUNK_MAGIC_SNAPPY;
}
// LZ4
CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out,
size_t* outLen) const
{
auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen);
if (!compressedLen)
{
cerr << "LZ_compress_default failed. InLen: " << inLen
<< ", compressedLen: " << compressedLen << endl;
return ERR_COMPRESS;
}
#ifdef DEBUG_COMPRESSION
std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen "
<< compressedLen << std::endl;
#endif
*outLen = compressedLen;
return ERR_OK;
}
int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen,
char* out, size_t* outLen) const
{
auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen);
if (decompressedLen < 0)
{
cerr << "LZ_decompress_safe failed with error code " << decompressedLen
<< endl;
cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl;
return ERR_DECOMPRESS;
}
*outLen = decompressedLen;
#ifdef DEBUG_COMPRESSION
std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen
<< std::endl;
#endif
return ERR_OK;
}
size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const
{
return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE);
}
bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const
{
// LZ4 does not have such function.
idbassert(false);
return false;
}
uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const
{
return CHUNK_MAGIC_LZ4;
}
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes)
{
switch (compressionType)
{
case 1:
case 2:
return new CompressInterfaceSnappy(numUserPaddingBytes);
case 3:
return new CompressInterfaceLZ4(numUserPaddingBytes);
}
return nullptr;
}
CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
uint32_t numUserPaddingBytes)
{
if (compressionName == "SNAPPY")
return new CompressInterfaceSnappy(numUserPaddingBytes);
else if (compressionName == "LZ4")
return new CompressInterfaceLZ4(numUserPaddingBytes);
return nullptr;
}
void initializeCompressorPool(
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
compressorPool,
uint32_t numUserPaddingBytes)
{
compressorPool = {
make_pair(2, std::shared_ptr<CompressInterface>(
new CompressInterfaceSnappy(numUserPaddingBytes))),
make_pair(3, std::shared_ptr<CompressInterface>(
new CompressInterfaceLZ4(numUserPaddingBytes)))};
}
std::shared_ptr<CompressInterface> getCompressorByType(
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
compressorPool,
uint32_t compressionType)
{
switch (compressionType)
{
case 1:
case 2:
if (!compressorPool.count(2))
{
return nullptr;
}
return compressorPool[2];
case 3:
if (!compressorPool.count(3))
{
return nullptr;
}
return compressorPool[3];
}
return nullptr;
}
#endif #endif
} // namespace compress } // namespace compress

View File

@ -26,6 +26,7 @@
#endif #endif
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <unordered_map>
#include "calpontsystemcatalog.h" #include "calpontsystemcatalog.h"
@ -41,11 +42,12 @@ namespace compress
typedef std::pair<uint64_t, uint64_t> CompChunkPtr; typedef std::pair<uint64_t, uint64_t> CompChunkPtr;
typedef std::vector<CompChunkPtr> CompChunkPtrList; typedef std::vector<CompChunkPtr> CompChunkPtrList;
class IDBCompressInterface class CompressInterface
{ {
public: public:
static const unsigned int HDR_BUF_LEN = 4096; static const unsigned int HDR_BUF_LEN = 4096;
static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8; static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
// error codes from uncompressBlock() // error codes from uncompressBlock()
static const int ERR_OK = 0; static const int ERR_OK = 0;
@ -53,22 +55,29 @@ public:
static const int ERR_DECOMPRESS = -2; static const int ERR_DECOMPRESS = -2;
static const int ERR_BADINPUT = -3; static const int ERR_BADINPUT = -3;
static const int ERR_BADOUTSIZE = -4; static const int ERR_BADOUTSIZE = -4;
static const int ERR_COMPRESS = -5;
/** /**
* When IDBCompressInterface object is being used to compress a chunk, this * When CompressInterface object is being used to compress a chunk, this
* construct can be used to specify the padding added by padCompressedChunks * construct can be used to specify the padding added by padCompressedChunks
*/ */
EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0); EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
/** /**
* dtor * dtor
*/ */
EXPORT virtual ~IDBCompressInterface(); EXPORT virtual ~CompressInterface() = default;
/** /**
* see if the algo is available in this lib * see if the algo is available in this lib
*/ */
EXPORT bool isCompressionAvail(int compressionType = 0) const; EXPORT static bool isCompressionAvail(int compressionType = 0);
/**
* Returns the maximum compressed size from all available compression
* types.
*/
EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
/** /**
* Compresses specified "in" buffer of length "inLen" bytes. * Compresses specified "in" buffer of length "inLen" bytes.
@ -76,30 +85,31 @@ public:
* "out" should be sized using maxCompressedSize() to allow for incompressible data. * "out" should be sized using maxCompressedSize() to allow for incompressible data.
* Returns 0 if success. * Returns 0 if success.
*/ */
EXPORT int compressBlock(const char* in,
const size_t inLen, EXPORT int compressBlock(const char* in, const size_t inLen,
unsigned char* out, unsigned char* out, size_t& outLen) const;
unsigned int& outLen) const;
/** /**
* outLen must be initialized with the size of the out buffer before calling uncompressBlock. * outLen must be initialized with the size of the out buffer before calling uncompressBlock.
* On return, outLen will have the number of bytes used in out. * On return, outLen will have the number of bytes used in out.
*/ */
EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out, EXPORT int uncompressBlock(const char* in, const size_t inLen,
unsigned int& outLen) const; unsigned char* out, size_t& outLen) const;
/** /**
* This fcn wraps whatever compression algorithm we're using at the time, and * This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk. * is not specific to blocks on disk.
*/ */
EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const; EXPORT virtual int compress(const char* in, size_t inLen, char* out,
size_t* outLen) const = 0;
/** /**
* This fcn wraps whatever compression algorithm we're using at the time, and * This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk. The caller needs to make sure out is big * is not specific to blocks on disk. The caller needs to make sure out is big
* enough to contain the output by using getUncompressedSize(). * enough to contain the output by using getUncompressedSize().
*/ */
EXPORT int uncompress(const char* in, size_t inLen, char* out) const; EXPORT virtual int uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const = 0;
/** /**
* Initialize header buffer at start of compressed db file. * Initialize header buffer at start of compressed db file.
@ -107,23 +117,24 @@ public:
* @warning hdrBuf must be at least HDR_BUF_LEN bytes * @warning hdrBuf must be at least HDR_BUF_LEN bytes
* @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes * @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
*/ */
EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht, EXPORT static void
execplan::CalpontSystemCatalog::ColDataType columnType, initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
int compressionType, int hdrSize) const; execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType, int hdrSize);
/** /**
* Initialize header buffer at start of compressed db file. * Initialize header buffer at start of compressed db file.
* *
* @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes * @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
*/ */
EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth, EXPORT static void
execplan::CalpontSystemCatalog::ColDataType columnType, initHdr(void* hdrBuf, uint32_t columnWidth,
int compressionType) const; execplan::CalpontSystemCatalog::ColDataType columnType,
int compressionType);
/** /**
* Verify the passed in buffer contains a compressed db file header. * Verify the passed in buffer contains a compressed db file header.
*/ */
EXPORT int verifyHdr(const void* hdrBuf) const; EXPORT static int verifyHdr(const void* hdrBuf);
/** /**
* Extracts list of compression pointers from the specified ptr buffer. * Extracts list of compression pointers from the specified ptr buffer.
@ -131,9 +142,8 @@ public:
* chunkPtrs is a vector of offset, size pairs for the compressed chunks. * chunkPtrs is a vector of offset, size pairs for the compressed chunks.
* Returns 0 if success. * Returns 0 if success.
*/ */
EXPORT int getPtrList(const char* ptrBuf, EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize,
const int ptrBufSize, CompChunkPtrList& chunkPtrs);
CompChunkPtrList& chunkPtrs) const;
/** /**
* Extracts list of compression pointers from the specified header. * Extracts list of compression pointers from the specified header.
@ -142,28 +152,28 @@ public:
* Note: the pointer passed in is the beginning of the header, * Note: the pointer passed in is the beginning of the header,
* not the pointer section as above. * not the pointer section as above.
*/ */
EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const; EXPORT static int getPtrList(const char* hdrBuf,
CompChunkPtrList& chunkPtrs);
/** /**
* Return the number of chunk pointers contained in the specified ptr buffer. * Return the number of chunk pointers contained in the specified ptr buffer.
* ptrBuf points to the pointer section taken from the headers. * ptrBuf points to the pointer section taken from the headers.
*/ */
EXPORT unsigned int getPtrCount(const char* ptrBuf, EXPORT static unsigned int getPtrCount(const char* ptrBuf,
const int ptrBufSize) const; const int ptrBufSize);
/** /**
* Return the number of chunk pointers contained in the specified header. * Return the number of chunk pointers contained in the specified header.
* hdrBuf points to start of 2 buffer headers from compressed db file. * hdrBuf points to start of 2 buffer headers from compressed db file.
* For non-dictionary columns. * For non-dictionary columns.
*/ */
EXPORT unsigned int getPtrCount(const char* hdrBuf) const; EXPORT static unsigned int getPtrCount(const char* hdrBuf);
/** /**
* Store vector of pointers into the specified buffer header's pointer section. * Store vector of pointers into the specified buffer header's pointer section.
*/ */
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs, EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf, void* hdrBuf, int ptrSectionSize);
int ptrSectionSize) const;
/** /**
* Store vector of pointers into the specified buffer header. * Store vector of pointers into the specified buffer header.
@ -171,14 +181,14 @@ public:
* Note: the pointer passed in is the beginning of the header, * Note: the pointer passed in is the beginning of the header,
* not the pointer section as above. * not the pointer section as above.
*/ */
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf) const; EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
void* hdrBuf);
/** /**
* Calculates the chunk, and the block offset within the chunk, for the * Calculates the chunk, and the block offset within the chunk, for the
* specified block number. * specified block number.
*/ */
EXPORT void locateBlock(unsigned int block, EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const; unsigned int& blockOffsetWithinChunk) const;
/** /**
@ -187,9 +197,8 @@ public:
* maxLen is the maximum size for buf. nonzero return code means the * maxLen is the maximum size for buf. nonzero return code means the
* result output buffer length is > than maxLen. * result output buffer length is > than maxLen.
*/ */
EXPORT int padCompressedChunks(unsigned char* buf, EXPORT int padCompressedChunks(unsigned char* buf, size_t& len,
unsigned int& len, unsigned int maxLen) const;
unsigned int maxLen ) const;
/* /*
* Mutator methods for the block count in the file * Mutator methods for the block count in the file
@ -197,17 +206,22 @@ public:
/** /**
* getVersionNumber * getVersionNumber
*/ */
EXPORT uint64_t getVersionNumber(const void* hdrBuf) const; EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
/** /**
* setBlockCount * setBlockCount
*/ */
EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const; EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
/** /**
* getBlockCount * getBlockCount
*/ */
EXPORT uint64_t getBlockCount(const void* hdrBuf) const; EXPORT static uint64_t getBlockCount(const void* hdrBuf);
/**
* getCompressionType
*/
EXPORT static uint64_t getCompressionType(const void* hdrBuf);
/* /*
* Mutator methods for the overall header size * Mutator methods for the overall header size
@ -215,38 +229,38 @@ public:
/** /**
* setHdrSize * setHdrSize
*/ */
EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const; EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
/** /**
* getHdrSize * getHdrSize
*/ */
EXPORT uint64_t getHdrSize(const void* hdrBuf) const; EXPORT static uint64_t getHdrSize(const void* hdrBuf);
/** /**
* getColumnType * getColumnType
*/ */
EXPORT execplan::CalpontSystemCatalog::ColDataType EXPORT static execplan::CalpontSystemCatalog::ColDataType
getColDataType(const void* hdrBuf) const; getColDataType(const void* hdrBuf);
/** /**
* getColumnWidth * getColumnWidth
*/ */
EXPORT uint64_t getColumnWidth(const void* hdrBuf) const; EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
/** /**
* getLBIDByIndex * getLBIDByIndex
*/ */
EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const; EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
/** /**
* setLBIDByIndex * setLBIDByIndex
*/ */
EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const; EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
/** /**
* getLBIDCount * getLBIDCount
*/ */
EXPORT uint64_t getLBIDCount(void* hdrBuf) const; EXPORT static uint64_t getLBIDCount(void* hdrBuf);
/** /**
* Mutator methods for the user padding bytes * Mutator methods for the user padding bytes
@ -271,97 +285,213 @@ public:
* Given an input, uncompressed block, what's the maximum possible output, * Given an input, uncompressed block, what's the maximum possible output,
* compressed size? * compressed size?
*/ */
EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize); EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
/** /**
* Given a compressed block, returns the uncompressed size in outLen. * Given a compressed block, returns the uncompressed size in outLen.
* Returns false on error, true on success. * Returns false on error, true on success.
*/ */
EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen); EXPORT virtual bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const = 0;
protected: protected:
virtual uint8_t getChunkMagicNumber() const = 0;
private: private:
//defaults okay //defaults okay
//IDBCompressInterface(const IDBCompressInterface& rhs); //CompressInterface(const CompressInterface& rhs);
//IDBCompressInterface& operator=(const IDBCompressInterface& rhs); //CompressInterface& operator=(const CompressInterface& rhs);
unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks
}; };
class CompressInterfaceSnappy : public CompressInterface
{
public:
EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
EXPORT ~CompressInterfaceSnappy() = default;
/**
* Compress the given block using snappy compression API.
*/
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Uncompress the given block using snappy compression API.
*/
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Get max compressed size for the given `uncompSize` value using snappy
* compression API.
*/
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
/**
* Get uncompressed size for the given block using snappy
* compression API.
*/
EXPORT
bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const override;
protected:
uint8_t getChunkMagicNumber() const override;
private:
const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
};
class CompressInterfaceLZ4 : public CompressInterface
{
public:
EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
EXPORT ~CompressInterfaceLZ4() = default;
/**
* Compress the given block using LZ4 compression API.
*/
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Uncompress the given block using LZ4 compression API.
*/
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
size_t* outLen) const override;
/**
* Get max compressed size for the given `uncompSize` value using LZ4
* compression API.
*/
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
/**
* Get uncompressed size for the given block using LZ4
* compression API.
*/
EXPORT
bool getUncompressedSize(char* in, size_t inLen,
size_t* outLen) const override;
protected:
uint8_t getChunkMagicNumber() const override;
private:
const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
};
using CompressorPool =
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>;
/**
* Returns a pointer to the appropriate compression interface based on
* `compressionType`. `compressionType` must be greater than 0.
* Note: caller is responsible for memory deallocation.
*/
EXPORT CompressInterface*
getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes = 0);
/**
* Returns a pointer to the appropriate compression interface based on
* `compressionName`.
* Note: caller is responsible for memory deallocation.
*/
EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
uint32_t numUserPaddingBytes = 0);
/**
* Initializes a given `unordered_map` with all available compression
* interfaces.
*/
EXPORT void initializeCompressorPool(CompressorPool& compressorPool,
uint32_t numUserPaddingBytes = 0);
/**
* Returns a `shared_ptr` to the appropriate compression interface.
*/
EXPORT std::shared_ptr<CompressInterface>
getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType);
#ifdef SKIP_IDB_COMPRESSION #ifdef SKIP_IDB_COMPRESSION
inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {} inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {}
inline IDBCompressInterface::~IDBCompressInterface() {} inline bool CompressInterface::isCompressionAvail(int c)
inline bool IDBCompressInterface::isCompressionAvail(int c) const
{ {
return (c == 0); return (c == 0);
} }
inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
{ {
return -1; return -1;
} }
inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const inline int CompressInterface::uncompressBlock(const char* in,
const size_t inLen,
unsigned char* out,
size_t& outLen) const
{ {
return -1; return -1;
} }
inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {}
inline int CompressInterface::verifyHdr(const void*)
{ {
return -1; return -1;
} }
inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {}
inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
{
return -1;
}
inline unsigned int CompressInterface::getPtrCount(const char*, const int)
{ {
return 0; return 0;
} }
inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {} inline unsigned int CompressInterface::getPtrCount(const char*)
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
inline int IDBCompressInterface::verifyHdr(const void*) const
{
return -1;
}
inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const
{
return -1;
}
inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const
{
return -1;
}
inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const
{ {
return 0; return 0;
} }
inline unsigned int IDBCompressInterface::getPtrCount(const char*) const inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) {}
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*) {}
inline void
CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const
{ {
return 0;
} }
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) const {} inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*) const {}
inline void IDBCompressInterface::locateBlock(unsigned int block,
unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {}
inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
{ {
return -1; return -1;
} }
inline uint64_t inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
{ {
return 0; return 0;
} }
inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {} inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {}
inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{ {
return 0; return 0;
} }
inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {} inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
inline uint64_t IDBCompressInterface::getHdrSize(const void*) const
{ {
return 0; return 0;
} }
inline execplan::CalpontSystemCatalog::ColDataType inline execplan::CalpontSystemCatalog::ColDataType
IDBCompressInterface::getColDataType(const void* hdrBuf) const CompressInterface::getColDataType(const void* hdrBuf)
{ {
return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED; return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
} }
inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
{
return 0;
}
inline uint64_t getLBID0(const void* hdrBuf) { return 0; }
void setLBID0(void* hdrBuf, uint64_t lbid) {}
inline uint64_t getLBID1(const void* hdrBuf) { return 0; }
void setLBID1(void* hdrBuf, uint64_t lbid) {}
inline void CompressInterface::setHdrSize(void*, uint64_t) {}
inline uint64_t CompressInterface::getHdrSize(const void*)
{
return 0;
}
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
: CompressInterface(numUserPaddingBytes)
{
}
inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; } inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; }
inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize) inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
{ {
@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si
{ {
return false; return false;
} }
uint8_t getChunkMagicNumber() const { return 0; }
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
uint32_t numUserPaddingBytes)
{
return nullptr;
}
#endif #endif
} }
#undef EXPORT #undef EXPORT

View File

@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
return -1; return -1;
} }
compress::IDBCompressInterface decompressor; char hdr1[compress::CompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN);
char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN]; if ( nBytes != compress::CompressInterface::HDR_BUF_LEN )
nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN);
if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN )
{ {
delete pFile; delete pFile;
return -1; return -1;
} }
// Verify we are a compressed file // Verify we are a compressed file
if (decompressor.verifyHdr(hdr1) < 0) if (compress::CompressInterface::verifyHdr(hdr1) < 0)
{ {
delete pFile; delete pFile;
return -1; return -1;
} }
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN; int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
compress::CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize]; char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize); nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
} }
compress::CompChunkPtrList chunkPtrs; compress::CompChunkPtrList chunkPtrs;
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs); int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize,
chunkPtrs);
delete[] hdr2; delete[] hdr2;
if (rc != 0) if (rc != 0)

View File

@ -50,7 +50,10 @@ namespace joiner
uint64_t uniqueNums = 0; uint64_t uniqueNums = 0;
JoinPartition::JoinPartition() { } JoinPartition::JoinPartition()
{
compressor.reset(new compress::CompressInterfaceSnappy());
}
/* This is the ctor used by THJS */ /* This is the ctor used by THJS */
JoinPartition::JoinPartition(const RowGroup& lRG, JoinPartition::JoinPartition(const RowGroup& lRG,
@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
for (int i = 0; i < (int) bucketCount; i++) for (int i = 0; i < (int) bucketCount; i++)
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false))); buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
string compressionType;
try
{
compressionType =
config->getConfig("HashJoin", "TempFileCompressionType");
} catch (...) {}
if (compressionType == "LZ4")
{
compressor.reset(new compress::CompressInterfaceLZ4());
}
else
{
compressor.reset(new compress::CompressInterfaceSnappy());
}
} }
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */ /* Ctor used by JoinPartition on expansion, creates JP's in filemode */
@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
smallRG.setData(&buffer); smallRG.setData(&buffer);
smallRG.resetRowGroup(0); smallRG.resetRowGroup(0);
smallRG.getRow(0, &smallRow); smallRG.getRow(0, &smallRow);
compressor = jp.compressor;
} }
@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
fs.seekg(offset); fs.seekg(offset);
fs.read((char*) &len, sizeof(len)); fs.read((char*) &len, sizeof(len));
saveErrno = errno; saveErrno = errno;
if (!fs) if (!fs)
@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
else else
{ {
size_t uncompressedSize; size_t uncompressedSize;
fs.read((char*) &uncompressedSize, sizeof(uncompressedSize));
boost::scoped_array<char> buf(new char[len]); boost::scoped_array<char> buf(new char[len]);
fs.read(buf.get(), len); fs.read(buf.get(), len);
saveErrno = errno; saveErrno = errno;
if (!fs) if (!fs || !uncompressedSize)
{ {
fs.close(); fs.close();
ostringstream os; ostringstream os;
@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
} }
totalBytesRead += len; totalBytesRead += len;
compressor.getUncompressedSize(buf.get(), len, &uncompressedSize);
bs->needAtLeast(uncompressedSize); bs->needAtLeast(uncompressedSize);
compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr()); compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(),
&uncompressedSize);
bs->advanceInputPtr(uncompressedSize); bs->advanceInputPtr(uncompressedSize);
} }
@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs)
} }
else else
{ {
uint64_t maxSize = compressor.maxCompressedSize(len); size_t maxSize = compressor->maxCompressedSize(len);
size_t actualSize; size_t actualSize = maxSize;
boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]); boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]);
compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize); compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
ret = actualSize + 4; ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4?
fs.write((char*) &actualSize, sizeof(actualSize)); fs.write((char*) &actualSize, sizeof(actualSize));
// Save uncompressed len.
fs.write((char*) &len, sizeof(len));
fs.write((char*) compressed.get(), actualSize); fs.write((char*) compressed.get(), actualSize);
saveErrno = errno; saveErrno = errno;

View File

@ -164,7 +164,7 @@ private:
/* Compression support */ /* Compression support */
bool useCompression; bool useCompression;
compress::IDBCompressInterface compressor; std::shared_ptr<compress::CompressInterface> compressor;
/* TBD: do the reading/writing in one thread, compression/decompression in another */ /* TBD: do the reading/writing in one thread, compression/decompression in another */
/* Some stats for reporting */ /* Some stats for reporting */

View File

@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
{ {
config::Config* config = config::Config::makeConfig(); config::Config* config = config::Config::makeConfig();
string val; string val;
string compressionType;
try try
{ {
@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
useCompression = true; useCompression = true;
else else
useCompression = false; useCompression = false;
try
{
compressionType =
config->getConfig("NetworkCompression", "NetworkCompression");
}
catch (...) { }
auto* compressInterface = compress::getCompressInterfaceByName(compressionType);
if (!compressInterface)
compressInterface = new compress::CompressInterfaceSnappy();
alg.reset(compressInterface);
} }
Socket* CompressedInetStreamSocket::clone() const Socket* CompressedInetStreamSocket::clone() const
@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
{ {
SBS readBS, ret; SBS readBS, ret;
size_t uncompressedSize; size_t uncompressedSize;
bool err;
readBS = InetStreamSocket::read(timeout, isTimeOut, stats); readBS = InetStreamSocket::read(timeout, isTimeOut, stats);
if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC) if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC)
return readBS; return readBS;
err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize); // Read stored len, first 4 bytes.
uint32_t storedLen = *(uint32_t*) readBS->buf();
if (!err) if (!storedLen)
return SBS(new ByteStream(0)); return SBS(new ByteStream(0));
uncompressedSize = storedLen;
ret.reset(new ByteStream(uncompressedSize)); ret.reset(new ByteStream(uncompressedSize));
alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr());
alg->uncompress((char*) readBS->buf() + HEADER_SIZE,
readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(),
&uncompressedSize);
ret->advanceInputPtr(uncompressedSize); ret->advanceInputPtr(uncompressedSize);
return ret; return ret;
@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats) void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
{ {
size_t outLen = 0; size_t len = msg.length();
uint32_t len = msg.length();
if (useCompression && (len > 512)) if (useCompression && (len > 512))
{ {
ByteStream smsg(alg.maxCompressedSize(len)); size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE;
ByteStream smsg(outLen);
alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen); alg->compress((char*) msg.buf(), len,
smsg.advanceInputPtr(outLen); (char*) smsg.getInputPtr() + HEADER_SIZE, &outLen);
// Save original len.
*(uint32_t*) smsg.getInputPtr() = len;
smsg.advanceInputPtr(outLen + HEADER_SIZE);
if (outLen < len) if (outLen < len)
do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats); do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats);

View File

@ -54,8 +54,9 @@ public:
virtual const IOSocket accept(const struct timespec* timeout); virtual const IOSocket accept(const struct timespec* timeout);
virtual void connect(const sockaddr* addr); virtual void connect(const sockaddr* addr);
private: private:
compress::IDBCompressInterface alg; std::shared_ptr<compress::CompressInterface> alg;
bool useCompression; bool useCompression;
static const uint32_t HEADER_SIZE = 4;
}; };
} //namespace messageqcpp } //namespace messageqcpp

View File

@ -337,15 +337,12 @@ int BulkLoad::loadJobInfo(
} }
} }
// Validate that specified compression type is available
compress::IDBCompressInterface compressor;
for (unsigned kT = 0; kT < curJob.jobTableList.size(); kT++) for (unsigned kT = 0; kT < curJob.jobTableList.size(); kT++)
{ {
for (unsigned kC = 0; kC < curJob.jobTableList[kT].colList.size(); kC++) for (unsigned kC = 0; kC < curJob.jobTableList[kT].colList.size(); kC++)
{ {
if ( !compressor.isCompressionAvail( if (!compress::CompressInterface::isCompressionAvail(
curJob.jobTableList[kT].colList[kC].compressionType) ) curJob.jobTableList[kT].colList[kC].compressionType))
{ {
std::ostringstream oss; std::ostringstream oss;
oss << "Specified compression type (" << oss << "Specified compression type (" <<

View File

@ -60,12 +60,11 @@ ColumnBufferCompressed::ColumnBufferCompressed( ColumnInfo* pColInfo,
fToBeCompressedBuffer(0), fToBeCompressedBuffer(0),
fToBeCompressedCapacity(0), fToBeCompressedCapacity(0),
fNumBytes(0), fNumBytes(0),
fCompressor(0),
fPreLoadHWMChunk(true), fPreLoadHWMChunk(true),
fFlushedStartHwmChunk(false) fFlushedStartHwmChunk(false)
{ {
fUserPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; fUserPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
fCompressor = new compress::IDBCompressInterface( fUserPaddingBytes ); compress::initializeCompressorPool(fCompressorPool, fUserPaddingBytes);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -79,7 +78,6 @@ ColumnBufferCompressed::~ColumnBufferCompressed()
fToBeCompressedBuffer = 0; fToBeCompressedBuffer = 0;
fToBeCompressedCapacity = 0; fToBeCompressedCapacity = 0;
fNumBytes = 0; fNumBytes = 0;
delete fCompressor;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -91,9 +89,7 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
fFile = f; fFile = f;
fStartingHwm = startHwm; fStartingHwm = startHwm;
IDBCompressInterface compressor; if (compress::CompressInterface::getPtrList(hdrs, fChunkPtrs) != 0)
if (compressor.getPtrList(hdrs, fChunkPtrs) != 0)
{ {
return ERR_COMP_PARSE_HDRS; return ERR_COMP_PARSE_HDRS;
} }
@ -102,7 +98,15 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
// rollback), that fall after the HWM, then drop those trailing ptrs. // rollback), that fall after the HWM, then drop those trailing ptrs.
unsigned int chunkIndex = 0; unsigned int chunkIndex = 0;
unsigned int blockOffsetWithinChunk = 0; unsigned int blockOffsetWithinChunk = 0;
fCompressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
auto compressor = compress::getCompressorByType(
fCompressorPool, fColInfo->column.compressionType);
if (!compressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
compressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
if ((chunkIndex + 1) < fChunkPtrs.size()) if ((chunkIndex + 1) < fChunkPtrs.size())
{ {
@ -127,11 +131,11 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
if (!fToBeCompressedBuffer) if (!fToBeCompressedBuffer)
{ {
fToBeCompressedBuffer = fToBeCompressedBuffer =
new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN]; new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
} }
BlockOp::setEmptyBuf( fToBeCompressedBuffer, BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal, fColInfo->column.emptyVal,
fColInfo->column.width ); fColInfo->column.width );
@ -147,10 +151,10 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
fLog->logMsg( oss.str(), MSGLVL_INFO2 ); fLog->logMsg( oss.str(), MSGLVL_INFO2 );
} }
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
// Set file offset past end of last chunk // Set file offset past end of last chunk
startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2; startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
if (fChunkPtrs.size() > 0) if (fChunkPtrs.size() > 0)
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first + startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
@ -223,7 +227,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Expand the compression buffer size if working with an abbrev extent, and // Expand the compression buffer size if working with an abbrev extent, and
// the bytes we are about to add will overflow the abbreviated extent. // the bytes we are about to add will overflow the abbreviated extent.
if ((fToBeCompressedCapacity < IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) && if ((fToBeCompressedCapacity < CompressInterface::UNCOMPRESSED_INBUF_LEN) &&
((fNumBytes + writeSize + fillUpWEmptiesWriteSize) > fToBeCompressedCapacity) ) ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) > fToBeCompressedCapacity) )
{ {
std::ostringstream oss; std::ostringstream oss;
@ -233,7 +237,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
"; part-" << fColInfo->curCol.dataFile.fPartition << "; part-" << fColInfo->curCol.dataFile.fPartition <<
"; seg-" << fColInfo->curCol.dataFile.fSegment; "; seg-" << fColInfo->curCol.dataFile.fSegment;
fLog->logMsg( oss.str(), MSGLVL_INFO2 ); fLog->logMsg( oss.str(), MSGLVL_INFO2 );
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
} }
if ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) <= fToBeCompressedCapacity) if ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) <= fToBeCompressedCapacity)
@ -316,12 +320,12 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Start over again loading a new to-be-compressed buffer // Start over again loading a new to-be-compressed buffer
BlockOp::setEmptyBuf( fToBeCompressedBuffer, BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal, fColInfo->column.emptyVal,
fColInfo->column.width ); fColInfo->column.width );
fToBeCompressedCapacity = fToBeCompressedCapacity =
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; CompressInterface::UNCOMPRESSED_INBUF_LEN;
bufOffset = fToBeCompressedBuffer; bufOffset = fToBeCompressedBuffer;
fNumBytes = 0; fNumBytes = 0;
@ -377,21 +381,31 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile ) int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
{ {
const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(fToBeCompressedCapacity) + auto compressor = compress::getCompressorByType(
fUserPaddingBytes; fCompressorPool, fColInfo->column.compressionType);
if (!compressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
const size_t OUTPUT_BUFFER_SIZE =
compressor->maxCompressedSize(fToBeCompressedCapacity) +
fUserPaddingBytes +
// Padded len = len + COMPRESSED_SIZE_INCREMENT_CHUNK - (len %
// COMPRESSED_SIZE_INCREMENT_CHUNK) + usePadding
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
unsigned char* compressedOutBuf = new unsigned char[ OUTPUT_BUFFER_SIZE ]; unsigned char* compressedOutBuf = new unsigned char[ OUTPUT_BUFFER_SIZE ];
boost::scoped_array<unsigned char> compressedOutBufPtr(compressedOutBuf); boost::scoped_array<unsigned char> compressedOutBufPtr(compressedOutBuf);
unsigned int outputLen = OUTPUT_BUFFER_SIZE; size_t outputLen = OUTPUT_BUFFER_SIZE;
#ifdef PROFILE #ifdef PROFILE
Stats::startParseEvent(WE_STATS_COMPRESS_COL_COMPRESS); Stats::startParseEvent(WE_STATS_COMPRESS_COL_COMPRESS);
#endif #endif
int rc = fCompressor->compressBlock( int rc = compressor->compressBlock(
reinterpret_cast<char*>(fToBeCompressedBuffer), reinterpret_cast<char*>(fToBeCompressedBuffer),
fToBeCompressedCapacity, fToBeCompressedCapacity, compressedOutBuf, outputLen);
compressedOutBuf,
outputLen );
if (rc != 0) if (rc != 0)
{ {
@ -399,7 +413,7 @@ int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
} }
// Round up the compressed chunk size // Round up the compressed chunk size
rc = fCompressor->padCompressedChunks( compressedOutBuf, rc = compressor->padCompressedChunks( compressedOutBuf,
outputLen, OUTPUT_BUFFER_SIZE ); outputLen, OUTPUT_BUFFER_SIZE );
if (rc != 0) if (rc != 0)
@ -581,26 +595,24 @@ int ColumnBufferCompressed::finishFile(bool bTruncFile)
int ColumnBufferCompressed::saveCompressionHeaders( ) int ColumnBufferCompressed::saveCompressionHeaders( )
{ {
// Construct the header records // Construct the header records
char hdrBuf[IDBCompressInterface::HDR_BUF_LEN * 2]; char hdrBuf[CompressInterface::HDR_BUF_LEN * 2];
RETURN_ON_ERROR(fColInfo->colOp->readHeaders(fFile, hdrBuf)); RETURN_ON_ERROR(fColInfo->colOp->readHeaders(fFile, hdrBuf));
BRM::LBID_t lbid = fCompressor->getLBIDByIndex(hdrBuf, 0); BRM::LBID_t lbid = compress::CompressInterface::getLBIDByIndex(hdrBuf, 0);
fCompressor->initHdr(hdrBuf, fColInfo->column.width, compress::CompressInterface::initHdr(hdrBuf, fColInfo->column.width, fColInfo->column.dataType,
fColInfo->column.dataType, fColInfo->column.compressionType);
fColInfo->column.compressionType); compress::CompressInterface::setBlockCount(hdrBuf, (fColInfo->getFileSize() / BYTE_PER_BLOCK));
fCompressor->setBlockCount(hdrBuf,
(fColInfo->getFileSize() / BYTE_PER_BLOCK) );
// If lbid written in the header is not 0 and not equal to `lastupdatedlbid` - we are running // If lbid written in the header is not 0 and not equal to `lastupdatedlbid` - we are running
// for the next extent for column segment file. // for the next extent for column segment file.
const auto lastUpdatedLbid = fColInfo->getLastUpdatedLBID(); const auto lastUpdatedLbid = fColInfo->getLastUpdatedLBID();
if (lbid && lastUpdatedLbid != lbid) if (lbid && lastUpdatedLbid != lbid)
{ {
// Write back lbid, after header initialization. // Write back lbid, after header initialization.
fCompressor->setLBIDByIndex(hdrBuf, lbid, 0); compress::CompressInterface::setLBIDByIndex(hdrBuf, lbid, 0);
fCompressor->setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1); compress::CompressInterface::setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
} }
else else
fCompressor->setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0); compress::CompressInterface::setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
std::vector<uint64_t> ptrs; std::vector<uint64_t> ptrs;
@ -611,7 +623,7 @@ int ColumnBufferCompressed::saveCompressionHeaders( )
unsigned lastIdx = fChunkPtrs.size() - 1; unsigned lastIdx = fChunkPtrs.size() - 1;
ptrs.push_back( fChunkPtrs[lastIdx].first + fChunkPtrs[lastIdx].second ); ptrs.push_back( fChunkPtrs[lastIdx].first + fChunkPtrs[lastIdx].second );
fCompressor->storePtrs( ptrs, hdrBuf ); compress::CompressInterface::storePtrs(ptrs, hdrBuf);
// Write out the header records // Write out the header records
//char resp; //char resp;
@ -641,9 +653,9 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
if (!fToBeCompressedBuffer) if (!fToBeCompressedBuffer)
{ {
fToBeCompressedBuffer = fToBeCompressedBuffer =
new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN]; new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
BlockOp::setEmptyBuf( fToBeCompressedBuffer, BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal, fColInfo->column.emptyVal,
fColInfo->column.width ); fColInfo->column.width );
bNewBuffer = true; bNewBuffer = true;
@ -656,12 +668,19 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
unsigned int blockOffsetWithinChunk = 0; unsigned int blockOffsetWithinChunk = 0;
bool bSkipStartingBlks = false; bool bSkipStartingBlks = false;
auto compressor = compress::getCompressorByType(
fCompressorPool, fColInfo->column.compressionType);
if (!compressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fPreLoadHWMChunk) if (fPreLoadHWMChunk)
{ {
if (fChunkPtrs.size() > 0) if (fChunkPtrs.size() > 0)
{ {
fCompressor->locateBlock(fStartingHwm, compressor->locateBlock(fStartingHwm, chunkIndex,
chunkIndex, blockOffsetWithinChunk); blockOffsetWithinChunk);
if (chunkIndex < fChunkPtrs.size()) if (chunkIndex < fChunkPtrs.size())
startFileOffset = fChunkPtrs[chunkIndex].first; startFileOffset = fChunkPtrs[chunkIndex].first;
@ -718,8 +737,8 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
} }
// Uncompress the chunk into our 4MB buffer // Uncompress the chunk into our 4MB buffer
unsigned int outLen = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; size_t outLen = CompressInterface::UNCOMPRESSED_INBUF_LEN;
int rc = fCompressor->uncompressBlock( int rc = compressor->uncompressBlock(
compressedOutBuf, compressedOutBuf,
fChunkPtrs[chunkIndex].second, fChunkPtrs[chunkIndex].second,
fToBeCompressedBuffer, fToBeCompressedBuffer,
@ -758,7 +777,7 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
if (!bNewBuffer) if (!bNewBuffer)
{ {
BlockOp::setEmptyBuf( fToBeCompressedBuffer, BlockOp::setEmptyBuf( fToBeCompressedBuffer,
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal, fColInfo->column.emptyVal,
fColInfo->column.width ); fColInfo->column.width );
} }
@ -775,10 +794,10 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
fLog->logMsg( oss.str(), MSGLVL_INFO2 ); fLog->logMsg( oss.str(), MSGLVL_INFO2 );
} }
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
// Set file offset to start after last current chunk // Set file offset to start after last current chunk
startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2; startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
if (fChunkPtrs.size() > 0) if (fChunkPtrs.size() > 0)
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first + startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
@ -796,5 +815,4 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
return NO_ERROR; return NO_ERROR;
} }
} }

View File

@ -107,8 +107,7 @@ private:
// should always be 4MB, unless // should always be 4MB, unless
// working with abbrev extent. // working with abbrev extent.
size_t fNumBytes; // num Bytes in comp buffer size_t fNumBytes; // num Bytes in comp buffer
compress::IDBCompressInterface* compress::CompressorPool fCompressorPool; // data compression object pool
fCompressor; // data compression object
compress::CompChunkPtrList compress::CompChunkPtrList
fChunkPtrs; // col file header information fChunkPtrs; // col file header information
bool fPreLoadHWMChunk; // preload 1st HWM chunk only bool fPreLoadHWMChunk; // preload 1st HWM chunk only

View File

@ -450,7 +450,7 @@ int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
if (column.dctnry.fCompressionType != 0) if (column.dctnry.fCompressionType != 0)
{ {
DctnryCompress1* tempD1; DctnryCompress1* tempD1;
tempD1 = new DctnryCompress1; tempD1 = new DctnryCompress1(column.dctnry.fCompressionType);
tempD1->setMaxActiveChunkNum(1); tempD1->setMaxActiveChunkNum(1);
tempD1->setBulkFlag(true); tempD1->setBulkFlag(true);
tempD = tempD1; tempD = tempD1;
@ -668,7 +668,7 @@ int ColumnInfo::extendColumnNewExtent(
uint16_t segmentNew = 0; uint16_t segmentNew = 0;
BRM::LBID_t startLbid; BRM::LBID_t startLbid;
char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
// Extend the column by adding an extent to the next // Extend the column by adding an extent to the next
// DBRoot, partition, and segment file in the rotation // DBRoot, partition, and segment file in the rotation
@ -1684,7 +1684,8 @@ int ColumnInfo::openDctnryStore( bool bMustExist )
if ( column.dctnry.fCompressionType != 0) if ( column.dctnry.fCompressionType != 0)
{ {
DctnryCompress1* dctnryCompress1 = new DctnryCompress1; DctnryCompress1* dctnryCompress1 =
new DctnryCompress1(column.dctnry.fCompressionType);
dctnryCompress1->setMaxActiveChunkNum(1); dctnryCompress1->setMaxActiveChunkNum(1);
dctnryCompress1->setBulkFlag(true); dctnryCompress1->setBulkFlag(true);
fStore = dctnryCompress1; fStore = dctnryCompress1;

View File

@ -108,7 +108,7 @@ int ColumnInfoCompressed::closeColumnFile(bool bCompletingExtent, bool bAbort)
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm ) int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
{ {
char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
RETURN_ON_ERROR( colOp->readHeaders(curCol.dataFile.pFile, hdr) ); RETURN_ON_ERROR( colOp->readHeaders(curCol.dataFile.pFile, hdr) );
// Initialize the output buffer manager for the column. // Initialize the output buffer manager for the column.
@ -129,10 +129,9 @@ int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
fColBufferMgr = mgr; fColBufferMgr = mgr;
IDBCompressInterface compressor; int abbrevFlag = (compress::CompressInterface::getBlockCount(hdr) ==
int abbrevFlag = uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width /
( compressor.getBlockCount(hdr) == BYTE_PER_BLOCK));
uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width / BYTE_PER_BLOCK) );
setFileSize( hwm, abbrevFlag ); setFileSize( hwm, abbrevFlag );
// See if dealing with abbreviated extent that will need expanding. // See if dealing with abbreviated extent that will need expanding.
@ -324,9 +323,9 @@ int ColumnInfoCompressed::truncateDctnryStore(
return rc; return rc;
} }
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ]; char controlHdr[ CompressInterface::HDR_BUF_LEN ];
rc = fTruncateDctnryFileOp.readFile( dFile, rc = fTruncateDctnryFileOp.readFile( dFile,
(unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN); (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
if (rc != NO_ERROR) if (rc != NO_ERROR)
{ {
@ -345,8 +344,7 @@ int ColumnInfoCompressed::truncateDctnryStore(
return rc; return rc;
} }
IDBCompressInterface compressor; int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
int rc1 = compressor.verifyHdr( controlHdr );
if (rc1 != 0) if (rc1 != 0)
{ {
@ -372,7 +370,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
// actually grow the file (something we don't want to do), because we have // actually grow the file (something we don't want to do), because we have
// not yet reserved a full extent (on disk) for this dictionary store file. // not yet reserved a full extent (on disk) for this dictionary store file.
const int PSEUDO_COL_WIDTH = 8; const int PSEUDO_COL_WIDTH = 8;
uint64_t numBlocks = compressor.getBlockCount( controlHdr ); uint64_t numBlocks =
compress::CompressInterface::getBlockCount(controlHdr);
if ( numBlocks == uint64_t if ( numBlocks == uint64_t
(INITIAL_EXTENT_ROWS_TO_DISK * PSEUDO_COL_WIDTH / BYTE_PER_BLOCK) ) (INITIAL_EXTENT_ROWS_TO_DISK * PSEUDO_COL_WIDTH / BYTE_PER_BLOCK) )
@ -390,8 +389,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
return NO_ERROR; return NO_ERROR;
} }
uint64_t hdrSize = compressor.getHdrSize(controlHdr); uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN; uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize]; char* pointerHdr = new char[ptrHdrSize];
rc = fTruncateDctnryFileOp.readFile(dFile, rc = fTruncateDctnryFileOp.readFile(dFile,
@ -416,7 +415,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
} }
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
rc1 = compressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs ); rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
chunkPtrs);
delete[] pointerHdr; delete[] pointerHdr;
if (rc1 != 0) if (rc1 != 0)

View File

@ -96,7 +96,7 @@ size_t readFillBuffer(
return totalBytesRead; return totalBytesRead;
} }
off64_t getCompressedDataSize(string& fileName) static off64_t getCompressedDataSize(string& fileName)
{ {
off64_t dataSize = 0; off64_t dataSize = 0;
IDBDataFile* pFile = 0; IDBDataFile* pFile = 0;
@ -119,21 +119,21 @@ off64_t getCompressedDataSize(string& fileName)
throw std::runtime_error(oss.str()); throw std::runtime_error(oss.str());
} }
IDBCompressInterface decompressor;
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
// Read headers and extract compression pointers // Read headers and extract compression pointers
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
char hdr1[IDBCompressInterface::HDR_BUF_LEN]; char hdr1[CompressInterface::HDR_BUF_LEN];
nBytes = readFillBuffer( pFile, hdr1, IDBCompressInterface::HDR_BUF_LEN); nBytes = readFillBuffer( pFile, hdr1, CompressInterface::HDR_BUF_LEN);
if ( nBytes != IDBCompressInterface::HDR_BUF_LEN ) if ( nBytes != CompressInterface::HDR_BUF_LEN )
{ {
std::ostringstream oss; std::ostringstream oss;
oss << "Error reading first header from file " << fileName; oss << "Error reading first header from file " << fileName;
throw std::runtime_error(oss.str()); throw std::runtime_error(oss.str());
} }
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - IDBCompressInterface::HDR_BUF_LEN; int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize]; char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize); nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@ -145,7 +145,8 @@ off64_t getCompressedDataSize(string& fileName)
} }
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs); int rc =
compress::CompressInterface::getPtrList(hdr2, ptrSecSize, chunkPtrs);
delete[] hdr2; delete[] hdr2;
if (rc != 0) if (rc != 0)

View File

@ -51,6 +51,7 @@ namespace WriteEngine
BulkRollbackFileCompressed::BulkRollbackFileCompressed(BulkRollbackMgr* mgr) : BulkRollbackFileCompressed::BulkRollbackFileCompressed(BulkRollbackMgr* mgr) :
BulkRollbackFile(mgr) BulkRollbackFile(mgr)
{ {
compress::initializeCompressorPool(fCompressorPool);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -104,7 +105,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
} }
// Read and parse the header pointers // Read and parse the header pointers
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];; char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];;
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
std::string errMsg; std::string errMsg;
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg); int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
@ -127,7 +128,20 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
unsigned int blockOffset = fileSizeBlocks - 1; unsigned int blockOffset = fileSizeBlocks - 1;
unsigned int chunkIndex = 0; unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0; unsigned int blkOffsetInChunk = 0;
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
auto fCompressor = compress::getCompressorByType(
fCompressorPool,
compress::CompressInterface::getCompressionType(hdrs));
if (!fCompressor)
{
std::ostringstream oss;
oss << "Error, wrong compression type for segment file"
<< ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
<< partNum << "; segment-" << segNum << ";";
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
}
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
// Truncate the extra extents that are to be aborted // Truncate the extra extents that are to be aborted
if (chunkIndex < chunkPtrs.size()) if (chunkIndex < chunkPtrs.size())
@ -145,7 +159,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
logging::M0075, columnOID, msgText2.str() ); logging::M0075, columnOID, msgText2.str() );
// Drop off any trailing pointers (that point beyond the last block) // Drop off any trailing pointers (that point beyond the last block)
fCompressor.setBlockCount( hdrs, fileSizeBlocks ); compress::CompressInterface::setBlockCount(hdrs, fileSizeBlocks);
std::vector<uint64_t> ptrs; std::vector<uint64_t> ptrs;
for (unsigned i = 0; i <= chunkIndex; i++) for (unsigned i = 0; i <= chunkIndex; i++)
@ -155,7 +169,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
ptrs.push_back( chunkPtrs[chunkIndex].first + ptrs.push_back( chunkPtrs[chunkIndex].first +
chunkPtrs[chunkIndex].second ); chunkPtrs[chunkIndex].second );
fCompressor.storePtrs( ptrs, hdrs ); compress::CompressInterface::storePtrs(ptrs, hdrs);
rc = fDbFile.writeHeaders( pFile, hdrs ); rc = fDbFile.writeHeaders( pFile, hdrs );
@ -252,7 +266,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
} }
// Read and parse the header pointers // Read and parse the header pointers
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
std::string errMsg; std::string errMsg;
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg); int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
@ -275,7 +289,20 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
unsigned int blockOffset = startOffsetBlk - 1; unsigned int blockOffset = startOffsetBlk - 1;
unsigned int chunkIndex = 0; unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0; unsigned int blkOffsetInChunk = 0;
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
auto fCompressor = compress::getCompressorByType(
fCompressorPool,
compress::CompressInterface::getCompressionType(hdrs));
if (!fCompressor)
{
std::ostringstream oss;
oss << "Error, wrong compression type for segment file"
<< ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
<< partNum << "; segment-" << segNum << ";";
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
}
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
if (chunkIndex < chunkPtrs.size()) if (chunkIndex < chunkPtrs.size())
{ {
@ -401,7 +428,8 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
// Watch for the special case where we are restoring a db file as an // Watch for the special case where we are restoring a db file as an
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we // empty file (chunkindex=0 and restoredChunkLen=0); in this case we
// just restore the first pointer (set to 8192). // just restore the first pointer (set to 8192).
fCompressor.setBlockCount( hdrs, (startOffsetBlk + nBlocks) ); compress::CompressInterface::setBlockCount(hdrs,
(startOffsetBlk + nBlocks));
std::vector<uint64_t> newPtrs; std::vector<uint64_t> newPtrs;
if ((chunkIndex > 0) || (restoredChunkLen > 0)) if ((chunkIndex > 0) || (restoredChunkLen > 0))
@ -413,7 +441,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
} }
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen ); newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
fCompressor.storePtrs( newPtrs, hdrs ); compress::CompressInterface::storePtrs(newPtrs, hdrs);
rc = fDbFile.writeHeaders( pFile, hdrs ); rc = fDbFile.writeHeaders( pFile, hdrs );
@ -482,7 +510,7 @@ int BulkRollbackFileCompressed::loadColumnHdrPtrs(
} }
// Parse the header pointers // Parse the header pointers
int rc1 = fCompressor.getPtrList( hdrs, chunkPtrs ); int rc1 = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
if (rc1 != 0) if (rc1 != 0)
{ {
@ -548,7 +576,7 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
throw WeException( oss.str(), ERR_FILE_OPEN ); throw WeException( oss.str(), ERR_FILE_OPEN );
} }
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ]; char controlHdr[ CompressInterface::HDR_BUF_LEN ];
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
uint64_t ptrHdrSize; uint64_t ptrHdrSize;
std::string errMsg; std::string errMsg;
@ -572,7 +600,20 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
unsigned int blockOffset = startOffsetBlk - 1; unsigned int blockOffset = startOffsetBlk - 1;
unsigned int chunkIndex = 0; unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0; unsigned int blkOffsetInChunk = 0;
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
auto fCompressor = compress::getCompressorByType(
fCompressorPool,
compress::CompressInterface::getCompressionType(controlHdr));
if (!fCompressor)
{
std::ostringstream oss;
oss << "Error, wrong compression type for segment file"
<< ": OID-" << dStoreOID << "; DbRoot-" << dbRoot << "; partition-"
<< partNum << "; segment-" << segNum << ";";
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
}
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
if (chunkIndex < chunkPtrs.size()) if (chunkIndex < chunkPtrs.size())
{ {
@ -686,7 +727,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
// Watch for the special case where we are restoring a db file as an // Watch for the special case where we are restoring a db file as an
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we // empty file (chunkindex=0 and restoredChunkLen=0); in this case we
// just restore the first pointer (set to 8192). // just restore the first pointer (set to 8192).
fCompressor.setBlockCount( controlHdr, (startOffsetBlk + nBlocks) ); compress::CompressInterface::setBlockCount(controlHdr,
(startOffsetBlk + nBlocks));
std::vector<uint64_t> newPtrs; std::vector<uint64_t> newPtrs;
if ((chunkIndex > 0) || (restoredChunkLen > 0)) if ((chunkIndex > 0) || (restoredChunkLen > 0))
@ -699,7 +741,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen ); newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
char* pointerHdr = new char[ptrHdrSize]; char* pointerHdr = new char[ptrHdrSize];
fCompressor.storePtrs( newPtrs, pointerHdr, ptrHdrSize ); compress::CompressInterface::storePtrs(newPtrs, pointerHdr,
ptrHdrSize);
rc = fDbFile.writeHeaders( pFile, controlHdr, pointerHdr, ptrHdrSize ); rc = fDbFile.writeHeaders( pFile, controlHdr, pointerHdr, ptrHdrSize );
delete[] pointerHdr; delete[] pointerHdr;
@ -759,7 +802,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
std::string& errMsg) const std::string& errMsg) const
{ {
int rc = fDbFile.readFile( int rc = fDbFile.readFile(
pFile, (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN); pFile, (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
if (rc != NO_ERROR) if (rc != NO_ERROR)
{ {
@ -771,7 +814,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
return rc; return rc;
} }
int rc1 = fCompressor.verifyHdr( controlHdr ); int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0) if (rc1 != 0)
{ {
@ -786,8 +829,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
return rc; return rc;
} }
uint64_t hdrSize = fCompressor.getHdrSize(controlHdr); uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN; ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize]; char* pointerHdr = new char[ptrHdrSize];
rc = fDbFile.readFile(pFile, (unsigned char*)pointerHdr, ptrHdrSize); rc = fDbFile.readFile(pFile, (unsigned char*)pointerHdr, ptrHdrSize);
@ -804,7 +847,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
} }
// Parse the header pointers // Parse the header pointers
rc1 = fCompressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs ); rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
chunkPtrs);
delete[] pointerHdr; delete[] pointerHdr;
if (rc1 != 0) if (rc1 != 0)
@ -1033,5 +1077,4 @@ size_t BulkRollbackFileCompressed::readFillBuffer(
return totalBytesRead; return totalBytesRead;
} }
} //end of namespace } //end of namespace

View File

@ -28,6 +28,7 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <unordered_map>
#include "we_define.h" #include "we_define.h"
#include "we_type.h" #include "we_type.h"
@ -148,7 +149,7 @@ private:
uint64_t& ptrHdrSize, uint64_t& ptrHdrSize,
std::string& errMsg ) const; std::string& errMsg ) const;
compress::IDBCompressInterface fCompressor; compress::CompressorPool fCompressorPool;
}; };
} //end of namespace } //end of namespace

View File

@ -67,8 +67,6 @@ namespace WriteEngine
extern int NUM_BLOCKS_PER_INITIAL_EXTENT; // defined in we_dctnry.cpp extern int NUM_BLOCKS_PER_INITIAL_EXTENT; // defined in we_dctnry.cpp
extern WErrorCodes ec; // defined in we_log.cpp extern WErrorCodes ec; // defined in we_log.cpp
const int COMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::maxCompressedSize(UNCOMPRESSED_CHUNK_SIZE) + 64 + 3 + 8 * 1024;
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Search for the specified chunk in fChunkList. // Search for the specified chunk in fChunkList.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -91,18 +89,24 @@ ChunkData* CompFileData::findChunk(int64_t id) const
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// ChunkManager constructor // ChunkManager constructor
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
ChunkManager::ChunkManager() : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false), ChunkManager::ChunkManager()
fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()), : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
fFileOp(0), fSysLogger(NULL), fTransId(-1), fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
fLocalModuleId(Config::getLocalModuleID()), fFileOp(0), fSysLogger(NULL), fTransId(-1),
fFs(fIsHdfs ? fLocalModuleId(Config::getLocalModuleID()),
IDBFileSystem::getFs(IDBDataFile::HDFS) : fFs(fIsHdfs ? IDBFileSystem::getFs(IDBDataFile::HDFS)
IDBPolicy::useCloud() ? : IDBPolicy::useCloud()
IDBFileSystem::getFs(IDBDataFile::CLOUD) : ? IDBFileSystem::getFs(IDBDataFile::CLOUD)
IDBFileSystem::getFs(IDBDataFile::BUFFERED)) : IDBFileSystem::getFs(IDBDataFile::BUFFERED))
{ {
fUserPaddings = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; fUserPaddings = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
fCompressor.numUserPaddingBytes(fUserPaddings); compress::initializeCompressorPool(fCompressorPool, fUserPaddings);
COMPRESSED_CHUNK_SIZE =
compress::CompressInterface::getMaxCompressedSizeGeneric(
UNCOMPRESSED_CHUNK_SIZE) +
64 + 3 + 8 * 1024;
fMaxCompressedBufSize = COMPRESSED_CHUNK_SIZE + fUserPaddings; fMaxCompressedBufSize = COMPRESSED_CHUNK_SIZE + fUserPaddings;
fBufCompressed = new char[fMaxCompressedBufSize]; fBufCompressed = new char[fMaxCompressedBufSize];
fSysLogger = new logging::Logger(SUBSYSTEM_ID_WE); fSysLogger = new logging::Logger(SUBSYSTEM_ID_WE);
@ -383,16 +387,22 @@ CompFileData* ChunkManager::getFileData(const FID& fid,
} }
// make sure the header is valid // make sure the header is valid
if (fCompressor.verifyHdr(fileData->fFileHeader.fControlData) != 0) if (compress::CompressInterface::verifyHdr(fileData->fFileHeader.fControlData) != 0)
{ {
WE_COMP_DBG(cout << "Invalid header." << endl;) WE_COMP_DBG(cout << "Invalid header." << endl;)
delete fileData; delete fileData;
return NULL; return NULL;
} }
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// Save segment file compression type.
uint32_t compressionType = compress::CompressInterface::getCompressionType(
fileData->fFileHeader.fControlData);
fileData->fCompressionType = compressionType;
if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT) if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT)
{ {
// >8K header, dictionary width > 128 // >8K header, dictionary width > 128
@ -462,11 +472,12 @@ IDBDataFile* ChunkManager::createDctnryFile(const FID& fid,
// Dictionary store extent width == 0. See more details in function // Dictionary store extent width == 0. See more details in function
// `createDictStoreExtent`. // `createDictStoreExtent`.
fCompressor.initHdr(fileData->fFileHeader.fControlData, compress::CompressInterface::initHdr(
fileData->fFileHeader.fPtrSection, fileData->fFileHeader.fControlData, fileData->fFileHeader.fPtrSection,
/*colWidth=*/0, fileData->fColDataType, /*colWidth=*/0, fileData->fColDataType, fFileOp->compressionType(), hdrSize);
fFileOp->compressionType(), hdrSize); compress::CompressInterface::setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
fCompressor.setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0); // Save compression type.
fileData->fCompressionType = fFileOp->compressionType();
if (writeHeader(fileData, __LINE__) != NO_ERROR) if (writeHeader(fileData, __LINE__) != NO_ERROR)
{ {
@ -771,9 +782,16 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
} }
// uncompress the read in buffer // uncompress the read in buffer
unsigned int dataLen = sizeof(chunkData->fBufUnCompressed); size_t dataLen = sizeof(chunkData->fBufUnCompressed);
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize, auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0) (unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
{ {
if (fIsFix) if (fIsFix)
@ -784,7 +802,7 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
{ {
char* hdr = fileData->fFileHeader.fControlData; char* hdr = fileData->fFileHeader.fControlData;
if (fCompressor.getBlockCount(hdr) < 512) if (compress::CompressInterface::getBlockCount(hdr) < 512)
blocks = 256; blocks = 256;
} }
@ -820,7 +838,8 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
{ {
if (id == 0 && ptrs[id] == 0) // if the 1st ptr is not set for new extent if (id == 0 && ptrs[id] == 0) // if the 1st ptr is not set for new extent
{ {
ptrs[0] = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); ptrs[0] = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
} }
// load the uncompressed buffer with empty values. // load the uncompressed buffer with empty values.
@ -907,10 +926,17 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
// compress the chunk before writing it to file // compress the chunk before writing it to file
fLenCompressed = fMaxCompressedBufSize; fLenCompressed = fMaxCompressedBufSize;
if (fCompressor.compressBlock((char*)chunkData->fBufUnCompressed, auto fCompressor = compress::getCompressorByType(
chunkData->fLenUnCompressed, fCompressorPool, fileData->fCompressionType);
(unsigned char*)fBufCompressed, if (!fCompressor)
fLenCompressed) != 0) {
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fCompressor->compressBlock((char*) chunkData->fBufUnCompressed,
chunkData->fLenUnCompressed,
(unsigned char*) fBufCompressed,
fLenCompressed) != 0)
{ {
logMessage(ERR_COMP_COMPRESS, logging::LOG_TYPE_ERROR, __LINE__); logMessage(ERR_COMP_COMPRESS, logging::LOG_TYPE_ERROR, __LINE__);
return ERR_COMP_COMPRESS; return ERR_COMP_COMPRESS;
@ -941,7 +967,8 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
// [chunkId+0] is the start offset of current chunk. // [chunkId+0] is the start offset of current chunk.
// [chunkId+1] is the start offset of next chunk, the offset diff is current chunk size. // [chunkId+1] is the start offset of next chunk, the offset diff is current chunk size.
// [chunkId+2] is 0 or not indicates if the next chunk exists. // [chunkId+2] is 0 or not indicates if the next chunk exists.
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
int64_t usablePtrIds = (ptrSecSize / sizeof(uint64_t)) - 2; int64_t usablePtrIds = (ptrSecSize / sizeof(uint64_t)) - 2;
@ -968,7 +995,7 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
else if (lastChunk) else if (lastChunk)
{ {
// add padding space if the chunk is written first time // add padding space if the chunk is written first time
if (fCompressor.padCompressedChunks( if (fCompressor->padCompressedChunks(
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize) != 0) (unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize) != 0)
{ {
WE_COMP_DBG(cout << "Last chunk:" << chunkId << ", padding failed." << endl;) WE_COMP_DBG(cout << "Last chunk:" << chunkId << ", padding failed." << endl;)
@ -1272,7 +1299,8 @@ int ChunkManager::closeFile(CompFileData* fileData)
int ChunkManager::writeHeader(CompFileData* fileData, int ln) int ChunkManager::writeHeader(CompFileData* fileData, int ln)
{ {
int rc = NO_ERROR; int rc = NO_ERROR;
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
if (!fIsHdfs && !fIsBulkLoad) if (!fIsHdfs && !fIsBulkLoad)
@ -1422,8 +1450,10 @@ int ChunkManager::updateColumnExtent(IDBDataFile* pFile, int addBlockCount, int6
int rc = NO_ERROR; int rc = NO_ERROR;
char* hdr = pFileData->fFileHeader.fControlData; char* hdr = pFileData->fFileHeader.fControlData;
fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount); compress::CompressInterface::setBlockCount(
fCompressor.setLBIDByIndex(hdr, lbid, 1); hdr, compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
compress::CompressInterface::setLBIDByIndex(hdr, lbid, 1);
ChunkData* chunkData = (pFileData)->findChunk(0); ChunkData* chunkData = (pFileData)->findChunk(0);
if (chunkData != NULL) if (chunkData != NULL)
@ -1475,7 +1505,7 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
char* hdr = i->second->fFileHeader.fControlData; char* hdr = i->second->fFileHeader.fControlData;
char* uncompressedBuf = chunkData->fBufUnCompressed; char* uncompressedBuf = chunkData->fBufUnCompressed;
int currentBlockCount = fCompressor.getBlockCount(hdr); int currentBlockCount = compress::CompressInterface::getBlockCount(hdr);
// Bug 3203, write out the compressed initial extent. // Bug 3203, write out the compressed initial extent.
if (currentBlockCount == 0) if (currentBlockCount == 0)
@ -1511,13 +1541,15 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
} }
if (rc == NO_ERROR) if (rc == NO_ERROR)
fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount); compress::CompressInterface::setBlockCount(
hdr,
compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
if (currentBlockCount) if (currentBlockCount)
{ {
// Append to the end. // Append to the end.
uint64_t lbidCount = fCompressor.getLBIDCount(hdr); uint64_t lbidCount = compress::CompressInterface::getLBIDCount(hdr);
fCompressor.setLBIDByIndex(hdr, lbid, lbidCount); compress::CompressInterface::setLBIDByIndex(hdr, lbid, lbidCount);
} }
return rc; return rc;
} }
@ -1684,7 +1716,8 @@ int ChunkManager::getBlockCount(IDBDataFile* pFile)
map<IDBDataFile*, CompFileData*>::iterator fpIt = fFilePtrMap.find(pFile); map<IDBDataFile*, CompFileData*>::iterator fpIt = fFilePtrMap.find(pFile);
idbassert(fpIt != fFilePtrMap.end()); idbassert(fpIt != fFilePtrMap.end());
return fCompressor.getBlockCount(fpIt->second->fFileHeader.fControlData); return compress::CompressInterface::getBlockCount(
fpIt->second->fFileHeader.fControlData);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -1758,11 +1791,13 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
origFilePtr->flush(); origFilePtr->flush();
// back out the current pointers // back out the current pointers
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
compress::CompChunkPtrList origPtrs; compress::CompChunkPtrList origPtrs;
if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0) if (compress::CompressInterface::getPtrList(
fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
{ {
ostringstream oss; ostringstream oss;
oss << "Chunk shifting failed, file:" << origFileName << " -- invalid header."; oss << "Chunk shifting failed, file:" << origFileName << " -- invalid header.";
@ -1876,7 +1911,14 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
ChunkData* chunkData = chunksTouched[k]; ChunkData* chunkData = chunksTouched[k];
fLenCompressed = fMaxCompressedBufSize; fLenCompressed = fMaxCompressedBufSize;
if ((rc = fCompressor.compressBlock((char*)chunkData->fBufUnCompressed, auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if ((rc = fCompressor->compressBlock((char*)chunkData->fBufUnCompressed,
chunkData->fLenUnCompressed, chunkData->fLenUnCompressed,
(unsigned char*)fBufCompressed, (unsigned char*)fBufCompressed,
fLenCompressed)) != 0) fLenCompressed)) != 0)
@ -1894,7 +1936,7 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
<< fLenCompressed;) << fLenCompressed;)
// shifting chunk, add padding space // shifting chunk, add padding space
if ((rc = fCompressor.padCompressedChunks( if ((rc = fCompressor->padCompressedChunks(
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize)) != 0) (unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize)) != 0)
{ {
WE_COMP_DBG(cout << ", but padding failed." << endl;) WE_COMP_DBG(cout << ", but padding failed." << endl;)
@ -2245,7 +2287,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
} }
// make sure the header is valid // make sure the header is valid
if ((rc = fCompressor.verifyHdr(fileData->fFileHeader.fControlData)) != 0) if ((rc = compress::CompressInterface::verifyHdr(
fileData->fFileHeader.fControlData)) != 0)
{ {
ostringstream oss; ostringstream oss;
oss << "Invalid header in new " << fileData->fFileName << ", roll back"; oss << "Invalid header in new " << fileData->fFileName << ", roll back";
@ -2254,7 +2297,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
return rc; return rc;
} }
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); int headerSize = compress::CompressInterface::getHdrSize(
fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// read in the pointer section in header // read in the pointer section in header
@ -2270,7 +2314,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
// get pointer list // get pointer list
compress::CompChunkPtrList ptrs; compress::CompChunkPtrList ptrs;
if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0) if (compress::CompressInterface::getPtrList(
fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
{ {
ostringstream oss; ostringstream oss;
oss << "Failed to parse pointer list from new " << fileData->fFileName << "@" << __LINE__; oss << "Failed to parse pointer list from new " << fileData->fFileName << "@" << __LINE__;
@ -2282,6 +2327,13 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
ChunkData chunkData; ChunkData chunkData;
int numOfChunks = ptrs.size(); // number of chunks in the file int numOfChunks = ptrs.size(); // number of chunks in the file
auto fCompressor = compress::getCompressorByType(
fCompressorPool, fileData->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
for (int i = 0; i < numOfChunks && rc == NO_ERROR; i++) for (int i = 0; i < numOfChunks && rc == NO_ERROR; i++)
{ {
unsigned int chunkSize = ptrs[i].second; unsigned int chunkSize = ptrs[i].second;
@ -2304,9 +2356,9 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
} }
// uncompress the read in buffer // uncompress the read in buffer
unsigned int dataLen = sizeof(chunkData.fBufUnCompressed); size_t dataLen = sizeof(chunkData.fBufUnCompressed);
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize, if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData.fBufUnCompressed, dataLen) != 0) (unsigned char*)chunkData.fBufUnCompressed, dataLen) != 0)
{ {
ostringstream oss; ostringstream oss;
@ -2624,13 +2676,15 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
if (mit != fFileMap.end()) if (mit != fFileMap.end())
{ {
int headerSize = fCompressor.getHdrSize(mit->second->fFileHeader.fControlData); int headerSize = compress::CompressInterface::getHdrSize(
mit->second->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// get pointer list // get pointer list
compress::CompChunkPtrList ptrs; compress::CompChunkPtrList ptrs;
if (fCompressor.getPtrList(mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0) if (compress::CompressInterface::getPtrList(
mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
{ {
ostringstream oss; ostringstream oss;
oss << "Failed to parse pointer list from new " << mit->second->fFileName << "@" << __LINE__; oss << "Failed to parse pointer list from new " << mit->second->fFileName << "@" << __LINE__;
@ -2662,9 +2716,16 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
// uncompress the read in buffer // uncompress the read in buffer
chunkData = new ChunkData(numOfChunks - 1); chunkData = new ChunkData(numOfChunks - 1);
unsigned int dataLen = sizeof(chunkData->fBufUnCompressed); size_t dataLen = sizeof(chunkData->fBufUnCompressed);
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize, auto fCompressor = compress::getCompressorByType(
fCompressorPool, mit->second->fCompressionType);
if (!fCompressor)
{
return ERR_COMP_WRONG_COMP_TYPE;
}
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0) (unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
{ {
mit->second->fChunkList.push_back(chunkData); mit->second->fChunkList.push_back(chunkData);
@ -2676,7 +2737,7 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
{ {
char* hdr = mit->second->fFileHeader.fControlData; char* hdr = mit->second->fFileHeader.fControlData;
if (fCompressor.getBlockCount(hdr) < 512) if (compress::CompressInterface::getBlockCount(hdr) < 512)
blocks = 256; blocks = 256;
} }
@ -2693,7 +2754,6 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
return rc; return rc;
} }
} }
// vim:ts=4 sw=4: // vim:ts=4 sw=4:

View File

@ -64,8 +64,8 @@ namespace WriteEngine
// forward reference // forward reference
class FileOp; class FileOp;
const int UNCOMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; const int UNCOMPRESSED_CHUNK_SIZE = compress::CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int COMPRESSED_FILE_HEADER_UNIT = compress::IDBCompressInterface::HDR_BUF_LEN; const int COMPRESSED_FILE_HEADER_UNIT = compress::CompressInterface::HDR_BUF_LEN;
// assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding // assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding
@ -136,7 +136,7 @@ class CompFileData
public: public:
CompFileData(const FileID& id, const FID& fid, const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth) : CompFileData(const FileID& id, const FID& fid, const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth) :
fFileID(id), fFid(fid), fColDataType(colDataType), fColWidth(colWidth), fDctnryCol(false), fFileID(id), fFid(fid), fColDataType(colDataType), fColWidth(colWidth), fDctnryCol(false),
fFilePtr(NULL), fIoBSize(0) {} fFilePtr(NULL), fIoBSize(0), fCompressionType(1) {}
ChunkData* findChunk(int64_t cid) const; ChunkData* findChunk(int64_t cid) const;
@ -152,6 +152,7 @@ protected:
std::list<ChunkData*> fChunkList; std::list<ChunkData*> fChunkList;
boost::scoped_array<char> fIoBuffer; boost::scoped_array<char> fIoBuffer;
size_t fIoBSize; size_t fIoBSize;
uint32_t fCompressionType;
friend class ChunkManager; friend class ChunkManager;
}; };
@ -369,22 +370,23 @@ protected:
std::list<std::pair<FileID, ChunkData*> > fActiveChunks; std::list<std::pair<FileID, ChunkData*> > fActiveChunks;
unsigned int fMaxActiveChunkNum; // max active chunks per file unsigned int fMaxActiveChunkNum; // max active chunks per file
char* fBufCompressed; char* fBufCompressed;
unsigned int fLenCompressed; size_t fLenCompressed;
unsigned int fMaxCompressedBufSize; size_t fMaxCompressedBufSize;
unsigned int fUserPaddings; size_t fUserPaddings;
bool fIsBulkLoad; bool fIsBulkLoad;
bool fDropFdCache; bool fDropFdCache;
bool fIsInsert; bool fIsInsert;
bool fIsHdfs; bool fIsHdfs;
FileOp* fFileOp; FileOp* fFileOp;
compress::IDBCompressInterface fCompressor; compress::CompressorPool fCompressorPool;
logging::Logger* fSysLogger; logging::Logger* fSysLogger;
TxnID fTransId; TxnID fTransId;
int fLocalModuleId; int fLocalModuleId;
idbdatafile::IDBFileSystem& fFs; idbdatafile::IDBFileSystem& fFs;
bool fIsFix; bool fIsFix;
size_t COMPRESSED_CHUNK_SIZE;
private: private:
}; };
} }

View File

@ -348,6 +348,7 @@ const int ERR_COMP_READ_FILE = ERR_COMPBASE + 16;// Failed to read from a
const int ERR_COMP_WRITE_FILE = ERR_COMPBASE + 17;// Failed to write to a compresssed data file const int ERR_COMP_WRITE_FILE = ERR_COMPBASE + 17;// Failed to write to a compresssed data file
const int ERR_COMP_CLOSE_FILE = ERR_COMPBASE + 18;// Failed to close a compressed data file const int ERR_COMP_CLOSE_FILE = ERR_COMPBASE + 18;// Failed to close a compressed data file
const int ERR_COMP_TRUNCATE_ZERO = ERR_COMPBASE + 19;// Invalid attempt to truncate file to 0 bytes const int ERR_COMP_TRUNCATE_ZERO = ERR_COMPBASE + 19;// Invalid attempt to truncate file to 0 bytes
const int ERR_COMP_WRONG_COMP_TYPE = ERR_COMPBASE + 20;// Invalid compression type.
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
// Auto-increment error // Auto-increment error

View File

@ -652,14 +652,19 @@ int FileOp::extendFile(
// @bug 5349: check that new extent's fbo is not past current EOF // @bug 5349: check that new extent's fbo is not past current EOF
if (m_compressionType) if (m_compressionType)
{ {
char hdrsIn[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdrsIn[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
RETURN_ON_ERROR( readHeaders(pFile, hdrsIn) ); RETURN_ON_ERROR( readHeaders(pFile, hdrsIn) );
IDBCompressInterface compressor; std::unique_ptr<compress::CompressInterface> compressor(
unsigned int ptrCount = compressor.getPtrCount(hdrsIn); compress::getCompressInterfaceByType(
compress::CompressInterface::getCompressionType(hdrsIn)));
unsigned int ptrCount =
compress::CompressInterface::getPtrCount(hdrsIn);
unsigned int chunkIndex = 0; unsigned int chunkIndex = 0;
unsigned int blockOffsetWithinChunk = 0; unsigned int blockOffsetWithinChunk = 0;
compressor.locateBlock((hwm - 1), chunkIndex, blockOffsetWithinChunk); compressor->locateBlock((hwm - 1), chunkIndex,
blockOffsetWithinChunk);
//std::ostringstream oss1; //std::ostringstream oss1;
//oss1 << "Extending compressed column file"<< //oss1 << "Extending compressed column file"<<
@ -816,9 +821,8 @@ int FileOp::extendFile(
if ((m_compressionType) && (hdrs)) if ((m_compressionType) && (hdrs))
{ {
IDBCompressInterface compressor; compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compressor.initHdr(hdrs, width, colDataType, m_compressionType); compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
compressor.setLBIDByIndex(hdrs, startLbid, 0);
} }
} }
@ -976,9 +980,8 @@ int FileOp::addExtentExactFile(
if ((m_compressionType) && (hdrs)) if ((m_compressionType) && (hdrs))
{ {
IDBCompressInterface compressor; compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compressor.initHdr(hdrs, width, colDataType, m_compressionType); compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
compressor.setLBIDByIndex(hdrs, startLbid, 0);
} }
} }
@ -1064,13 +1067,11 @@ int FileOp::initColumnExtent(
{ {
if ((bNewFile) && (m_compressionType)) if ((bNewFile) && (m_compressionType))
{ {
char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2]; char hdrs[CompressInterface::HDR_BUF_LEN * 2];
IDBCompressInterface compressor; compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compressor.initHdr(hdrs, width, colDataType, m_compressionType); compress::CompressInterface::setLBIDByIndex(hdrs, lbid, 0);
compressor.setLBIDByIndex(hdrs, lbid, 0);
if (bAbbrevExtent) if (bAbbrevExtent)
compressor.setBlockCount(hdrs, nBlocks); compress::CompressInterface::setBlockCount(hdrs, nBlocks);
RETURN_ON_ERROR(writeHeaders(pFile, hdrs)); RETURN_ON_ERROR(writeHeaders(pFile, hdrs));
} }
@ -1262,7 +1263,7 @@ int FileOp::initAbbrevCompColumnExtent(
Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_ABBREV_EXT); Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_ABBREV_EXT);
#endif #endif
char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2]; char hdrs[CompressInterface::HDR_BUF_LEN * 2];
rc = writeInitialCompColumnChunk( pFile, rc = writeInitialCompColumnChunk( pFile,
nBlocks, nBlocks,
INITIAL_EXTENT_ROWS_TO_DISK, INITIAL_EXTENT_ROWS_TO_DISK,
@ -1308,24 +1309,30 @@ int FileOp::writeInitialCompColumnChunk(
execplan::CalpontSystemCatalog::ColDataType colDataType, execplan::CalpontSystemCatalog::ColDataType colDataType,
char* hdrs) char* hdrs)
{ {
const int INPUT_BUFFER_SIZE = nRows * width; const size_t INPUT_BUFFER_SIZE = nRows * width;
char* toBeCompressedInput = new char[INPUT_BUFFER_SIZE]; char* toBeCompressedInput = new char[INPUT_BUFFER_SIZE];
unsigned int userPaddingBytes = Config::getNumCompressedPadBlks() * unsigned int userPaddingBytes = Config::getNumCompressedPadBlks() *
BYTE_PER_BLOCK; BYTE_PER_BLOCK;
const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(INPUT_BUFFER_SIZE) + // Compress an initialized abbreviated extent
userPaddingBytes; // Initially m_compressionType == 0, but this function is used under
// condtion where m_compressionType > 0.
std::unique_ptr<CompressInterface> compressor(
compress::getCompressInterfaceByType(m_compressionType,
userPaddingBytes));
const size_t OUTPUT_BUFFER_SIZE =
compressor->maxCompressedSize(INPUT_BUFFER_SIZE) + userPaddingBytes +
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
unsigned char* compressedOutput = new unsigned char[OUTPUT_BUFFER_SIZE]; unsigned char* compressedOutput = new unsigned char[OUTPUT_BUFFER_SIZE];
unsigned int outputLen = OUTPUT_BUFFER_SIZE; size_t outputLen = OUTPUT_BUFFER_SIZE;
boost::scoped_array<char> toBeCompressedInputPtr( toBeCompressedInput ); boost::scoped_array<char> toBeCompressedInputPtr( toBeCompressedInput );
boost::scoped_array<unsigned char> compressedOutputPtr(compressedOutput); boost::scoped_array<unsigned char> compressedOutputPtr(compressedOutput);
setEmptyBuf( (unsigned char*)toBeCompressedInput, setEmptyBuf( (unsigned char*)toBeCompressedInput,
INPUT_BUFFER_SIZE, emptyVal, width); INPUT_BUFFER_SIZE, emptyVal, width);
// Compress an initialized abbreviated extent int rc = compressor->compressBlock(toBeCompressedInput, INPUT_BUFFER_SIZE,
IDBCompressInterface compressor( userPaddingBytes ); compressedOutput, outputLen);
int rc = compressor.compressBlock(toBeCompressedInput,
INPUT_BUFFER_SIZE, compressedOutput, outputLen );
if (rc != 0) if (rc != 0)
{ {
@ -1333,8 +1340,8 @@ int FileOp::writeInitialCompColumnChunk(
} }
// Round up the compressed chunk size // Round up the compressed chunk size
rc = compressor.padCompressedChunks( compressedOutput, rc = compressor->padCompressedChunks(compressedOutput, outputLen,
outputLen, OUTPUT_BUFFER_SIZE ); OUTPUT_BUFFER_SIZE);
if (rc != 0) if (rc != 0)
{ {
@ -1347,23 +1354,22 @@ int FileOp::writeInitialCompColumnChunk(
// "; blkAllocCnt: " << nBlocksAllocated << // "; blkAllocCnt: " << nBlocksAllocated <<
// "; compressedByteCnt: " << outputLen << std::endl; // "; compressedByteCnt: " << outputLen << std::endl;
compressor.initHdr(hdrs, width, colDataType, m_compressionType); compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
compressor.setBlockCount(hdrs, nBlocksAllocated); compress::CompressInterface::setBlockCount(hdrs, nBlocksAllocated);
compressor.setLBIDByIndex(hdrs, startLBID, 0); compress::CompressInterface::setLBIDByIndex(hdrs, startLBID, 0);
// Store compression pointers in the header // Store compression pointers in the header
std::vector<uint64_t> ptrs; std::vector<uint64_t> ptrs;
ptrs.push_back( IDBCompressInterface::HDR_BUF_LEN * 2 ); ptrs.push_back( CompressInterface::HDR_BUF_LEN * 2 );
ptrs.push_back( outputLen + (IDBCompressInterface::HDR_BUF_LEN * 2) ); ptrs.push_back( outputLen + (CompressInterface::HDR_BUF_LEN * 2) );
compressor.storePtrs(ptrs, hdrs); compress::CompressInterface::storePtrs(ptrs, hdrs);
RETURN_ON_ERROR( writeHeaders(pFile, hdrs) ); RETURN_ON_ERROR( writeHeaders(pFile, hdrs) );
// Write the compressed data // Write the compressed data
if ( pFile->write( compressedOutput, outputLen ) != outputLen ) size_t writtenLen = pFile->write(compressedOutput, outputLen);
{ if (writtenLen != outputLen)
return ERR_FILE_WRITE; return ERR_FILE_WRITE;
}
return NO_ERROR; return NO_ERROR;
} }
@ -1421,7 +1427,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
return ERR_FILE_OPEN; return ERR_FILE_OPEN;
} }
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
rc = readHeaders( pFile, hdrs ); rc = readHeaders( pFile, hdrs );
if (rc != NO_ERROR) if (rc != NO_ERROR)
@ -1432,9 +1438,14 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
} }
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
IDBCompressInterface compressor( userPadBytes );
std::unique_ptr<CompressInterface> compressor(
compress::getCompressInterfaceByType(
compress::CompressInterface::getCompressionType(hdrs),
userPadBytes));
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
int rcComp = compressor.getPtrList( hdrs, chunkPtrs ); int rcComp = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
if (rcComp != 0) if (rcComp != 0)
{ {
@ -1444,7 +1455,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
} }
// Nothing to do if the proposed HWM is < the current block count // Nothing to do if the proposed HWM is < the current block count
uint64_t blkCount = compressor.getBlockCount(hdrs); uint64_t blkCount = compress::CompressInterface::getBlockCount(hdrs);
if (blkCount > (hwm + 1)) if (blkCount > (hwm + 1))
{ {
@ -1455,7 +1466,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
const unsigned int ROWS_PER_EXTENT = const unsigned int ROWS_PER_EXTENT =
BRMWrapper::getInstance()->getInstance()->getExtentRows(); BRMWrapper::getInstance()->getInstance()->getExtentRows();
const unsigned int ROWS_PER_CHUNK = const unsigned int ROWS_PER_CHUNK =
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth; CompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
const unsigned int CHUNKS_PER_EXTENT = ROWS_PER_EXTENT / ROWS_PER_CHUNK; const unsigned int CHUNKS_PER_EXTENT = ROWS_PER_EXTENT / ROWS_PER_CHUNK;
// If this is an abbreviated extent, we first expand to a full extent // If this is an abbreviated extent, we first expand to a full extent
@ -1493,7 +1504,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
CompChunkPtr chunkOutPtr; CompChunkPtr chunkOutPtr;
rc = expandAbbrevColumnChunk( pFile, emptyVal, colWidth, rc = expandAbbrevColumnChunk( pFile, emptyVal, colWidth,
chunkPtrs[0], chunkOutPtr ); chunkPtrs[0], chunkOutPtr, hdrs );
if (rc != NO_ERROR) if (rc != NO_ERROR)
{ {
@ -1515,7 +1526,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Update block count to reflect a full extent // Update block count to reflect a full extent
blkCount = (ROWS_PER_EXTENT * colWidth) / BYTE_PER_BLOCK; blkCount = (ROWS_PER_EXTENT * colWidth) / BYTE_PER_BLOCK;
compressor.setBlockCount( hdrs, blkCount ); compress::CompressInterface::setBlockCount(hdrs, blkCount);
} }
// Calculate the number of empty chunks we need to add to fill this extent // Calculate the number of empty chunks we need to add to fill this extent
@ -1532,7 +1543,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
compressor.getBlockCount(hdrs) << std::endl; compressor.getBlockCount(hdrs) << std::endl;
std::cout << "Pointer Header Size (in bytes): " << std::cout << "Pointer Header Size (in bytes): " <<
(compressor.getHdrSize(hdrs) - (compressor.getHdrSize(hdrs) -
IDBCompressInterface::HDR_BUF_LEN) << std::endl; CompressInterface::HDR_BUF_LEN) << std::endl;
std::cout << "Chunk Pointers (offset,length): " << std::endl; std::cout << "Chunk Pointers (offset,length): " << std::endl;
for (unsigned k = 0; k < chunkPtrs.size(); k++) for (unsigned k = 0; k < chunkPtrs.size(); k++)
@ -1551,8 +1562,10 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Fill in or add necessary remaining empty chunks // Fill in or add necessary remaining empty chunks
if (numChunksToFill > 0) if (numChunksToFill > 0)
{ {
const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes; const int OUT_BUF_LEN =
compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
// Allocate buffer, and store in scoped_array to insure it's deletion. // Allocate buffer, and store in scoped_array to insure it's deletion.
// Create scope {...} to manage deletion of buffers // Create scope {...} to manage deletion of buffers
@ -1566,9 +1579,9 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Compress and then pad the compressed chunk // Compress and then pad the compressed chunk
setEmptyBuf( (unsigned char*)toBeCompressedBuf, setEmptyBuf( (unsigned char*)toBeCompressedBuf,
IN_BUF_LEN, emptyVal, colWidth ); IN_BUF_LEN, emptyVal, colWidth );
unsigned int outputLen = OUT_BUF_LEN; size_t outputLen = OUT_BUF_LEN;
rcComp = compressor.compressBlock( toBeCompressedBuf, rcComp = compressor->compressBlock(toBeCompressedBuf, IN_BUF_LEN,
IN_BUF_LEN, compressedBuf, outputLen ); compressedBuf, outputLen);
if (rcComp != 0) if (rcComp != 0)
{ {
@ -1579,8 +1592,8 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
toBeCompressedInputPtr.reset(); // release memory toBeCompressedInputPtr.reset(); // release memory
rcComp = compressor.padCompressedChunks( compressedBuf, rcComp = compressor->padCompressedChunks(compressedBuf, outputLen,
outputLen, OUT_BUF_LEN ); OUT_BUF_LEN);
if (rcComp != 0) if (rcComp != 0)
{ {
@ -1639,7 +1652,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
ptrs.push_back( chunkPtrs[chunkPtrs.size() - 1].first + ptrs.push_back( chunkPtrs[chunkPtrs.size() - 1].first +
chunkPtrs[chunkPtrs.size() - 1].second ); chunkPtrs[chunkPtrs.size() - 1].second );
compressor.storePtrs( ptrs, hdrs ); compress::CompressInterface::storePtrs(ptrs, hdrs);
rc = writeHeaders( pFile, hdrs ); rc = writeHeaders( pFile, hdrs );
@ -1697,11 +1710,24 @@ int FileOp::expandAbbrevColumnChunk(
const uint8_t* emptyVal, const uint8_t* emptyVal,
int colWidth, int colWidth,
const CompChunkPtr& chunkInPtr, const CompChunkPtr& chunkInPtr,
CompChunkPtr& chunkOutPtr ) CompChunkPtr& chunkOutPtr,
const char *hdrs )
{ {
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; auto realCompressionType = m_compressionType;
const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes; if (hdrs)
{
realCompressionType =
compress::CompressInterface::getCompressionType(hdrs);
}
std::unique_ptr<CompressInterface> compressor(
compress::getCompressInterfaceByType(realCompressionType,
userPadBytes));
const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
const int OUT_BUF_LEN =
compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
char* toBeCompressedBuf = new char[ IN_BUF_LEN ]; char* toBeCompressedBuf = new char[ IN_BUF_LEN ];
boost::scoped_array<char> toBeCompressedPtr(toBeCompressedBuf); boost::scoped_array<char> toBeCompressedPtr(toBeCompressedBuf);
@ -1717,13 +1743,10 @@ int FileOp::expandAbbrevColumnChunk(
chunkInPtr.second) ); chunkInPtr.second) );
// Uncompress an "abbreviated" chunk into our 4MB buffer // Uncompress an "abbreviated" chunk into our 4MB buffer
unsigned int outputLen = IN_BUF_LEN; size_t outputLen = IN_BUF_LEN;
IDBCompressInterface compressor( userPadBytes ); int rc = compressor->uncompressBlock(compressedInBuf, chunkInPtr.second,
int rc = compressor.uncompressBlock( (unsigned char*) toBeCompressedBuf,
compressedInBuf, outputLen);
chunkInPtr.second,
(unsigned char*)toBeCompressedBuf,
outputLen);
if (rc != 0) if (rc != 0)
{ {
@ -1739,11 +1762,8 @@ int FileOp::expandAbbrevColumnChunk(
// Compress the data we just read, as a "full" 4MB chunk // Compress the data we just read, as a "full" 4MB chunk
outputLen = OUT_BUF_LEN; outputLen = OUT_BUF_LEN;
rc = compressor.compressBlock( rc = compressor->compressBlock(reinterpret_cast<char*>(toBeCompressedBuf),
reinterpret_cast<char*>(toBeCompressedBuf), IN_BUF_LEN, compressedOutBuf, outputLen);
IN_BUF_LEN,
compressedOutBuf,
outputLen );
if (rc != 0) if (rc != 0)
{ {
@ -1751,8 +1771,8 @@ int FileOp::expandAbbrevColumnChunk(
} }
// Round up the compressed chunk size // Round up the compressed chunk size
rc = compressor.padCompressedChunks( compressedOutBuf, rc = compressor->padCompressedChunks(compressedOutBuf, outputLen,
outputLen, OUT_BUF_LEN ); OUT_BUF_LEN);
if (rc != 0) if (rc != 0)
{ {
@ -1782,7 +1802,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* hdr) const
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) ); RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
// Write the headers // Write the headers
if (pFile->write( hdr, IDBCompressInterface::HDR_BUF_LEN * 2 ) != IDBCompressInterface::HDR_BUF_LEN * 2) if (pFile->write( hdr, CompressInterface::HDR_BUF_LEN * 2 ) != CompressInterface::HDR_BUF_LEN * 2)
{ {
return ERR_FILE_WRITE; return ERR_FILE_WRITE;
} }
@ -1808,7 +1828,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) ); RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
// Write the control header // Write the control header
if (pFile->write( controlHdr, IDBCompressInterface::HDR_BUF_LEN ) != IDBCompressInterface::HDR_BUF_LEN) if (pFile->write( controlHdr, CompressInterface::HDR_BUF_LEN ) != CompressInterface::HDR_BUF_LEN)
{ {
return ERR_FILE_WRITE; return ERR_FILE_WRITE;
} }
@ -2651,9 +2671,8 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdrs ) const
{ {
RETURN_ON_ERROR( setFileOffset(pFile, 0) ); RETURN_ON_ERROR( setFileOffset(pFile, 0) );
RETURN_ON_ERROR( readFile( pFile, reinterpret_cast<unsigned char*>(hdrs), RETURN_ON_ERROR( readFile( pFile, reinterpret_cast<unsigned char*>(hdrs),
(IDBCompressInterface::HDR_BUF_LEN * 2) ) ); (CompressInterface::HDR_BUF_LEN * 2) ) );
IDBCompressInterface compressor; int rc = compress::CompressInterface::verifyHdr(hdrs);
int rc = compressor.verifyHdr( hdrs );
if (rc != 0) if (rc != 0)
{ {
@ -2671,11 +2690,10 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdr1, char* hdr2 ) const
unsigned char* hdrPtr = reinterpret_cast<unsigned char*>(hdr1); unsigned char* hdrPtr = reinterpret_cast<unsigned char*>(hdr1);
RETURN_ON_ERROR( setFileOffset(pFile, 0) ); RETURN_ON_ERROR( setFileOffset(pFile, 0) );
RETURN_ON_ERROR( readFile( pFile, hdrPtr, RETURN_ON_ERROR( readFile( pFile, hdrPtr,
IDBCompressInterface::HDR_BUF_LEN )); CompressInterface::HDR_BUF_LEN ));
IDBCompressInterface compressor; int ptrSecSize = compress::CompressInterface::getHdrSize(hdrPtr) -
int ptrSecSize = compressor.getHdrSize(hdrPtr) - CompressInterface::HDR_BUF_LEN;
IDBCompressInterface::HDR_BUF_LEN;
return readFile( pFile, reinterpret_cast<unsigned char*>(hdr2), return readFile( pFile, reinterpret_cast<unsigned char*>(hdr2),
ptrSecSize ); ptrSecSize );
} }

View File

@ -529,11 +529,11 @@ private:
FileOp(const FileOp& rhs); FileOp(const FileOp& rhs);
FileOp& operator=(const FileOp& rhs); FileOp& operator=(const FileOp& rhs);
int expandAbbrevColumnChunk( IDBDataFile* pFile, int expandAbbrevColumnChunk(IDBDataFile* pFile, const uint8_t* emptyVal,
const uint8_t* emptyVal, int colWidth,
int colWidth, const compress::CompChunkPtr& chunkInPtr,
const compress::CompChunkPtr& chunkInPtr, compress::CompChunkPtr& chunkOutPt,
compress::CompChunkPtr& chunkOutPt); const char* hdrs = nullptr);
int initAbbrevCompColumnExtent( int initAbbrevCompColumnExtent(
IDBDataFile* pFile, uint16_t dbRoot, int nBlocks, IDBDataFile* pFile, uint16_t dbRoot, int nBlocks,

View File

@ -1007,9 +1007,9 @@ void RBMetaWriter::backupHWMChunk(
} }
// Read Control header // Read Control header
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ]; char controlHdr[ CompressInterface::HDR_BUF_LEN ];
rc = fileOp.readFile( dbFile, (unsigned char*)controlHdr, rc = fileOp.readFile( dbFile, (unsigned char*)controlHdr,
IDBCompressInterface::HDR_BUF_LEN ); CompressInterface::HDR_BUF_LEN );
if (rc != NO_ERROR) if (rc != NO_ERROR)
{ {
@ -1025,8 +1025,7 @@ void RBMetaWriter::backupHWMChunk(
throw WeException( oss.str(), rc ); throw WeException( oss.str(), rc );
} }
IDBCompressInterface compressor; int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
int rc1 = compressor.verifyHdr( controlHdr );
if (rc1 != 0) if (rc1 != 0)
{ {
@ -1045,9 +1044,23 @@ void RBMetaWriter::backupHWMChunk(
throw WeException( oss.str(), rc ); throw WeException( oss.str(), rc );
} }
auto compressionType =
compress::CompressInterface::getCompressionType(controlHdr);
std::unique_ptr<compress::CompressInterface> compressor(
compress::getCompressInterfaceByType(compressionType));
if (!compressor)
{
WErrorCodes ec;
std::ostringstream oss;
oss << "Ivalid compression type " << compressionType;
fileOp.closeFile( dbFile );
throw WeException(oss.str(), rc);
}
// Read Pointer header data // Read Pointer header data
uint64_t hdrSize = compressor.getHdrSize(controlHdr); uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN; uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize]; char* pointerHdr = new char[ptrHdrSize];
rc = fileOp.readFile( dbFile, (unsigned char*)pointerHdr, ptrHdrSize ); rc = fileOp.readFile( dbFile, (unsigned char*)pointerHdr, ptrHdrSize );
@ -1067,7 +1080,8 @@ void RBMetaWriter::backupHWMChunk(
} }
CompChunkPtrList chunkPtrs; CompChunkPtrList chunkPtrs;
rc = compressor.getPtrList(pointerHdr, ptrHdrSize, chunkPtrs ); rc = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
chunkPtrs);
delete[] pointerHdr; delete[] pointerHdr;
if (rc != 0) if (rc != 0)
@ -1087,7 +1101,7 @@ void RBMetaWriter::backupHWMChunk(
unsigned int blockOffsetWithinChunk = 0; unsigned int blockOffsetWithinChunk = 0;
unsigned char* buffer = 0; unsigned char* buffer = 0;
uint64_t chunkSize = 0; uint64_t chunkSize = 0;
compressor.locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk); compressor->locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
if (chunkIndex < chunkPtrs.size()) if (chunkIndex < chunkPtrs.size())
{ {

View File

@ -121,9 +121,9 @@ int ColumnOpCompress0::saveBlock(IDBDataFile* pFile, const unsigned char* writeB
* Constructor * Constructor
*/ */
ColumnOpCompress1::ColumnOpCompress1(Log* logger) ColumnOpCompress1::ColumnOpCompress1(uint32_t compressionType, Log* logger)
{ {
m_compressionType = 1; m_compressionType = compressionType;
m_chunkManager = new ChunkManager(); m_chunkManager = new ChunkManager();
if (logger) if (logger)
@ -164,11 +164,7 @@ bool ColumnOpCompress1::abbreviatedExtent(IDBDataFile* pFile, int colWidth) cons
int ColumnOpCompress1::blocksInFile(IDBDataFile* pFile) const int ColumnOpCompress1::blocksInFile(IDBDataFile* pFile) const
{ {
CompFileHeader compFileHeader; return m_chunkManager->getBlockCount(pFile);
readHeaders(pFile, compFileHeader.fControlData, compFileHeader.fPtrSection);
compress::IDBCompressInterface compressor;
return compressor.getBlockCount(compFileHeader.fControlData);
} }

View File

@ -97,7 +97,7 @@ public:
/** /**
* @brief Constructor * @brief Constructor
*/ */
EXPORT ColumnOpCompress1(Log* logger = 0); EXPORT ColumnOpCompress1(uint32_t compressionType, Log* logger = 0);
/** /**
* @brief Default Destructor * @brief Default Destructor

View File

@ -67,9 +67,9 @@ DctnryCompress0::~DctnryCompress0()
/** /**
* Constructor * Constructor
*/ */
DctnryCompress1::DctnryCompress1(Log* logger) DctnryCompress1::DctnryCompress1(uint32_t compressionType, Log* logger)
{ {
m_compressionType = 1; m_compressionType = compressionType;
m_chunkManager = new ChunkManager(); m_chunkManager = new ChunkManager();
if (logger) if (logger)

View File

@ -62,7 +62,7 @@ public:
/** /**
* @brief Constructor * @brief Constructor
*/ */
EXPORT DctnryCompress1(Log* logger = 0); EXPORT DctnryCompress1(uint32_t compressionType, Log* logger = 0);
/** /**
* @brief Default Destructor * @brief Default Destructor

View File

@ -76,19 +76,25 @@ StopWatch timer;
WriteEngineWrapper::WriteEngineWrapper() : m_opType(NOOP) WriteEngineWrapper::WriteEngineWrapper() : m_opType(NOOP)
{ {
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0; m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0; m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*comressionType=*/1);
m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*comressionType=*/3);
m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
} }
WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opType(rhs.m_opType) WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opType(rhs.m_opType)
{ {
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0; m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0; m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*compressionType=*/1);
m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*compressionType=*/3);
m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
} }
/**@brief WriteEngineWrapper Constructor /**@brief WriteEngineWrapper Constructor
@ -96,9 +102,13 @@ WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opTyp
WriteEngineWrapper::~WriteEngineWrapper() WriteEngineWrapper::~WriteEngineWrapper()
{ {
delete m_colOp[UN_COMPRESSED_OP]; delete m_colOp[UN_COMPRESSED_OP];
delete m_colOp[COMPRESSED_OP];
delete m_dctnry[UN_COMPRESSED_OP]; delete m_dctnry[UN_COMPRESSED_OP];
delete m_dctnry[COMPRESSED_OP];
delete m_colOp[COMPRESSED_OP_1];
delete m_dctnry[COMPRESSED_OP_1];
delete m_colOp[COMPRESSED_OP_2];
delete m_dctnry[COMPRESSED_OP_2];
} }
/**@brief Perform upfront initialization /**@brief Perform upfront initialization

View File

@ -58,9 +58,10 @@ namespace WriteEngine
{ {
//... Total compression operation: un_compresssed, compressed //... Total compression operation: un_compresssed, compressed
const int UN_COMPRESSED_OP = 0; const int UN_COMPRESSED_OP = 0;
const int COMPRESSED_OP = 1; const int COMPRESSED_OP_1 = 1;
const int TOTAL_COMPRESS_OP = 2; const int COMPRESSED_OP_2 = 2;
const int TOTAL_COMPRESS_OP = 3;
//...Forward class declarations //...Forward class declarations
class Log; class Log;
@ -446,8 +447,10 @@ public:
*/ */
void setIsInsert(bool bIsInsert) void setIsInsert(bool bIsInsert)
{ {
m_colOp[COMPRESSED_OP]->chunkManager()->setIsInsert(bIsInsert); m_colOp[COMPRESSED_OP_1]->chunkManager()->setIsInsert(bIsInsert);
m_dctnry[COMPRESSED_OP]->chunkManager()->setIsInsert(true); m_dctnry[COMPRESSED_OP_1]->chunkManager()->setIsInsert(true);
m_colOp[COMPRESSED_OP_2]->chunkManager()->setIsInsert(bIsInsert);
m_dctnry[COMPRESSED_OP_2]->chunkManager()->setIsInsert(true);
} }
/** /**
@ -458,7 +461,7 @@ public:
*/ */
bool getIsInsert() bool getIsInsert()
{ {
return m_colOp[COMPRESSED_OP]->chunkManager()->getIsInsert(); return m_colOp[COMPRESSED_OP_1]->chunkManager()->getIsInsert();
} }
std::tr1::unordered_map<TxnID, SP_TxnLBIDRec_t>& getTxnMap() std::tr1::unordered_map<TxnID, SP_TxnLBIDRec_t>& getTxnMap()
@ -475,10 +478,23 @@ public:
*/ */
int flushChunks(int rc, const std::map<FID, FID>& columOids) int flushChunks(int rc, const std::map<FID, FID>& columOids)
{ {
int rtn1 = m_colOp[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids); std::vector<int32_t> compressedOpIds = {COMPRESSED_OP_1,
int rtn2 = m_dctnry[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids); COMPRESSED_OP_2};
return (rtn1 != NO_ERROR ? rtn1 : rtn2); for (const auto compressedOpId : compressedOpIds)
{
auto rtn = m_colOp[compressedOpId]->chunkManager()->flushChunks(
rc, columOids);
if (rtn != NO_ERROR)
return rtn;
rtn = m_dctnry[compressedOpId]->chunkManager()->flushChunks(
rc, columOids);
if (rtn != NO_ERROR)
return rtn;
}
return NO_ERROR;
} }
/** /**
@ -524,7 +540,7 @@ public:
int startTransaction(const TxnID& txnid) int startTransaction(const TxnID& txnid)
{ {
int rc = 0; int rc = 0;
rc = m_colOp[COMPRESSED_OP]->chunkManager()->startTransaction(txnid); rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->startTransaction(txnid);
//if ( rc == 0) //if ( rc == 0)
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->startTransaction(txnid); // rc = m_dctnry[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
return rc; return rc;
@ -537,7 +553,8 @@ public:
int confirmTransaction (const TxnID& txnid) int confirmTransaction (const TxnID& txnid)
{ {
int rc = 0; int rc = 0;
rc = m_colOp[COMPRESSED_OP]->chunkManager()->confirmTransaction (txnid); rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->confirmTransaction(
txnid);
return rc; return rc;
} }
@ -549,7 +566,8 @@ public:
int endTransaction(const TxnID& txnid, bool success) int endTransaction(const TxnID& txnid, bool success)
{ {
int rc = 0; int rc = 0;
rc = m_colOp[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success); rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->endTransaction(txnid,
success);
//if ( rc == 0) //if ( rc == 0)
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success); // rc = m_dctnry[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
return rc; return rc;
@ -785,7 +803,16 @@ private:
int op(int compressionType) int op(int compressionType)
{ {
return (compressionType > 0 ? COMPRESSED_OP : UN_COMPRESSED_OP); switch (compressionType)
{
case 1:
case 2:
return COMPRESSED_OP_1;
case 3:
return COMPRESSED_OP_2;
}
return 0;
} }