diff --git a/.drone.jsonnet b/.drone.jsonnet
index 91277de6e..0b9a02485 100644
--- a/.drone.jsonnet
+++ b/.drone.jsonnet
@@ -36,9 +36,9 @@ local deb_build_deps = 'apt update && apt install --yes --no-install-recommends
local platformMap(platform) =
local platform_map = {
- 'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
- 'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
- 'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
+ 'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran liblz4-devel && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
+ 'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' lz4-devel && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
+ 'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' lz4-devel cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
'debian:9': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=stretch' debian/autobake-deb.sh",
'debian:10': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=buster' debian/autobake-deb.sh",
'ubuntu:18.04': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=bionic' debian/autobake-deb.sh",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4bbdfe3fb..efe54d301 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -163,6 +163,12 @@ if(NOT AWK_EXECUTABLE)
return()
endif()
+FIND_PACKAGE(LZ4)
+if (NOT LZ4_FOUND)
+ MESSAGE_ONCE(CS_NO_LZ4 "lz4 not found")
+ return()
+endif()
+
IF (NOT INSTALL_LAYOUT)
INCLUDE(check_compiler_flag)
diff --git a/cmake/FindLZ4.cmake b/cmake/FindLZ4.cmake
new file mode 100644
index 000000000..7e2ca66e9
--- /dev/null
+++ b/cmake/FindLZ4.cmake
@@ -0,0 +1,25 @@
+find_path(LZ4_ROOT_DIR
+ NAMES include/lz4.h
+)
+
+find_library(LZ4_LIBRARIES
+ NAMES lz4
+ HINTS ${LZ4_ROOT_DIR}/lib
+)
+
+find_path(LZ4_INCLUDE_DIR
+ NAMES lz4.h
+ HINTS ${LZ4_ROOT_DIR}/include
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(lz4 DEFAULT_MSG
+ LZ4_LIBRARIES
+ LZ4_INCLUDE_DIR
+)
+
+mark_as_advanced(
+ LZ4_ROOT_DIR
+ LZ4_LIBRARIES
+ LZ4_INCLUDE_DIR
+)
diff --git a/dbcon/joblist/pcolstep.cpp b/dbcon/joblist/pcolstep.cpp
index 6b625d1e8..e2619d22e 100644
--- a/dbcon/joblist/pcolstep.cpp
+++ b/dbcon/joblist/pcolstep.cpp
@@ -145,9 +145,7 @@ pColStep::pColStep(
if (fOid < 1000)
throw runtime_error("pColStep: invalid column");
- compress::IDBCompressInterface cmpif;
-
- if (!cmpif.isCompressionAvail(fColType.compressionType))
+ if (!compress::CompressInterface::isCompressionAvail(fColType.compressionType))
{
ostringstream oss;
oss << "Unsupported compression type " << fColType.compressionType;
diff --git a/dbcon/mysql/columnstore_info.sql b/dbcon/mysql/columnstore_info.sql
index 476819aad..8a79f98ff 100644
--- a/dbcon/mysql/columnstore_info.sql
+++ b/dbcon/mysql/columnstore_info.sql
@@ -95,7 +95,11 @@ DROP PROCEDURE IF EXISTS `compression_ratio` //
CREATE PROCEDURE compression_ratio() SQL SECURITY INVOKER
BEGIN
-SELECT CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files WHERE compressed_data_size IS NOT NULL), ':1') COMPRESSION_RATIO;
+
+SELECT 'Snappy' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='Snappy' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') compression_ratio
+UNION ALL
+SELECT 'LZ4' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='LZ4') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='LZ4' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') as compression_ratio;
+
END //
create or replace procedure columnstore_upgrade() SQL SECURITY INVOKER
diff --git a/dbcon/mysql/ha_mcs_ddl.cpp b/dbcon/mysql/ha_mcs_ddl.cpp
index 339b43750..c6d56757b 100644
--- a/dbcon/mysql/ha_mcs_ddl.cpp
+++ b/dbcon/mysql/ha_mcs_ddl.cpp
@@ -777,7 +777,6 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
parser.setDefaultSchema(schema);
parser.setDefaultCharset(default_table_charset);
int rc = 0;
- IDBCompressInterface idbCompress;
parser.Parse(ddlStatement.c_str());
if (get_fe_conn_info_ptr() == NULL)
@@ -981,7 +980,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
if (compressionType == 1) compressionType = 2;
- if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
+ if ((compressionType > 0) &&
+ !(compress::CompressInterface::isCompressionAvail(
+ compressionType)))
{
rc = 1;
ci->alterTableState = cal_connection_info::NOT_ALTER;
@@ -1368,7 +1369,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc;
}
- if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
+ if ((compressionType > 0) &&
+ !(compress::CompressInterface::isCompressionAvail(
+ compressionType)))
{
rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@@ -1713,7 +1716,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc;
}
- if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
+ if ((compressionType > 0) &&
+ !(compress::CompressInterface::isCompressionAvail(
+ compressionType)))
{
rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@@ -1842,7 +1847,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
return rc;
}
- if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
+ if ((compressionType > 0) &&
+ !(compress::CompressInterface::isCompressionAvail(
+ compressionType)))
{
rc = 1;
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
@@ -2364,9 +2371,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea
if (compressiontype == 1) compressiontype = 2;
- IDBCompressInterface idbCompress;
-
- if ( ( compressiontype > 0 ) && !(idbCompress.isCompressionAvail( compressiontype )) )
+ if ((compressiontype > 0) &&
+ !(compress::CompressInterface::isCompressionAvail(compressiontype)))
{
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE);
setError(thd, ER_INTERNAL_ERROR, emsg);
diff --git a/dbcon/mysql/ha_mcs_sysvars.cpp b/dbcon/mysql/ha_mcs_sysvars.cpp
index b60dd3427..234fe69ae 100644
--- a/dbcon/mysql/ha_mcs_sysvars.cpp
+++ b/dbcon/mysql/ha_mcs_sysvars.cpp
@@ -21,8 +21,10 @@
#include "ha_mcs_sysvars.h"
const char* mcs_compression_type_names[] = {
- "SNAPPY",
- "SNAPPY",
+ "SNAPPY", // 0
+ "SNAPPY", // 1
+ "SNAPPY", // 2
+ "LZ4", // 3
NullS
};
@@ -39,7 +41,8 @@ static MYSQL_THDVAR_ENUM(
PLUGIN_VAR_RQCMDARG,
"Controls compression algorithm for create tables. Possible values are: "
"NO_COMPRESSION segment files aren't compressed; "
- "SNAPPY segment files are Snappy compressed (default);",
+ "SNAPPY segment files are Snappy compressed (default);"
+ "LZ4 segment files are LZ4 compressed;",
NULL, // check
NULL, // update
1, //default
diff --git a/dbcon/mysql/ha_mcs_sysvars.h b/dbcon/mysql/ha_mcs_sysvars.h
index faeed3880..a1c9afe9f 100644
--- a/dbcon/mysql/ha_mcs_sysvars.h
+++ b/dbcon/mysql/ha_mcs_sysvars.h
@@ -30,7 +30,8 @@ extern char cs_commit_hash[];
// compression_type
enum mcs_compression_type_t {
NO_COMPRESSION = 0,
- SNAPPY = 2
+ SNAPPY = 2,
+ LZ4 = 3
};
// use_import_for_batchinsert mode
diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp
index 437360489..43ff15d08 100644
--- a/dbcon/mysql/is_columnstore_columns.cpp
+++ b/dbcon/mysql/is_columnstore_columns.cpp
@@ -183,6 +183,10 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond)
compression_type = "Snappy";
break;
+ case 3:
+ compression_type = "LZ4";
+ break;
+
default:
compression_type = "Unknown";
break;
diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml
index df95e40ad..378c5b24b 100644
--- a/oam/etc/Columnstore.xml
+++ b/oam/etc/Columnstore.xml
@@ -492,6 +492,7 @@
100
N
Y
+ Snappy
16K
@@ -539,6 +540,7 @@
Y
+ Snappy
127.0.0.1
diff --git a/primitives/blockcache/iomanager.cpp b/primitives/blockcache/iomanager.cpp
index 70b40f63f..401caabd0 100644
--- a/primitives/blockcache/iomanager.cpp
+++ b/primitives/blockcache/iomanager.cpp
@@ -308,7 +308,7 @@ void waitForRetry(long count)
//Must hold the FD cache lock!
-int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterface& decompressor)
+static int updateptrs(char* ptr, FdCacheType_t::iterator fdit)
{
ssize_t i;
uint32_t progress;
@@ -357,7 +357,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
fdit->second->cmpMTime = mtime;
int gplRc = 0;
- gplRc = decompressor.getPtrList(&ptr[4096], 4096, fdit->second->ptrList);
+ gplRc = compress::CompressInterface::getPtrList(&ptr[4096], 4096,
+ fdit->second->ptrList);
if (gplRc != 0)
return -5; // go for a retry.
@@ -391,7 +392,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
return -8;
CompChunkPtrList nextPtrList;
- gplRc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
+ gplRc = compress::CompressInterface::getPtrList(
+ &nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
if (gplRc != 0)
return -7; // go for a retry.
@@ -445,7 +447,6 @@ void* thr_popper(ioManager* arg)
double rqst3;
bool locked = false;
SPFdEntry_t fe;
- IDBCompressInterface decompressor;
vector cacheInsertOps;
bool copyLocked = false;
@@ -463,8 +464,10 @@ void* thr_popper(ioManager* arg)
FdCacheType_t::iterator fdit;
IDBDataFile* fp = 0;
- uint32_t maxCompSz = IDBCompressInterface::maxCompressedSize(iom->blocksPerRead * BLOCK_SIZE);
- uint32_t readBufferSz = maxCompSz + pageSize;
+ size_t maxCompSz =
+ compress::CompressInterface::getMaxCompressedSizeGeneric(
+ iom->blocksPerRead * BLOCK_SIZE);
+ size_t readBufferSz = maxCompSz + pageSize;
realbuff.reset(new char[readBufferSz]);
@@ -863,7 +866,7 @@ retryReadHeaders:
cur_mtime = fp_mtime;
if (decompRetryCount > 0 || retryReadHeadersCount > 0 || cur_mtime > fdit->second->cmpMTime)
- updatePtrsRc = updateptrs(&alignedbuff[0], fdit, decompressor);
+ updatePtrsRc = updateptrs(&alignedbuff[0], fdit);
fdMapMutex.unlock();
@@ -1052,7 +1055,7 @@ retryReadHeaders:
#ifdef _MSC_VER
unsigned int blen = 4 * 1024 * 1024 + 4;
#else
- uint32_t blen = 4 * 1024 * 1024 + 4;
+ size_t blen = 4 * 1024 * 1024 + 4;
#endif
#ifdef IDB_COMP_POC_DEBUG
{
@@ -1060,7 +1063,18 @@ retryReadHeaders:
cout << "decompress(0x" << hex << (ptrdiff_t)&alignedbuff[0] << dec << ", " << fdit->second->ptrList[cmpOffFact.quot].second << ", 0x" << hex << (ptrdiff_t)uCmpBuf << dec << ", " << blen << ")" << endl;
}
#endif
- int dcrc = decompressor.uncompressBlock(&alignedbuff[0],
+
+ std::unique_ptr decompressor(
+ compress::getCompressInterfaceByType(
+ static_cast(fdit->second->compType)));
+ if (!decompressor)
+ {
+ // Use default?
+ decompressor.reset(
+ new compress::CompressInterfaceSnappy());
+ }
+
+ int dcrc = decompressor->uncompressBlock(&alignedbuff[0],
fdit->second->ptrList[cmpOffFact.quot].second, uCmpBuf, blen);
if (dcrc != 0)
diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp
index 719893abb..e6e9e65c0 100644
--- a/primitives/primproc/primitiveserver.cpp
+++ b/primitives/primproc/primitiveserver.cpp
@@ -696,13 +696,25 @@ blockReadRetry:
i = fp->pread( &cmpHdrBuf[0], 0, 4096 * 3);
CompChunkPtrList ptrList;
- IDBCompressInterface decompressor;
+ std::unique_ptr decompressor(
+ compress::getCompressInterfaceByType(
+ compress::CompressInterface::getCompressionType(
+ &cmpHdrBuf[0])));
+
+ if (!decompressor)
+ {
+ // Use default?
+ decompressor.reset(
+ new compress::CompressInterfaceSnappy());
+ }
+
int dcrc = 0;
if (i == 4096 * 3)
{
uint64_t numHdrs = 0; // extra headers
- dcrc = decompressor.getPtrList(&cmpHdrBuf[4096], 4096, ptrList);
+ dcrc = compress::CompressInterface::getPtrList(
+ &cmpHdrBuf[4096], 4096, ptrList);
if (dcrc == 0 && ptrList.size() > 0)
numHdrs = ptrList[0].first / 4096ULL - 2ULL;
@@ -723,7 +735,8 @@ blockReadRetry:
i = fp->pread( &nextHdrBufPtr[0], 4096 * 2, numHdrs * 4096 );
CompChunkPtrList nextPtrList;
- dcrc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
+ dcrc = compress::CompressInterface::getPtrList(
+ &nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
if (dcrc == 0)
ptrList.insert(ptrList.end(), nextPtrList.begin(), nextPtrList.end());
@@ -777,11 +790,11 @@ blockReadRetry:
cmpBuf = (char*) alignedBuffer;
}
- unsigned blen = 4 * 1024 * 1024;
+ size_t blen = 4 * 1024 * 1024;
i = fp->pread( cmpBuf, cmpBufOff, cmpBufSz );
- dcrc = decompressor.uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
+ dcrc = decompressor->uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
if (dcrc == 0)
{
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index b637e9b03..746d77124 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -42,3 +42,9 @@ if (WITH_REBUILD_EM_UT)
target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
endif()
+
+if (WITH_COMPRESSION_UT)
+ add_executable(compression_tests compression-tests.cpp)
+ target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
+ install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
+endif()
diff --git a/tests/compression-tests.cpp b/tests/compression-tests.cpp
new file mode 100644
index 000000000..b0d0868b4
--- /dev/null
+++ b/tests/compression-tests.cpp
@@ -0,0 +1,126 @@
+/* Copyright (C) 2021 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; version 2 of
+ the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#include
+#include
+#include
+
+#include "idbcompress.h"
+
+class CompressionTest : public ::testing::Test
+{
+
+ protected:
+ std::string genPermutations(string& data)
+ {
+ std::string generated;
+ generate(data, 0, generated);
+ return generated;
+ }
+
+ private:
+ void generate(string& data, uint32_t i, std::string& generated)
+ {
+ if (i == data.size())
+ {
+ generated.append(data);
+ return;
+ }
+
+ for (uint32_t k = i, e = data.size(); k < e; ++k)
+ {
+ std::swap(data[i], data[k]);
+ generate(data, i + 1, generated);
+ std::swap(data[i], data[k]);
+ }
+ }
+};
+
+TEST_F(CompressionTest, LZ4CanCompress)
+{
+ std::string originalData =
+ "This program is free software; you can redistribute it and/or"
+ "modify it under the terms of the GNU General Public License"
+ "as published by the Free Software Foundation; version 2 of"
+ "the License.";
+
+ std::unique_ptr compressor(
+ new compress::CompressInterfaceLZ4());
+
+ size_t originalSize = originalData.size();
+ size_t compressedSize = compressor->maxCompressedSize(originalSize);
+ std::unique_ptr compressedData(new char[compressedSize]);
+ std::memset(compressedData.get(), 0, compressedSize);
+
+ auto rc = compressor->compress(originalData.data(), originalSize,
+ compressedData.get(), &compressedSize);
+ ASSERT_EQ(rc, 0);
+
+ std::unique_ptr uncompressedData(new char[originalSize]);
+ rc = compressor->uncompress(compressedData.get(), compressedSize,
+ uncompressedData.get(), &originalSize);
+ ASSERT_EQ(rc, 0);
+ std::string result(uncompressedData.get());
+ EXPECT_EQ(originalData, result);
+}
+
+TEST_F(CompressionTest, LZvsSnappyUnique)
+{
+ std::unique_ptr lz4Compressor(
+ new compress::CompressInterfaceLZ4());
+ std::unique_ptr snappyCompressor(
+ new compress::CompressInterfaceSnappy());
+ // Generate permutations.
+ // 9! * 9 == 3265920 (closer to current chunk size)
+ std::vector dataPool{"abcdefghi", "aaadefghi", "aaaaafghi",
+ "aaaaaaahi", "aaaaaaaaj"};
+
+ for (auto& data : dataPool)
+ {
+ std::cout << "Permutations generated for: " << data << std::endl;
+ auto generated = genPermutations(data);
+ auto generatedSize = generated.size();
+
+ auto compressedSizeLZ4 =
+ lz4Compressor->maxCompressedSize(generatedSize);
+ auto compressedSizeSnappy =
+ snappyCompressor->maxCompressedSize(generatedSize);
+
+ std::unique_ptr lz4CompressedData(new char[compressedSizeLZ4]);
+ auto rc = lz4Compressor->compress(generated.data(), generatedSize,
+ lz4CompressedData.get(),
+ &compressedSizeLZ4);
+ ASSERT_EQ(rc, 0);
+
+ std::unique_ptr snappyCompressedData(
+ new char[compressedSizeSnappy]);
+ rc = snappyCompressor->compress(generated.data(), generatedSize,
+ snappyCompressedData.get(),
+ &compressedSizeSnappy);
+ ASSERT_EQ(rc, 0);
+
+ std::cout << "LZ ratio: "
+ << (float) ((float) generatedSize /
+ (float) compressedSizeLZ4)
+ << std::endl;
+
+ std::cout << "Snappy ratio: "
+ << (float) ((float) generatedSize /
+ (float) compressedSizeSnappy)
+ << std::endl;
+ }
+}
diff --git a/tests/shared_components_tests.cpp b/tests/shared_components_tests.cpp
index d747ee569..7f302599b 100644
--- a/tests/shared_components_tests.cpp
+++ b/tests/shared_components_tests.cpp
@@ -383,7 +383,7 @@ public:
BlockOp blockOp;
char fileName[20];
int rc;
- char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
printf("\nRunning testCreateDeleteFile \n");
idbdatafile::IDBPolicy::init(true, false, "", 0);
@@ -966,7 +966,7 @@ public:
BlockOp blockOp;
char fileName[20];
int rc;
- char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1;
printf("\nRunning testExtensionWOPrealloc \n");
@@ -1085,7 +1085,7 @@ public:
int dbRoot = 1;
int colWidth = 65535;
- DctnryCompress1 m_Dctnry;
+ DctnryCompress1 m_Dctnry(/*compressionType=*/1);
// This is the magic for the stub in FileOp::oid2FileName
int oId = 42;
@@ -1565,7 +1565,7 @@ public:
BlockOp blockOp;
char fileName[20];
int rc;
- char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1;
idbdatafile::IDBPolicy::init(true, false, "", 0);
diff --git a/tools/rebuildEM/rebuildEM.cpp b/tools/rebuildEM/rebuildEM.cpp
index 2895d40b4..cb0d20b40 100644
--- a/tools/rebuildEM/rebuildEM.cpp
+++ b/tools/rebuildEM/rebuildEM.cpp
@@ -89,7 +89,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
}
// Read and verify header.
- char fileHeader[compress::IDBCompressInterface::HDR_BUF_LEN * 2];
+ char fileHeader[compress::CompressInterface::HDR_BUF_LEN * 2];
rc = fileOp.readHeaders(dbFile.get(), fileHeader);
if (rc != 0)
{
@@ -116,8 +116,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
}
// Read the `colDataType` and `colWidth` from the given header.
- compress::IDBCompressInterface compressor;
- const auto versionNumber = compressor.getVersionNumber(fileHeader);
+ const auto versionNumber =
+ compress::CompressInterface::getVersionNumber(fileHeader);
// Verify header number.
if (versionNumber < 3)
{
@@ -129,10 +129,11 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
return -1;
}
- auto colDataType = compressor.getColDataType(fileHeader);
- auto colWidth = compressor.getColumnWidth(fileHeader);
- auto blockCount = compressor.getBlockCount(fileHeader);
- auto lbidCount = compressor.getLBIDCount(fileHeader);
+ auto colDataType = compress::CompressInterface::getColDataType(fileHeader);
+ auto colWidth = compress::CompressInterface::getColumnWidth(fileHeader);
+ auto blockCount = compress::CompressInterface::getBlockCount(fileHeader);
+ auto lbidCount = compress::CompressInterface::getLBIDCount(fileHeader);
+ auto compressionType = compress::CompressInterface::getCompressionType(fileHeader);
if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED)
{
@@ -155,7 +156,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
uint64_t hwm = 0;
rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth,
- blockCount, isDict, hwm);
+ blockCount, isDict, compressionType, hwm);
if (rc != 0)
{
return rc;
@@ -172,13 +173,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
{
for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex)
{
- auto lbid = compressor.getLBIDByIndex(fileHeader, lbidIndex);
+ auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
extentMap.push_back(fileId);
}
// Last one has an actual HWM.
- auto lbid = compressor.getLBIDByIndex(fileHeader, lbidCount - 1);
+ auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
@@ -192,7 +193,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
else
{
// One extent per segment file.
- auto lbid = compressor.getLBIDByIndex(fileHeader, 0);
+ auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
@@ -293,7 +294,7 @@ int32_t EMReBuilder::rebuildExtentMap()
int32_t EMReBuilder::searchHWMInSegmentFile(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
- uint64_t blockCount, bool isDict, uint64_t& hwm)
+ uint64_t blockCount, bool isDict, uint32_t compressionType, uint64_t& hwm)
{
std::unique_ptr chunkManagerWrapper;
try
@@ -302,13 +303,15 @@ int32_t EMReBuilder::searchHWMInSegmentFile(
{
chunkManagerWrapper = std::unique_ptr(
new ChunkManagerWrapperDict(oid, dbRoot, partition, segment,
- colDataType, colWidth));
+ colDataType, colWidth,
+ compressionType));
}
else
{
chunkManagerWrapper = std::unique_ptr(
new ChunkManagerWrapperColumn(oid, dbRoot, partition, segment,
- colDataType, colWidth));
+ colDataType, colWidth,
+ compressionType));
}
}
catch (...)
@@ -401,12 +404,13 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber)
ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
- execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth)
+ execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
+ uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
colWidth)
{
pFileOp = std::unique_ptr(
- new WriteEngine::ColumnOpCompress1());
+ new WriteEngine::ColumnOpCompress1(compressionType));
chunkManager.fileOp(pFileOp.get());
// Open compressed column segment file. We will read block by block
// from the compressed chunks.
@@ -463,12 +467,13 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const
ChunkManagerWrapperDict::ChunkManagerWrapperDict(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
- execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth)
+ execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
+ uint32_t compressionType)
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
colWidth)
{
pFileOp = std::unique_ptr(
- new WriteEngine::DctnryCompress1());
+ new WriteEngine::DctnryCompress1(compressionType));
chunkManager.fileOp(pFileOp.get());
// Open compressed dict segment file.
pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment,
diff --git a/tools/rebuildEM/rebuildEM.h b/tools/rebuildEM/rebuildEM.h
index 03db4896d..481a2e102 100644
--- a/tools/rebuildEM/rebuildEM.h
+++ b/tools/rebuildEM/rebuildEM.h
@@ -112,7 +112,8 @@ class EMReBuilder
int32_t searchHWMInSegmentFile(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
- uint32_t width, uint64_t blocksCount, bool isDict, uint64_t& hwm);
+ uint32_t width, uint64_t blocksCount, bool isDict,
+ uint32_t compressionType, uint64_t& hwm);
// Sets the dbroot to the given `number`.
void setDBRoot(uint32_t number) { dbRoot = number; }
@@ -184,7 +185,7 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper
ChunkManagerWrapperColumn(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
- uint32_t colWidth);
+ uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperColumn() = default;
ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete;
@@ -210,7 +211,7 @@ class ChunkManagerWrapperDict : public ChunkManagerWrapper
ChunkManagerWrapperDict(
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
execplan::CalpontSystemCatalog::ColDataType colDataType,
- uint32_t colWidth);
+ uint32_t colWidth, uint32_t compressionType);
~ChunkManagerWrapperDict() = default;
ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete;
diff --git a/utils/compress/CMakeLists.txt b/utils/compress/CMakeLists.txt
index 4156531e4..b3dc2b068 100644
--- a/utils/compress/CMakeLists.txt
+++ b/utils/compress/CMakeLists.txt
@@ -10,7 +10,7 @@ add_definitions(-DNDEBUG)
add_library(compress SHARED ${compress_LIB_SRCS})
-target_link_libraries(compress ${SNAPPY_LIBRARIES})
+target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES})
install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)
diff --git a/utils/compress/idbcompress.cpp b/utils/compress/idbcompress.cpp
index a434f19f5..79f812fbd 100644
--- a/utils/compress/idbcompress.cpp
+++ b/utils/compress/idbcompress.cpp
@@ -22,12 +22,14 @@
#include
#include
#include
+#include
using namespace std;
#include "blocksize.h"
#include "logger.h"
#include "snappy.h"
#include "hasher.h"
+#include "lz4.h"
#define IDBCOMP_DLLEXPORT
#include "idbcompress.h"
@@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL;
const uint64_t VERSION_NUM1 = 1;
const uint64_t VERSION_NUM2 = 2;
const uint64_t VERSION_NUM3 = 3;
-const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
-const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN;
+const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN;
// version 1.1 of the chunk data has a short header
// QuickLZ compressed data never has the high bit set on the first byte
@@ -83,7 +84,7 @@ struct CompressedDBFileHeader
union CompressedDBFileHeaderBlock
{
CompressedDBFileHeader fHeader;
- char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN];
+ char fDummy[compress::CompressInterface::HDR_BUF_LEN];
};
void initCompressedDBFileHeader(
@@ -110,53 +111,57 @@ namespace compress
{
#ifndef SKIP_IDB_COMPRESSION
-IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) :
+CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) :
fNumUserPaddingBytes(numUserPaddingBytes)
{ }
-IDBCompressInterface::~IDBCompressInterface()
-{ }
-
/* V1 is really only available for decompression, we kill any DDL using V1 by hand.
* Maybe should have a new api, isDecompressionAvail() ? Any request to compress
* using V1 will silently be changed to V2.
*/
-bool IDBCompressInterface::isCompressionAvail(int compressionType) const
+/*static*/
+bool CompressInterface::isCompressionAvail(int compressionType)
{
- if ( (compressionType == 0) ||
- (compressionType == 1) ||
- (compressionType == 2) )
- return true;
+ return ((compressionType == 0) || (compressionType == 1) ||
+ (compressionType == 2) || (compressionType == 3));
+}
- return false;
+size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen)
+{
+ return std::max(snappy::MaxCompressedLength(inLen),
+ LZ4_COMPRESSBOUND(inLen)) +
+ HEADER_SIZE;
}
//------------------------------------------------------------------------------
// Compress a block of data
//------------------------------------------------------------------------------
-int IDBCompressInterface::compressBlock(const char* in,
- const size_t inLen,
- unsigned char* out,
- unsigned int& outLen) const
+int CompressInterface::compressBlock(const char* in, const size_t inLen,
+ unsigned char* out, size_t& outLen) const
{
size_t snaplen = 0;
utils::Hasher128 hasher;
// loose input checking.
- if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE)
+ if (outLen < maxCompressedSize(inLen))
{
- cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " <<
- (snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl;
+ cerr << "got outLen = " << outLen << " for inLen = " << inLen
+ << ", needed " << (maxCompressedSize(inLen)) << endl;
return ERR_BADOUTSIZE;
}
- //apparently this never fails?
- snappy::RawCompress(in, inLen, reinterpret_cast(&out[HEADER_SIZE]), &snaplen);
+ auto rc = compress(in, inLen, reinterpret_cast(&out[HEADER_SIZE]),
+ &outLen);
+ if (rc != ERR_OK)
+ {
+ return rc;
+ }
+ snaplen = outLen;
uint8_t* signature = (uint8_t*) &out[SIG_OFFSET];
uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET];
uint32_t* len = (uint32_t*) &out[LEN_OFFSET];
- *signature = CHUNK_MAGIC3;
+ *signature = getChunkMagicNumber();
*checksum = hasher((char*) &out[HEADER_SIZE], snaplen);
*len = snaplen;
@@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in,
//------------------------------------------------------------------------------
// Decompress a block of data
//------------------------------------------------------------------------------
-int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
- unsigned int& outLen) const
+int CompressInterface::uncompressBlock(const char* in, const size_t inLen,
+ unsigned char* out,
+ size_t& outLen) const
{
- bool comprc = false;
- size_t ol = 0;
-
uint32_t realChecksum;
uint32_t storedChecksum;
uint32_t storedLen;
uint8_t storedMagic;
utils::Hasher128 hasher;
-
+ auto tmpOutLen = outLen;
outLen = 0;
if (inLen < 1)
- {
return ERR_BADINPUT;
- }
storedMagic = *((uint8_t*) &in[SIG_OFFSET]);
- if (storedMagic == CHUNK_MAGIC3)
+ if (storedMagic == getChunkMagicNumber())
{
if (inLen < HEADER_SIZE)
- {
return ERR_BADINPUT;
- }
storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]);
storedLen = *((uint32_t*) (&in[LEN_OFFSET]));
if (inLen < storedLen + HEADER_SIZE)
- {
return ERR_BADINPUT;
- }
realChecksum = hasher(&in[HEADER_SIZE], storedLen);
if (storedChecksum != realChecksum)
- {
return ERR_CHECKSUM;
+
+ auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast(out), &tmpOutLen);
+ if (rc != ERR_OK)
+ {
+ cerr << "uncompressBlock failed!" << endl;
+ return ERR_DECOMPRESS;
}
- comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) &&
- snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast(out));
+ outLen = tmpOutLen;
}
else
{
@@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
return ERR_BADINPUT;
}
- if (!comprc)
- {
- cerr << "decomp failed!" << endl;
- return ERR_DECOMPRESS;
- }
-
- outLen = ol;
//cerr << "ub: " << inLen << " : " << outLen << endl;
return ERR_OK;
@@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
//------------------------------------------------------------------------------
// Verify the passed in buffer contains a valid compression file header.
//------------------------------------------------------------------------------
-int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
+int CompressInterface::verifyHdr(const void* hdrBuf)
{
const CompressedDBFileHeader* hdr = reinterpret_cast(hdrBuf);
@@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
// Extract compression pointer information out of the pointer buffer that is
// passed in. ptrBuf points to the pointer section of the compression hdr.
//------------------------------------------------------------------------------
-int IDBCompressInterface::getPtrList(const char* ptrBuf,
- const int ptrBufSize,
- CompChunkPtrList& chunkPtrs ) const
+int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize,
+ CompChunkPtrList& chunkPtrs)
{
int rc = 0;
chunkPtrs.clear();
@@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf,
// one for the file header, and one for the list of pointers.
// Wrapper of above method for backward compatibility.
//------------------------------------------------------------------------------
-int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const
+int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs )
{
return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs);
}
@@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk
//------------------------------------------------------------------------------
// Count the number of chunk pointers in the pointer header(s)
//------------------------------------------------------------------------------
-unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
- const int ptrBufSize) const
+unsigned int CompressInterface::getPtrCount(const char* ptrBuf,
+ const int ptrBufSize)
{
unsigned int chunkCount = 0;
@@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
// This should not be used for compressed dictionary files which could have
// more compression chunk headers.
//------------------------------------------------------------------------------
-unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
+unsigned int CompressInterface::getPtrCount(const char* hdrBuf)
{
return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN);
}
@@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
//------------------------------------------------------------------------------
// Store list of compression pointers into the specified header.
//------------------------------------------------------------------------------
-void IDBCompressInterface::storePtrs(const std::vector& ptrs,
- void* ptrBuf,
- int ptrSectionSize) const
+void CompressInterface::storePtrs(const std::vector& ptrs,
+ void* ptrBuf, int ptrSectionSize)
{
memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0
uint64_t* hdrPtrs = reinterpret_cast(ptrBuf);
@@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector& ptrs,
//------------------------------------------------------------------------------
// Wrapper of above method for backward compatibility
//------------------------------------------------------------------------------
-void IDBCompressInterface::storePtrs(const std::vector& ptrs, void* ptrBuf) const
+void CompressInterface::storePtrs(const std::vector& ptrs, void* ptrBuf)
{
storePtrs(ptrs, reinterpret_cast(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN);
}
@@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector& ptrs, void* pt
//------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a dictionary file.
//------------------------------------------------------------------------------
-void IDBCompressInterface::initHdr(
+void CompressInterface::initHdr(
void* hdrBuf, void* ptrBuf, uint32_t colWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
- int compressionType, int hdrSize) const
+ int compressionType, int hdrSize)
{
memset(hdrBuf, 0, HDR_BUF_LEN);
memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN);
@@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------
// Initialize the header blocks to be written at the start of a column file.
//------------------------------------------------------------------------------
-void IDBCompressInterface::initHdr(
+void CompressInterface::initHdr(
void* hdrBuf, uint32_t columnWidth,
execplan::CalpontSystemCatalog::ColDataType columnType,
- int compressionType) const
+ int compressionType)
{
memset(hdrBuf, 0, HDR_BUF_LEN * 2);
initCompressedDBFileHeader(hdrBuf, columnWidth, columnType,
@@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr(
//------------------------------------------------------------------------------
// Get the header's version number
//------------------------------------------------------------------------------
-uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
+uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{
return (
reinterpret_cast(hdrBuf)->fVersionNum);
@@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Set the file's block count
//------------------------------------------------------------------------------
-void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
+void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
{
reinterpret_cast(hdrBuf)->fBlockCount = count;
}
@@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
//------------------------------------------------------------------------------
// Get the file's block count
//------------------------------------------------------------------------------
-uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
+uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{
return (reinterpret_cast(hdrBuf)->fBlockCount);
}
+//------------------------------------------------------------------------------
+// Get the file's compression type
+//------------------------------------------------------------------------------
+uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
+{
+ return (reinterpret_cast(hdrBuf)
+ ->fCompressionType);
+}
+
//------------------------------------------------------------------------------
// Set the overall header size
//------------------------------------------------------------------------------
-void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
+void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size)
{
reinterpret_cast(hdrBuf)->fHeaderSize = size;
}
@@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
//------------------------------------------------------------------------------
// Get the overall header size
//------------------------------------------------------------------------------
-uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
+uint64_t CompressInterface::getHdrSize(const void* hdrBuf)
{
return (reinterpret_cast(hdrBuf)->fHeaderSize);
}
@@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
// Get column type
//-----------------------------------------------------------------------------
execplan::CalpontSystemCatalog::ColDataType
-IDBCompressInterface::getColDataType(const void* hdrBuf) const
+CompressInterface::getColDataType(const void* hdrBuf)
{
return (
reinterpret_cast(hdrBuf)->fColDataType);
@@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Get column width
//------------------------------------------------------------------------------
-uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
+uint64_t CompressInterface::getColumnWidth(const void* hdrBuf)
{
return (
reinterpret_cast(hdrBuf)->fColumnWidth);
@@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
//------------------------------------------------------------------------------
// Get LBID by index
//------------------------------------------------------------------------------
-uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const
+uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index)
{
if (index < LBID_MAX_SIZE)
return (reinterpret_cast(hdrBuf)->fLBIDS[index]);
@@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index
//------------------------------------------------------------------------------
// Set LBID by index
//------------------------------------------------------------------------------
-void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const
+void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index)
{
if (lbid && index < LBID_MAX_SIZE)
{
@@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t
}
}
-uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
+//------------------------------------------------------------------------------
+// Get LBID count
+//------------------------------------------------------------------------------
+uint64_t CompressInterface::getLBIDCount(void* hdrBuf)
{
return reinterpret_cast(hdrBuf)->fLBIDCount;
}
@@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
// Calculates the chunk and block offset within the chunk for the specified
// block number.
//------------------------------------------------------------------------------
-void IDBCompressInterface::locateBlock(unsigned int block,
- unsigned int& chunkIndex,
- unsigned int& blockOffsetWithinChunk) const
+void CompressInterface::locateBlock(unsigned int block,
+ unsigned int& chunkIndex,
+ unsigned int& blockOffsetWithinChunk) const
{
const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN;
@@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block,
// also expand to allow for user requested padding. Lastly, initialize padding
// bytes to 0.
//------------------------------------------------------------------------------
-int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
- unsigned int& len,
- unsigned int maxLen) const
+int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len,
+ unsigned int maxLen) const
{
int nPaddingBytes = 0;
int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE;
@@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
return 0;
}
-/* static */
-uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
+// Snappy
+CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
+ : CompressInterface(numUserPaddingBytes)
+{
+}
+
+int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen,
+ char* out, size_t* outLen) const
+{
+ snappy::RawCompress(in, inLen, out, outLen);
+
+#ifdef DEBUG_COMPRESSION
+ std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen
+ << std::endl;
+#endif
+
+ return ERR_OK;
+}
+
+int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen,
+ char* out, size_t* outLen) const
+{
+ size_t realOutLen = 0;
+ auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen);
+
+ if (!rc || realOutLen > *outLen)
+ {
+ cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen
+ << ", outLen: " << *outLen << ", realOutLen: " << realOutLen
+ << endl;
+ return ERR_DECOMPRESS;
+ }
+
+ rc = snappy::RawUncompress(in, inLen, out);
+
+ if (!rc)
+ {
+ cerr << "snappy::RawUnompress failed. InLen: " << inLen
+ << ", outLen: " << *outLen << endl;
+ return ERR_DECOMPRESS;
+ }
+
+#ifdef DEBUG_COMPRESSION
+ std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen "
+ << *outLen << std::endl;
+#endif
+ *outLen = realOutLen;
+
+ return ERR_OK;
+}
+
+size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const
{
return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE);
}
-int IDBCompressInterface::compress(const char* in, size_t inLen, char* out,
- size_t* outLen) const
-{
- snappy::RawCompress(in, inLen, out, outLen);
- return 0;
-}
-
-int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
-{
- return !(snappy::RawUncompress(in, inLen, out));
-}
-
-/* static */
-bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
+bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen,
+ size_t* outLen) const
{
return snappy::GetUncompressedLength(in, inLen, outLen);
}
+uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const
+{
+ return CHUNK_MAGIC_SNAPPY;
+}
+
+// LZ4
+CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes)
+ : CompressInterface(numUserPaddingBytes)
+{
+}
+
+int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const
+{
+ auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen);
+
+ if (!compressedLen)
+ {
+ cerr << "LZ_compress_default failed. InLen: " << inLen
+ << ", compressedLen: " << compressedLen << endl;
+ return ERR_COMPRESS;
+ }
+
+#ifdef DEBUG_COMPRESSION
+ std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen "
+ << compressedLen << std::endl;
+#endif
+
+ *outLen = compressedLen;
+ return ERR_OK;
+}
+
+int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen,
+ char* out, size_t* outLen) const
+{
+ auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen);
+
+ if (decompressedLen < 0)
+ {
+ cerr << "LZ_decompress_safe failed with error code " << decompressedLen
+ << endl;
+ cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl;
+ return ERR_DECOMPRESS;
+ }
+
+ *outLen = decompressedLen;
+
+#ifdef DEBUG_COMPRESSION
+ std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen
+ << std::endl;
+#endif
+
+ return ERR_OK;
+}
+
+size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const
+{
+ return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE);
+}
+
+bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen,
+ size_t* outLen) const
+{
+ // LZ4 does not have such function.
+ idbassert(false);
+ return false;
+}
+
+uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const
+{
+ return CHUNK_MAGIC_LZ4;
+}
+
+CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
+ uint32_t numUserPaddingBytes)
+{
+ switch (compressionType)
+ {
+ case 1:
+ case 2:
+ return new CompressInterfaceSnappy(numUserPaddingBytes);
+ case 3:
+ return new CompressInterfaceLZ4(numUserPaddingBytes);
+ }
+
+ return nullptr;
+}
+
+CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
+ uint32_t numUserPaddingBytes)
+{
+ if (compressionName == "SNAPPY")
+ return new CompressInterfaceSnappy(numUserPaddingBytes);
+ else if (compressionName == "LZ4")
+ return new CompressInterfaceLZ4(numUserPaddingBytes);
+ return nullptr;
+}
+
+void initializeCompressorPool(
+ std::unordered_map>&
+ compressorPool,
+ uint32_t numUserPaddingBytes)
+{
+ compressorPool = {
+ make_pair(2, std::shared_ptr(
+ new CompressInterfaceSnappy(numUserPaddingBytes))),
+ make_pair(3, std::shared_ptr(
+ new CompressInterfaceLZ4(numUserPaddingBytes)))};
+}
+
+std::shared_ptr getCompressorByType(
+ std::unordered_map>&
+ compressorPool,
+ uint32_t compressionType)
+{
+ switch (compressionType)
+ {
+ case 1:
+ case 2:
+ if (!compressorPool.count(2))
+ {
+ return nullptr;
+ }
+ return compressorPool[2];
+ case 3:
+ if (!compressorPool.count(3))
+ {
+ return nullptr;
+ }
+ return compressorPool[3];
+ }
+
+ return nullptr;
+}
+
#endif
} // namespace compress
diff --git a/utils/compress/idbcompress.h b/utils/compress/idbcompress.h
index 03d327a41..23c02f966 100644
--- a/utils/compress/idbcompress.h
+++ b/utils/compress/idbcompress.h
@@ -26,6 +26,7 @@
#endif
#include
#include
+#include
#include "calpontsystemcatalog.h"
@@ -41,11 +42,12 @@ namespace compress
typedef std::pair CompChunkPtr;
typedef std::vector CompChunkPtrList;
-class IDBCompressInterface
+class CompressInterface
{
public:
static const unsigned int HDR_BUF_LEN = 4096;
static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
+ static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
// error codes from uncompressBlock()
static const int ERR_OK = 0;
@@ -53,22 +55,29 @@ public:
static const int ERR_DECOMPRESS = -2;
static const int ERR_BADINPUT = -3;
static const int ERR_BADOUTSIZE = -4;
+ static const int ERR_COMPRESS = -5;
/**
- * When IDBCompressInterface object is being used to compress a chunk, this
+ * When CompressInterface object is being used to compress a chunk, this
* construct can be used to specify the padding added by padCompressedChunks
*/
- EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0);
+ EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
/**
* dtor
*/
- EXPORT virtual ~IDBCompressInterface();
+ EXPORT virtual ~CompressInterface() = default;
/**
* see if the algo is available in this lib
*/
- EXPORT bool isCompressionAvail(int compressionType = 0) const;
+ EXPORT static bool isCompressionAvail(int compressionType = 0);
+
+ /**
+ * Returns the maximum compressed size from all available compression
+ * types.
+ */
+ EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
/**
* Compresses specified "in" buffer of length "inLen" bytes.
@@ -76,30 +85,31 @@ public:
* "out" should be sized using maxCompressedSize() to allow for incompressible data.
* Returns 0 if success.
*/
- EXPORT int compressBlock(const char* in,
- const size_t inLen,
- unsigned char* out,
- unsigned int& outLen) const;
+
+ EXPORT int compressBlock(const char* in, const size_t inLen,
+ unsigned char* out, size_t& outLen) const;
/**
* outLen must be initialized with the size of the out buffer before calling uncompressBlock.
* On return, outLen will have the number of bytes used in out.
*/
- EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
- unsigned int& outLen) const;
+ EXPORT int uncompressBlock(const char* in, const size_t inLen,
+ unsigned char* out, size_t& outLen) const;
/**
* This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk.
*/
- EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const;
+ EXPORT virtual int compress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const = 0;
/**
* This fcn wraps whatever compression algorithm we're using at the time, and
* is not specific to blocks on disk. The caller needs to make sure out is big
* enough to contain the output by using getUncompressedSize().
*/
- EXPORT int uncompress(const char* in, size_t inLen, char* out) const;
+ EXPORT virtual int uncompress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const = 0;
/**
* Initialize header buffer at start of compressed db file.
@@ -107,23 +117,24 @@ public:
* @warning hdrBuf must be at least HDR_BUF_LEN bytes
* @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
*/
- EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
- execplan::CalpontSystemCatalog::ColDataType columnType,
- int compressionType, int hdrSize) const;
-
+ EXPORT static void
+ initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
+ execplan::CalpontSystemCatalog::ColDataType columnType,
+ int compressionType, int hdrSize);
/**
* Initialize header buffer at start of compressed db file.
*
* @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
*/
- EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth,
- execplan::CalpontSystemCatalog::ColDataType columnType,
- int compressionType) const;
+ EXPORT static void
+ initHdr(void* hdrBuf, uint32_t columnWidth,
+ execplan::CalpontSystemCatalog::ColDataType columnType,
+ int compressionType);
/**
* Verify the passed in buffer contains a compressed db file header.
*/
- EXPORT int verifyHdr(const void* hdrBuf) const;
+ EXPORT static int verifyHdr(const void* hdrBuf);
/**
* Extracts list of compression pointers from the specified ptr buffer.
@@ -131,9 +142,8 @@ public:
* chunkPtrs is a vector of offset, size pairs for the compressed chunks.
* Returns 0 if success.
*/
- EXPORT int getPtrList(const char* ptrBuf,
- const int ptrBufSize,
- CompChunkPtrList& chunkPtrs) const;
+ EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize,
+ CompChunkPtrList& chunkPtrs);
/**
* Extracts list of compression pointers from the specified header.
@@ -142,28 +152,28 @@ public:
* Note: the pointer passed in is the beginning of the header,
* not the pointer section as above.
*/
- EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const;
+ EXPORT static int getPtrList(const char* hdrBuf,
+ CompChunkPtrList& chunkPtrs);
/**
* Return the number of chunk pointers contained in the specified ptr buffer.
* ptrBuf points to the pointer section taken from the headers.
*/
- EXPORT unsigned int getPtrCount(const char* ptrBuf,
- const int ptrBufSize) const;
+ EXPORT static unsigned int getPtrCount(const char* ptrBuf,
+ const int ptrBufSize);
/**
* Return the number of chunk pointers contained in the specified header.
* hdrBuf points to start of 2 buffer headers from compressed db file.
* For non-dictionary columns.
*/
- EXPORT unsigned int getPtrCount(const char* hdrBuf) const;
+ EXPORT static unsigned int getPtrCount(const char* hdrBuf);
/**
* Store vector of pointers into the specified buffer header's pointer section.
*/
- EXPORT void storePtrs(const std::vector& ptrs,
- void* hdrBuf,
- int ptrSectionSize) const;
+ EXPORT static void storePtrs(const std::vector& ptrs,
+ void* hdrBuf, int ptrSectionSize);
/**
* Store vector of pointers into the specified buffer header.
@@ -171,14 +181,14 @@ public:
* Note: the pointer passed in is the beginning of the header,
* not the pointer section as above.
*/
- EXPORT void storePtrs(const std::vector& ptrs, void* hdrBuf) const;
+ EXPORT static void storePtrs(const std::vector& ptrs,
+ void* hdrBuf);
/**
* Calculates the chunk, and the block offset within the chunk, for the
* specified block number.
*/
- EXPORT void locateBlock(unsigned int block,
- unsigned int& chunkIndex,
+ EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
unsigned int& blockOffsetWithinChunk) const;
/**
@@ -187,9 +197,8 @@ public:
* maxLen is the maximum size for buf. nonzero return code means the
* result output buffer length is > than maxLen.
*/
- EXPORT int padCompressedChunks(unsigned char* buf,
- unsigned int& len,
- unsigned int maxLen ) const;
+ EXPORT int padCompressedChunks(unsigned char* buf, size_t& len,
+ unsigned int maxLen) const;
/*
* Mutator methods for the block count in the file
@@ -197,17 +206,22 @@ public:
/**
* getVersionNumber
*/
- EXPORT uint64_t getVersionNumber(const void* hdrBuf) const;
+ EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
/**
* setBlockCount
*/
- EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const;
+ EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
/**
* getBlockCount
*/
- EXPORT uint64_t getBlockCount(const void* hdrBuf) const;
+ EXPORT static uint64_t getBlockCount(const void* hdrBuf);
+
+ /**
+ * getCompressionType
+ */
+ EXPORT static uint64_t getCompressionType(const void* hdrBuf);
/*
* Mutator methods for the overall header size
@@ -215,38 +229,38 @@ public:
/**
* setHdrSize
*/
- EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const;
+ EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
/**
* getHdrSize
*/
- EXPORT uint64_t getHdrSize(const void* hdrBuf) const;
+ EXPORT static uint64_t getHdrSize(const void* hdrBuf);
/**
* getColumnType
*/
- EXPORT execplan::CalpontSystemCatalog::ColDataType
- getColDataType(const void* hdrBuf) const;
+ EXPORT static execplan::CalpontSystemCatalog::ColDataType
+ getColDataType(const void* hdrBuf);
/**
* getColumnWidth
*/
- EXPORT uint64_t getColumnWidth(const void* hdrBuf) const;
+ EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
/**
* getLBIDByIndex
*/
- EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const;
+ EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
/**
* setLBIDByIndex
*/
- EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const;
+ EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
/**
* getLBIDCount
*/
- EXPORT uint64_t getLBIDCount(void* hdrBuf) const;
+ EXPORT static uint64_t getLBIDCount(void* hdrBuf);
/**
* Mutator methods for the user padding bytes
@@ -271,97 +285,213 @@ public:
* Given an input, uncompressed block, what's the maximum possible output,
* compressed size?
*/
- EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize);
+ EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
/**
* Given a compressed block, returns the uncompressed size in outLen.
* Returns false on error, true on success.
*/
- EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen);
+ EXPORT virtual bool getUncompressedSize(char* in, size_t inLen,
+ size_t* outLen) const = 0;
-protected:
+ protected:
+ virtual uint8_t getChunkMagicNumber() const = 0;
-private:
+ private:
//defaults okay
- //IDBCompressInterface(const IDBCompressInterface& rhs);
- //IDBCompressInterface& operator=(const IDBCompressInterface& rhs);
+ //CompressInterface(const CompressInterface& rhs);
+ //CompressInterface& operator=(const CompressInterface& rhs);
unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks
};
+class CompressInterfaceSnappy : public CompressInterface
+{
+ public:
+ EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
+ EXPORT ~CompressInterfaceSnappy() = default;
+ /**
+ * Compress the given block using snappy compression API.
+ */
+ EXPORT int32_t compress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const override;
+ /**
+ * Uncompress the given block using snappy compression API.
+ */
+ EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const override;
+ /**
+ * Get max compressed size for the given `uncompSize` value using snappy
+ * compression API.
+ */
+ EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
+
+ /**
+ * Get uncompressed size for the given block using snappy
+ * compression API.
+ */
+ EXPORT
+ bool getUncompressedSize(char* in, size_t inLen,
+ size_t* outLen) const override;
+
+ protected:
+ uint8_t getChunkMagicNumber() const override;
+
+ private:
+ const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
+};
+
+class CompressInterfaceLZ4 : public CompressInterface
+{
+ public:
+ EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
+ EXPORT ~CompressInterfaceLZ4() = default;
+ /**
+ * Compress the given block using LZ4 compression API.
+ */
+ EXPORT int32_t compress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const override;
+ /**
+ * Uncompress the given block using LZ4 compression API.
+ */
+ EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
+ size_t* outLen) const override;
+ /**
+ * Get max compressed size for the given `uncompSize` value using LZ4
+ * compression API.
+ */
+ EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
+
+ /**
+ * Get uncompressed size for the given block using LZ4
+ * compression API.
+ */
+ EXPORT
+ bool getUncompressedSize(char* in, size_t inLen,
+ size_t* outLen) const override;
+
+ protected:
+ uint8_t getChunkMagicNumber() const override;
+
+ private:
+ const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
+};
+
+using CompressorPool =
+ std::unordered_map>;
+
+/**
+ * Returns a pointer to the appropriate compression interface based on
+ * `compressionType`. `compressionType` must be greater than 0.
+ * Note: caller is responsible for memory deallocation.
+ */
+EXPORT CompressInterface*
+getCompressInterfaceByType(uint32_t compressionType,
+ uint32_t numUserPaddingBytes = 0);
+
+/**
+ * Returns a pointer to the appropriate compression interface based on
+ * `compressionName`.
+ * Note: caller is responsible for memory deallocation.
+ */
+EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
+ uint32_t numUserPaddingBytes = 0);
+
+/**
+ * Initializes a given `unordered_map` with all available compression
+ * interfaces.
+ */
+EXPORT void initializeCompressorPool(CompressorPool& compressorPool,
+ uint32_t numUserPaddingBytes = 0);
+
+/**
+ * Returns a `shared_ptr` to the appropriate compression interface.
+ */
+EXPORT std::shared_ptr
+getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType);
+
#ifdef SKIP_IDB_COMPRESSION
-inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {}
-inline IDBCompressInterface::~IDBCompressInterface() {}
-inline bool IDBCompressInterface::isCompressionAvail(int c) const
+inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {}
+inline bool CompressInterface::isCompressionAvail(int c)
{
return (c == 0);
}
-inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const
+inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
{
return -1;
}
-inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const
+inline int CompressInterface::uncompressBlock(const char* in,
+ const size_t inLen,
+ unsigned char* out,
+ size_t& outLen) const
{
return -1;
}
-inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const
+inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {}
+inline int CompressInterface::verifyHdr(const void*)
{
return -1;
}
-inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
+inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {}
+inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
+inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
+{
+ return -1;
+}
+inline unsigned int CompressInterface::getPtrCount(const char*, const int)
{
return 0;
}
-inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {}
-inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
-inline int IDBCompressInterface::verifyHdr(const void*) const
-{
- return -1;
-}
-inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const
-{
- return -1;
-}
-inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const
-{
- return -1;
-}
-inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const
+inline unsigned int CompressInterface::getPtrCount(const char*)
{
return 0;
}
-inline unsigned int IDBCompressInterface::getPtrCount(const char*) const
+inline void CompressInterface::storePtrs(const std::vector&, void*, int) {}
+inline void CompressInterface::storePtrs(const std::vector&, void*) {}
+inline void
+CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
+ unsigned int& blockOffsetWithinChunk) const
{
- return 0;
}
-inline void IDBCompressInterface::storePtrs(const std::vector&, void*, int) const {}
-inline void IDBCompressInterface::storePtrs(const std::vector&, void*) const {}
-inline void IDBCompressInterface::locateBlock(unsigned int block,
- unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {}
-inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
+inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
{
return -1;
}
-inline uint64_t
-IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
+inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
{
return 0;
}
-inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {}
-inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
+inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {}
+inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
{
return 0;
}
-inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {}
-inline uint64_t IDBCompressInterface::getHdrSize(const void*) const
+inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
{
return 0;
}
inline execplan::CalpontSystemCatalog::ColDataType
-IDBCompressInterface::getColDataType(const void* hdrBuf) const
+CompressInterface::getColDataType(const void* hdrBuf)
{
return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
}
+inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
+{
+ return 0;
+}
+inline uint64_t getLBID0(const void* hdrBuf) { return 0; }
+void setLBID0(void* hdrBuf, uint64_t lbid) {}
+inline uint64_t getLBID1(const void* hdrBuf) { return 0; }
+void setLBID1(void* hdrBuf, uint64_t lbid) {}
+inline void CompressInterface::setHdrSize(void*, uint64_t) {}
+inline uint64_t CompressInterface::getHdrSize(const void*)
+{
+ return 0;
+}
+CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
+ : CompressInterface(numUserPaddingBytes)
+{
+}
inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; }
inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
{
@@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si
{
return false;
}
+uint8_t getChunkMagicNumber() const { return 0; }
+CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
+ uint32_t numUserPaddingBytes)
+{
+ return nullptr;
+}
#endif
-
}
#undef EXPORT
diff --git a/utils/idbdatafile/PosixFileSystem.cpp b/utils/idbdatafile/PosixFileSystem.cpp
index 4ebdeb4ae..b9bb9644e 100644
--- a/utils/idbdatafile/PosixFileSystem.cpp
+++ b/utils/idbdatafile/PosixFileSystem.cpp
@@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
return -1;
}
- compress::IDBCompressInterface decompressor;
+ char hdr1[compress::CompressInterface::HDR_BUF_LEN];
+ nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN);
- char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN];
- nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN);
-
- if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN )
+ if ( nBytes != compress::CompressInterface::HDR_BUF_LEN )
{
delete pFile;
return -1;
}
// Verify we are a compressed file
- if (decompressor.verifyHdr(hdr1) < 0)
+ if (compress::CompressInterface::verifyHdr(hdr1) < 0)
{
delete pFile;
return -1;
}
- int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN;
+ int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
+ compress::CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
}
compress::CompChunkPtrList chunkPtrs;
- int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
+ int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize,
+ chunkPtrs);
delete[] hdr2;
if (rc != 0)
diff --git a/utils/joiner/joinpartition.cpp b/utils/joiner/joinpartition.cpp
index 0b102cd70..4b796979c 100644
--- a/utils/joiner/joinpartition.cpp
+++ b/utils/joiner/joinpartition.cpp
@@ -50,7 +50,10 @@ namespace joiner
uint64_t uniqueNums = 0;
-JoinPartition::JoinPartition() { }
+JoinPartition::JoinPartition()
+{
+ compressor.reset(new compress::CompressInterfaceSnappy());
+}
/* This is the ctor used by THJS */
JoinPartition::JoinPartition(const RowGroup& lRG,
@@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
for (int i = 0; i < (int) bucketCount; i++)
buckets.push_back(boost::shared_ptr(new JoinPartition(*this, false)));
+
+ string compressionType;
+ try
+ {
+ compressionType =
+ config->getConfig("HashJoin", "TempFileCompressionType");
+ } catch (...) {}
+
+ if (compressionType == "LZ4")
+ {
+ compressor.reset(new compress::CompressInterfaceLZ4());
+ }
+ else
+ {
+ compressor.reset(new compress::CompressInterfaceSnappy());
+ }
}
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
@@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
smallRG.setData(&buffer);
smallRG.resetRowGroup(0);
smallRG.getRow(0, &smallRow);
+
+ compressor = jp.compressor;
}
@@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
fs.seekg(offset);
fs.read((char*) &len, sizeof(len));
+
saveErrno = errno;
if (!fs)
@@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
else
{
size_t uncompressedSize;
+ fs.read((char*) &uncompressedSize, sizeof(uncompressedSize));
+
boost::scoped_array buf(new char[len]);
fs.read(buf.get(), len);
saveErrno = errno;
- if (!fs)
+ if (!fs || !uncompressedSize)
{
fs.close();
ostringstream os;
@@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
}
totalBytesRead += len;
- compressor.getUncompressedSize(buf.get(), len, &uncompressedSize);
bs->needAtLeast(uncompressedSize);
- compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr());
+ compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(),
+ &uncompressedSize);
bs->advanceInputPtr(uncompressedSize);
}
@@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs)
}
else
{
- uint64_t maxSize = compressor.maxCompressedSize(len);
- size_t actualSize;
+ size_t maxSize = compressor->maxCompressedSize(len);
+ size_t actualSize = maxSize;
boost::scoped_array compressed(new uint8_t[maxSize]);
- compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
- ret = actualSize + 4;
+ compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
+ ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4?
fs.write((char*) &actualSize, sizeof(actualSize));
+ // Save uncompressed len.
+ fs.write((char*) &len, sizeof(len));
fs.write((char*) compressed.get(), actualSize);
saveErrno = errno;
diff --git a/utils/joiner/joinpartition.h b/utils/joiner/joinpartition.h
index 7e7ae5d6d..9c33d8e28 100644
--- a/utils/joiner/joinpartition.h
+++ b/utils/joiner/joinpartition.h
@@ -164,7 +164,7 @@ private:
/* Compression support */
bool useCompression;
- compress::IDBCompressInterface compressor;
+ std::shared_ptr compressor;
/* TBD: do the reading/writing in one thread, compression/decompression in another */
/* Some stats for reporting */
diff --git a/utils/messageqcpp/compressed_iss.cpp b/utils/messageqcpp/compressed_iss.cpp
index 26af982e3..585b3a3f5 100644
--- a/utils/messageqcpp/compressed_iss.cpp
+++ b/utils/messageqcpp/compressed_iss.cpp
@@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
{
config::Config* config = config::Config::makeConfig();
string val;
+ string compressionType;
try
{
@@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
useCompression = true;
else
useCompression = false;
+
+ try
+ {
+ compressionType =
+ config->getConfig("NetworkCompression", "NetworkCompression");
+ }
+ catch (...) { }
+
+ auto* compressInterface = compress::getCompressInterfaceByName(compressionType);
+ if (!compressInterface)
+ compressInterface = new compress::CompressInterfaceSnappy();
+
+ alg.reset(compressInterface);
}
Socket* CompressedInetStreamSocket::clone() const
@@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
{
SBS readBS, ret;
size_t uncompressedSize;
- bool err;
readBS = InetStreamSocket::read(timeout, isTimeOut, stats);
if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC)
return readBS;
- err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize);
+ // Read stored len, first 4 bytes.
+ uint32_t storedLen = *(uint32_t*) readBS->buf();
- if (!err)
+ if (!storedLen)
return SBS(new ByteStream(0));
+ uncompressedSize = storedLen;
ret.reset(new ByteStream(uncompressedSize));
- alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr());
+
+ alg->uncompress((char*) readBS->buf() + HEADER_SIZE,
+ readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(),
+ &uncompressedSize);
+
ret->advanceInputPtr(uncompressedSize);
return ret;
@@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
{
- size_t outLen = 0;
- uint32_t len = msg.length();
+ size_t len = msg.length();
if (useCompression && (len > 512))
{
- ByteStream smsg(alg.maxCompressedSize(len));
+ size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE;
+ ByteStream smsg(outLen);
- alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen);
- smsg.advanceInputPtr(outLen);
+ alg->compress((char*) msg.buf(), len,
+ (char*) smsg.getInputPtr() + HEADER_SIZE, &outLen);
+ // Save original len.
+ *(uint32_t*) smsg.getInputPtr() = len;
+ smsg.advanceInputPtr(outLen + HEADER_SIZE);
if (outLen < len)
do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats);
diff --git a/utils/messageqcpp/compressed_iss.h b/utils/messageqcpp/compressed_iss.h
index 2eabfb3fa..2514195da 100644
--- a/utils/messageqcpp/compressed_iss.h
+++ b/utils/messageqcpp/compressed_iss.h
@@ -54,8 +54,9 @@ public:
virtual const IOSocket accept(const struct timespec* timeout);
virtual void connect(const sockaddr* addr);
private:
- compress::IDBCompressInterface alg;
+ std::shared_ptr alg;
bool useCompression;
+ static const uint32_t HEADER_SIZE = 4;
};
} //namespace messageqcpp
diff --git a/writeengine/bulk/we_bulkload.cpp b/writeengine/bulk/we_bulkload.cpp
index ef612f380..fae55bff8 100644
--- a/writeengine/bulk/we_bulkload.cpp
+++ b/writeengine/bulk/we_bulkload.cpp
@@ -337,15 +337,12 @@ int BulkLoad::loadJobInfo(
}
}
- // Validate that specified compression type is available
- compress::IDBCompressInterface compressor;
-
for (unsigned kT = 0; kT < curJob.jobTableList.size(); kT++)
{
for (unsigned kC = 0; kC < curJob.jobTableList[kT].colList.size(); kC++)
{
- if ( !compressor.isCompressionAvail(
- curJob.jobTableList[kT].colList[kC].compressionType) )
+ if (!compress::CompressInterface::isCompressionAvail(
+ curJob.jobTableList[kT].colList[kC].compressionType))
{
std::ostringstream oss;
oss << "Specified compression type (" <<
diff --git a/writeengine/bulk/we_colbufcompressed.cpp b/writeengine/bulk/we_colbufcompressed.cpp
index e5d004226..9131d9ea7 100644
--- a/writeengine/bulk/we_colbufcompressed.cpp
+++ b/writeengine/bulk/we_colbufcompressed.cpp
@@ -60,12 +60,11 @@ ColumnBufferCompressed::ColumnBufferCompressed( ColumnInfo* pColInfo,
fToBeCompressedBuffer(0),
fToBeCompressedCapacity(0),
fNumBytes(0),
- fCompressor(0),
fPreLoadHWMChunk(true),
fFlushedStartHwmChunk(false)
{
fUserPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
- fCompressor = new compress::IDBCompressInterface( fUserPaddingBytes );
+ compress::initializeCompressorPool(fCompressorPool, fUserPaddingBytes);
}
//------------------------------------------------------------------------------
@@ -79,7 +78,6 @@ ColumnBufferCompressed::~ColumnBufferCompressed()
fToBeCompressedBuffer = 0;
fToBeCompressedCapacity = 0;
fNumBytes = 0;
- delete fCompressor;
}
//------------------------------------------------------------------------------
@@ -91,9 +89,7 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
fFile = f;
fStartingHwm = startHwm;
- IDBCompressInterface compressor;
-
- if (compressor.getPtrList(hdrs, fChunkPtrs) != 0)
+ if (compress::CompressInterface::getPtrList(hdrs, fChunkPtrs) != 0)
{
return ERR_COMP_PARSE_HDRS;
}
@@ -102,7 +98,15 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
// rollback), that fall after the HWM, then drop those trailing ptrs.
unsigned int chunkIndex = 0;
unsigned int blockOffsetWithinChunk = 0;
- fCompressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
+
+ auto compressor = compress::getCompressorByType(
+ fCompressorPool, fColInfo->column.compressionType);
+ if (!compressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
+ compressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
if ((chunkIndex + 1) < fChunkPtrs.size())
{
@@ -127,11 +131,11 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
if (!fToBeCompressedBuffer)
{
fToBeCompressedBuffer =
- new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN];
+ new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
}
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
- IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
+ CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
@@ -147,10 +151,10 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
}
- fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
+ fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
// Set file offset past end of last chunk
- startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2;
+ startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
if (fChunkPtrs.size() > 0)
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
@@ -223,7 +227,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Expand the compression buffer size if working with an abbrev extent, and
// the bytes we are about to add will overflow the abbreviated extent.
- if ((fToBeCompressedCapacity < IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) &&
+ if ((fToBeCompressedCapacity < CompressInterface::UNCOMPRESSED_INBUF_LEN) &&
((fNumBytes + writeSize + fillUpWEmptiesWriteSize) > fToBeCompressedCapacity) )
{
std::ostringstream oss;
@@ -233,7 +237,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
"; part-" << fColInfo->curCol.dataFile.fPartition <<
"; seg-" << fColInfo->curCol.dataFile.fSegment;
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
- fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
+ fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
}
if ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) <= fToBeCompressedCapacity)
@@ -316,12 +320,12 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
// Start over again loading a new to-be-compressed buffer
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
- IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
+ CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
fToBeCompressedCapacity =
- IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
+ CompressInterface::UNCOMPRESSED_INBUF_LEN;
bufOffset = fToBeCompressedBuffer;
fNumBytes = 0;
@@ -377,21 +381,31 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
//------------------------------------------------------------------------------
int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
{
- const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(fToBeCompressedCapacity) +
- fUserPaddingBytes;
+ auto compressor = compress::getCompressorByType(
+ fCompressorPool, fColInfo->column.compressionType);
+ if (!compressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
+ const size_t OUTPUT_BUFFER_SIZE =
+ compressor->maxCompressedSize(fToBeCompressedCapacity) +
+ fUserPaddingBytes +
+ // Padded len = len + COMPRESSED_SIZE_INCREMENT_CHUNK - (len %
+ // COMPRESSED_SIZE_INCREMENT_CHUNK) + usePadding
+ compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
+
unsigned char* compressedOutBuf = new unsigned char[ OUTPUT_BUFFER_SIZE ];
boost::scoped_array compressedOutBufPtr(compressedOutBuf);
- unsigned int outputLen = OUTPUT_BUFFER_SIZE;
+ size_t outputLen = OUTPUT_BUFFER_SIZE;
#ifdef PROFILE
Stats::startParseEvent(WE_STATS_COMPRESS_COL_COMPRESS);
#endif
- int rc = fCompressor->compressBlock(
- reinterpret_cast(fToBeCompressedBuffer),
- fToBeCompressedCapacity,
- compressedOutBuf,
- outputLen );
+ int rc = compressor->compressBlock(
+ reinterpret_cast(fToBeCompressedBuffer),
+ fToBeCompressedCapacity, compressedOutBuf, outputLen);
if (rc != 0)
{
@@ -399,7 +413,7 @@ int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
}
// Round up the compressed chunk size
- rc = fCompressor->padCompressedChunks( compressedOutBuf,
+ rc = compressor->padCompressedChunks( compressedOutBuf,
outputLen, OUTPUT_BUFFER_SIZE );
if (rc != 0)
@@ -581,26 +595,24 @@ int ColumnBufferCompressed::finishFile(bool bTruncFile)
int ColumnBufferCompressed::saveCompressionHeaders( )
{
// Construct the header records
- char hdrBuf[IDBCompressInterface::HDR_BUF_LEN * 2];
+ char hdrBuf[CompressInterface::HDR_BUF_LEN * 2];
RETURN_ON_ERROR(fColInfo->colOp->readHeaders(fFile, hdrBuf));
- BRM::LBID_t lbid = fCompressor->getLBIDByIndex(hdrBuf, 0);
- fCompressor->initHdr(hdrBuf, fColInfo->column.width,
- fColInfo->column.dataType,
- fColInfo->column.compressionType);
- fCompressor->setBlockCount(hdrBuf,
- (fColInfo->getFileSize() / BYTE_PER_BLOCK) );
+ BRM::LBID_t lbid = compress::CompressInterface::getLBIDByIndex(hdrBuf, 0);
+ compress::CompressInterface::initHdr(hdrBuf, fColInfo->column.width, fColInfo->column.dataType,
+ fColInfo->column.compressionType);
+ compress::CompressInterface::setBlockCount(hdrBuf, (fColInfo->getFileSize() / BYTE_PER_BLOCK));
// If lbid written in the header is not 0 and not equal to `lastupdatedlbid` - we are running
// for the next extent for column segment file.
const auto lastUpdatedLbid = fColInfo->getLastUpdatedLBID();
if (lbid && lastUpdatedLbid != lbid)
{
// Write back lbid, after header initialization.
- fCompressor->setLBIDByIndex(hdrBuf, lbid, 0);
- fCompressor->setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
+ compress::CompressInterface::setLBIDByIndex(hdrBuf, lbid, 0);
+ compress::CompressInterface::setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
}
else
- fCompressor->setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
+ compress::CompressInterface::setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
std::vector ptrs;
@@ -611,7 +623,7 @@ int ColumnBufferCompressed::saveCompressionHeaders( )
unsigned lastIdx = fChunkPtrs.size() - 1;
ptrs.push_back( fChunkPtrs[lastIdx].first + fChunkPtrs[lastIdx].second );
- fCompressor->storePtrs( ptrs, hdrBuf );
+ compress::CompressInterface::storePtrs(ptrs, hdrBuf);
// Write out the header records
//char resp;
@@ -641,9 +653,9 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
if (!fToBeCompressedBuffer)
{
fToBeCompressedBuffer =
- new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN];
+ new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
- IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
+ CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
bNewBuffer = true;
@@ -656,12 +668,19 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
unsigned int blockOffsetWithinChunk = 0;
bool bSkipStartingBlks = false;
+ auto compressor = compress::getCompressorByType(
+ fCompressorPool, fColInfo->column.compressionType);
+ if (!compressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
if (fPreLoadHWMChunk)
{
if (fChunkPtrs.size() > 0)
{
- fCompressor->locateBlock(fStartingHwm,
- chunkIndex, blockOffsetWithinChunk);
+ compressor->locateBlock(fStartingHwm, chunkIndex,
+ blockOffsetWithinChunk);
if (chunkIndex < fChunkPtrs.size())
startFileOffset = fChunkPtrs[chunkIndex].first;
@@ -718,8 +737,8 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
}
// Uncompress the chunk into our 4MB buffer
- unsigned int outLen = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
- int rc = fCompressor->uncompressBlock(
+ size_t outLen = CompressInterface::UNCOMPRESSED_INBUF_LEN;
+ int rc = compressor->uncompressBlock(
compressedOutBuf,
fChunkPtrs[chunkIndex].second,
fToBeCompressedBuffer,
@@ -758,7 +777,7 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
if (!bNewBuffer)
{
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
- IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
+ CompressInterface::UNCOMPRESSED_INBUF_LEN,
fColInfo->column.emptyVal,
fColInfo->column.width );
}
@@ -775,10 +794,10 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
}
- fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
+ fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
// Set file offset to start after last current chunk
- startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2;
+ startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
if (fChunkPtrs.size() > 0)
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
@@ -796,5 +815,4 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
return NO_ERROR;
}
-
}
diff --git a/writeengine/bulk/we_colbufcompressed.h b/writeengine/bulk/we_colbufcompressed.h
index 5c4cccffc..057ae65aa 100644
--- a/writeengine/bulk/we_colbufcompressed.h
+++ b/writeengine/bulk/we_colbufcompressed.h
@@ -107,8 +107,7 @@ private:
// should always be 4MB, unless
// working with abbrev extent.
size_t fNumBytes; // num Bytes in comp buffer
- compress::IDBCompressInterface*
- fCompressor; // data compression object
+ compress::CompressorPool fCompressorPool; // data compression object pool
compress::CompChunkPtrList
fChunkPtrs; // col file header information
bool fPreLoadHWMChunk; // preload 1st HWM chunk only
diff --git a/writeengine/bulk/we_columninfo.cpp b/writeengine/bulk/we_columninfo.cpp
index 69bf37fef..e1e883a39 100644
--- a/writeengine/bulk/we_columninfo.cpp
+++ b/writeengine/bulk/we_columninfo.cpp
@@ -450,7 +450,7 @@ int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
if (column.dctnry.fCompressionType != 0)
{
DctnryCompress1* tempD1;
- tempD1 = new DctnryCompress1;
+ tempD1 = new DctnryCompress1(column.dctnry.fCompressionType);
tempD1->setMaxActiveChunkNum(1);
tempD1->setBulkFlag(true);
tempD = tempD1;
@@ -668,7 +668,7 @@ int ColumnInfo::extendColumnNewExtent(
uint16_t segmentNew = 0;
BRM::LBID_t startLbid;
- char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
// Extend the column by adding an extent to the next
// DBRoot, partition, and segment file in the rotation
@@ -1684,7 +1684,8 @@ int ColumnInfo::openDctnryStore( bool bMustExist )
if ( column.dctnry.fCompressionType != 0)
{
- DctnryCompress1* dctnryCompress1 = new DctnryCompress1;
+ DctnryCompress1* dctnryCompress1 =
+ new DctnryCompress1(column.dctnry.fCompressionType);
dctnryCompress1->setMaxActiveChunkNum(1);
dctnryCompress1->setBulkFlag(true);
fStore = dctnryCompress1;
diff --git a/writeengine/bulk/we_columninfocompressed.cpp b/writeengine/bulk/we_columninfocompressed.cpp
index e30e74382..e412e62d8 100644
--- a/writeengine/bulk/we_columninfocompressed.cpp
+++ b/writeengine/bulk/we_columninfocompressed.cpp
@@ -108,7 +108,7 @@ int ColumnInfoCompressed::closeColumnFile(bool bCompletingExtent, bool bAbort)
//------------------------------------------------------------------------------
int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
{
- char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
RETURN_ON_ERROR( colOp->readHeaders(curCol.dataFile.pFile, hdr) );
// Initialize the output buffer manager for the column.
@@ -129,10 +129,9 @@ int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
fColBufferMgr = mgr;
- IDBCompressInterface compressor;
- int abbrevFlag =
- ( compressor.getBlockCount(hdr) ==
- uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width / BYTE_PER_BLOCK) );
+ int abbrevFlag = (compress::CompressInterface::getBlockCount(hdr) ==
+ uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width /
+ BYTE_PER_BLOCK));
setFileSize( hwm, abbrevFlag );
// See if dealing with abbreviated extent that will need expanding.
@@ -324,9 +323,9 @@ int ColumnInfoCompressed::truncateDctnryStore(
return rc;
}
- char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
+ char controlHdr[ CompressInterface::HDR_BUF_LEN ];
rc = fTruncateDctnryFileOp.readFile( dFile,
- (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN);
+ (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
if (rc != NO_ERROR)
{
@@ -345,8 +344,7 @@ int ColumnInfoCompressed::truncateDctnryStore(
return rc;
}
- IDBCompressInterface compressor;
- int rc1 = compressor.verifyHdr( controlHdr );
+ int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0)
{
@@ -372,7 +370,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
// actually grow the file (something we don't want to do), because we have
// not yet reserved a full extent (on disk) for this dictionary store file.
const int PSEUDO_COL_WIDTH = 8;
- uint64_t numBlocks = compressor.getBlockCount( controlHdr );
+ uint64_t numBlocks =
+ compress::CompressInterface::getBlockCount(controlHdr);
if ( numBlocks == uint64_t
(INITIAL_EXTENT_ROWS_TO_DISK * PSEUDO_COL_WIDTH / BYTE_PER_BLOCK) )
@@ -390,8 +389,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
return NO_ERROR;
}
- uint64_t hdrSize = compressor.getHdrSize(controlHdr);
- uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
+ uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
+ uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize];
rc = fTruncateDctnryFileOp.readFile(dFile,
@@ -416,7 +415,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
}
CompChunkPtrList chunkPtrs;
- rc1 = compressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs );
+ rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
+ chunkPtrs);
delete[] pointerHdr;
if (rc1 != 0)
diff --git a/writeengine/server/we_getfilesizes.cpp b/writeengine/server/we_getfilesizes.cpp
index 3a597ff69..aa374755f 100644
--- a/writeengine/server/we_getfilesizes.cpp
+++ b/writeengine/server/we_getfilesizes.cpp
@@ -96,7 +96,7 @@ size_t readFillBuffer(
return totalBytesRead;
}
-off64_t getCompressedDataSize(string& fileName)
+static off64_t getCompressedDataSize(string& fileName)
{
off64_t dataSize = 0;
IDBDataFile* pFile = 0;
@@ -119,21 +119,21 @@ off64_t getCompressedDataSize(string& fileName)
throw std::runtime_error(oss.str());
}
- IDBCompressInterface decompressor;
//--------------------------------------------------------------------------
// Read headers and extract compression pointers
//--------------------------------------------------------------------------
- char hdr1[IDBCompressInterface::HDR_BUF_LEN];
- nBytes = readFillBuffer( pFile, hdr1, IDBCompressInterface::HDR_BUF_LEN);
+ char hdr1[CompressInterface::HDR_BUF_LEN];
+ nBytes = readFillBuffer( pFile, hdr1, CompressInterface::HDR_BUF_LEN);
- if ( nBytes != IDBCompressInterface::HDR_BUF_LEN )
+ if ( nBytes != CompressInterface::HDR_BUF_LEN )
{
std::ostringstream oss;
oss << "Error reading first header from file " << fileName;
throw std::runtime_error(oss.str());
}
- int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - IDBCompressInterface::HDR_BUF_LEN;
+ int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
+ CompressInterface::HDR_BUF_LEN;
char* hdr2 = new char[ptrSecSize];
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
@@ -145,7 +145,8 @@ off64_t getCompressedDataSize(string& fileName)
}
CompChunkPtrList chunkPtrs;
- int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
+ int rc =
+ compress::CompressInterface::getPtrList(hdr2, ptrSecSize, chunkPtrs);
delete[] hdr2;
if (rc != 0)
diff --git a/writeengine/shared/we_bulkrollbackfilecompressed.cpp b/writeengine/shared/we_bulkrollbackfilecompressed.cpp
index c149bde71..f15b090e9 100644
--- a/writeengine/shared/we_bulkrollbackfilecompressed.cpp
+++ b/writeengine/shared/we_bulkrollbackfilecompressed.cpp
@@ -51,6 +51,7 @@ namespace WriteEngine
BulkRollbackFileCompressed::BulkRollbackFileCompressed(BulkRollbackMgr* mgr) :
BulkRollbackFile(mgr)
{
+ compress::initializeCompressorPool(fCompressorPool);
}
//------------------------------------------------------------------------------
@@ -104,7 +105,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
}
// Read and parse the header pointers
- char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];;
+ char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];;
CompChunkPtrList chunkPtrs;
std::string errMsg;
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
@@ -127,7 +128,20 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
unsigned int blockOffset = fileSizeBlocks - 1;
unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0;
- fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
+
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool,
+ compress::CompressInterface::getCompressionType(hdrs));
+ if (!fCompressor)
+ {
+ std::ostringstream oss;
+ oss << "Error, wrong compression type for segment file"
+ << ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
+ << partNum << "; segment-" << segNum << ";";
+ throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
+ }
+
+ fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
// Truncate the extra extents that are to be aborted
if (chunkIndex < chunkPtrs.size())
@@ -145,7 +159,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
logging::M0075, columnOID, msgText2.str() );
// Drop off any trailing pointers (that point beyond the last block)
- fCompressor.setBlockCount( hdrs, fileSizeBlocks );
+ compress::CompressInterface::setBlockCount(hdrs, fileSizeBlocks);
std::vector ptrs;
for (unsigned i = 0; i <= chunkIndex; i++)
@@ -155,7 +169,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
ptrs.push_back( chunkPtrs[chunkIndex].first +
chunkPtrs[chunkIndex].second );
- fCompressor.storePtrs( ptrs, hdrs );
+ compress::CompressInterface::storePtrs(ptrs, hdrs);
rc = fDbFile.writeHeaders( pFile, hdrs );
@@ -252,7 +266,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
}
// Read and parse the header pointers
- char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
CompChunkPtrList chunkPtrs;
std::string errMsg;
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
@@ -275,7 +289,20 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
unsigned int blockOffset = startOffsetBlk - 1;
unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0;
- fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
+
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool,
+ compress::CompressInterface::getCompressionType(hdrs));
+ if (!fCompressor)
+ {
+ std::ostringstream oss;
+ oss << "Error, wrong compression type for segment file"
+ << ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
+ << partNum << "; segment-" << segNum << ";";
+ throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
+ }
+
+ fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
if (chunkIndex < chunkPtrs.size())
{
@@ -401,7 +428,8 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
// Watch for the special case where we are restoring a db file as an
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we
// just restore the first pointer (set to 8192).
- fCompressor.setBlockCount( hdrs, (startOffsetBlk + nBlocks) );
+ compress::CompressInterface::setBlockCount(hdrs,
+ (startOffsetBlk + nBlocks));
std::vector newPtrs;
if ((chunkIndex > 0) || (restoredChunkLen > 0))
@@ -413,7 +441,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
}
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
- fCompressor.storePtrs( newPtrs, hdrs );
+ compress::CompressInterface::storePtrs(newPtrs, hdrs);
rc = fDbFile.writeHeaders( pFile, hdrs );
@@ -482,7 +510,7 @@ int BulkRollbackFileCompressed::loadColumnHdrPtrs(
}
// Parse the header pointers
- int rc1 = fCompressor.getPtrList( hdrs, chunkPtrs );
+ int rc1 = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
if (rc1 != 0)
{
@@ -548,7 +576,7 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
throw WeException( oss.str(), ERR_FILE_OPEN );
}
- char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
+ char controlHdr[ CompressInterface::HDR_BUF_LEN ];
CompChunkPtrList chunkPtrs;
uint64_t ptrHdrSize;
std::string errMsg;
@@ -572,7 +600,20 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
unsigned int blockOffset = startOffsetBlk - 1;
unsigned int chunkIndex = 0;
unsigned int blkOffsetInChunk = 0;
- fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
+
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool,
+ compress::CompressInterface::getCompressionType(controlHdr));
+ if (!fCompressor)
+ {
+ std::ostringstream oss;
+ oss << "Error, wrong compression type for segment file"
+ << ": OID-" << dStoreOID << "; DbRoot-" << dbRoot << "; partition-"
+ << partNum << "; segment-" << segNum << ";";
+ throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
+ }
+
+ fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
if (chunkIndex < chunkPtrs.size())
{
@@ -686,7 +727,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
// Watch for the special case where we are restoring a db file as an
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we
// just restore the first pointer (set to 8192).
- fCompressor.setBlockCount( controlHdr, (startOffsetBlk + nBlocks) );
+ compress::CompressInterface::setBlockCount(controlHdr,
+ (startOffsetBlk + nBlocks));
std::vector newPtrs;
if ((chunkIndex > 0) || (restoredChunkLen > 0))
@@ -699,7 +741,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
char* pointerHdr = new char[ptrHdrSize];
- fCompressor.storePtrs( newPtrs, pointerHdr, ptrHdrSize );
+ compress::CompressInterface::storePtrs(newPtrs, pointerHdr,
+ ptrHdrSize);
rc = fDbFile.writeHeaders( pFile, controlHdr, pointerHdr, ptrHdrSize );
delete[] pointerHdr;
@@ -759,7 +802,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
std::string& errMsg) const
{
int rc = fDbFile.readFile(
- pFile, (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN);
+ pFile, (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
if (rc != NO_ERROR)
{
@@ -771,7 +814,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
return rc;
}
- int rc1 = fCompressor.verifyHdr( controlHdr );
+ int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0)
{
@@ -786,8 +829,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
return rc;
}
- uint64_t hdrSize = fCompressor.getHdrSize(controlHdr);
- ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
+ uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
+ ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize];
rc = fDbFile.readFile(pFile, (unsigned char*)pointerHdr, ptrHdrSize);
@@ -804,7 +847,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
}
// Parse the header pointers
- rc1 = fCompressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs );
+ rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
+ chunkPtrs);
delete[] pointerHdr;
if (rc1 != 0)
@@ -1033,5 +1077,4 @@ size_t BulkRollbackFileCompressed::readFillBuffer(
return totalBytesRead;
}
-
} //end of namespace
diff --git a/writeengine/shared/we_bulkrollbackfilecompressed.h b/writeengine/shared/we_bulkrollbackfilecompressed.h
index 7b7357fc5..ab9e8439c 100644
--- a/writeengine/shared/we_bulkrollbackfilecompressed.h
+++ b/writeengine/shared/we_bulkrollbackfilecompressed.h
@@ -28,6 +28,7 @@
#include
#include
+#include
#include "we_define.h"
#include "we_type.h"
@@ -148,7 +149,7 @@ private:
uint64_t& ptrHdrSize,
std::string& errMsg ) const;
- compress::IDBCompressInterface fCompressor;
+ compress::CompressorPool fCompressorPool;
};
} //end of namespace
diff --git a/writeengine/shared/we_chunkmanager.cpp b/writeengine/shared/we_chunkmanager.cpp
index 8ee736c73..ea100d5de 100644
--- a/writeengine/shared/we_chunkmanager.cpp
+++ b/writeengine/shared/we_chunkmanager.cpp
@@ -67,8 +67,6 @@ namespace WriteEngine
extern int NUM_BLOCKS_PER_INITIAL_EXTENT; // defined in we_dctnry.cpp
extern WErrorCodes ec; // defined in we_log.cpp
-const int COMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::maxCompressedSize(UNCOMPRESSED_CHUNK_SIZE) + 64 + 3 + 8 * 1024;
-
//------------------------------------------------------------------------------
// Search for the specified chunk in fChunkList.
//------------------------------------------------------------------------------
@@ -91,18 +89,24 @@ ChunkData* CompFileData::findChunk(int64_t id) const
//------------------------------------------------------------------------------
// ChunkManager constructor
//------------------------------------------------------------------------------
-ChunkManager::ChunkManager() : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
- fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
- fFileOp(0), fSysLogger(NULL), fTransId(-1),
- fLocalModuleId(Config::getLocalModuleID()),
- fFs(fIsHdfs ?
- IDBFileSystem::getFs(IDBDataFile::HDFS) :
- IDBPolicy::useCloud() ?
- IDBFileSystem::getFs(IDBDataFile::CLOUD) :
- IDBFileSystem::getFs(IDBDataFile::BUFFERED))
+ChunkManager::ChunkManager()
+ : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
+ fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
+ fFileOp(0), fSysLogger(NULL), fTransId(-1),
+ fLocalModuleId(Config::getLocalModuleID()),
+ fFs(fIsHdfs ? IDBFileSystem::getFs(IDBDataFile::HDFS)
+ : IDBPolicy::useCloud()
+ ? IDBFileSystem::getFs(IDBDataFile::CLOUD)
+ : IDBFileSystem::getFs(IDBDataFile::BUFFERED))
{
fUserPaddings = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
- fCompressor.numUserPaddingBytes(fUserPaddings);
+ compress::initializeCompressorPool(fCompressorPool, fUserPaddings);
+
+ COMPRESSED_CHUNK_SIZE =
+ compress::CompressInterface::getMaxCompressedSizeGeneric(
+ UNCOMPRESSED_CHUNK_SIZE) +
+ 64 + 3 + 8 * 1024;
+
fMaxCompressedBufSize = COMPRESSED_CHUNK_SIZE + fUserPaddings;
fBufCompressed = new char[fMaxCompressedBufSize];
fSysLogger = new logging::Logger(SUBSYSTEM_ID_WE);
@@ -383,16 +387,22 @@ CompFileData* ChunkManager::getFileData(const FID& fid,
}
// make sure the header is valid
- if (fCompressor.verifyHdr(fileData->fFileHeader.fControlData) != 0)
+ if (compress::CompressInterface::verifyHdr(fileData->fFileHeader.fControlData) != 0)
{
WE_COMP_DBG(cout << "Invalid header." << endl;)
delete fileData;
return NULL;
}
- int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
+ int headerSize = compress::CompressInterface::getHdrSize(
+ fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
+ // Save segment file compression type.
+ uint32_t compressionType = compress::CompressInterface::getCompressionType(
+ fileData->fFileHeader.fControlData);
+ fileData->fCompressionType = compressionType;
+
if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT)
{
// >8K header, dictionary width > 128
@@ -462,11 +472,12 @@ IDBDataFile* ChunkManager::createDctnryFile(const FID& fid,
// Dictionary store extent width == 0. See more details in function
// `createDictStoreExtent`.
- fCompressor.initHdr(fileData->fFileHeader.fControlData,
- fileData->fFileHeader.fPtrSection,
- /*colWidth=*/0, fileData->fColDataType,
- fFileOp->compressionType(), hdrSize);
- fCompressor.setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
+ compress::CompressInterface::initHdr(
+ fileData->fFileHeader.fControlData, fileData->fFileHeader.fPtrSection,
+ /*colWidth=*/0, fileData->fColDataType, fFileOp->compressionType(), hdrSize);
+ compress::CompressInterface::setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
+ // Save compression type.
+ fileData->fCompressionType = fFileOp->compressionType();
if (writeHeader(fileData, __LINE__) != NO_ERROR)
{
@@ -771,9 +782,16 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
}
// uncompress the read in buffer
- unsigned int dataLen = sizeof(chunkData->fBufUnCompressed);
+ size_t dataLen = sizeof(chunkData->fBufUnCompressed);
- if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool, fileData->fCompressionType);
+ if (!fCompressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
+ if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
{
if (fIsFix)
@@ -784,7 +802,7 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
{
char* hdr = fileData->fFileHeader.fControlData;
- if (fCompressor.getBlockCount(hdr) < 512)
+ if (compress::CompressInterface::getBlockCount(hdr) < 512)
blocks = 256;
}
@@ -820,7 +838,8 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
{
if (id == 0 && ptrs[id] == 0) // if the 1st ptr is not set for new extent
{
- ptrs[0] = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
+ ptrs[0] = compress::CompressInterface::getHdrSize(
+ fileData->fFileHeader.fControlData);
}
// load the uncompressed buffer with empty values.
@@ -907,10 +926,17 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
// compress the chunk before writing it to file
fLenCompressed = fMaxCompressedBufSize;
- if (fCompressor.compressBlock((char*)chunkData->fBufUnCompressed,
- chunkData->fLenUnCompressed,
- (unsigned char*)fBufCompressed,
- fLenCompressed) != 0)
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool, fileData->fCompressionType);
+ if (!fCompressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
+ if (fCompressor->compressBlock((char*) chunkData->fBufUnCompressed,
+ chunkData->fLenUnCompressed,
+ (unsigned char*) fBufCompressed,
+ fLenCompressed) != 0)
{
logMessage(ERR_COMP_COMPRESS, logging::LOG_TYPE_ERROR, __LINE__);
return ERR_COMP_COMPRESS;
@@ -941,7 +967,8 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
// [chunkId+0] is the start offset of current chunk.
// [chunkId+1] is the start offset of next chunk, the offset diff is current chunk size.
// [chunkId+2] is 0 or not indicates if the next chunk exists.
- int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
+ int headerSize = compress::CompressInterface::getHdrSize(
+ fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
int64_t usablePtrIds = (ptrSecSize / sizeof(uint64_t)) - 2;
@@ -968,7 +995,7 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
else if (lastChunk)
{
// add padding space if the chunk is written first time
- if (fCompressor.padCompressedChunks(
+ if (fCompressor->padCompressedChunks(
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize) != 0)
{
WE_COMP_DBG(cout << "Last chunk:" << chunkId << ", padding failed." << endl;)
@@ -1272,7 +1299,8 @@ int ChunkManager::closeFile(CompFileData* fileData)
int ChunkManager::writeHeader(CompFileData* fileData, int ln)
{
int rc = NO_ERROR;
- int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
+ int headerSize = compress::CompressInterface::getHdrSize(
+ fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
if (!fIsHdfs && !fIsBulkLoad)
@@ -1422,8 +1450,10 @@ int ChunkManager::updateColumnExtent(IDBDataFile* pFile, int addBlockCount, int6
int rc = NO_ERROR;
char* hdr = pFileData->fFileHeader.fControlData;
- fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount);
- fCompressor.setLBIDByIndex(hdr, lbid, 1);
+ compress::CompressInterface::setBlockCount(
+ hdr, compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
+ compress::CompressInterface::setLBIDByIndex(hdr, lbid, 1);
+
ChunkData* chunkData = (pFileData)->findChunk(0);
if (chunkData != NULL)
@@ -1475,7 +1505,7 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
char* hdr = i->second->fFileHeader.fControlData;
char* uncompressedBuf = chunkData->fBufUnCompressed;
- int currentBlockCount = fCompressor.getBlockCount(hdr);
+ int currentBlockCount = compress::CompressInterface::getBlockCount(hdr);
// Bug 3203, write out the compressed initial extent.
if (currentBlockCount == 0)
@@ -1511,13 +1541,15 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
}
if (rc == NO_ERROR)
- fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount);
+ compress::CompressInterface::setBlockCount(
+ hdr,
+ compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
if (currentBlockCount)
{
// Append to the end.
- uint64_t lbidCount = fCompressor.getLBIDCount(hdr);
- fCompressor.setLBIDByIndex(hdr, lbid, lbidCount);
+ uint64_t lbidCount = compress::CompressInterface::getLBIDCount(hdr);
+ compress::CompressInterface::setLBIDByIndex(hdr, lbid, lbidCount);
}
return rc;
}
@@ -1684,7 +1716,8 @@ int ChunkManager::getBlockCount(IDBDataFile* pFile)
map::iterator fpIt = fFilePtrMap.find(pFile);
idbassert(fpIt != fFilePtrMap.end());
- return fCompressor.getBlockCount(fpIt->second->fFileHeader.fControlData);
+ return compress::CompressInterface::getBlockCount(
+ fpIt->second->fFileHeader.fControlData);
}
//------------------------------------------------------------------------------
@@ -1758,11 +1791,13 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
origFilePtr->flush();
// back out the current pointers
- int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
+ int headerSize = compress::CompressInterface::getHdrSize(
+ fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
compress::CompChunkPtrList origPtrs;
- if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
+ if (compress::CompressInterface::getPtrList(
+ fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
{
ostringstream oss;
oss << "Chunk shifting failed, file:" << origFileName << " -- invalid header.";
@@ -1876,7 +1911,14 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
ChunkData* chunkData = chunksTouched[k];
fLenCompressed = fMaxCompressedBufSize;
- if ((rc = fCompressor.compressBlock((char*)chunkData->fBufUnCompressed,
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool, fileData->fCompressionType);
+ if (!fCompressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
+ if ((rc = fCompressor->compressBlock((char*)chunkData->fBufUnCompressed,
chunkData->fLenUnCompressed,
(unsigned char*)fBufCompressed,
fLenCompressed)) != 0)
@@ -1894,7 +1936,7 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
<< fLenCompressed;)
// shifting chunk, add padding space
- if ((rc = fCompressor.padCompressedChunks(
+ if ((rc = fCompressor->padCompressedChunks(
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize)) != 0)
{
WE_COMP_DBG(cout << ", but padding failed." << endl;)
@@ -2245,7 +2287,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
}
// make sure the header is valid
- if ((rc = fCompressor.verifyHdr(fileData->fFileHeader.fControlData)) != 0)
+ if ((rc = compress::CompressInterface::verifyHdr(
+ fileData->fFileHeader.fControlData)) != 0)
{
ostringstream oss;
oss << "Invalid header in new " << fileData->fFileName << ", roll back";
@@ -2254,7 +2297,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
return rc;
}
- int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
+ int headerSize = compress::CompressInterface::getHdrSize(
+ fileData->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// read in the pointer section in header
@@ -2270,7 +2314,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
// get pointer list
compress::CompChunkPtrList ptrs;
- if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
+ if (compress::CompressInterface::getPtrList(
+ fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
{
ostringstream oss;
oss << "Failed to parse pointer list from new " << fileData->fFileName << "@" << __LINE__;
@@ -2282,6 +2327,13 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
ChunkData chunkData;
int numOfChunks = ptrs.size(); // number of chunks in the file
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool, fileData->fCompressionType);
+ if (!fCompressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
for (int i = 0; i < numOfChunks && rc == NO_ERROR; i++)
{
unsigned int chunkSize = ptrs[i].second;
@@ -2304,9 +2356,9 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
}
// uncompress the read in buffer
- unsigned int dataLen = sizeof(chunkData.fBufUnCompressed);
+ size_t dataLen = sizeof(chunkData.fBufUnCompressed);
- if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
+ if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData.fBufUnCompressed, dataLen) != 0)
{
ostringstream oss;
@@ -2624,13 +2676,15 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
if (mit != fFileMap.end())
{
- int headerSize = fCompressor.getHdrSize(mit->second->fFileHeader.fControlData);
+ int headerSize = compress::CompressInterface::getHdrSize(
+ mit->second->fFileHeader.fControlData);
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
// get pointer list
compress::CompChunkPtrList ptrs;
- if (fCompressor.getPtrList(mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
+ if (compress::CompressInterface::getPtrList(
+ mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
{
ostringstream oss;
oss << "Failed to parse pointer list from new " << mit->second->fFileName << "@" << __LINE__;
@@ -2662,9 +2716,16 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
// uncompress the read in buffer
chunkData = new ChunkData(numOfChunks - 1);
- unsigned int dataLen = sizeof(chunkData->fBufUnCompressed);
+ size_t dataLen = sizeof(chunkData->fBufUnCompressed);
- if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
+ auto fCompressor = compress::getCompressorByType(
+ fCompressorPool, mit->second->fCompressionType);
+ if (!fCompressor)
+ {
+ return ERR_COMP_WRONG_COMP_TYPE;
+ }
+
+ if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
{
mit->second->fChunkList.push_back(chunkData);
@@ -2676,7 +2737,7 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
{
char* hdr = mit->second->fFileHeader.fControlData;
- if (fCompressor.getBlockCount(hdr) < 512)
+ if (compress::CompressInterface::getBlockCount(hdr) < 512)
blocks = 256;
}
@@ -2693,7 +2754,6 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
return rc;
}
-
}
// vim:ts=4 sw=4:
diff --git a/writeengine/shared/we_chunkmanager.h b/writeengine/shared/we_chunkmanager.h
index b79a9b377..198c67cce 100644
--- a/writeengine/shared/we_chunkmanager.h
+++ b/writeengine/shared/we_chunkmanager.h
@@ -64,8 +64,8 @@ namespace WriteEngine
// forward reference
class FileOp;
-const int UNCOMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
-const int COMPRESSED_FILE_HEADER_UNIT = compress::IDBCompressInterface::HDR_BUF_LEN;
+const int UNCOMPRESSED_CHUNK_SIZE = compress::CompressInterface::UNCOMPRESSED_INBUF_LEN;
+const int COMPRESSED_FILE_HEADER_UNIT = compress::CompressInterface::HDR_BUF_LEN;
// assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding
@@ -136,7 +136,7 @@ class CompFileData
public:
CompFileData(const FileID& id, const FID& fid, const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth) :
fFileID(id), fFid(fid), fColDataType(colDataType), fColWidth(colWidth), fDctnryCol(false),
- fFilePtr(NULL), fIoBSize(0) {}
+ fFilePtr(NULL), fIoBSize(0), fCompressionType(1) {}
ChunkData* findChunk(int64_t cid) const;
@@ -152,6 +152,7 @@ protected:
std::list fChunkList;
boost::scoped_array fIoBuffer;
size_t fIoBSize;
+ uint32_t fCompressionType;
friend class ChunkManager;
};
@@ -369,22 +370,23 @@ protected:
std::list > fActiveChunks;
unsigned int fMaxActiveChunkNum; // max active chunks per file
char* fBufCompressed;
- unsigned int fLenCompressed;
- unsigned int fMaxCompressedBufSize;
- unsigned int fUserPaddings;
+ size_t fLenCompressed;
+ size_t fMaxCompressedBufSize;
+ size_t fUserPaddings;
bool fIsBulkLoad;
bool fDropFdCache;
bool fIsInsert;
bool fIsHdfs;
FileOp* fFileOp;
- compress::IDBCompressInterface fCompressor;
+ compress::CompressorPool fCompressorPool;
logging::Logger* fSysLogger;
TxnID fTransId;
int fLocalModuleId;
idbdatafile::IDBFileSystem& fFs;
bool fIsFix;
+ size_t COMPRESSED_CHUNK_SIZE;
-private:
+ private:
};
}
diff --git a/writeengine/shared/we_define.h b/writeengine/shared/we_define.h
index 97ae3a1b2..bf987c31c 100644
--- a/writeengine/shared/we_define.h
+++ b/writeengine/shared/we_define.h
@@ -348,6 +348,7 @@ const int ERR_COMP_READ_FILE = ERR_COMPBASE + 16;// Failed to read from a
const int ERR_COMP_WRITE_FILE = ERR_COMPBASE + 17;// Failed to write to a compresssed data file
const int ERR_COMP_CLOSE_FILE = ERR_COMPBASE + 18;// Failed to close a compressed data file
const int ERR_COMP_TRUNCATE_ZERO = ERR_COMPBASE + 19;// Invalid attempt to truncate file to 0 bytes
+const int ERR_COMP_WRONG_COMP_TYPE = ERR_COMPBASE + 20;// Invalid compression type.
//--------------------------------------------------------------------------
// Auto-increment error
diff --git a/writeengine/shared/we_fileop.cpp b/writeengine/shared/we_fileop.cpp
index 52785a013..f8046deed 100644
--- a/writeengine/shared/we_fileop.cpp
+++ b/writeengine/shared/we_fileop.cpp
@@ -652,14 +652,19 @@ int FileOp::extendFile(
// @bug 5349: check that new extent's fbo is not past current EOF
if (m_compressionType)
{
- char hdrsIn[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdrsIn[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
RETURN_ON_ERROR( readHeaders(pFile, hdrsIn) );
- IDBCompressInterface compressor;
- unsigned int ptrCount = compressor.getPtrCount(hdrsIn);
+ std::unique_ptr compressor(
+ compress::getCompressInterfaceByType(
+ compress::CompressInterface::getCompressionType(hdrsIn)));
+
+ unsigned int ptrCount =
+ compress::CompressInterface::getPtrCount(hdrsIn);
unsigned int chunkIndex = 0;
unsigned int blockOffsetWithinChunk = 0;
- compressor.locateBlock((hwm - 1), chunkIndex, blockOffsetWithinChunk);
+ compressor->locateBlock((hwm - 1), chunkIndex,
+ blockOffsetWithinChunk);
//std::ostringstream oss1;
//oss1 << "Extending compressed column file"<<
@@ -816,9 +821,8 @@ int FileOp::extendFile(
if ((m_compressionType) && (hdrs))
{
- IDBCompressInterface compressor;
- compressor.initHdr(hdrs, width, colDataType, m_compressionType);
- compressor.setLBIDByIndex(hdrs, startLbid, 0);
+ compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
+ compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
}
}
@@ -976,9 +980,8 @@ int FileOp::addExtentExactFile(
if ((m_compressionType) && (hdrs))
{
- IDBCompressInterface compressor;
- compressor.initHdr(hdrs, width, colDataType, m_compressionType);
- compressor.setLBIDByIndex(hdrs, startLbid, 0);
+ compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
+ compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
}
}
@@ -1064,13 +1067,11 @@ int FileOp::initColumnExtent(
{
if ((bNewFile) && (m_compressionType))
{
- char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2];
- IDBCompressInterface compressor;
- compressor.initHdr(hdrs, width, colDataType, m_compressionType);
- compressor.setLBIDByIndex(hdrs, lbid, 0);
-
+ char hdrs[CompressInterface::HDR_BUF_LEN * 2];
+ compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
+ compress::CompressInterface::setLBIDByIndex(hdrs, lbid, 0);
if (bAbbrevExtent)
- compressor.setBlockCount(hdrs, nBlocks);
+ compress::CompressInterface::setBlockCount(hdrs, nBlocks);
RETURN_ON_ERROR(writeHeaders(pFile, hdrs));
}
@@ -1262,7 +1263,7 @@ int FileOp::initAbbrevCompColumnExtent(
Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_ABBREV_EXT);
#endif
- char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2];
+ char hdrs[CompressInterface::HDR_BUF_LEN * 2];
rc = writeInitialCompColumnChunk( pFile,
nBlocks,
INITIAL_EXTENT_ROWS_TO_DISK,
@@ -1308,24 +1309,30 @@ int FileOp::writeInitialCompColumnChunk(
execplan::CalpontSystemCatalog::ColDataType colDataType,
char* hdrs)
{
- const int INPUT_BUFFER_SIZE = nRows * width;
+ const size_t INPUT_BUFFER_SIZE = nRows * width;
char* toBeCompressedInput = new char[INPUT_BUFFER_SIZE];
unsigned int userPaddingBytes = Config::getNumCompressedPadBlks() *
BYTE_PER_BLOCK;
- const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(INPUT_BUFFER_SIZE) +
- userPaddingBytes;
+ // Compress an initialized abbreviated extent
+ // Initially m_compressionType == 0, but this function is used under
+ // condtion where m_compressionType > 0.
+ std::unique_ptr compressor(
+ compress::getCompressInterfaceByType(m_compressionType,
+ userPaddingBytes));
+ const size_t OUTPUT_BUFFER_SIZE =
+ compressor->maxCompressedSize(INPUT_BUFFER_SIZE) + userPaddingBytes +
+ compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
+
unsigned char* compressedOutput = new unsigned char[OUTPUT_BUFFER_SIZE];
- unsigned int outputLen = OUTPUT_BUFFER_SIZE;
+ size_t outputLen = OUTPUT_BUFFER_SIZE;
boost::scoped_array toBeCompressedInputPtr( toBeCompressedInput );
boost::scoped_array compressedOutputPtr(compressedOutput);
setEmptyBuf( (unsigned char*)toBeCompressedInput,
INPUT_BUFFER_SIZE, emptyVal, width);
- // Compress an initialized abbreviated extent
- IDBCompressInterface compressor( userPaddingBytes );
- int rc = compressor.compressBlock(toBeCompressedInput,
- INPUT_BUFFER_SIZE, compressedOutput, outputLen );
+ int rc = compressor->compressBlock(toBeCompressedInput, INPUT_BUFFER_SIZE,
+ compressedOutput, outputLen);
if (rc != 0)
{
@@ -1333,8 +1340,8 @@ int FileOp::writeInitialCompColumnChunk(
}
// Round up the compressed chunk size
- rc = compressor.padCompressedChunks( compressedOutput,
- outputLen, OUTPUT_BUFFER_SIZE );
+ rc = compressor->padCompressedChunks(compressedOutput, outputLen,
+ OUTPUT_BUFFER_SIZE);
if (rc != 0)
{
@@ -1347,23 +1354,22 @@ int FileOp::writeInitialCompColumnChunk(
// "; blkAllocCnt: " << nBlocksAllocated <<
// "; compressedByteCnt: " << outputLen << std::endl;
- compressor.initHdr(hdrs, width, colDataType, m_compressionType);
- compressor.setBlockCount(hdrs, nBlocksAllocated);
- compressor.setLBIDByIndex(hdrs, startLBID, 0);
+ compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
+ compress::CompressInterface::setBlockCount(hdrs, nBlocksAllocated);
+ compress::CompressInterface::setLBIDByIndex(hdrs, startLBID, 0);
// Store compression pointers in the header
std::vector ptrs;
- ptrs.push_back( IDBCompressInterface::HDR_BUF_LEN * 2 );
- ptrs.push_back( outputLen + (IDBCompressInterface::HDR_BUF_LEN * 2) );
- compressor.storePtrs(ptrs, hdrs);
+ ptrs.push_back( CompressInterface::HDR_BUF_LEN * 2 );
+ ptrs.push_back( outputLen + (CompressInterface::HDR_BUF_LEN * 2) );
+ compress::CompressInterface::storePtrs(ptrs, hdrs);
RETURN_ON_ERROR( writeHeaders(pFile, hdrs) );
// Write the compressed data
- if ( pFile->write( compressedOutput, outputLen ) != outputLen )
- {
+ size_t writtenLen = pFile->write(compressedOutput, outputLen);
+ if (writtenLen != outputLen)
return ERR_FILE_WRITE;
- }
return NO_ERROR;
}
@@ -1421,7 +1427,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
return ERR_FILE_OPEN;
}
- char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
+ char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
rc = readHeaders( pFile, hdrs );
if (rc != NO_ERROR)
@@ -1432,9 +1438,14 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
}
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
- IDBCompressInterface compressor( userPadBytes );
+
+ std::unique_ptr compressor(
+ compress::getCompressInterfaceByType(
+ compress::CompressInterface::getCompressionType(hdrs),
+ userPadBytes));
+
CompChunkPtrList chunkPtrs;
- int rcComp = compressor.getPtrList( hdrs, chunkPtrs );
+ int rcComp = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
if (rcComp != 0)
{
@@ -1444,7 +1455,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
}
// Nothing to do if the proposed HWM is < the current block count
- uint64_t blkCount = compressor.getBlockCount(hdrs);
+ uint64_t blkCount = compress::CompressInterface::getBlockCount(hdrs);
if (blkCount > (hwm + 1))
{
@@ -1455,7 +1466,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
const unsigned int ROWS_PER_EXTENT =
BRMWrapper::getInstance()->getInstance()->getExtentRows();
const unsigned int ROWS_PER_CHUNK =
- IDBCompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
+ CompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
const unsigned int CHUNKS_PER_EXTENT = ROWS_PER_EXTENT / ROWS_PER_CHUNK;
// If this is an abbreviated extent, we first expand to a full extent
@@ -1493,7 +1504,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
CompChunkPtr chunkOutPtr;
rc = expandAbbrevColumnChunk( pFile, emptyVal, colWidth,
- chunkPtrs[0], chunkOutPtr );
+ chunkPtrs[0], chunkOutPtr, hdrs );
if (rc != NO_ERROR)
{
@@ -1515,7 +1526,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Update block count to reflect a full extent
blkCount = (ROWS_PER_EXTENT * colWidth) / BYTE_PER_BLOCK;
- compressor.setBlockCount( hdrs, blkCount );
+ compress::CompressInterface::setBlockCount(hdrs, blkCount);
}
// Calculate the number of empty chunks we need to add to fill this extent
@@ -1532,7 +1543,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
compressor.getBlockCount(hdrs) << std::endl;
std::cout << "Pointer Header Size (in bytes): " <<
(compressor.getHdrSize(hdrs) -
- IDBCompressInterface::HDR_BUF_LEN) << std::endl;
+ CompressInterface::HDR_BUF_LEN) << std::endl;
std::cout << "Chunk Pointers (offset,length): " << std::endl;
for (unsigned k = 0; k < chunkPtrs.size(); k++)
@@ -1551,8 +1562,10 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Fill in or add necessary remaining empty chunks
if (numChunksToFill > 0)
{
- const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
- const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes;
+ const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
+ const int OUT_BUF_LEN =
+ compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
+ compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
// Allocate buffer, and store in scoped_array to insure it's deletion.
// Create scope {...} to manage deletion of buffers
@@ -1566,9 +1579,9 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
// Compress and then pad the compressed chunk
setEmptyBuf( (unsigned char*)toBeCompressedBuf,
IN_BUF_LEN, emptyVal, colWidth );
- unsigned int outputLen = OUT_BUF_LEN;
- rcComp = compressor.compressBlock( toBeCompressedBuf,
- IN_BUF_LEN, compressedBuf, outputLen );
+ size_t outputLen = OUT_BUF_LEN;
+ rcComp = compressor->compressBlock(toBeCompressedBuf, IN_BUF_LEN,
+ compressedBuf, outputLen);
if (rcComp != 0)
{
@@ -1579,8 +1592,8 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
toBeCompressedInputPtr.reset(); // release memory
- rcComp = compressor.padCompressedChunks( compressedBuf,
- outputLen, OUT_BUF_LEN );
+ rcComp = compressor->padCompressedChunks(compressedBuf, outputLen,
+ OUT_BUF_LEN);
if (rcComp != 0)
{
@@ -1639,7 +1652,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
ptrs.push_back( chunkPtrs[chunkPtrs.size() - 1].first +
chunkPtrs[chunkPtrs.size() - 1].second );
- compressor.storePtrs( ptrs, hdrs );
+ compress::CompressInterface::storePtrs(ptrs, hdrs);
rc = writeHeaders( pFile, hdrs );
@@ -1697,11 +1710,24 @@ int FileOp::expandAbbrevColumnChunk(
const uint8_t* emptyVal,
int colWidth,
const CompChunkPtr& chunkInPtr,
- CompChunkPtr& chunkOutPtr )
+ CompChunkPtr& chunkOutPtr,
+ const char *hdrs )
{
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
- const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
- const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes;
+ auto realCompressionType = m_compressionType;
+ if (hdrs)
+ {
+ realCompressionType =
+ compress::CompressInterface::getCompressionType(hdrs);
+ }
+ std::unique_ptr compressor(
+ compress::getCompressInterfaceByType(realCompressionType,
+ userPadBytes));
+
+ const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
+ const int OUT_BUF_LEN =
+ compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
+ compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
char* toBeCompressedBuf = new char[ IN_BUF_LEN ];
boost::scoped_array toBeCompressedPtr(toBeCompressedBuf);
@@ -1717,13 +1743,10 @@ int FileOp::expandAbbrevColumnChunk(
chunkInPtr.second) );
// Uncompress an "abbreviated" chunk into our 4MB buffer
- unsigned int outputLen = IN_BUF_LEN;
- IDBCompressInterface compressor( userPadBytes );
- int rc = compressor.uncompressBlock(
- compressedInBuf,
- chunkInPtr.second,
- (unsigned char*)toBeCompressedBuf,
- outputLen);
+ size_t outputLen = IN_BUF_LEN;
+ int rc = compressor->uncompressBlock(compressedInBuf, chunkInPtr.second,
+ (unsigned char*) toBeCompressedBuf,
+ outputLen);
if (rc != 0)
{
@@ -1739,11 +1762,8 @@ int FileOp::expandAbbrevColumnChunk(
// Compress the data we just read, as a "full" 4MB chunk
outputLen = OUT_BUF_LEN;
- rc = compressor.compressBlock(
- reinterpret_cast(toBeCompressedBuf),
- IN_BUF_LEN,
- compressedOutBuf,
- outputLen );
+ rc = compressor->compressBlock(reinterpret_cast(toBeCompressedBuf),
+ IN_BUF_LEN, compressedOutBuf, outputLen);
if (rc != 0)
{
@@ -1751,8 +1771,8 @@ int FileOp::expandAbbrevColumnChunk(
}
// Round up the compressed chunk size
- rc = compressor.padCompressedChunks( compressedOutBuf,
- outputLen, OUT_BUF_LEN );
+ rc = compressor->padCompressedChunks(compressedOutBuf, outputLen,
+ OUT_BUF_LEN);
if (rc != 0)
{
@@ -1782,7 +1802,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* hdr) const
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
// Write the headers
- if (pFile->write( hdr, IDBCompressInterface::HDR_BUF_LEN * 2 ) != IDBCompressInterface::HDR_BUF_LEN * 2)
+ if (pFile->write( hdr, CompressInterface::HDR_BUF_LEN * 2 ) != CompressInterface::HDR_BUF_LEN * 2)
{
return ERR_FILE_WRITE;
}
@@ -1808,7 +1828,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
// Write the control header
- if (pFile->write( controlHdr, IDBCompressInterface::HDR_BUF_LEN ) != IDBCompressInterface::HDR_BUF_LEN)
+ if (pFile->write( controlHdr, CompressInterface::HDR_BUF_LEN ) != CompressInterface::HDR_BUF_LEN)
{
return ERR_FILE_WRITE;
}
@@ -2651,9 +2671,8 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdrs ) const
{
RETURN_ON_ERROR( setFileOffset(pFile, 0) );
RETURN_ON_ERROR( readFile( pFile, reinterpret_cast(hdrs),
- (IDBCompressInterface::HDR_BUF_LEN * 2) ) );
- IDBCompressInterface compressor;
- int rc = compressor.verifyHdr( hdrs );
+ (CompressInterface::HDR_BUF_LEN * 2) ) );
+ int rc = compress::CompressInterface::verifyHdr(hdrs);
if (rc != 0)
{
@@ -2671,11 +2690,10 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdr1, char* hdr2 ) const
unsigned char* hdrPtr = reinterpret_cast(hdr1);
RETURN_ON_ERROR( setFileOffset(pFile, 0) );
RETURN_ON_ERROR( readFile( pFile, hdrPtr,
- IDBCompressInterface::HDR_BUF_LEN ));
+ CompressInterface::HDR_BUF_LEN ));
- IDBCompressInterface compressor;
- int ptrSecSize = compressor.getHdrSize(hdrPtr) -
- IDBCompressInterface::HDR_BUF_LEN;
+ int ptrSecSize = compress::CompressInterface::getHdrSize(hdrPtr) -
+ CompressInterface::HDR_BUF_LEN;
return readFile( pFile, reinterpret_cast(hdr2),
ptrSecSize );
}
diff --git a/writeengine/shared/we_fileop.h b/writeengine/shared/we_fileop.h
index a136d4528..267bff6d4 100644
--- a/writeengine/shared/we_fileop.h
+++ b/writeengine/shared/we_fileop.h
@@ -529,11 +529,11 @@ private:
FileOp(const FileOp& rhs);
FileOp& operator=(const FileOp& rhs);
- int expandAbbrevColumnChunk( IDBDataFile* pFile,
- const uint8_t* emptyVal,
- int colWidth,
- const compress::CompChunkPtr& chunkInPtr,
- compress::CompChunkPtr& chunkOutPt);
+ int expandAbbrevColumnChunk(IDBDataFile* pFile, const uint8_t* emptyVal,
+ int colWidth,
+ const compress::CompChunkPtr& chunkInPtr,
+ compress::CompChunkPtr& chunkOutPt,
+ const char* hdrs = nullptr);
int initAbbrevCompColumnExtent(
IDBDataFile* pFile, uint16_t dbRoot, int nBlocks,
diff --git a/writeengine/shared/we_rbmetawriter.cpp b/writeengine/shared/we_rbmetawriter.cpp
index ee9ff7c46..8cc2bd7a1 100644
--- a/writeengine/shared/we_rbmetawriter.cpp
+++ b/writeengine/shared/we_rbmetawriter.cpp
@@ -1007,9 +1007,9 @@ void RBMetaWriter::backupHWMChunk(
}
// Read Control header
- char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
+ char controlHdr[ CompressInterface::HDR_BUF_LEN ];
rc = fileOp.readFile( dbFile, (unsigned char*)controlHdr,
- IDBCompressInterface::HDR_BUF_LEN );
+ CompressInterface::HDR_BUF_LEN );
if (rc != NO_ERROR)
{
@@ -1025,8 +1025,7 @@ void RBMetaWriter::backupHWMChunk(
throw WeException( oss.str(), rc );
}
- IDBCompressInterface compressor;
- int rc1 = compressor.verifyHdr( controlHdr );
+ int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
if (rc1 != 0)
{
@@ -1045,9 +1044,23 @@ void RBMetaWriter::backupHWMChunk(
throw WeException( oss.str(), rc );
}
+ auto compressionType =
+ compress::CompressInterface::getCompressionType(controlHdr);
+ std::unique_ptr compressor(
+ compress::getCompressInterfaceByType(compressionType));
+
+ if (!compressor)
+ {
+ WErrorCodes ec;
+ std::ostringstream oss;
+ oss << "Ivalid compression type " << compressionType;
+ fileOp.closeFile( dbFile );
+ throw WeException(oss.str(), rc);
+ }
+
// Read Pointer header data
- uint64_t hdrSize = compressor.getHdrSize(controlHdr);
- uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
+ uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
+ uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
char* pointerHdr = new char[ptrHdrSize];
rc = fileOp.readFile( dbFile, (unsigned char*)pointerHdr, ptrHdrSize );
@@ -1067,7 +1080,8 @@ void RBMetaWriter::backupHWMChunk(
}
CompChunkPtrList chunkPtrs;
- rc = compressor.getPtrList(pointerHdr, ptrHdrSize, chunkPtrs );
+ rc = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
+ chunkPtrs);
delete[] pointerHdr;
if (rc != 0)
@@ -1087,7 +1101,7 @@ void RBMetaWriter::backupHWMChunk(
unsigned int blockOffsetWithinChunk = 0;
unsigned char* buffer = 0;
uint64_t chunkSize = 0;
- compressor.locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
+ compressor->locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
if (chunkIndex < chunkPtrs.size())
{
diff --git a/writeengine/wrapper/we_colopcompress.cpp b/writeengine/wrapper/we_colopcompress.cpp
index ae5659c03..cdf186e43 100644
--- a/writeengine/wrapper/we_colopcompress.cpp
+++ b/writeengine/wrapper/we_colopcompress.cpp
@@ -121,9 +121,9 @@ int ColumnOpCompress0::saveBlock(IDBDataFile* pFile, const unsigned char* writeB
* Constructor
*/
-ColumnOpCompress1::ColumnOpCompress1(Log* logger)
+ColumnOpCompress1::ColumnOpCompress1(uint32_t compressionType, Log* logger)
{
- m_compressionType = 1;
+ m_compressionType = compressionType;
m_chunkManager = new ChunkManager();
if (logger)
@@ -164,11 +164,7 @@ bool ColumnOpCompress1::abbreviatedExtent(IDBDataFile* pFile, int colWidth) cons
int ColumnOpCompress1::blocksInFile(IDBDataFile* pFile) const
{
- CompFileHeader compFileHeader;
- readHeaders(pFile, compFileHeader.fControlData, compFileHeader.fPtrSection);
-
- compress::IDBCompressInterface compressor;
- return compressor.getBlockCount(compFileHeader.fControlData);
+ return m_chunkManager->getBlockCount(pFile);
}
diff --git a/writeengine/wrapper/we_colopcompress.h b/writeengine/wrapper/we_colopcompress.h
index 681d911f8..33da38646 100644
--- a/writeengine/wrapper/we_colopcompress.h
+++ b/writeengine/wrapper/we_colopcompress.h
@@ -97,7 +97,7 @@ public:
/**
* @brief Constructor
*/
- EXPORT ColumnOpCompress1(Log* logger = 0);
+ EXPORT ColumnOpCompress1(uint32_t compressionType, Log* logger = 0);
/**
* @brief Default Destructor
diff --git a/writeengine/wrapper/we_dctnrycompress.cpp b/writeengine/wrapper/we_dctnrycompress.cpp
index 22677b491..e352165ce 100644
--- a/writeengine/wrapper/we_dctnrycompress.cpp
+++ b/writeengine/wrapper/we_dctnrycompress.cpp
@@ -67,9 +67,9 @@ DctnryCompress0::~DctnryCompress0()
/**
* Constructor
*/
-DctnryCompress1::DctnryCompress1(Log* logger)
+DctnryCompress1::DctnryCompress1(uint32_t compressionType, Log* logger)
{
- m_compressionType = 1;
+ m_compressionType = compressionType;
m_chunkManager = new ChunkManager();
if (logger)
diff --git a/writeengine/wrapper/we_dctnrycompress.h b/writeengine/wrapper/we_dctnrycompress.h
index 968253d45..8ec3ad3ac 100644
--- a/writeengine/wrapper/we_dctnrycompress.h
+++ b/writeengine/wrapper/we_dctnrycompress.h
@@ -62,7 +62,7 @@ public:
/**
* @brief Constructor
*/
- EXPORT DctnryCompress1(Log* logger = 0);
+ EXPORT DctnryCompress1(uint32_t compressionType, Log* logger = 0);
/**
* @brief Default Destructor
diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp
index b1cb6b2f8..fe4aeff49 100644
--- a/writeengine/wrapper/writeengine.cpp
+++ b/writeengine/wrapper/writeengine.cpp
@@ -76,19 +76,25 @@ StopWatch timer;
WriteEngineWrapper::WriteEngineWrapper() : m_opType(NOOP)
{
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
- m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
-
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
- m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
+
+ m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*comressionType=*/1);
+ m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
+
+ m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*comressionType=*/3);
+ m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
}
WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opType(rhs.m_opType)
{
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
- m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
-
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
- m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
+
+ m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*compressionType=*/1);
+ m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
+
+ m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*compressionType=*/3);
+ m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
}
/**@brief WriteEngineWrapper Constructor
@@ -96,9 +102,13 @@ WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opTyp
WriteEngineWrapper::~WriteEngineWrapper()
{
delete m_colOp[UN_COMPRESSED_OP];
- delete m_colOp[COMPRESSED_OP];
delete m_dctnry[UN_COMPRESSED_OP];
- delete m_dctnry[COMPRESSED_OP];
+
+ delete m_colOp[COMPRESSED_OP_1];
+ delete m_dctnry[COMPRESSED_OP_1];
+
+ delete m_colOp[COMPRESSED_OP_2];
+ delete m_dctnry[COMPRESSED_OP_2];
}
/**@brief Perform upfront initialization
diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h
index cdc7d472d..5423a7361 100644
--- a/writeengine/wrapper/writeengine.h
+++ b/writeengine/wrapper/writeengine.h
@@ -58,9 +58,10 @@ namespace WriteEngine
{
//... Total compression operation: un_compresssed, compressed
-const int UN_COMPRESSED_OP = 0;
-const int COMPRESSED_OP = 1;
-const int TOTAL_COMPRESS_OP = 2;
+const int UN_COMPRESSED_OP = 0;
+const int COMPRESSED_OP_1 = 1;
+const int COMPRESSED_OP_2 = 2;
+const int TOTAL_COMPRESS_OP = 3;
//...Forward class declarations
class Log;
@@ -446,8 +447,10 @@ public:
*/
void setIsInsert(bool bIsInsert)
{
- m_colOp[COMPRESSED_OP]->chunkManager()->setIsInsert(bIsInsert);
- m_dctnry[COMPRESSED_OP]->chunkManager()->setIsInsert(true);
+ m_colOp[COMPRESSED_OP_1]->chunkManager()->setIsInsert(bIsInsert);
+ m_dctnry[COMPRESSED_OP_1]->chunkManager()->setIsInsert(true);
+ m_colOp[COMPRESSED_OP_2]->chunkManager()->setIsInsert(bIsInsert);
+ m_dctnry[COMPRESSED_OP_2]->chunkManager()->setIsInsert(true);
}
/**
@@ -458,7 +461,7 @@ public:
*/
bool getIsInsert()
{
- return m_colOp[COMPRESSED_OP]->chunkManager()->getIsInsert();
+ return m_colOp[COMPRESSED_OP_1]->chunkManager()->getIsInsert();
}
std::tr1::unordered_map& getTxnMap()
@@ -475,10 +478,23 @@ public:
*/
int flushChunks(int rc, const std::map& columOids)
{
- int rtn1 = m_colOp[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids);
- int rtn2 = m_dctnry[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids);
+ std::vector compressedOpIds = {COMPRESSED_OP_1,
+ COMPRESSED_OP_2};
- return (rtn1 != NO_ERROR ? rtn1 : rtn2);
+ for (const auto compressedOpId : compressedOpIds)
+ {
+ auto rtn = m_colOp[compressedOpId]->chunkManager()->flushChunks(
+ rc, columOids);
+ if (rtn != NO_ERROR)
+ return rtn;
+
+ rtn = m_dctnry[compressedOpId]->chunkManager()->flushChunks(
+ rc, columOids);
+ if (rtn != NO_ERROR)
+ return rtn;
+ }
+
+ return NO_ERROR;
}
/**
@@ -524,7 +540,7 @@ public:
int startTransaction(const TxnID& txnid)
{
int rc = 0;
- rc = m_colOp[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
+ rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->startTransaction(txnid);
//if ( rc == 0)
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
return rc;
@@ -537,7 +553,8 @@ public:
int confirmTransaction (const TxnID& txnid)
{
int rc = 0;
- rc = m_colOp[COMPRESSED_OP]->chunkManager()->confirmTransaction (txnid);
+ rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->confirmTransaction(
+ txnid);
return rc;
}
@@ -549,7 +566,8 @@ public:
int endTransaction(const TxnID& txnid, bool success)
{
int rc = 0;
- rc = m_colOp[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
+ rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->endTransaction(txnid,
+ success);
//if ( rc == 0)
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
return rc;
@@ -785,7 +803,16 @@ private:
int op(int compressionType)
{
- return (compressionType > 0 ? COMPRESSED_OP : UN_COMPRESSED_OP);
+ switch (compressionType)
+ {
+ case 1:
+ case 2:
+ return COMPRESSED_OP_1;
+ case 3:
+ return COMPRESSED_OP_2;
+ }
+
+ return 0;
}