diff --git a/.drone.jsonnet b/.drone.jsonnet index 91277de6e..0b9a02485 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -36,9 +36,9 @@ local deb_build_deps = 'apt update && apt install --yes --no-install-recommends local platformMap(platform) = local platform_map = { - 'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package', - 'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package', - 'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package', + 'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran liblz4-devel && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package', + 'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' lz4-devel && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package', + 'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' lz4-devel cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package', 'debian:9': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=stretch' debian/autobake-deb.sh", 'debian:10': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=buster' debian/autobake-deb.sh", 'ubuntu:18.04': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=bionic' debian/autobake-deb.sh", diff --git a/CMakeLists.txt b/CMakeLists.txt index 4bbdfe3fb..efe54d301 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,6 +163,12 @@ if(NOT AWK_EXECUTABLE) return() endif() +FIND_PACKAGE(LZ4) +if (NOT LZ4_FOUND) + MESSAGE_ONCE(CS_NO_LZ4 "lz4 not found") + return() +endif() + IF (NOT INSTALL_LAYOUT) INCLUDE(check_compiler_flag) diff --git a/cmake/FindLZ4.cmake b/cmake/FindLZ4.cmake new file mode 100644 index 000000000..7e2ca66e9 --- /dev/null +++ b/cmake/FindLZ4.cmake @@ -0,0 +1,25 @@ +find_path(LZ4_ROOT_DIR + NAMES include/lz4.h +) + +find_library(LZ4_LIBRARIES + NAMES lz4 + HINTS ${LZ4_ROOT_DIR}/lib +) + +find_path(LZ4_INCLUDE_DIR + NAMES lz4.h + HINTS ${LZ4_ROOT_DIR}/include +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(lz4 DEFAULT_MSG + LZ4_LIBRARIES + LZ4_INCLUDE_DIR +) + +mark_as_advanced( + LZ4_ROOT_DIR + LZ4_LIBRARIES + LZ4_INCLUDE_DIR +) diff --git a/dbcon/joblist/pcolstep.cpp b/dbcon/joblist/pcolstep.cpp index 6b625d1e8..e2619d22e 100644 --- a/dbcon/joblist/pcolstep.cpp +++ b/dbcon/joblist/pcolstep.cpp @@ -145,9 +145,7 @@ pColStep::pColStep( if (fOid < 1000) throw runtime_error("pColStep: invalid column"); - compress::IDBCompressInterface cmpif; - - if (!cmpif.isCompressionAvail(fColType.compressionType)) + if (!compress::CompressInterface::isCompressionAvail(fColType.compressionType)) { ostringstream oss; oss << "Unsupported compression type " << fColType.compressionType; diff --git a/dbcon/mysql/columnstore_info.sql b/dbcon/mysql/columnstore_info.sql index 476819aad..8a79f98ff 100644 --- a/dbcon/mysql/columnstore_info.sql +++ b/dbcon/mysql/columnstore_info.sql @@ -95,7 +95,11 @@ DROP PROCEDURE IF EXISTS `compression_ratio` // CREATE PROCEDURE compression_ratio() SQL SECURITY INVOKER BEGIN -SELECT CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files WHERE compressed_data_size IS NOT NULL), ':1') COMPRESSION_RATIO; + +SELECT 'Snappy' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='Snappy' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') compression_ratio +UNION ALL +SELECT 'LZ4' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='LZ4') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='LZ4' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') as compression_ratio; + END // create or replace procedure columnstore_upgrade() SQL SECURITY INVOKER diff --git a/dbcon/mysql/ha_mcs_ddl.cpp b/dbcon/mysql/ha_mcs_ddl.cpp index 339b43750..c6d56757b 100644 --- a/dbcon/mysql/ha_mcs_ddl.cpp +++ b/dbcon/mysql/ha_mcs_ddl.cpp @@ -777,7 +777,6 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl parser.setDefaultSchema(schema); parser.setDefaultCharset(default_table_charset); int rc = 0; - IDBCompressInterface idbCompress; parser.Parse(ddlStatement.c_str()); if (get_fe_conn_info_ptr() == NULL) @@ -981,7 +980,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl if (compressionType == 1) compressionType = 2; - if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) + if ((compressionType > 0) && + !(compress::CompressInterface::isCompressionAvail( + compressionType))) { rc = 1; ci->alterTableState = cal_connection_info::NOT_ALTER; @@ -1368,7 +1369,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl return rc; } - if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) + if ((compressionType > 0) && + !(compress::CompressInterface::isCompressionAvail( + compressionType))) { rc = 1; thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str()); @@ -1713,7 +1716,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl return rc; } - if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) + if ((compressionType > 0) && + !(compress::CompressInterface::isCompressionAvail( + compressionType))) { rc = 1; thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str()); @@ -1842,7 +1847,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl return rc; } - if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType ))) + if ((compressionType > 0) && + !(compress::CompressInterface::isCompressionAvail( + compressionType))) { rc = 1; thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str()); @@ -2364,9 +2371,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea if (compressiontype == 1) compressiontype = 2; - IDBCompressInterface idbCompress; - - if ( ( compressiontype > 0 ) && !(idbCompress.isCompressionAvail( compressiontype )) ) + if ((compressiontype > 0) && + !(compress::CompressInterface::isCompressionAvail(compressiontype))) { string emsg = IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE); setError(thd, ER_INTERNAL_ERROR, emsg); diff --git a/dbcon/mysql/ha_mcs_sysvars.cpp b/dbcon/mysql/ha_mcs_sysvars.cpp index b60dd3427..234fe69ae 100644 --- a/dbcon/mysql/ha_mcs_sysvars.cpp +++ b/dbcon/mysql/ha_mcs_sysvars.cpp @@ -21,8 +21,10 @@ #include "ha_mcs_sysvars.h" const char* mcs_compression_type_names[] = { - "SNAPPY", - "SNAPPY", + "SNAPPY", // 0 + "SNAPPY", // 1 + "SNAPPY", // 2 + "LZ4", // 3 NullS }; @@ -39,7 +41,8 @@ static MYSQL_THDVAR_ENUM( PLUGIN_VAR_RQCMDARG, "Controls compression algorithm for create tables. Possible values are: " "NO_COMPRESSION segment files aren't compressed; " - "SNAPPY segment files are Snappy compressed (default);", + "SNAPPY segment files are Snappy compressed (default);" + "LZ4 segment files are LZ4 compressed;", NULL, // check NULL, // update 1, //default diff --git a/dbcon/mysql/ha_mcs_sysvars.h b/dbcon/mysql/ha_mcs_sysvars.h index faeed3880..a1c9afe9f 100644 --- a/dbcon/mysql/ha_mcs_sysvars.h +++ b/dbcon/mysql/ha_mcs_sysvars.h @@ -30,7 +30,8 @@ extern char cs_commit_hash[]; // compression_type enum mcs_compression_type_t { NO_COMPRESSION = 0, - SNAPPY = 2 + SNAPPY = 2, + LZ4 = 3 }; // use_import_for_batchinsert mode diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index 437360489..43ff15d08 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -183,6 +183,10 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) compression_type = "Snappy"; break; + case 3: + compression_type = "LZ4"; + break; + default: compression_type = "Unknown"; break; diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml index df95e40ad..378c5b24b 100644 --- a/oam/etc/Columnstore.xml +++ b/oam/etc/Columnstore.xml @@ -492,6 +492,7 @@ 100 N Y + Snappy 16K @@ -539,6 +540,7 @@ Y + Snappy 127.0.0.1 diff --git a/primitives/blockcache/iomanager.cpp b/primitives/blockcache/iomanager.cpp index 70b40f63f..401caabd0 100644 --- a/primitives/blockcache/iomanager.cpp +++ b/primitives/blockcache/iomanager.cpp @@ -308,7 +308,7 @@ void waitForRetry(long count) //Must hold the FD cache lock! -int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterface& decompressor) +static int updateptrs(char* ptr, FdCacheType_t::iterator fdit) { ssize_t i; uint32_t progress; @@ -357,7 +357,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa fdit->second->cmpMTime = mtime; int gplRc = 0; - gplRc = decompressor.getPtrList(&ptr[4096], 4096, fdit->second->ptrList); + gplRc = compress::CompressInterface::getPtrList(&ptr[4096], 4096, + fdit->second->ptrList); if (gplRc != 0) return -5; // go for a retry. @@ -391,7 +392,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa return -8; CompChunkPtrList nextPtrList; - gplRc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList); + gplRc = compress::CompressInterface::getPtrList( + &nextHdrBufPtr[0], numHdrs * 4096, nextPtrList); if (gplRc != 0) return -7; // go for a retry. @@ -445,7 +447,6 @@ void* thr_popper(ioManager* arg) double rqst3; bool locked = false; SPFdEntry_t fe; - IDBCompressInterface decompressor; vector cacheInsertOps; bool copyLocked = false; @@ -463,8 +464,10 @@ void* thr_popper(ioManager* arg) FdCacheType_t::iterator fdit; IDBDataFile* fp = 0; - uint32_t maxCompSz = IDBCompressInterface::maxCompressedSize(iom->blocksPerRead * BLOCK_SIZE); - uint32_t readBufferSz = maxCompSz + pageSize; + size_t maxCompSz = + compress::CompressInterface::getMaxCompressedSizeGeneric( + iom->blocksPerRead * BLOCK_SIZE); + size_t readBufferSz = maxCompSz + pageSize; realbuff.reset(new char[readBufferSz]); @@ -863,7 +866,7 @@ retryReadHeaders: cur_mtime = fp_mtime; if (decompRetryCount > 0 || retryReadHeadersCount > 0 || cur_mtime > fdit->second->cmpMTime) - updatePtrsRc = updateptrs(&alignedbuff[0], fdit, decompressor); + updatePtrsRc = updateptrs(&alignedbuff[0], fdit); fdMapMutex.unlock(); @@ -1052,7 +1055,7 @@ retryReadHeaders: #ifdef _MSC_VER unsigned int blen = 4 * 1024 * 1024 + 4; #else - uint32_t blen = 4 * 1024 * 1024 + 4; + size_t blen = 4 * 1024 * 1024 + 4; #endif #ifdef IDB_COMP_POC_DEBUG { @@ -1060,7 +1063,18 @@ retryReadHeaders: cout << "decompress(0x" << hex << (ptrdiff_t)&alignedbuff[0] << dec << ", " << fdit->second->ptrList[cmpOffFact.quot].second << ", 0x" << hex << (ptrdiff_t)uCmpBuf << dec << ", " << blen << ")" << endl; } #endif - int dcrc = decompressor.uncompressBlock(&alignedbuff[0], + + std::unique_ptr decompressor( + compress::getCompressInterfaceByType( + static_cast(fdit->second->compType))); + if (!decompressor) + { + // Use default? + decompressor.reset( + new compress::CompressInterfaceSnappy()); + } + + int dcrc = decompressor->uncompressBlock(&alignedbuff[0], fdit->second->ptrList[cmpOffFact.quot].second, uCmpBuf, blen); if (dcrc != 0) diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index 719893abb..e6e9e65c0 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -696,13 +696,25 @@ blockReadRetry: i = fp->pread( &cmpHdrBuf[0], 0, 4096 * 3); CompChunkPtrList ptrList; - IDBCompressInterface decompressor; + std::unique_ptr decompressor( + compress::getCompressInterfaceByType( + compress::CompressInterface::getCompressionType( + &cmpHdrBuf[0]))); + + if (!decompressor) + { + // Use default? + decompressor.reset( + new compress::CompressInterfaceSnappy()); + } + int dcrc = 0; if (i == 4096 * 3) { uint64_t numHdrs = 0; // extra headers - dcrc = decompressor.getPtrList(&cmpHdrBuf[4096], 4096, ptrList); + dcrc = compress::CompressInterface::getPtrList( + &cmpHdrBuf[4096], 4096, ptrList); if (dcrc == 0 && ptrList.size() > 0) numHdrs = ptrList[0].first / 4096ULL - 2ULL; @@ -723,7 +735,8 @@ blockReadRetry: i = fp->pread( &nextHdrBufPtr[0], 4096 * 2, numHdrs * 4096 ); CompChunkPtrList nextPtrList; - dcrc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList); + dcrc = compress::CompressInterface::getPtrList( + &nextHdrBufPtr[0], numHdrs * 4096, nextPtrList); if (dcrc == 0) ptrList.insert(ptrList.end(), nextPtrList.begin(), nextPtrList.end()); @@ -777,11 +790,11 @@ blockReadRetry: cmpBuf = (char*) alignedBuffer; } - unsigned blen = 4 * 1024 * 1024; + size_t blen = 4 * 1024 * 1024; i = fp->pread( cmpBuf, cmpBufOff, cmpBufSz ); - dcrc = decompressor.uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen); + dcrc = decompressor->uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen); if (dcrc == 0) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b637e9b03..746d77124 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -42,3 +42,9 @@ if (WITH_REBUILD_EM_UT) target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS}) install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) endif() + +if (WITH_COMPRESSION_UT) + add_executable(compression_tests compression-tests.cpp) + target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS}) + install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) +endif() diff --git a/tests/compression-tests.cpp b/tests/compression-tests.cpp new file mode 100644 index 000000000..b0d0868b4 --- /dev/null +++ b/tests/compression-tests.cpp @@ -0,0 +1,126 @@ +/* Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include + +#include "idbcompress.h" + +class CompressionTest : public ::testing::Test +{ + + protected: + std::string genPermutations(string& data) + { + std::string generated; + generate(data, 0, generated); + return generated; + } + + private: + void generate(string& data, uint32_t i, std::string& generated) + { + if (i == data.size()) + { + generated.append(data); + return; + } + + for (uint32_t k = i, e = data.size(); k < e; ++k) + { + std::swap(data[i], data[k]); + generate(data, i + 1, generated); + std::swap(data[i], data[k]); + } + } +}; + +TEST_F(CompressionTest, LZ4CanCompress) +{ + std::string originalData = + "This program is free software; you can redistribute it and/or" + "modify it under the terms of the GNU General Public License" + "as published by the Free Software Foundation; version 2 of" + "the License."; + + std::unique_ptr compressor( + new compress::CompressInterfaceLZ4()); + + size_t originalSize = originalData.size(); + size_t compressedSize = compressor->maxCompressedSize(originalSize); + std::unique_ptr compressedData(new char[compressedSize]); + std::memset(compressedData.get(), 0, compressedSize); + + auto rc = compressor->compress(originalData.data(), originalSize, + compressedData.get(), &compressedSize); + ASSERT_EQ(rc, 0); + + std::unique_ptr uncompressedData(new char[originalSize]); + rc = compressor->uncompress(compressedData.get(), compressedSize, + uncompressedData.get(), &originalSize); + ASSERT_EQ(rc, 0); + std::string result(uncompressedData.get()); + EXPECT_EQ(originalData, result); +} + +TEST_F(CompressionTest, LZvsSnappyUnique) +{ + std::unique_ptr lz4Compressor( + new compress::CompressInterfaceLZ4()); + std::unique_ptr snappyCompressor( + new compress::CompressInterfaceSnappy()); + // Generate permutations. + // 9! * 9 == 3265920 (closer to current chunk size) + std::vector dataPool{"abcdefghi", "aaadefghi", "aaaaafghi", + "aaaaaaahi", "aaaaaaaaj"}; + + for (auto& data : dataPool) + { + std::cout << "Permutations generated for: " << data << std::endl; + auto generated = genPermutations(data); + auto generatedSize = generated.size(); + + auto compressedSizeLZ4 = + lz4Compressor->maxCompressedSize(generatedSize); + auto compressedSizeSnappy = + snappyCompressor->maxCompressedSize(generatedSize); + + std::unique_ptr lz4CompressedData(new char[compressedSizeLZ4]); + auto rc = lz4Compressor->compress(generated.data(), generatedSize, + lz4CompressedData.get(), + &compressedSizeLZ4); + ASSERT_EQ(rc, 0); + + std::unique_ptr snappyCompressedData( + new char[compressedSizeSnappy]); + rc = snappyCompressor->compress(generated.data(), generatedSize, + snappyCompressedData.get(), + &compressedSizeSnappy); + ASSERT_EQ(rc, 0); + + std::cout << "LZ ratio: " + << (float) ((float) generatedSize / + (float) compressedSizeLZ4) + << std::endl; + + std::cout << "Snappy ratio: " + << (float) ((float) generatedSize / + (float) compressedSizeSnappy) + << std::endl; + } +} diff --git a/tests/shared_components_tests.cpp b/tests/shared_components_tests.cpp index d747ee569..7f302599b 100644 --- a/tests/shared_components_tests.cpp +++ b/tests/shared_components_tests.cpp @@ -383,7 +383,7 @@ public: BlockOp blockOp; char fileName[20]; int rc; - char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ]; printf("\nRunning testCreateDeleteFile \n"); idbdatafile::IDBPolicy::init(true, false, "", 0); @@ -966,7 +966,7 @@ public: BlockOp blockOp; char fileName[20]; int rc; - char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ]; int dbRoot = 1; printf("\nRunning testExtensionWOPrealloc \n"); @@ -1085,7 +1085,7 @@ public: int dbRoot = 1; int colWidth = 65535; - DctnryCompress1 m_Dctnry; + DctnryCompress1 m_Dctnry(/*compressionType=*/1); // This is the magic for the stub in FileOp::oid2FileName int oId = 42; @@ -1565,7 +1565,7 @@ public: BlockOp blockOp; char fileName[20]; int rc; - char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ]; int dbRoot = 1; idbdatafile::IDBPolicy::init(true, false, "", 0); diff --git a/tools/rebuildEM/rebuildEM.cpp b/tools/rebuildEM/rebuildEM.cpp index 2895d40b4..cb0d20b40 100644 --- a/tools/rebuildEM/rebuildEM.cpp +++ b/tools/rebuildEM/rebuildEM.cpp @@ -89,7 +89,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) } // Read and verify header. - char fileHeader[compress::IDBCompressInterface::HDR_BUF_LEN * 2]; + char fileHeader[compress::CompressInterface::HDR_BUF_LEN * 2]; rc = fileOp.readHeaders(dbFile.get(), fileHeader); if (rc != 0) { @@ -116,8 +116,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) } // Read the `colDataType` and `colWidth` from the given header. - compress::IDBCompressInterface compressor; - const auto versionNumber = compressor.getVersionNumber(fileHeader); + const auto versionNumber = + compress::CompressInterface::getVersionNumber(fileHeader); // Verify header number. if (versionNumber < 3) { @@ -129,10 +129,11 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) return -1; } - auto colDataType = compressor.getColDataType(fileHeader); - auto colWidth = compressor.getColumnWidth(fileHeader); - auto blockCount = compressor.getBlockCount(fileHeader); - auto lbidCount = compressor.getLBIDCount(fileHeader); + auto colDataType = compress::CompressInterface::getColDataType(fileHeader); + auto colWidth = compress::CompressInterface::getColumnWidth(fileHeader); + auto blockCount = compress::CompressInterface::getBlockCount(fileHeader); + auto lbidCount = compress::CompressInterface::getLBIDCount(fileHeader); + auto compressionType = compress::CompressInterface::getCompressionType(fileHeader); if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED) { @@ -155,7 +156,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) uint64_t hwm = 0; rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth, - blockCount, isDict, hwm); + blockCount, isDict, compressionType, hwm); if (rc != 0) { return rc; @@ -172,13 +173,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) { for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex) { - auto lbid = compressor.getLBIDByIndex(fileHeader, lbidIndex); + auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex); FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict); extentMap.push_back(fileId); } // Last one has an actual HWM. - auto lbid = compressor.getLBIDByIndex(fileHeader, lbidCount - 1); + auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1); FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict); extentMap.push_back(fileId); @@ -192,7 +193,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName) else { // One extent per segment file. - auto lbid = compressor.getLBIDByIndex(fileHeader, 0); + auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0); FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict); extentMap.push_back(fileId); @@ -293,7 +294,7 @@ int32_t EMReBuilder::rebuildExtentMap() int32_t EMReBuilder::searchHWMInSegmentFile( uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, - uint64_t blockCount, bool isDict, uint64_t& hwm) + uint64_t blockCount, bool isDict, uint32_t compressionType, uint64_t& hwm) { std::unique_ptr chunkManagerWrapper; try @@ -302,13 +303,15 @@ int32_t EMReBuilder::searchHWMInSegmentFile( { chunkManagerWrapper = std::unique_ptr( new ChunkManagerWrapperDict(oid, dbRoot, partition, segment, - colDataType, colWidth)); + colDataType, colWidth, + compressionType)); } else { chunkManagerWrapper = std::unique_ptr( new ChunkManagerWrapperColumn(oid, dbRoot, partition, segment, - colDataType, colWidth)); + colDataType, colWidth, + compressionType)); } } catch (...) @@ -401,12 +404,13 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber) ChunkManagerWrapperColumn::ChunkManagerWrapperColumn( uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth) + execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, + uint32_t compressionType) : ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, colWidth) { pFileOp = std::unique_ptr( - new WriteEngine::ColumnOpCompress1()); + new WriteEngine::ColumnOpCompress1(compressionType)); chunkManager.fileOp(pFileOp.get()); // Open compressed column segment file. We will read block by block // from the compressed chunks. @@ -463,12 +467,13 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const ChunkManagerWrapperDict::ChunkManagerWrapperDict( uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, - execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth) + execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth, + uint32_t compressionType) : ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType, colWidth) { pFileOp = std::unique_ptr( - new WriteEngine::DctnryCompress1()); + new WriteEngine::DctnryCompress1(compressionType)); chunkManager.fileOp(pFileOp.get()); // Open compressed dict segment file. pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment, diff --git a/tools/rebuildEM/rebuildEM.h b/tools/rebuildEM/rebuildEM.h index 03db4896d..481a2e102 100644 --- a/tools/rebuildEM/rebuildEM.h +++ b/tools/rebuildEM/rebuildEM.h @@ -112,7 +112,8 @@ class EMReBuilder int32_t searchHWMInSegmentFile( uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, - uint32_t width, uint64_t blocksCount, bool isDict, uint64_t& hwm); + uint32_t width, uint64_t blocksCount, bool isDict, + uint32_t compressionType, uint64_t& hwm); // Sets the dbroot to the given `number`. void setDBRoot(uint32_t number) { dbRoot = number; } @@ -184,7 +185,7 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper ChunkManagerWrapperColumn( uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, - uint32_t colWidth); + uint32_t colWidth, uint32_t compressionType); ~ChunkManagerWrapperColumn() = default; ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete; @@ -210,7 +211,7 @@ class ChunkManagerWrapperDict : public ChunkManagerWrapper ChunkManagerWrapperDict( uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment, execplan::CalpontSystemCatalog::ColDataType colDataType, - uint32_t colWidth); + uint32_t colWidth, uint32_t compressionType); ~ChunkManagerWrapperDict() = default; ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete; diff --git a/utils/compress/CMakeLists.txt b/utils/compress/CMakeLists.txt index 4156531e4..b3dc2b068 100644 --- a/utils/compress/CMakeLists.txt +++ b/utils/compress/CMakeLists.txt @@ -10,7 +10,7 @@ add_definitions(-DNDEBUG) add_library(compress SHARED ${compress_LIB_SRCS}) -target_link_libraries(compress ${SNAPPY_LIBRARIES}) +target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES}) install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine) diff --git a/utils/compress/idbcompress.cpp b/utils/compress/idbcompress.cpp index a434f19f5..79f812fbd 100644 --- a/utils/compress/idbcompress.cpp +++ b/utils/compress/idbcompress.cpp @@ -22,12 +22,14 @@ #include #include #include +#include using namespace std; #include "blocksize.h" #include "logger.h" #include "snappy.h" #include "hasher.h" +#include "lz4.h" #define IDBCOMP_DLLEXPORT #include "idbcompress.h" @@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL; const uint64_t VERSION_NUM1 = 1; const uint64_t VERSION_NUM2 = 2; const uint64_t VERSION_NUM3 = 3; -const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192; -const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN; +const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN; // version 1.1 of the chunk data has a short header // QuickLZ compressed data never has the high bit set on the first byte @@ -83,7 +84,7 @@ struct CompressedDBFileHeader union CompressedDBFileHeaderBlock { CompressedDBFileHeader fHeader; - char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN]; + char fDummy[compress::CompressInterface::HDR_BUF_LEN]; }; void initCompressedDBFileHeader( @@ -110,53 +111,57 @@ namespace compress { #ifndef SKIP_IDB_COMPRESSION -IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) : +CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) : fNumUserPaddingBytes(numUserPaddingBytes) { } -IDBCompressInterface::~IDBCompressInterface() -{ } - /* V1 is really only available for decompression, we kill any DDL using V1 by hand. * Maybe should have a new api, isDecompressionAvail() ? Any request to compress * using V1 will silently be changed to V2. */ -bool IDBCompressInterface::isCompressionAvail(int compressionType) const +/*static*/ +bool CompressInterface::isCompressionAvail(int compressionType) { - if ( (compressionType == 0) || - (compressionType == 1) || - (compressionType == 2) ) - return true; + return ((compressionType == 0) || (compressionType == 1) || + (compressionType == 2) || (compressionType == 3)); +} - return false; +size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen) +{ + return std::max(snappy::MaxCompressedLength(inLen), + LZ4_COMPRESSBOUND(inLen)) + + HEADER_SIZE; } //------------------------------------------------------------------------------ // Compress a block of data //------------------------------------------------------------------------------ -int IDBCompressInterface::compressBlock(const char* in, - const size_t inLen, - unsigned char* out, - unsigned int& outLen) const +int CompressInterface::compressBlock(const char* in, const size_t inLen, + unsigned char* out, size_t& outLen) const { size_t snaplen = 0; utils::Hasher128 hasher; // loose input checking. - if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE) + if (outLen < maxCompressedSize(inLen)) { - cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " << - (snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl; + cerr << "got outLen = " << outLen << " for inLen = " << inLen + << ", needed " << (maxCompressedSize(inLen)) << endl; return ERR_BADOUTSIZE; } - //apparently this never fails? - snappy::RawCompress(in, inLen, reinterpret_cast(&out[HEADER_SIZE]), &snaplen); + auto rc = compress(in, inLen, reinterpret_cast(&out[HEADER_SIZE]), + &outLen); + if (rc != ERR_OK) + { + return rc; + } + snaplen = outLen; uint8_t* signature = (uint8_t*) &out[SIG_OFFSET]; uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET]; uint32_t* len = (uint32_t*) &out[LEN_OFFSET]; - *signature = CHUNK_MAGIC3; + *signature = getChunkMagicNumber(); *checksum = hasher((char*) &out[HEADER_SIZE], snaplen); *len = snaplen; @@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in, //------------------------------------------------------------------------------ // Decompress a block of data //------------------------------------------------------------------------------ -int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, - unsigned int& outLen) const +int CompressInterface::uncompressBlock(const char* in, const size_t inLen, + unsigned char* out, + size_t& outLen) const { - bool comprc = false; - size_t ol = 0; - uint32_t realChecksum; uint32_t storedChecksum; uint32_t storedLen; uint8_t storedMagic; utils::Hasher128 hasher; - + auto tmpOutLen = outLen; outLen = 0; if (inLen < 1) - { return ERR_BADINPUT; - } storedMagic = *((uint8_t*) &in[SIG_OFFSET]); - if (storedMagic == CHUNK_MAGIC3) + if (storedMagic == getChunkMagicNumber()) { if (inLen < HEADER_SIZE) - { return ERR_BADINPUT; - } storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]); storedLen = *((uint32_t*) (&in[LEN_OFFSET])); if (inLen < storedLen + HEADER_SIZE) - { return ERR_BADINPUT; - } realChecksum = hasher(&in[HEADER_SIZE], storedLen); if (storedChecksum != realChecksum) - { return ERR_CHECKSUM; + + auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast(out), &tmpOutLen); + if (rc != ERR_OK) + { + cerr << "uncompressBlock failed!" << endl; + return ERR_DECOMPRESS; } - comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) && - snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast(out)); + outLen = tmpOutLen; } else { @@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un return ERR_BADINPUT; } - if (!comprc) - { - cerr << "decomp failed!" << endl; - return ERR_DECOMPRESS; - } - - outLen = ol; //cerr << "ub: " << inLen << " : " << outLen << endl; return ERR_OK; @@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un //------------------------------------------------------------------------------ // Verify the passed in buffer contains a valid compression file header. //------------------------------------------------------------------------------ -int IDBCompressInterface::verifyHdr(const void* hdrBuf) const +int CompressInterface::verifyHdr(const void* hdrBuf) { const CompressedDBFileHeader* hdr = reinterpret_cast(hdrBuf); @@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const // Extract compression pointer information out of the pointer buffer that is // passed in. ptrBuf points to the pointer section of the compression hdr. //------------------------------------------------------------------------------ -int IDBCompressInterface::getPtrList(const char* ptrBuf, - const int ptrBufSize, - CompChunkPtrList& chunkPtrs ) const +int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize, + CompChunkPtrList& chunkPtrs) { int rc = 0; chunkPtrs.clear(); @@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf, // one for the file header, and one for the list of pointers. // Wrapper of above method for backward compatibility. //------------------------------------------------------------------------------ -int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const +int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) { return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs); } @@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk //------------------------------------------------------------------------------ // Count the number of chunk pointers in the pointer header(s) //------------------------------------------------------------------------------ -unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf, - const int ptrBufSize) const +unsigned int CompressInterface::getPtrCount(const char* ptrBuf, + const int ptrBufSize) { unsigned int chunkCount = 0; @@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf, // This should not be used for compressed dictionary files which could have // more compression chunk headers. //------------------------------------------------------------------------------ -unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const +unsigned int CompressInterface::getPtrCount(const char* hdrBuf) { return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN); } @@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const //------------------------------------------------------------------------------ // Store list of compression pointers into the specified header. //------------------------------------------------------------------------------ -void IDBCompressInterface::storePtrs(const std::vector& ptrs, - void* ptrBuf, - int ptrSectionSize) const +void CompressInterface::storePtrs(const std::vector& ptrs, + void* ptrBuf, int ptrSectionSize) { memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0 uint64_t* hdrPtrs = reinterpret_cast(ptrBuf); @@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector& ptrs, //------------------------------------------------------------------------------ // Wrapper of above method for backward compatibility //------------------------------------------------------------------------------ -void IDBCompressInterface::storePtrs(const std::vector& ptrs, void* ptrBuf) const +void CompressInterface::storePtrs(const std::vector& ptrs, void* ptrBuf) { storePtrs(ptrs, reinterpret_cast(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN); } @@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector& ptrs, void* pt //------------------------------------------------------------------------------ // Initialize the header blocks to be written at the start of a dictionary file. //------------------------------------------------------------------------------ -void IDBCompressInterface::initHdr( +void CompressInterface::initHdr( void* hdrBuf, void* ptrBuf, uint32_t colWidth, execplan::CalpontSystemCatalog::ColDataType columnType, - int compressionType, int hdrSize) const + int compressionType, int hdrSize) { memset(hdrBuf, 0, HDR_BUF_LEN); memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN); @@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr( //------------------------------------------------------------------------------ // Initialize the header blocks to be written at the start of a column file. //------------------------------------------------------------------------------ -void IDBCompressInterface::initHdr( +void CompressInterface::initHdr( void* hdrBuf, uint32_t columnWidth, execplan::CalpontSystemCatalog::ColDataType columnType, - int compressionType) const + int compressionType) { memset(hdrBuf, 0, HDR_BUF_LEN * 2); initCompressedDBFileHeader(hdrBuf, columnWidth, columnType, @@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr( //------------------------------------------------------------------------------ // Get the header's version number //------------------------------------------------------------------------------ -uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const +uint64_t CompressInterface::getVersionNumber(const void* hdrBuf) { return ( reinterpret_cast(hdrBuf)->fVersionNum); @@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const //------------------------------------------------------------------------------ // Set the file's block count //------------------------------------------------------------------------------ -void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const +void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) { reinterpret_cast(hdrBuf)->fBlockCount = count; } @@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const //------------------------------------------------------------------------------ // Get the file's block count //------------------------------------------------------------------------------ -uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const +uint64_t CompressInterface::getBlockCount(const void* hdrBuf) { return (reinterpret_cast(hdrBuf)->fBlockCount); } +//------------------------------------------------------------------------------ +// Get the file's compression type +//------------------------------------------------------------------------------ +uint64_t CompressInterface::getCompressionType(const void* hdrBuf) +{ + return (reinterpret_cast(hdrBuf) + ->fCompressionType); +} + //------------------------------------------------------------------------------ // Set the overall header size //------------------------------------------------------------------------------ -void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const +void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size) { reinterpret_cast(hdrBuf)->fHeaderSize = size; } @@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const //------------------------------------------------------------------------------ // Get the overall header size //------------------------------------------------------------------------------ -uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const +uint64_t CompressInterface::getHdrSize(const void* hdrBuf) { return (reinterpret_cast(hdrBuf)->fHeaderSize); } @@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const // Get column type //----------------------------------------------------------------------------- execplan::CalpontSystemCatalog::ColDataType -IDBCompressInterface::getColDataType(const void* hdrBuf) const +CompressInterface::getColDataType(const void* hdrBuf) { return ( reinterpret_cast(hdrBuf)->fColDataType); @@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const //------------------------------------------------------------------------------ // Get column width //------------------------------------------------------------------------------ -uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const +uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) { return ( reinterpret_cast(hdrBuf)->fColumnWidth); @@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const //------------------------------------------------------------------------------ // Get LBID by index //------------------------------------------------------------------------------ -uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const +uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) { if (index < LBID_MAX_SIZE) return (reinterpret_cast(hdrBuf)->fLBIDS[index]); @@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index //------------------------------------------------------------------------------ // Set LBID by index //------------------------------------------------------------------------------ -void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const +void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) { if (lbid && index < LBID_MAX_SIZE) { @@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t } } -uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const +//------------------------------------------------------------------------------ +// Get LBID count +//------------------------------------------------------------------------------ +uint64_t CompressInterface::getLBIDCount(void* hdrBuf) { return reinterpret_cast(hdrBuf)->fLBIDCount; } @@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const // Calculates the chunk and block offset within the chunk for the specified // block number. //------------------------------------------------------------------------------ -void IDBCompressInterface::locateBlock(unsigned int block, - unsigned int& chunkIndex, - unsigned int& blockOffsetWithinChunk) const +void CompressInterface::locateBlock(unsigned int block, + unsigned int& chunkIndex, + unsigned int& blockOffsetWithinChunk) const { const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN; @@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block, // also expand to allow for user requested padding. Lastly, initialize padding // bytes to 0. //------------------------------------------------------------------------------ -int IDBCompressInterface::padCompressedChunks(unsigned char* buf, - unsigned int& len, - unsigned int maxLen) const +int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len, + unsigned int maxLen) const { int nPaddingBytes = 0; int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE; @@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf, return 0; } -/* static */ -uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize) +// Snappy +CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes) + : CompressInterface(numUserPaddingBytes) +{ +} + +int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen, + char* out, size_t* outLen) const +{ + snappy::RawCompress(in, inLen, out, outLen); + +#ifdef DEBUG_COMPRESSION + std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen + << std::endl; +#endif + + return ERR_OK; +} + +int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen, + char* out, size_t* outLen) const +{ + size_t realOutLen = 0; + auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen); + + if (!rc || realOutLen > *outLen) + { + cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen + << ", outLen: " << *outLen << ", realOutLen: " << realOutLen + << endl; + return ERR_DECOMPRESS; + } + + rc = snappy::RawUncompress(in, inLen, out); + + if (!rc) + { + cerr << "snappy::RawUnompress failed. InLen: " << inLen + << ", outLen: " << *outLen << endl; + return ERR_DECOMPRESS; + } + +#ifdef DEBUG_COMPRESSION + std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen " + << *outLen << std::endl; +#endif + *outLen = realOutLen; + + return ERR_OK; +} + +size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const { return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE); } -int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, - size_t* outLen) const -{ - snappy::RawCompress(in, inLen, out, outLen); - return 0; -} - -int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const -{ - return !(snappy::RawUncompress(in, inLen, out)); -} - -/* static */ -bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen) +bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen, + size_t* outLen) const { return snappy::GetUncompressedLength(in, inLen, outLen); } +uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const +{ + return CHUNK_MAGIC_SNAPPY; +} + +// LZ4 +CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes) + : CompressInterface(numUserPaddingBytes) +{ +} + +int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out, + size_t* outLen) const +{ + auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen); + + if (!compressedLen) + { + cerr << "LZ_compress_default failed. InLen: " << inLen + << ", compressedLen: " << compressedLen << endl; + return ERR_COMPRESS; + } + +#ifdef DEBUG_COMPRESSION + std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen " + << compressedLen << std::endl; +#endif + + *outLen = compressedLen; + return ERR_OK; +} + +int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen, + char* out, size_t* outLen) const +{ + auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen); + + if (decompressedLen < 0) + { + cerr << "LZ_decompress_safe failed with error code " << decompressedLen + << endl; + cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl; + return ERR_DECOMPRESS; + } + + *outLen = decompressedLen; + +#ifdef DEBUG_COMPRESSION + std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen + << std::endl; +#endif + + return ERR_OK; +} + +size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const +{ + return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE); +} + +bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen, + size_t* outLen) const +{ + // LZ4 does not have such function. + idbassert(false); + return false; +} + +uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const +{ + return CHUNK_MAGIC_LZ4; +} + +CompressInterface* getCompressInterfaceByType(uint32_t compressionType, + uint32_t numUserPaddingBytes) +{ + switch (compressionType) + { + case 1: + case 2: + return new CompressInterfaceSnappy(numUserPaddingBytes); + case 3: + return new CompressInterfaceLZ4(numUserPaddingBytes); + } + + return nullptr; +} + +CompressInterface* getCompressInterfaceByName(const std::string& compressionName, + uint32_t numUserPaddingBytes) +{ + if (compressionName == "SNAPPY") + return new CompressInterfaceSnappy(numUserPaddingBytes); + else if (compressionName == "LZ4") + return new CompressInterfaceLZ4(numUserPaddingBytes); + return nullptr; +} + +void initializeCompressorPool( + std::unordered_map>& + compressorPool, + uint32_t numUserPaddingBytes) +{ + compressorPool = { + make_pair(2, std::shared_ptr( + new CompressInterfaceSnappy(numUserPaddingBytes))), + make_pair(3, std::shared_ptr( + new CompressInterfaceLZ4(numUserPaddingBytes)))}; +} + +std::shared_ptr getCompressorByType( + std::unordered_map>& + compressorPool, + uint32_t compressionType) +{ + switch (compressionType) + { + case 1: + case 2: + if (!compressorPool.count(2)) + { + return nullptr; + } + return compressorPool[2]; + case 3: + if (!compressorPool.count(3)) + { + return nullptr; + } + return compressorPool[3]; + } + + return nullptr; +} + #endif } // namespace compress diff --git a/utils/compress/idbcompress.h b/utils/compress/idbcompress.h index 03d327a41..23c02f966 100644 --- a/utils/compress/idbcompress.h +++ b/utils/compress/idbcompress.h @@ -26,6 +26,7 @@ #endif #include #include +#include #include "calpontsystemcatalog.h" @@ -41,11 +42,12 @@ namespace compress typedef std::pair CompChunkPtr; typedef std::vector CompChunkPtrList; -class IDBCompressInterface +class CompressInterface { public: static const unsigned int HDR_BUF_LEN = 4096; static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8; + static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192; // error codes from uncompressBlock() static const int ERR_OK = 0; @@ -53,22 +55,29 @@ public: static const int ERR_DECOMPRESS = -2; static const int ERR_BADINPUT = -3; static const int ERR_BADOUTSIZE = -4; + static const int ERR_COMPRESS = -5; /** - * When IDBCompressInterface object is being used to compress a chunk, this + * When CompressInterface object is being used to compress a chunk, this * construct can be used to specify the padding added by padCompressedChunks */ - EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0); + EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0); /** * dtor */ - EXPORT virtual ~IDBCompressInterface(); + EXPORT virtual ~CompressInterface() = default; /** * see if the algo is available in this lib */ - EXPORT bool isCompressionAvail(int compressionType = 0) const; + EXPORT static bool isCompressionAvail(int compressionType = 0); + + /** + * Returns the maximum compressed size from all available compression + * types. + */ + EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen); /** * Compresses specified "in" buffer of length "inLen" bytes. @@ -76,30 +85,31 @@ public: * "out" should be sized using maxCompressedSize() to allow for incompressible data. * Returns 0 if success. */ - EXPORT int compressBlock(const char* in, - const size_t inLen, - unsigned char* out, - unsigned int& outLen) const; + + EXPORT int compressBlock(const char* in, const size_t inLen, + unsigned char* out, size_t& outLen) const; /** * outLen must be initialized with the size of the out buffer before calling uncompressBlock. * On return, outLen will have the number of bytes used in out. */ - EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out, - unsigned int& outLen) const; + EXPORT int uncompressBlock(const char* in, const size_t inLen, + unsigned char* out, size_t& outLen) const; /** * This fcn wraps whatever compression algorithm we're using at the time, and * is not specific to blocks on disk. */ - EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const; + EXPORT virtual int compress(const char* in, size_t inLen, char* out, + size_t* outLen) const = 0; /** * This fcn wraps whatever compression algorithm we're using at the time, and * is not specific to blocks on disk. The caller needs to make sure out is big * enough to contain the output by using getUncompressedSize(). */ - EXPORT int uncompress(const char* in, size_t inLen, char* out) const; + EXPORT virtual int uncompress(const char* in, size_t inLen, char* out, + size_t* outLen) const = 0; /** * Initialize header buffer at start of compressed db file. @@ -107,23 +117,24 @@ public: * @warning hdrBuf must be at least HDR_BUF_LEN bytes * @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes */ - EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht, - execplan::CalpontSystemCatalog::ColDataType columnType, - int compressionType, int hdrSize) const; - + EXPORT static void + initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht, + execplan::CalpontSystemCatalog::ColDataType columnType, + int compressionType, int hdrSize); /** * Initialize header buffer at start of compressed db file. * * @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes */ - EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth, - execplan::CalpontSystemCatalog::ColDataType columnType, - int compressionType) const; + EXPORT static void + initHdr(void* hdrBuf, uint32_t columnWidth, + execplan::CalpontSystemCatalog::ColDataType columnType, + int compressionType); /** * Verify the passed in buffer contains a compressed db file header. */ - EXPORT int verifyHdr(const void* hdrBuf) const; + EXPORT static int verifyHdr(const void* hdrBuf); /** * Extracts list of compression pointers from the specified ptr buffer. @@ -131,9 +142,8 @@ public: * chunkPtrs is a vector of offset, size pairs for the compressed chunks. * Returns 0 if success. */ - EXPORT int getPtrList(const char* ptrBuf, - const int ptrBufSize, - CompChunkPtrList& chunkPtrs) const; + EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize, + CompChunkPtrList& chunkPtrs); /** * Extracts list of compression pointers from the specified header. @@ -142,28 +152,28 @@ public: * Note: the pointer passed in is the beginning of the header, * not the pointer section as above. */ - EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const; + EXPORT static int getPtrList(const char* hdrBuf, + CompChunkPtrList& chunkPtrs); /** * Return the number of chunk pointers contained in the specified ptr buffer. * ptrBuf points to the pointer section taken from the headers. */ - EXPORT unsigned int getPtrCount(const char* ptrBuf, - const int ptrBufSize) const; + EXPORT static unsigned int getPtrCount(const char* ptrBuf, + const int ptrBufSize); /** * Return the number of chunk pointers contained in the specified header. * hdrBuf points to start of 2 buffer headers from compressed db file. * For non-dictionary columns. */ - EXPORT unsigned int getPtrCount(const char* hdrBuf) const; + EXPORT static unsigned int getPtrCount(const char* hdrBuf); /** * Store vector of pointers into the specified buffer header's pointer section. */ - EXPORT void storePtrs(const std::vector& ptrs, - void* hdrBuf, - int ptrSectionSize) const; + EXPORT static void storePtrs(const std::vector& ptrs, + void* hdrBuf, int ptrSectionSize); /** * Store vector of pointers into the specified buffer header. @@ -171,14 +181,14 @@ public: * Note: the pointer passed in is the beginning of the header, * not the pointer section as above. */ - EXPORT void storePtrs(const std::vector& ptrs, void* hdrBuf) const; + EXPORT static void storePtrs(const std::vector& ptrs, + void* hdrBuf); /** * Calculates the chunk, and the block offset within the chunk, for the * specified block number. */ - EXPORT void locateBlock(unsigned int block, - unsigned int& chunkIndex, + EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const; /** @@ -187,9 +197,8 @@ public: * maxLen is the maximum size for buf. nonzero return code means the * result output buffer length is > than maxLen. */ - EXPORT int padCompressedChunks(unsigned char* buf, - unsigned int& len, - unsigned int maxLen ) const; + EXPORT int padCompressedChunks(unsigned char* buf, size_t& len, + unsigned int maxLen) const; /* * Mutator methods for the block count in the file @@ -197,17 +206,22 @@ public: /** * getVersionNumber */ - EXPORT uint64_t getVersionNumber(const void* hdrBuf) const; + EXPORT static uint64_t getVersionNumber(const void* hdrBuf); /** * setBlockCount */ - EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const; + EXPORT static void setBlockCount(void* hdrBuf, uint64_t count); /** * getBlockCount */ - EXPORT uint64_t getBlockCount(const void* hdrBuf) const; + EXPORT static uint64_t getBlockCount(const void* hdrBuf); + + /** + * getCompressionType + */ + EXPORT static uint64_t getCompressionType(const void* hdrBuf); /* * Mutator methods for the overall header size @@ -215,38 +229,38 @@ public: /** * setHdrSize */ - EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const; + EXPORT static void setHdrSize(void* hdrBuf, uint64_t size); /** * getHdrSize */ - EXPORT uint64_t getHdrSize(const void* hdrBuf) const; + EXPORT static uint64_t getHdrSize(const void* hdrBuf); /** * getColumnType */ - EXPORT execplan::CalpontSystemCatalog::ColDataType - getColDataType(const void* hdrBuf) const; + EXPORT static execplan::CalpontSystemCatalog::ColDataType + getColDataType(const void* hdrBuf); /** * getColumnWidth */ - EXPORT uint64_t getColumnWidth(const void* hdrBuf) const; + EXPORT static uint64_t getColumnWidth(const void* hdrBuf); /** * getLBIDByIndex */ - EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const; + EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index); /** * setLBIDByIndex */ - EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const; + EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index); /** * getLBIDCount */ - EXPORT uint64_t getLBIDCount(void* hdrBuf) const; + EXPORT static uint64_t getLBIDCount(void* hdrBuf); /** * Mutator methods for the user padding bytes @@ -271,97 +285,213 @@ public: * Given an input, uncompressed block, what's the maximum possible output, * compressed size? */ - EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize); + EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0; /** * Given a compressed block, returns the uncompressed size in outLen. * Returns false on error, true on success. */ - EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen); + EXPORT virtual bool getUncompressedSize(char* in, size_t inLen, + size_t* outLen) const = 0; -protected: + protected: + virtual uint8_t getChunkMagicNumber() const = 0; -private: + private: //defaults okay - //IDBCompressInterface(const IDBCompressInterface& rhs); - //IDBCompressInterface& operator=(const IDBCompressInterface& rhs); + //CompressInterface(const CompressInterface& rhs); + //CompressInterface& operator=(const CompressInterface& rhs); unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks }; +class CompressInterfaceSnappy : public CompressInterface +{ + public: + EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0); + EXPORT ~CompressInterfaceSnappy() = default; + /** + * Compress the given block using snappy compression API. + */ + EXPORT int32_t compress(const char* in, size_t inLen, char* out, + size_t* outLen) const override; + /** + * Uncompress the given block using snappy compression API. + */ + EXPORT int32_t uncompress(const char* in, size_t inLen, char* out, + size_t* outLen) const override; + /** + * Get max compressed size for the given `uncompSize` value using snappy + * compression API. + */ + EXPORT size_t maxCompressedSize(size_t uncompSize) const override; + + /** + * Get uncompressed size for the given block using snappy + * compression API. + */ + EXPORT + bool getUncompressedSize(char* in, size_t inLen, + size_t* outLen) const override; + + protected: + uint8_t getChunkMagicNumber() const override; + + private: + const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd; +}; + +class CompressInterfaceLZ4 : public CompressInterface +{ + public: + EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0); + EXPORT ~CompressInterfaceLZ4() = default; + /** + * Compress the given block using LZ4 compression API. + */ + EXPORT int32_t compress(const char* in, size_t inLen, char* out, + size_t* outLen) const override; + /** + * Uncompress the given block using LZ4 compression API. + */ + EXPORT int32_t uncompress(const char* in, size_t inLen, char* out, + size_t* outLen) const override; + /** + * Get max compressed size for the given `uncompSize` value using LZ4 + * compression API. + */ + EXPORT size_t maxCompressedSize(size_t uncompSize) const override; + + /** + * Get uncompressed size for the given block using LZ4 + * compression API. + */ + EXPORT + bool getUncompressedSize(char* in, size_t inLen, + size_t* outLen) const override; + + protected: + uint8_t getChunkMagicNumber() const override; + + private: + const uint8_t CHUNK_MAGIC_LZ4 = 0xfc; +}; + +using CompressorPool = + std::unordered_map>; + +/** + * Returns a pointer to the appropriate compression interface based on + * `compressionType`. `compressionType` must be greater than 0. + * Note: caller is responsible for memory deallocation. + */ +EXPORT CompressInterface* +getCompressInterfaceByType(uint32_t compressionType, + uint32_t numUserPaddingBytes = 0); + +/** + * Returns a pointer to the appropriate compression interface based on + * `compressionName`. + * Note: caller is responsible for memory deallocation. + */ +EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName, + uint32_t numUserPaddingBytes = 0); + +/** + * Initializes a given `unordered_map` with all available compression + * interfaces. + */ +EXPORT void initializeCompressorPool(CompressorPool& compressorPool, + uint32_t numUserPaddingBytes = 0); + +/** + * Returns a `shared_ptr` to the appropriate compression interface. + */ +EXPORT std::shared_ptr +getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType); + #ifdef SKIP_IDB_COMPRESSION -inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {} -inline IDBCompressInterface::~IDBCompressInterface() {} -inline bool IDBCompressInterface::isCompressionAvail(int c) const +inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {} +inline bool CompressInterface::isCompressionAvail(int c) { return (c == 0); } -inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const +inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const { return -1; } -inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const +inline int CompressInterface::uncompressBlock(const char* in, + const size_t inLen, + unsigned char* out, + size_t& outLen) const { return -1; } -inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const +inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {} +inline int CompressInterface::verifyHdr(const void*) { return -1; } -inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const +inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {} +inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {} +inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) +{ + return -1; +} +inline unsigned int CompressInterface::getPtrCount(const char*, const int) { return 0; } -inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {} -inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {} -inline int IDBCompressInterface::verifyHdr(const void*) const -{ - return -1; -} -inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const -{ - return -1; -} -inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const -{ - return -1; -} -inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const +inline unsigned int CompressInterface::getPtrCount(const char*) { return 0; } -inline unsigned int IDBCompressInterface::getPtrCount(const char*) const +inline void CompressInterface::storePtrs(const std::vector&, void*, int) {} +inline void CompressInterface::storePtrs(const std::vector&, void*) {} +inline void +CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex, + unsigned int& blockOffsetWithinChunk) const { - return 0; } -inline void IDBCompressInterface::storePtrs(const std::vector&, void*, int) const {} -inline void IDBCompressInterface::storePtrs(const std::vector&, void*) const {} -inline void IDBCompressInterface::locateBlock(unsigned int block, - unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {} -inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const +inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const { return -1; } -inline uint64_t -IDBCompressInterface::getVersionNumber(const void* hdrBuf) const +inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf) { return 0; } -inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {} -inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const +inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {} +inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf) { return 0; } -inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {} -inline uint64_t IDBCompressInterface::getHdrSize(const void*) const +inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf) { return 0; } inline execplan::CalpontSystemCatalog::ColDataType -IDBCompressInterface::getColDataType(const void* hdrBuf) const +CompressInterface::getColDataType(const void* hdrBuf) { return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED; } +inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const +{ + return 0; +} +inline uint64_t getLBID0(const void* hdrBuf) { return 0; } +void setLBID0(void* hdrBuf, uint64_t lbid) {} +inline uint64_t getLBID1(const void* hdrBuf) { return 0; } +void setLBID1(void* hdrBuf, uint64_t lbid) {} +inline void CompressInterface::setHdrSize(void*, uint64_t) {} +inline uint64_t CompressInterface::getHdrSize(const void*) +{ + return 0; +} +CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes) + : CompressInterface(numUserPaddingBytes) +{ +} inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; } inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize) { @@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si { return false; } +uint8_t getChunkMagicNumber() const { return 0; } +CompressInterface* getCompressInterfaceByType(uint32_t compressionType, + uint32_t numUserPaddingBytes) +{ + return nullptr; +} #endif - } #undef EXPORT diff --git a/utils/idbdatafile/PosixFileSystem.cpp b/utils/idbdatafile/PosixFileSystem.cpp index 4ebdeb4ae..b9bb9644e 100644 --- a/utils/idbdatafile/PosixFileSystem.cpp +++ b/utils/idbdatafile/PosixFileSystem.cpp @@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const return -1; } - compress::IDBCompressInterface decompressor; + char hdr1[compress::CompressInterface::HDR_BUF_LEN]; + nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN); - char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN]; - nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN); - - if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN ) + if ( nBytes != compress::CompressInterface::HDR_BUF_LEN ) { delete pFile; return -1; } // Verify we are a compressed file - if (decompressor.verifyHdr(hdr1) < 0) + if (compress::CompressInterface::verifyHdr(hdr1) < 0) { delete pFile; return -1; } - int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN; + int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) - + compress::CompressInterface::HDR_BUF_LEN; char* hdr2 = new char[ptrSecSize]; nBytes = readFillBuffer( pFile, hdr2, ptrSecSize); @@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const } compress::CompChunkPtrList chunkPtrs; - int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs); + int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize, + chunkPtrs); delete[] hdr2; if (rc != 0) diff --git a/utils/joiner/joinpartition.cpp b/utils/joiner/joinpartition.cpp index 0b102cd70..4b796979c 100644 --- a/utils/joiner/joinpartition.cpp +++ b/utils/joiner/joinpartition.cpp @@ -50,7 +50,10 @@ namespace joiner uint64_t uniqueNums = 0; -JoinPartition::JoinPartition() { } +JoinPartition::JoinPartition() +{ + compressor.reset(new compress::CompressInterfaceSnappy()); +} /* This is the ctor used by THJS */ JoinPartition::JoinPartition(const RowGroup& lRG, @@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG, for (int i = 0; i < (int) bucketCount; i++) buckets.push_back(boost::shared_ptr(new JoinPartition(*this, false))); + + string compressionType; + try + { + compressionType = + config->getConfig("HashJoin", "TempFileCompressionType"); + } catch (...) {} + + if (compressionType == "LZ4") + { + compressor.reset(new compress::CompressInterfaceLZ4()); + } + else + { + compressor.reset(new compress::CompressInterfaceSnappy()); + } } /* Ctor used by JoinPartition on expansion, creates JP's in filemode */ @@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) : smallRG.setData(&buffer); smallRG.resetRowGroup(0); smallRG.getRow(0, &smallRow); + + compressor = jp.compressor; } @@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs) fs.seekg(offset); fs.read((char*) &len, sizeof(len)); + saveErrno = errno; if (!fs) @@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs) else { size_t uncompressedSize; + fs.read((char*) &uncompressedSize, sizeof(uncompressedSize)); + boost::scoped_array buf(new char[len]); fs.read(buf.get(), len); saveErrno = errno; - if (!fs) + if (!fs || !uncompressedSize) { fs.close(); ostringstream os; @@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs) } totalBytesRead += len; - compressor.getUncompressedSize(buf.get(), len, &uncompressedSize); bs->needAtLeast(uncompressedSize); - compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr()); + compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(), + &uncompressedSize); bs->advanceInputPtr(uncompressedSize); } @@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs) } else { - uint64_t maxSize = compressor.maxCompressedSize(len); - size_t actualSize; + size_t maxSize = compressor->maxCompressedSize(len); + size_t actualSize = maxSize; boost::scoped_array compressed(new uint8_t[maxSize]); - compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize); - ret = actualSize + 4; + compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize); + ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4? fs.write((char*) &actualSize, sizeof(actualSize)); + // Save uncompressed len. + fs.write((char*) &len, sizeof(len)); fs.write((char*) compressed.get(), actualSize); saveErrno = errno; diff --git a/utils/joiner/joinpartition.h b/utils/joiner/joinpartition.h index 7e7ae5d6d..9c33d8e28 100644 --- a/utils/joiner/joinpartition.h +++ b/utils/joiner/joinpartition.h @@ -164,7 +164,7 @@ private: /* Compression support */ bool useCompression; - compress::IDBCompressInterface compressor; + std::shared_ptr compressor; /* TBD: do the reading/writing in one thread, compression/decompression in another */ /* Some stats for reporting */ diff --git a/utils/messageqcpp/compressed_iss.cpp b/utils/messageqcpp/compressed_iss.cpp index 26af982e3..585b3a3f5 100644 --- a/utils/messageqcpp/compressed_iss.cpp +++ b/utils/messageqcpp/compressed_iss.cpp @@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket() { config::Config* config = config::Config::makeConfig(); string val; + string compressionType; try { @@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket() useCompression = true; else useCompression = false; + + try + { + compressionType = + config->getConfig("NetworkCompression", "NetworkCompression"); + } + catch (...) { } + + auto* compressInterface = compress::getCompressInterfaceByName(compressionType); + if (!compressInterface) + compressInterface = new compress::CompressInterfaceSnappy(); + + alg.reset(compressInterface); } Socket* CompressedInetStreamSocket::clone() const @@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool* { SBS readBS, ret; size_t uncompressedSize; - bool err; readBS = InetStreamSocket::read(timeout, isTimeOut, stats); if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC) return readBS; - err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize); + // Read stored len, first 4 bytes. + uint32_t storedLen = *(uint32_t*) readBS->buf(); - if (!err) + if (!storedLen) return SBS(new ByteStream(0)); + uncompressedSize = storedLen; ret.reset(new ByteStream(uncompressedSize)); - alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr()); + + alg->uncompress((char*) readBS->buf() + HEADER_SIZE, + readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(), + &uncompressedSize); + ret->advanceInputPtr(uncompressedSize); return ret; @@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool* void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats) { - size_t outLen = 0; - uint32_t len = msg.length(); + size_t len = msg.length(); if (useCompression && (len > 512)) { - ByteStream smsg(alg.maxCompressedSize(len)); + size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE; + ByteStream smsg(outLen); - alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen); - smsg.advanceInputPtr(outLen); + alg->compress((char*) msg.buf(), len, + (char*) smsg.getInputPtr() + HEADER_SIZE, &outLen); + // Save original len. + *(uint32_t*) smsg.getInputPtr() = len; + smsg.advanceInputPtr(outLen + HEADER_SIZE); if (outLen < len) do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats); diff --git a/utils/messageqcpp/compressed_iss.h b/utils/messageqcpp/compressed_iss.h index 2eabfb3fa..2514195da 100644 --- a/utils/messageqcpp/compressed_iss.h +++ b/utils/messageqcpp/compressed_iss.h @@ -54,8 +54,9 @@ public: virtual const IOSocket accept(const struct timespec* timeout); virtual void connect(const sockaddr* addr); private: - compress::IDBCompressInterface alg; + std::shared_ptr alg; bool useCompression; + static const uint32_t HEADER_SIZE = 4; }; } //namespace messageqcpp diff --git a/writeengine/bulk/we_bulkload.cpp b/writeengine/bulk/we_bulkload.cpp index ef612f380..fae55bff8 100644 --- a/writeengine/bulk/we_bulkload.cpp +++ b/writeengine/bulk/we_bulkload.cpp @@ -337,15 +337,12 @@ int BulkLoad::loadJobInfo( } } - // Validate that specified compression type is available - compress::IDBCompressInterface compressor; - for (unsigned kT = 0; kT < curJob.jobTableList.size(); kT++) { for (unsigned kC = 0; kC < curJob.jobTableList[kT].colList.size(); kC++) { - if ( !compressor.isCompressionAvail( - curJob.jobTableList[kT].colList[kC].compressionType) ) + if (!compress::CompressInterface::isCompressionAvail( + curJob.jobTableList[kT].colList[kC].compressionType)) { std::ostringstream oss; oss << "Specified compression type (" << diff --git a/writeengine/bulk/we_colbufcompressed.cpp b/writeengine/bulk/we_colbufcompressed.cpp index e5d004226..9131d9ea7 100644 --- a/writeengine/bulk/we_colbufcompressed.cpp +++ b/writeengine/bulk/we_colbufcompressed.cpp @@ -60,12 +60,11 @@ ColumnBufferCompressed::ColumnBufferCompressed( ColumnInfo* pColInfo, fToBeCompressedBuffer(0), fToBeCompressedCapacity(0), fNumBytes(0), - fCompressor(0), fPreLoadHWMChunk(true), fFlushedStartHwmChunk(false) { fUserPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; - fCompressor = new compress::IDBCompressInterface( fUserPaddingBytes ); + compress::initializeCompressorPool(fCompressorPool, fUserPaddingBytes); } //------------------------------------------------------------------------------ @@ -79,7 +78,6 @@ ColumnBufferCompressed::~ColumnBufferCompressed() fToBeCompressedBuffer = 0; fToBeCompressedCapacity = 0; fNumBytes = 0; - delete fCompressor; } //------------------------------------------------------------------------------ @@ -91,9 +89,7 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char* fFile = f; fStartingHwm = startHwm; - IDBCompressInterface compressor; - - if (compressor.getPtrList(hdrs, fChunkPtrs) != 0) + if (compress::CompressInterface::getPtrList(hdrs, fChunkPtrs) != 0) { return ERR_COMP_PARSE_HDRS; } @@ -102,7 +98,15 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char* // rollback), that fall after the HWM, then drop those trailing ptrs. unsigned int chunkIndex = 0; unsigned int blockOffsetWithinChunk = 0; - fCompressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk); + + auto compressor = compress::getCompressorByType( + fCompressorPool, fColInfo->column.compressionType); + if (!compressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + + compressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk); if ((chunkIndex + 1) < fChunkPtrs.size()) { @@ -127,11 +131,11 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf( if (!fToBeCompressedBuffer) { fToBeCompressedBuffer = - new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN]; + new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN]; } BlockOp::setEmptyBuf( fToBeCompressedBuffer, - IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, + CompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); @@ -147,10 +151,10 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf( fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } - fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; + fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN; // Set file offset past end of last chunk - startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2; + startFileOffset = CompressInterface::HDR_BUF_LEN * 2; if (fChunkPtrs.size() > 0) startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first + @@ -223,7 +227,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize, // Expand the compression buffer size if working with an abbrev extent, and // the bytes we are about to add will overflow the abbreviated extent. - if ((fToBeCompressedCapacity < IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) && + if ((fToBeCompressedCapacity < CompressInterface::UNCOMPRESSED_INBUF_LEN) && ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) > fToBeCompressedCapacity) ) { std::ostringstream oss; @@ -233,7 +237,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize, "; part-" << fColInfo->curCol.dataFile.fPartition << "; seg-" << fColInfo->curCol.dataFile.fSegment; fLog->logMsg( oss.str(), MSGLVL_INFO2 ); - fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; + fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN; } if ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) <= fToBeCompressedCapacity) @@ -316,12 +320,12 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize, // Start over again loading a new to-be-compressed buffer BlockOp::setEmptyBuf( fToBeCompressedBuffer, - IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, + CompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); fToBeCompressedCapacity = - IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; + CompressInterface::UNCOMPRESSED_INBUF_LEN; bufOffset = fToBeCompressedBuffer; fNumBytes = 0; @@ -377,21 +381,31 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize, //------------------------------------------------------------------------------ int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile ) { - const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(fToBeCompressedCapacity) + - fUserPaddingBytes; + auto compressor = compress::getCompressorByType( + fCompressorPool, fColInfo->column.compressionType); + if (!compressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + + const size_t OUTPUT_BUFFER_SIZE = + compressor->maxCompressedSize(fToBeCompressedCapacity) + + fUserPaddingBytes + + // Padded len = len + COMPRESSED_SIZE_INCREMENT_CHUNK - (len % + // COMPRESSED_SIZE_INCREMENT_CHUNK) + usePadding + compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE; + unsigned char* compressedOutBuf = new unsigned char[ OUTPUT_BUFFER_SIZE ]; boost::scoped_array compressedOutBufPtr(compressedOutBuf); - unsigned int outputLen = OUTPUT_BUFFER_SIZE; + size_t outputLen = OUTPUT_BUFFER_SIZE; #ifdef PROFILE Stats::startParseEvent(WE_STATS_COMPRESS_COL_COMPRESS); #endif - int rc = fCompressor->compressBlock( - reinterpret_cast(fToBeCompressedBuffer), - fToBeCompressedCapacity, - compressedOutBuf, - outputLen ); + int rc = compressor->compressBlock( + reinterpret_cast(fToBeCompressedBuffer), + fToBeCompressedCapacity, compressedOutBuf, outputLen); if (rc != 0) { @@ -399,7 +413,7 @@ int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile ) } // Round up the compressed chunk size - rc = fCompressor->padCompressedChunks( compressedOutBuf, + rc = compressor->padCompressedChunks( compressedOutBuf, outputLen, OUTPUT_BUFFER_SIZE ); if (rc != 0) @@ -581,26 +595,24 @@ int ColumnBufferCompressed::finishFile(bool bTruncFile) int ColumnBufferCompressed::saveCompressionHeaders( ) { // Construct the header records - char hdrBuf[IDBCompressInterface::HDR_BUF_LEN * 2]; + char hdrBuf[CompressInterface::HDR_BUF_LEN * 2]; RETURN_ON_ERROR(fColInfo->colOp->readHeaders(fFile, hdrBuf)); - BRM::LBID_t lbid = fCompressor->getLBIDByIndex(hdrBuf, 0); - fCompressor->initHdr(hdrBuf, fColInfo->column.width, - fColInfo->column.dataType, - fColInfo->column.compressionType); - fCompressor->setBlockCount(hdrBuf, - (fColInfo->getFileSize() / BYTE_PER_BLOCK) ); + BRM::LBID_t lbid = compress::CompressInterface::getLBIDByIndex(hdrBuf, 0); + compress::CompressInterface::initHdr(hdrBuf, fColInfo->column.width, fColInfo->column.dataType, + fColInfo->column.compressionType); + compress::CompressInterface::setBlockCount(hdrBuf, (fColInfo->getFileSize() / BYTE_PER_BLOCK)); // If lbid written in the header is not 0 and not equal to `lastupdatedlbid` - we are running // for the next extent for column segment file. const auto lastUpdatedLbid = fColInfo->getLastUpdatedLBID(); if (lbid && lastUpdatedLbid != lbid) { // Write back lbid, after header initialization. - fCompressor->setLBIDByIndex(hdrBuf, lbid, 0); - fCompressor->setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1); + compress::CompressInterface::setLBIDByIndex(hdrBuf, lbid, 0); + compress::CompressInterface::setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1); } else - fCompressor->setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0); + compress::CompressInterface::setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0); std::vector ptrs; @@ -611,7 +623,7 @@ int ColumnBufferCompressed::saveCompressionHeaders( ) unsigned lastIdx = fChunkPtrs.size() - 1; ptrs.push_back( fChunkPtrs[lastIdx].first + fChunkPtrs[lastIdx].second ); - fCompressor->storePtrs( ptrs, hdrBuf ); + compress::CompressInterface::storePtrs(ptrs, hdrBuf); // Write out the header records //char resp; @@ -641,9 +653,9 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) if (!fToBeCompressedBuffer) { fToBeCompressedBuffer = - new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN]; + new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN]; BlockOp::setEmptyBuf( fToBeCompressedBuffer, - IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, + CompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); bNewBuffer = true; @@ -656,12 +668,19 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) unsigned int blockOffsetWithinChunk = 0; bool bSkipStartingBlks = false; + auto compressor = compress::getCompressorByType( + fCompressorPool, fColInfo->column.compressionType); + if (!compressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + if (fPreLoadHWMChunk) { if (fChunkPtrs.size() > 0) { - fCompressor->locateBlock(fStartingHwm, - chunkIndex, blockOffsetWithinChunk); + compressor->locateBlock(fStartingHwm, chunkIndex, + blockOffsetWithinChunk); if (chunkIndex < fChunkPtrs.size()) startFileOffset = fChunkPtrs[chunkIndex].first; @@ -718,8 +737,8 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) } // Uncompress the chunk into our 4MB buffer - unsigned int outLen = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; - int rc = fCompressor->uncompressBlock( + size_t outLen = CompressInterface::UNCOMPRESSED_INBUF_LEN; + int rc = compressor->uncompressBlock( compressedOutBuf, fChunkPtrs[chunkIndex].second, fToBeCompressedBuffer, @@ -758,7 +777,7 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) if (!bNewBuffer) { BlockOp::setEmptyBuf( fToBeCompressedBuffer, - IDBCompressInterface::UNCOMPRESSED_INBUF_LEN, + CompressInterface::UNCOMPRESSED_INBUF_LEN, fColInfo->column.emptyVal, fColInfo->column.width ); } @@ -775,10 +794,10 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) fLog->logMsg( oss.str(), MSGLVL_INFO2 ); } - fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; + fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN; // Set file offset to start after last current chunk - startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2; + startFileOffset = CompressInterface::HDR_BUF_LEN * 2; if (fChunkPtrs.size() > 0) startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first + @@ -796,5 +815,4 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset) return NO_ERROR; } - } diff --git a/writeengine/bulk/we_colbufcompressed.h b/writeengine/bulk/we_colbufcompressed.h index 5c4cccffc..057ae65aa 100644 --- a/writeengine/bulk/we_colbufcompressed.h +++ b/writeengine/bulk/we_colbufcompressed.h @@ -107,8 +107,7 @@ private: // should always be 4MB, unless // working with abbrev extent. size_t fNumBytes; // num Bytes in comp buffer - compress::IDBCompressInterface* - fCompressor; // data compression object + compress::CompressorPool fCompressorPool; // data compression object pool compress::CompChunkPtrList fChunkPtrs; // col file header information bool fPreLoadHWMChunk; // preload 1st HWM chunk only diff --git a/writeengine/bulk/we_columninfo.cpp b/writeengine/bulk/we_columninfo.cpp index 69bf37fef..e1e883a39 100644 --- a/writeengine/bulk/we_columninfo.cpp +++ b/writeengine/bulk/we_columninfo.cpp @@ -450,7 +450,7 @@ int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName ) if (column.dctnry.fCompressionType != 0) { DctnryCompress1* tempD1; - tempD1 = new DctnryCompress1; + tempD1 = new DctnryCompress1(column.dctnry.fCompressionType); tempD1->setMaxActiveChunkNum(1); tempD1->setBulkFlag(true); tempD = tempD1; @@ -668,7 +668,7 @@ int ColumnInfo::extendColumnNewExtent( uint16_t segmentNew = 0; BRM::LBID_t startLbid; - char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ]; // Extend the column by adding an extent to the next // DBRoot, partition, and segment file in the rotation @@ -1684,7 +1684,8 @@ int ColumnInfo::openDctnryStore( bool bMustExist ) if ( column.dctnry.fCompressionType != 0) { - DctnryCompress1* dctnryCompress1 = new DctnryCompress1; + DctnryCompress1* dctnryCompress1 = + new DctnryCompress1(column.dctnry.fCompressionType); dctnryCompress1->setMaxActiveChunkNum(1); dctnryCompress1->setBulkFlag(true); fStore = dctnryCompress1; diff --git a/writeengine/bulk/we_columninfocompressed.cpp b/writeengine/bulk/we_columninfocompressed.cpp index e30e74382..e412e62d8 100644 --- a/writeengine/bulk/we_columninfocompressed.cpp +++ b/writeengine/bulk/we_columninfocompressed.cpp @@ -108,7 +108,7 @@ int ColumnInfoCompressed::closeColumnFile(bool bCompletingExtent, bool bAbort) //------------------------------------------------------------------------------ int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm ) { - char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ]; RETURN_ON_ERROR( colOp->readHeaders(curCol.dataFile.pFile, hdr) ); // Initialize the output buffer manager for the column. @@ -129,10 +129,9 @@ int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm ) fColBufferMgr = mgr; - IDBCompressInterface compressor; - int abbrevFlag = - ( compressor.getBlockCount(hdr) == - uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width / BYTE_PER_BLOCK) ); + int abbrevFlag = (compress::CompressInterface::getBlockCount(hdr) == + uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width / + BYTE_PER_BLOCK)); setFileSize( hwm, abbrevFlag ); // See if dealing with abbreviated extent that will need expanding. @@ -324,9 +323,9 @@ int ColumnInfoCompressed::truncateDctnryStore( return rc; } - char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ]; + char controlHdr[ CompressInterface::HDR_BUF_LEN ]; rc = fTruncateDctnryFileOp.readFile( dFile, - (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN); + (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN); if (rc != NO_ERROR) { @@ -345,8 +344,7 @@ int ColumnInfoCompressed::truncateDctnryStore( return rc; } - IDBCompressInterface compressor; - int rc1 = compressor.verifyHdr( controlHdr ); + int rc1 = compress::CompressInterface::verifyHdr(controlHdr); if (rc1 != 0) { @@ -372,7 +370,8 @@ int ColumnInfoCompressed::truncateDctnryStore( // actually grow the file (something we don't want to do), because we have // not yet reserved a full extent (on disk) for this dictionary store file. const int PSEUDO_COL_WIDTH = 8; - uint64_t numBlocks = compressor.getBlockCount( controlHdr ); + uint64_t numBlocks = + compress::CompressInterface::getBlockCount(controlHdr); if ( numBlocks == uint64_t (INITIAL_EXTENT_ROWS_TO_DISK * PSEUDO_COL_WIDTH / BYTE_PER_BLOCK) ) @@ -390,8 +389,8 @@ int ColumnInfoCompressed::truncateDctnryStore( return NO_ERROR; } - uint64_t hdrSize = compressor.getHdrSize(controlHdr); - uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN; + uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr); + uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN; char* pointerHdr = new char[ptrHdrSize]; rc = fTruncateDctnryFileOp.readFile(dFile, @@ -416,7 +415,8 @@ int ColumnInfoCompressed::truncateDctnryStore( } CompChunkPtrList chunkPtrs; - rc1 = compressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs ); + rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize, + chunkPtrs); delete[] pointerHdr; if (rc1 != 0) diff --git a/writeengine/server/we_getfilesizes.cpp b/writeengine/server/we_getfilesizes.cpp index 3a597ff69..aa374755f 100644 --- a/writeengine/server/we_getfilesizes.cpp +++ b/writeengine/server/we_getfilesizes.cpp @@ -96,7 +96,7 @@ size_t readFillBuffer( return totalBytesRead; } -off64_t getCompressedDataSize(string& fileName) +static off64_t getCompressedDataSize(string& fileName) { off64_t dataSize = 0; IDBDataFile* pFile = 0; @@ -119,21 +119,21 @@ off64_t getCompressedDataSize(string& fileName) throw std::runtime_error(oss.str()); } - IDBCompressInterface decompressor; //-------------------------------------------------------------------------- // Read headers and extract compression pointers //-------------------------------------------------------------------------- - char hdr1[IDBCompressInterface::HDR_BUF_LEN]; - nBytes = readFillBuffer( pFile, hdr1, IDBCompressInterface::HDR_BUF_LEN); + char hdr1[CompressInterface::HDR_BUF_LEN]; + nBytes = readFillBuffer( pFile, hdr1, CompressInterface::HDR_BUF_LEN); - if ( nBytes != IDBCompressInterface::HDR_BUF_LEN ) + if ( nBytes != CompressInterface::HDR_BUF_LEN ) { std::ostringstream oss; oss << "Error reading first header from file " << fileName; throw std::runtime_error(oss.str()); } - int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - IDBCompressInterface::HDR_BUF_LEN; + int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) - + CompressInterface::HDR_BUF_LEN; char* hdr2 = new char[ptrSecSize]; nBytes = readFillBuffer( pFile, hdr2, ptrSecSize); @@ -145,7 +145,8 @@ off64_t getCompressedDataSize(string& fileName) } CompChunkPtrList chunkPtrs; - int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs); + int rc = + compress::CompressInterface::getPtrList(hdr2, ptrSecSize, chunkPtrs); delete[] hdr2; if (rc != 0) diff --git a/writeengine/shared/we_bulkrollbackfilecompressed.cpp b/writeengine/shared/we_bulkrollbackfilecompressed.cpp index c149bde71..f15b090e9 100644 --- a/writeengine/shared/we_bulkrollbackfilecompressed.cpp +++ b/writeengine/shared/we_bulkrollbackfilecompressed.cpp @@ -51,6 +51,7 @@ namespace WriteEngine BulkRollbackFileCompressed::BulkRollbackFileCompressed(BulkRollbackMgr* mgr) : BulkRollbackFile(mgr) { + compress::initializeCompressorPool(fCompressorPool); } //------------------------------------------------------------------------------ @@ -104,7 +105,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile( } // Read and parse the header pointers - char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];; + char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];; CompChunkPtrList chunkPtrs; std::string errMsg; int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg); @@ -127,7 +128,20 @@ void BulkRollbackFileCompressed::truncateSegmentFile( unsigned int blockOffset = fileSizeBlocks - 1; unsigned int chunkIndex = 0; unsigned int blkOffsetInChunk = 0; - fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk ); + + auto fCompressor = compress::getCompressorByType( + fCompressorPool, + compress::CompressInterface::getCompressionType(hdrs)); + if (!fCompressor) + { + std::ostringstream oss; + oss << "Error, wrong compression type for segment file" + << ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-" + << partNum << "; segment-" << segNum << ";"; + throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE); + } + + fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk); // Truncate the extra extents that are to be aborted if (chunkIndex < chunkPtrs.size()) @@ -145,7 +159,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile( logging::M0075, columnOID, msgText2.str() ); // Drop off any trailing pointers (that point beyond the last block) - fCompressor.setBlockCount( hdrs, fileSizeBlocks ); + compress::CompressInterface::setBlockCount(hdrs, fileSizeBlocks); std::vector ptrs; for (unsigned i = 0; i <= chunkIndex; i++) @@ -155,7 +169,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile( ptrs.push_back( chunkPtrs[chunkIndex].first + chunkPtrs[chunkIndex].second ); - fCompressor.storePtrs( ptrs, hdrs ); + compress::CompressInterface::storePtrs(ptrs, hdrs); rc = fDbFile.writeHeaders( pFile, hdrs ); @@ -252,7 +266,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent( } // Read and parse the header pointers - char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ]; CompChunkPtrList chunkPtrs; std::string errMsg; int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg); @@ -275,7 +289,20 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent( unsigned int blockOffset = startOffsetBlk - 1; unsigned int chunkIndex = 0; unsigned int blkOffsetInChunk = 0; - fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk ); + + auto fCompressor = compress::getCompressorByType( + fCompressorPool, + compress::CompressInterface::getCompressionType(hdrs)); + if (!fCompressor) + { + std::ostringstream oss; + oss << "Error, wrong compression type for segment file" + << ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-" + << partNum << "; segment-" << segNum << ";"; + throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE); + } + + fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk); if (chunkIndex < chunkPtrs.size()) { @@ -401,7 +428,8 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent( // Watch for the special case where we are restoring a db file as an // empty file (chunkindex=0 and restoredChunkLen=0); in this case we // just restore the first pointer (set to 8192). - fCompressor.setBlockCount( hdrs, (startOffsetBlk + nBlocks) ); + compress::CompressInterface::setBlockCount(hdrs, + (startOffsetBlk + nBlocks)); std::vector newPtrs; if ((chunkIndex > 0) || (restoredChunkLen > 0)) @@ -413,7 +441,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent( } newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen ); - fCompressor.storePtrs( newPtrs, hdrs ); + compress::CompressInterface::storePtrs(newPtrs, hdrs); rc = fDbFile.writeHeaders( pFile, hdrs ); @@ -482,7 +510,7 @@ int BulkRollbackFileCompressed::loadColumnHdrPtrs( } // Parse the header pointers - int rc1 = fCompressor.getPtrList( hdrs, chunkPtrs ); + int rc1 = compress::CompressInterface::getPtrList(hdrs, chunkPtrs); if (rc1 != 0) { @@ -548,7 +576,7 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent( throw WeException( oss.str(), ERR_FILE_OPEN ); } - char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ]; + char controlHdr[ CompressInterface::HDR_BUF_LEN ]; CompChunkPtrList chunkPtrs; uint64_t ptrHdrSize; std::string errMsg; @@ -572,7 +600,20 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent( unsigned int blockOffset = startOffsetBlk - 1; unsigned int chunkIndex = 0; unsigned int blkOffsetInChunk = 0; - fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk ); + + auto fCompressor = compress::getCompressorByType( + fCompressorPool, + compress::CompressInterface::getCompressionType(controlHdr)); + if (!fCompressor) + { + std::ostringstream oss; + oss << "Error, wrong compression type for segment file" + << ": OID-" << dStoreOID << "; DbRoot-" << dbRoot << "; partition-" + << partNum << "; segment-" << segNum << ";"; + throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE); + } + + fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk); if (chunkIndex < chunkPtrs.size()) { @@ -686,7 +727,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent( // Watch for the special case where we are restoring a db file as an // empty file (chunkindex=0 and restoredChunkLen=0); in this case we // just restore the first pointer (set to 8192). - fCompressor.setBlockCount( controlHdr, (startOffsetBlk + nBlocks) ); + compress::CompressInterface::setBlockCount(controlHdr, + (startOffsetBlk + nBlocks)); std::vector newPtrs; if ((chunkIndex > 0) || (restoredChunkLen > 0)) @@ -699,7 +741,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent( newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen ); char* pointerHdr = new char[ptrHdrSize]; - fCompressor.storePtrs( newPtrs, pointerHdr, ptrHdrSize ); + compress::CompressInterface::storePtrs(newPtrs, pointerHdr, + ptrHdrSize); rc = fDbFile.writeHeaders( pFile, controlHdr, pointerHdr, ptrHdrSize ); delete[] pointerHdr; @@ -759,7 +802,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs( std::string& errMsg) const { int rc = fDbFile.readFile( - pFile, (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN); + pFile, (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN); if (rc != NO_ERROR) { @@ -771,7 +814,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs( return rc; } - int rc1 = fCompressor.verifyHdr( controlHdr ); + int rc1 = compress::CompressInterface::verifyHdr(controlHdr); if (rc1 != 0) { @@ -786,8 +829,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs( return rc; } - uint64_t hdrSize = fCompressor.getHdrSize(controlHdr); - ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN; + uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr); + ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN; char* pointerHdr = new char[ptrHdrSize]; rc = fDbFile.readFile(pFile, (unsigned char*)pointerHdr, ptrHdrSize); @@ -804,7 +847,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs( } // Parse the header pointers - rc1 = fCompressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs ); + rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize, + chunkPtrs); delete[] pointerHdr; if (rc1 != 0) @@ -1033,5 +1077,4 @@ size_t BulkRollbackFileCompressed::readFillBuffer( return totalBytesRead; } - } //end of namespace diff --git a/writeengine/shared/we_bulkrollbackfilecompressed.h b/writeengine/shared/we_bulkrollbackfilecompressed.h index 7b7357fc5..ab9e8439c 100644 --- a/writeengine/shared/we_bulkrollbackfilecompressed.h +++ b/writeengine/shared/we_bulkrollbackfilecompressed.h @@ -28,6 +28,7 @@ #include #include +#include #include "we_define.h" #include "we_type.h" @@ -148,7 +149,7 @@ private: uint64_t& ptrHdrSize, std::string& errMsg ) const; - compress::IDBCompressInterface fCompressor; + compress::CompressorPool fCompressorPool; }; } //end of namespace diff --git a/writeengine/shared/we_chunkmanager.cpp b/writeengine/shared/we_chunkmanager.cpp index 8ee736c73..ea100d5de 100644 --- a/writeengine/shared/we_chunkmanager.cpp +++ b/writeengine/shared/we_chunkmanager.cpp @@ -67,8 +67,6 @@ namespace WriteEngine extern int NUM_BLOCKS_PER_INITIAL_EXTENT; // defined in we_dctnry.cpp extern WErrorCodes ec; // defined in we_log.cpp -const int COMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::maxCompressedSize(UNCOMPRESSED_CHUNK_SIZE) + 64 + 3 + 8 * 1024; - //------------------------------------------------------------------------------ // Search for the specified chunk in fChunkList. //------------------------------------------------------------------------------ @@ -91,18 +89,24 @@ ChunkData* CompFileData::findChunk(int64_t id) const //------------------------------------------------------------------------------ // ChunkManager constructor //------------------------------------------------------------------------------ -ChunkManager::ChunkManager() : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false), - fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()), - fFileOp(0), fSysLogger(NULL), fTransId(-1), - fLocalModuleId(Config::getLocalModuleID()), - fFs(fIsHdfs ? - IDBFileSystem::getFs(IDBDataFile::HDFS) : - IDBPolicy::useCloud() ? - IDBFileSystem::getFs(IDBDataFile::CLOUD) : - IDBFileSystem::getFs(IDBDataFile::BUFFERED)) +ChunkManager::ChunkManager() + : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false), + fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()), + fFileOp(0), fSysLogger(NULL), fTransId(-1), + fLocalModuleId(Config::getLocalModuleID()), + fFs(fIsHdfs ? IDBFileSystem::getFs(IDBDataFile::HDFS) + : IDBPolicy::useCloud() + ? IDBFileSystem::getFs(IDBDataFile::CLOUD) + : IDBFileSystem::getFs(IDBDataFile::BUFFERED)) { fUserPaddings = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; - fCompressor.numUserPaddingBytes(fUserPaddings); + compress::initializeCompressorPool(fCompressorPool, fUserPaddings); + + COMPRESSED_CHUNK_SIZE = + compress::CompressInterface::getMaxCompressedSizeGeneric( + UNCOMPRESSED_CHUNK_SIZE) + + 64 + 3 + 8 * 1024; + fMaxCompressedBufSize = COMPRESSED_CHUNK_SIZE + fUserPaddings; fBufCompressed = new char[fMaxCompressedBufSize]; fSysLogger = new logging::Logger(SUBSYSTEM_ID_WE); @@ -383,16 +387,22 @@ CompFileData* ChunkManager::getFileData(const FID& fid, } // make sure the header is valid - if (fCompressor.verifyHdr(fileData->fFileHeader.fControlData) != 0) + if (compress::CompressInterface::verifyHdr(fileData->fFileHeader.fControlData) != 0) { WE_COMP_DBG(cout << "Invalid header." << endl;) delete fileData; return NULL; } - int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); + int headerSize = compress::CompressInterface::getHdrSize( + fileData->fFileHeader.fControlData); int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; + // Save segment file compression type. + uint32_t compressionType = compress::CompressInterface::getCompressionType( + fileData->fFileHeader.fControlData); + fileData->fCompressionType = compressionType; + if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT) { // >8K header, dictionary width > 128 @@ -462,11 +472,12 @@ IDBDataFile* ChunkManager::createDctnryFile(const FID& fid, // Dictionary store extent width == 0. See more details in function // `createDictStoreExtent`. - fCompressor.initHdr(fileData->fFileHeader.fControlData, - fileData->fFileHeader.fPtrSection, - /*colWidth=*/0, fileData->fColDataType, - fFileOp->compressionType(), hdrSize); - fCompressor.setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0); + compress::CompressInterface::initHdr( + fileData->fFileHeader.fControlData, fileData->fFileHeader.fPtrSection, + /*colWidth=*/0, fileData->fColDataType, fFileOp->compressionType(), hdrSize); + compress::CompressInterface::setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0); + // Save compression type. + fileData->fCompressionType = fFileOp->compressionType(); if (writeHeader(fileData, __LINE__) != NO_ERROR) { @@ -771,9 +782,16 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*& } // uncompress the read in buffer - unsigned int dataLen = sizeof(chunkData->fBufUnCompressed); + size_t dataLen = sizeof(chunkData->fBufUnCompressed); - if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize, + auto fCompressor = compress::getCompressorByType( + fCompressorPool, fileData->fCompressionType); + if (!fCompressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + + if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize, (unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0) { if (fIsFix) @@ -784,7 +802,7 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*& { char* hdr = fileData->fFileHeader.fControlData; - if (fCompressor.getBlockCount(hdr) < 512) + if (compress::CompressInterface::getBlockCount(hdr) < 512) blocks = 256; } @@ -820,7 +838,8 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*& { if (id == 0 && ptrs[id] == 0) // if the 1st ptr is not set for new extent { - ptrs[0] = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); + ptrs[0] = compress::CompressInterface::getHdrSize( + fileData->fFileHeader.fControlData); } // load the uncompressed buffer with empty values. @@ -907,10 +926,17 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData) // compress the chunk before writing it to file fLenCompressed = fMaxCompressedBufSize; - if (fCompressor.compressBlock((char*)chunkData->fBufUnCompressed, - chunkData->fLenUnCompressed, - (unsigned char*)fBufCompressed, - fLenCompressed) != 0) + auto fCompressor = compress::getCompressorByType( + fCompressorPool, fileData->fCompressionType); + if (!fCompressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + + if (fCompressor->compressBlock((char*) chunkData->fBufUnCompressed, + chunkData->fLenUnCompressed, + (unsigned char*) fBufCompressed, + fLenCompressed) != 0) { logMessage(ERR_COMP_COMPRESS, logging::LOG_TYPE_ERROR, __LINE__); return ERR_COMP_COMPRESS; @@ -941,7 +967,8 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData) // [chunkId+0] is the start offset of current chunk. // [chunkId+1] is the start offset of next chunk, the offset diff is current chunk size. // [chunkId+2] is 0 or not indicates if the next chunk exists. - int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); + int headerSize = compress::CompressInterface::getHdrSize( + fileData->fFileHeader.fControlData); int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; int64_t usablePtrIds = (ptrSecSize / sizeof(uint64_t)) - 2; @@ -968,7 +995,7 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData) else if (lastChunk) { // add padding space if the chunk is written first time - if (fCompressor.padCompressedChunks( + if (fCompressor->padCompressedChunks( (unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize) != 0) { WE_COMP_DBG(cout << "Last chunk:" << chunkId << ", padding failed." << endl;) @@ -1272,7 +1299,8 @@ int ChunkManager::closeFile(CompFileData* fileData) int ChunkManager::writeHeader(CompFileData* fileData, int ln) { int rc = NO_ERROR; - int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); + int headerSize = compress::CompressInterface::getHdrSize( + fileData->fFileHeader.fControlData); int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; if (!fIsHdfs && !fIsBulkLoad) @@ -1422,8 +1450,10 @@ int ChunkManager::updateColumnExtent(IDBDataFile* pFile, int addBlockCount, int6 int rc = NO_ERROR; char* hdr = pFileData->fFileHeader.fControlData; - fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount); - fCompressor.setLBIDByIndex(hdr, lbid, 1); + compress::CompressInterface::setBlockCount( + hdr, compress::CompressInterface::getBlockCount(hdr) + addBlockCount); + compress::CompressInterface::setLBIDByIndex(hdr, lbid, 1); + ChunkData* chunkData = (pFileData)->findChunk(0); if (chunkData != NULL) @@ -1475,7 +1505,7 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount, char* hdr = i->second->fFileHeader.fControlData; char* uncompressedBuf = chunkData->fBufUnCompressed; - int currentBlockCount = fCompressor.getBlockCount(hdr); + int currentBlockCount = compress::CompressInterface::getBlockCount(hdr); // Bug 3203, write out the compressed initial extent. if (currentBlockCount == 0) @@ -1511,13 +1541,15 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount, } if (rc == NO_ERROR) - fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount); + compress::CompressInterface::setBlockCount( + hdr, + compress::CompressInterface::getBlockCount(hdr) + addBlockCount); if (currentBlockCount) { // Append to the end. - uint64_t lbidCount = fCompressor.getLBIDCount(hdr); - fCompressor.setLBIDByIndex(hdr, lbid, lbidCount); + uint64_t lbidCount = compress::CompressInterface::getLBIDCount(hdr); + compress::CompressInterface::setLBIDByIndex(hdr, lbid, lbidCount); } return rc; } @@ -1684,7 +1716,8 @@ int ChunkManager::getBlockCount(IDBDataFile* pFile) map::iterator fpIt = fFilePtrMap.find(pFile); idbassert(fpIt != fFilePtrMap.end()); - return fCompressor.getBlockCount(fpIt->second->fFileHeader.fControlData); + return compress::CompressInterface::getBlockCount( + fpIt->second->fFileHeader.fControlData); } //------------------------------------------------------------------------------ @@ -1758,11 +1791,13 @@ int ChunkManager::reallocateChunks(CompFileData* fileData) origFilePtr->flush(); // back out the current pointers - int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); + int headerSize = compress::CompressInterface::getHdrSize( + fileData->fFileHeader.fControlData); int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; compress::CompChunkPtrList origPtrs; - if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0) + if (compress::CompressInterface::getPtrList( + fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0) { ostringstream oss; oss << "Chunk shifting failed, file:" << origFileName << " -- invalid header."; @@ -1876,7 +1911,14 @@ int ChunkManager::reallocateChunks(CompFileData* fileData) ChunkData* chunkData = chunksTouched[k]; fLenCompressed = fMaxCompressedBufSize; - if ((rc = fCompressor.compressBlock((char*)chunkData->fBufUnCompressed, + auto fCompressor = compress::getCompressorByType( + fCompressorPool, fileData->fCompressionType); + if (!fCompressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + + if ((rc = fCompressor->compressBlock((char*)chunkData->fBufUnCompressed, chunkData->fLenUnCompressed, (unsigned char*)fBufCompressed, fLenCompressed)) != 0) @@ -1894,7 +1936,7 @@ int ChunkManager::reallocateChunks(CompFileData* fileData) << fLenCompressed;) // shifting chunk, add padding space - if ((rc = fCompressor.padCompressedChunks( + if ((rc = fCompressor->padCompressedChunks( (unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize)) != 0) { WE_COMP_DBG(cout << ", but padding failed." << endl;) @@ -2245,7 +2287,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData) } // make sure the header is valid - if ((rc = fCompressor.verifyHdr(fileData->fFileHeader.fControlData)) != 0) + if ((rc = compress::CompressInterface::verifyHdr( + fileData->fFileHeader.fControlData)) != 0) { ostringstream oss; oss << "Invalid header in new " << fileData->fFileName << ", roll back"; @@ -2254,7 +2297,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData) return rc; } - int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData); + int headerSize = compress::CompressInterface::getHdrSize( + fileData->fFileHeader.fControlData); int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; // read in the pointer section in header @@ -2270,7 +2314,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData) // get pointer list compress::CompChunkPtrList ptrs; - if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0) + if (compress::CompressInterface::getPtrList( + fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0) { ostringstream oss; oss << "Failed to parse pointer list from new " << fileData->fFileName << "@" << __LINE__; @@ -2282,6 +2327,13 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData) ChunkData chunkData; int numOfChunks = ptrs.size(); // number of chunks in the file + auto fCompressor = compress::getCompressorByType( + fCompressorPool, fileData->fCompressionType); + if (!fCompressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + for (int i = 0; i < numOfChunks && rc == NO_ERROR; i++) { unsigned int chunkSize = ptrs[i].second; @@ -2304,9 +2356,9 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData) } // uncompress the read in buffer - unsigned int dataLen = sizeof(chunkData.fBufUnCompressed); + size_t dataLen = sizeof(chunkData.fBufUnCompressed); - if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize, + if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize, (unsigned char*)chunkData.fBufUnCompressed, dataLen) != 0) { ostringstream oss; @@ -2624,13 +2676,15 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid, if (mit != fFileMap.end()) { - int headerSize = fCompressor.getHdrSize(mit->second->fFileHeader.fControlData); + int headerSize = compress::CompressInterface::getHdrSize( + mit->second->fFileHeader.fControlData); int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT; // get pointer list compress::CompChunkPtrList ptrs; - if (fCompressor.getPtrList(mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0) + if (compress::CompressInterface::getPtrList( + mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0) { ostringstream oss; oss << "Failed to parse pointer list from new " << mit->second->fFileName << "@" << __LINE__; @@ -2662,9 +2716,16 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid, // uncompress the read in buffer chunkData = new ChunkData(numOfChunks - 1); - unsigned int dataLen = sizeof(chunkData->fBufUnCompressed); + size_t dataLen = sizeof(chunkData->fBufUnCompressed); - if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize, + auto fCompressor = compress::getCompressorByType( + fCompressorPool, mit->second->fCompressionType); + if (!fCompressor) + { + return ERR_COMP_WRONG_COMP_TYPE; + } + + if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize, (unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0) { mit->second->fChunkList.push_back(chunkData); @@ -2676,7 +2737,7 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid, { char* hdr = mit->second->fFileHeader.fControlData; - if (fCompressor.getBlockCount(hdr) < 512) + if (compress::CompressInterface::getBlockCount(hdr) < 512) blocks = 256; } @@ -2693,7 +2754,6 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid, return rc; } - } // vim:ts=4 sw=4: diff --git a/writeengine/shared/we_chunkmanager.h b/writeengine/shared/we_chunkmanager.h index b79a9b377..198c67cce 100644 --- a/writeengine/shared/we_chunkmanager.h +++ b/writeengine/shared/we_chunkmanager.h @@ -64,8 +64,8 @@ namespace WriteEngine // forward reference class FileOp; -const int UNCOMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; -const int COMPRESSED_FILE_HEADER_UNIT = compress::IDBCompressInterface::HDR_BUF_LEN; +const int UNCOMPRESSED_CHUNK_SIZE = compress::CompressInterface::UNCOMPRESSED_INBUF_LEN; +const int COMPRESSED_FILE_HEADER_UNIT = compress::CompressInterface::HDR_BUF_LEN; // assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding @@ -136,7 +136,7 @@ class CompFileData public: CompFileData(const FileID& id, const FID& fid, const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth) : fFileID(id), fFid(fid), fColDataType(colDataType), fColWidth(colWidth), fDctnryCol(false), - fFilePtr(NULL), fIoBSize(0) {} + fFilePtr(NULL), fIoBSize(0), fCompressionType(1) {} ChunkData* findChunk(int64_t cid) const; @@ -152,6 +152,7 @@ protected: std::list fChunkList; boost::scoped_array fIoBuffer; size_t fIoBSize; + uint32_t fCompressionType; friend class ChunkManager; }; @@ -369,22 +370,23 @@ protected: std::list > fActiveChunks; unsigned int fMaxActiveChunkNum; // max active chunks per file char* fBufCompressed; - unsigned int fLenCompressed; - unsigned int fMaxCompressedBufSize; - unsigned int fUserPaddings; + size_t fLenCompressed; + size_t fMaxCompressedBufSize; + size_t fUserPaddings; bool fIsBulkLoad; bool fDropFdCache; bool fIsInsert; bool fIsHdfs; FileOp* fFileOp; - compress::IDBCompressInterface fCompressor; + compress::CompressorPool fCompressorPool; logging::Logger* fSysLogger; TxnID fTransId; int fLocalModuleId; idbdatafile::IDBFileSystem& fFs; bool fIsFix; + size_t COMPRESSED_CHUNK_SIZE; -private: + private: }; } diff --git a/writeengine/shared/we_define.h b/writeengine/shared/we_define.h index 97ae3a1b2..bf987c31c 100644 --- a/writeengine/shared/we_define.h +++ b/writeengine/shared/we_define.h @@ -348,6 +348,7 @@ const int ERR_COMP_READ_FILE = ERR_COMPBASE + 16;// Failed to read from a const int ERR_COMP_WRITE_FILE = ERR_COMPBASE + 17;// Failed to write to a compresssed data file const int ERR_COMP_CLOSE_FILE = ERR_COMPBASE + 18;// Failed to close a compressed data file const int ERR_COMP_TRUNCATE_ZERO = ERR_COMPBASE + 19;// Invalid attempt to truncate file to 0 bytes +const int ERR_COMP_WRONG_COMP_TYPE = ERR_COMPBASE + 20;// Invalid compression type. //-------------------------------------------------------------------------- // Auto-increment error diff --git a/writeengine/shared/we_fileop.cpp b/writeengine/shared/we_fileop.cpp index 52785a013..f8046deed 100644 --- a/writeengine/shared/we_fileop.cpp +++ b/writeengine/shared/we_fileop.cpp @@ -652,14 +652,19 @@ int FileOp::extendFile( // @bug 5349: check that new extent's fbo is not past current EOF if (m_compressionType) { - char hdrsIn[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdrsIn[ compress::CompressInterface::HDR_BUF_LEN * 2 ]; RETURN_ON_ERROR( readHeaders(pFile, hdrsIn) ); - IDBCompressInterface compressor; - unsigned int ptrCount = compressor.getPtrCount(hdrsIn); + std::unique_ptr compressor( + compress::getCompressInterfaceByType( + compress::CompressInterface::getCompressionType(hdrsIn))); + + unsigned int ptrCount = + compress::CompressInterface::getPtrCount(hdrsIn); unsigned int chunkIndex = 0; unsigned int blockOffsetWithinChunk = 0; - compressor.locateBlock((hwm - 1), chunkIndex, blockOffsetWithinChunk); + compressor->locateBlock((hwm - 1), chunkIndex, + blockOffsetWithinChunk); //std::ostringstream oss1; //oss1 << "Extending compressed column file"<< @@ -816,9 +821,8 @@ int FileOp::extendFile( if ((m_compressionType) && (hdrs)) { - IDBCompressInterface compressor; - compressor.initHdr(hdrs, width, colDataType, m_compressionType); - compressor.setLBIDByIndex(hdrs, startLbid, 0); + compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType); + compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0); } } @@ -976,9 +980,8 @@ int FileOp::addExtentExactFile( if ((m_compressionType) && (hdrs)) { - IDBCompressInterface compressor; - compressor.initHdr(hdrs, width, colDataType, m_compressionType); - compressor.setLBIDByIndex(hdrs, startLbid, 0); + compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType); + compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0); } } @@ -1064,13 +1067,11 @@ int FileOp::initColumnExtent( { if ((bNewFile) && (m_compressionType)) { - char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2]; - IDBCompressInterface compressor; - compressor.initHdr(hdrs, width, colDataType, m_compressionType); - compressor.setLBIDByIndex(hdrs, lbid, 0); - + char hdrs[CompressInterface::HDR_BUF_LEN * 2]; + compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType); + compress::CompressInterface::setLBIDByIndex(hdrs, lbid, 0); if (bAbbrevExtent) - compressor.setBlockCount(hdrs, nBlocks); + compress::CompressInterface::setBlockCount(hdrs, nBlocks); RETURN_ON_ERROR(writeHeaders(pFile, hdrs)); } @@ -1262,7 +1263,7 @@ int FileOp::initAbbrevCompColumnExtent( Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_ABBREV_EXT); #endif - char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2]; + char hdrs[CompressInterface::HDR_BUF_LEN * 2]; rc = writeInitialCompColumnChunk( pFile, nBlocks, INITIAL_EXTENT_ROWS_TO_DISK, @@ -1308,24 +1309,30 @@ int FileOp::writeInitialCompColumnChunk( execplan::CalpontSystemCatalog::ColDataType colDataType, char* hdrs) { - const int INPUT_BUFFER_SIZE = nRows * width; + const size_t INPUT_BUFFER_SIZE = nRows * width; char* toBeCompressedInput = new char[INPUT_BUFFER_SIZE]; unsigned int userPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; - const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(INPUT_BUFFER_SIZE) + - userPaddingBytes; + // Compress an initialized abbreviated extent + // Initially m_compressionType == 0, but this function is used under + // condtion where m_compressionType > 0. + std::unique_ptr compressor( + compress::getCompressInterfaceByType(m_compressionType, + userPaddingBytes)); + const size_t OUTPUT_BUFFER_SIZE = + compressor->maxCompressedSize(INPUT_BUFFER_SIZE) + userPaddingBytes + + compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE; + unsigned char* compressedOutput = new unsigned char[OUTPUT_BUFFER_SIZE]; - unsigned int outputLen = OUTPUT_BUFFER_SIZE; + size_t outputLen = OUTPUT_BUFFER_SIZE; boost::scoped_array toBeCompressedInputPtr( toBeCompressedInput ); boost::scoped_array compressedOutputPtr(compressedOutput); setEmptyBuf( (unsigned char*)toBeCompressedInput, INPUT_BUFFER_SIZE, emptyVal, width); - // Compress an initialized abbreviated extent - IDBCompressInterface compressor( userPaddingBytes ); - int rc = compressor.compressBlock(toBeCompressedInput, - INPUT_BUFFER_SIZE, compressedOutput, outputLen ); + int rc = compressor->compressBlock(toBeCompressedInput, INPUT_BUFFER_SIZE, + compressedOutput, outputLen); if (rc != 0) { @@ -1333,8 +1340,8 @@ int FileOp::writeInitialCompColumnChunk( } // Round up the compressed chunk size - rc = compressor.padCompressedChunks( compressedOutput, - outputLen, OUTPUT_BUFFER_SIZE ); + rc = compressor->padCompressedChunks(compressedOutput, outputLen, + OUTPUT_BUFFER_SIZE); if (rc != 0) { @@ -1347,23 +1354,22 @@ int FileOp::writeInitialCompColumnChunk( // "; blkAllocCnt: " << nBlocksAllocated << // "; compressedByteCnt: " << outputLen << std::endl; - compressor.initHdr(hdrs, width, colDataType, m_compressionType); - compressor.setBlockCount(hdrs, nBlocksAllocated); - compressor.setLBIDByIndex(hdrs, startLBID, 0); + compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType); + compress::CompressInterface::setBlockCount(hdrs, nBlocksAllocated); + compress::CompressInterface::setLBIDByIndex(hdrs, startLBID, 0); // Store compression pointers in the header std::vector ptrs; - ptrs.push_back( IDBCompressInterface::HDR_BUF_LEN * 2 ); - ptrs.push_back( outputLen + (IDBCompressInterface::HDR_BUF_LEN * 2) ); - compressor.storePtrs(ptrs, hdrs); + ptrs.push_back( CompressInterface::HDR_BUF_LEN * 2 ); + ptrs.push_back( outputLen + (CompressInterface::HDR_BUF_LEN * 2) ); + compress::CompressInterface::storePtrs(ptrs, hdrs); RETURN_ON_ERROR( writeHeaders(pFile, hdrs) ); // Write the compressed data - if ( pFile->write( compressedOutput, outputLen ) != outputLen ) - { + size_t writtenLen = pFile->write(compressedOutput, outputLen); + if (writtenLen != outputLen) return ERR_FILE_WRITE; - } return NO_ERROR; } @@ -1421,7 +1427,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, return ERR_FILE_OPEN; } - char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ]; + char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ]; rc = readHeaders( pFile, hdrs ); if (rc != NO_ERROR) @@ -1432,9 +1438,14 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, } int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; - IDBCompressInterface compressor( userPadBytes ); + + std::unique_ptr compressor( + compress::getCompressInterfaceByType( + compress::CompressInterface::getCompressionType(hdrs), + userPadBytes)); + CompChunkPtrList chunkPtrs; - int rcComp = compressor.getPtrList( hdrs, chunkPtrs ); + int rcComp = compress::CompressInterface::getPtrList(hdrs, chunkPtrs); if (rcComp != 0) { @@ -1444,7 +1455,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, } // Nothing to do if the proposed HWM is < the current block count - uint64_t blkCount = compressor.getBlockCount(hdrs); + uint64_t blkCount = compress::CompressInterface::getBlockCount(hdrs); if (blkCount > (hwm + 1)) { @@ -1455,7 +1466,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, const unsigned int ROWS_PER_EXTENT = BRMWrapper::getInstance()->getInstance()->getExtentRows(); const unsigned int ROWS_PER_CHUNK = - IDBCompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth; + CompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth; const unsigned int CHUNKS_PER_EXTENT = ROWS_PER_EXTENT / ROWS_PER_CHUNK; // If this is an abbreviated extent, we first expand to a full extent @@ -1493,7 +1504,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, CompChunkPtr chunkOutPtr; rc = expandAbbrevColumnChunk( pFile, emptyVal, colWidth, - chunkPtrs[0], chunkOutPtr ); + chunkPtrs[0], chunkOutPtr, hdrs ); if (rc != NO_ERROR) { @@ -1515,7 +1526,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, // Update block count to reflect a full extent blkCount = (ROWS_PER_EXTENT * colWidth) / BYTE_PER_BLOCK; - compressor.setBlockCount( hdrs, blkCount ); + compress::CompressInterface::setBlockCount(hdrs, blkCount); } // Calculate the number of empty chunks we need to add to fill this extent @@ -1532,7 +1543,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, compressor.getBlockCount(hdrs) << std::endl; std::cout << "Pointer Header Size (in bytes): " << (compressor.getHdrSize(hdrs) - - IDBCompressInterface::HDR_BUF_LEN) << std::endl; + CompressInterface::HDR_BUF_LEN) << std::endl; std::cout << "Chunk Pointers (offset,length): " << std::endl; for (unsigned k = 0; k < chunkPtrs.size(); k++) @@ -1551,8 +1562,10 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, // Fill in or add necessary remaining empty chunks if (numChunksToFill > 0) { - const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; - const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes; + const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN; + const int OUT_BUF_LEN = + compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes + + compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE; // Allocate buffer, and store in scoped_array to insure it's deletion. // Create scope {...} to manage deletion of buffers @@ -1566,9 +1579,9 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, // Compress and then pad the compressed chunk setEmptyBuf( (unsigned char*)toBeCompressedBuf, IN_BUF_LEN, emptyVal, colWidth ); - unsigned int outputLen = OUT_BUF_LEN; - rcComp = compressor.compressBlock( toBeCompressedBuf, - IN_BUF_LEN, compressedBuf, outputLen ); + size_t outputLen = OUT_BUF_LEN; + rcComp = compressor->compressBlock(toBeCompressedBuf, IN_BUF_LEN, + compressedBuf, outputLen); if (rcComp != 0) { @@ -1579,8 +1592,8 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, toBeCompressedInputPtr.reset(); // release memory - rcComp = compressor.padCompressedChunks( compressedBuf, - outputLen, OUT_BUF_LEN ); + rcComp = compressor->padCompressedChunks(compressedBuf, outputLen, + OUT_BUF_LEN); if (rcComp != 0) { @@ -1639,7 +1652,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid, ptrs.push_back( chunkPtrs[chunkPtrs.size() - 1].first + chunkPtrs[chunkPtrs.size() - 1].second ); - compressor.storePtrs( ptrs, hdrs ); + compress::CompressInterface::storePtrs(ptrs, hdrs); rc = writeHeaders( pFile, hdrs ); @@ -1697,11 +1710,24 @@ int FileOp::expandAbbrevColumnChunk( const uint8_t* emptyVal, int colWidth, const CompChunkPtr& chunkInPtr, - CompChunkPtr& chunkOutPtr ) + CompChunkPtr& chunkOutPtr, + const char *hdrs ) { int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK; - const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN; - const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes; + auto realCompressionType = m_compressionType; + if (hdrs) + { + realCompressionType = + compress::CompressInterface::getCompressionType(hdrs); + } + std::unique_ptr compressor( + compress::getCompressInterfaceByType(realCompressionType, + userPadBytes)); + + const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN; + const int OUT_BUF_LEN = + compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes + + compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE; char* toBeCompressedBuf = new char[ IN_BUF_LEN ]; boost::scoped_array toBeCompressedPtr(toBeCompressedBuf); @@ -1717,13 +1743,10 @@ int FileOp::expandAbbrevColumnChunk( chunkInPtr.second) ); // Uncompress an "abbreviated" chunk into our 4MB buffer - unsigned int outputLen = IN_BUF_LEN; - IDBCompressInterface compressor( userPadBytes ); - int rc = compressor.uncompressBlock( - compressedInBuf, - chunkInPtr.second, - (unsigned char*)toBeCompressedBuf, - outputLen); + size_t outputLen = IN_BUF_LEN; + int rc = compressor->uncompressBlock(compressedInBuf, chunkInPtr.second, + (unsigned char*) toBeCompressedBuf, + outputLen); if (rc != 0) { @@ -1739,11 +1762,8 @@ int FileOp::expandAbbrevColumnChunk( // Compress the data we just read, as a "full" 4MB chunk outputLen = OUT_BUF_LEN; - rc = compressor.compressBlock( - reinterpret_cast(toBeCompressedBuf), - IN_BUF_LEN, - compressedOutBuf, - outputLen ); + rc = compressor->compressBlock(reinterpret_cast(toBeCompressedBuf), + IN_BUF_LEN, compressedOutBuf, outputLen); if (rc != 0) { @@ -1751,8 +1771,8 @@ int FileOp::expandAbbrevColumnChunk( } // Round up the compressed chunk size - rc = compressor.padCompressedChunks( compressedOutBuf, - outputLen, OUT_BUF_LEN ); + rc = compressor->padCompressedChunks(compressedOutBuf, outputLen, + OUT_BUF_LEN); if (rc != 0) { @@ -1782,7 +1802,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* hdr) const RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) ); // Write the headers - if (pFile->write( hdr, IDBCompressInterface::HDR_BUF_LEN * 2 ) != IDBCompressInterface::HDR_BUF_LEN * 2) + if (pFile->write( hdr, CompressInterface::HDR_BUF_LEN * 2 ) != CompressInterface::HDR_BUF_LEN * 2) { return ERR_FILE_WRITE; } @@ -1808,7 +1828,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr, RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) ); // Write the control header - if (pFile->write( controlHdr, IDBCompressInterface::HDR_BUF_LEN ) != IDBCompressInterface::HDR_BUF_LEN) + if (pFile->write( controlHdr, CompressInterface::HDR_BUF_LEN ) != CompressInterface::HDR_BUF_LEN) { return ERR_FILE_WRITE; } @@ -2651,9 +2671,8 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdrs ) const { RETURN_ON_ERROR( setFileOffset(pFile, 0) ); RETURN_ON_ERROR( readFile( pFile, reinterpret_cast(hdrs), - (IDBCompressInterface::HDR_BUF_LEN * 2) ) ); - IDBCompressInterface compressor; - int rc = compressor.verifyHdr( hdrs ); + (CompressInterface::HDR_BUF_LEN * 2) ) ); + int rc = compress::CompressInterface::verifyHdr(hdrs); if (rc != 0) { @@ -2671,11 +2690,10 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdr1, char* hdr2 ) const unsigned char* hdrPtr = reinterpret_cast(hdr1); RETURN_ON_ERROR( setFileOffset(pFile, 0) ); RETURN_ON_ERROR( readFile( pFile, hdrPtr, - IDBCompressInterface::HDR_BUF_LEN )); + CompressInterface::HDR_BUF_LEN )); - IDBCompressInterface compressor; - int ptrSecSize = compressor.getHdrSize(hdrPtr) - - IDBCompressInterface::HDR_BUF_LEN; + int ptrSecSize = compress::CompressInterface::getHdrSize(hdrPtr) - + CompressInterface::HDR_BUF_LEN; return readFile( pFile, reinterpret_cast(hdr2), ptrSecSize ); } diff --git a/writeengine/shared/we_fileop.h b/writeengine/shared/we_fileop.h index a136d4528..267bff6d4 100644 --- a/writeengine/shared/we_fileop.h +++ b/writeengine/shared/we_fileop.h @@ -529,11 +529,11 @@ private: FileOp(const FileOp& rhs); FileOp& operator=(const FileOp& rhs); - int expandAbbrevColumnChunk( IDBDataFile* pFile, - const uint8_t* emptyVal, - int colWidth, - const compress::CompChunkPtr& chunkInPtr, - compress::CompChunkPtr& chunkOutPt); + int expandAbbrevColumnChunk(IDBDataFile* pFile, const uint8_t* emptyVal, + int colWidth, + const compress::CompChunkPtr& chunkInPtr, + compress::CompChunkPtr& chunkOutPt, + const char* hdrs = nullptr); int initAbbrevCompColumnExtent( IDBDataFile* pFile, uint16_t dbRoot, int nBlocks, diff --git a/writeengine/shared/we_rbmetawriter.cpp b/writeengine/shared/we_rbmetawriter.cpp index ee9ff7c46..8cc2bd7a1 100644 --- a/writeengine/shared/we_rbmetawriter.cpp +++ b/writeengine/shared/we_rbmetawriter.cpp @@ -1007,9 +1007,9 @@ void RBMetaWriter::backupHWMChunk( } // Read Control header - char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ]; + char controlHdr[ CompressInterface::HDR_BUF_LEN ]; rc = fileOp.readFile( dbFile, (unsigned char*)controlHdr, - IDBCompressInterface::HDR_BUF_LEN ); + CompressInterface::HDR_BUF_LEN ); if (rc != NO_ERROR) { @@ -1025,8 +1025,7 @@ void RBMetaWriter::backupHWMChunk( throw WeException( oss.str(), rc ); } - IDBCompressInterface compressor; - int rc1 = compressor.verifyHdr( controlHdr ); + int rc1 = compress::CompressInterface::verifyHdr(controlHdr); if (rc1 != 0) { @@ -1045,9 +1044,23 @@ void RBMetaWriter::backupHWMChunk( throw WeException( oss.str(), rc ); } + auto compressionType = + compress::CompressInterface::getCompressionType(controlHdr); + std::unique_ptr compressor( + compress::getCompressInterfaceByType(compressionType)); + + if (!compressor) + { + WErrorCodes ec; + std::ostringstream oss; + oss << "Ivalid compression type " << compressionType; + fileOp.closeFile( dbFile ); + throw WeException(oss.str(), rc); + } + // Read Pointer header data - uint64_t hdrSize = compressor.getHdrSize(controlHdr); - uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN; + uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr); + uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN; char* pointerHdr = new char[ptrHdrSize]; rc = fileOp.readFile( dbFile, (unsigned char*)pointerHdr, ptrHdrSize ); @@ -1067,7 +1080,8 @@ void RBMetaWriter::backupHWMChunk( } CompChunkPtrList chunkPtrs; - rc = compressor.getPtrList(pointerHdr, ptrHdrSize, chunkPtrs ); + rc = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize, + chunkPtrs); delete[] pointerHdr; if (rc != 0) @@ -1087,7 +1101,7 @@ void RBMetaWriter::backupHWMChunk( unsigned int blockOffsetWithinChunk = 0; unsigned char* buffer = 0; uint64_t chunkSize = 0; - compressor.locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk); + compressor->locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk); if (chunkIndex < chunkPtrs.size()) { diff --git a/writeengine/wrapper/we_colopcompress.cpp b/writeengine/wrapper/we_colopcompress.cpp index ae5659c03..cdf186e43 100644 --- a/writeengine/wrapper/we_colopcompress.cpp +++ b/writeengine/wrapper/we_colopcompress.cpp @@ -121,9 +121,9 @@ int ColumnOpCompress0::saveBlock(IDBDataFile* pFile, const unsigned char* writeB * Constructor */ -ColumnOpCompress1::ColumnOpCompress1(Log* logger) +ColumnOpCompress1::ColumnOpCompress1(uint32_t compressionType, Log* logger) { - m_compressionType = 1; + m_compressionType = compressionType; m_chunkManager = new ChunkManager(); if (logger) @@ -164,11 +164,7 @@ bool ColumnOpCompress1::abbreviatedExtent(IDBDataFile* pFile, int colWidth) cons int ColumnOpCompress1::blocksInFile(IDBDataFile* pFile) const { - CompFileHeader compFileHeader; - readHeaders(pFile, compFileHeader.fControlData, compFileHeader.fPtrSection); - - compress::IDBCompressInterface compressor; - return compressor.getBlockCount(compFileHeader.fControlData); + return m_chunkManager->getBlockCount(pFile); } diff --git a/writeengine/wrapper/we_colopcompress.h b/writeengine/wrapper/we_colopcompress.h index 681d911f8..33da38646 100644 --- a/writeengine/wrapper/we_colopcompress.h +++ b/writeengine/wrapper/we_colopcompress.h @@ -97,7 +97,7 @@ public: /** * @brief Constructor */ - EXPORT ColumnOpCompress1(Log* logger = 0); + EXPORT ColumnOpCompress1(uint32_t compressionType, Log* logger = 0); /** * @brief Default Destructor diff --git a/writeengine/wrapper/we_dctnrycompress.cpp b/writeengine/wrapper/we_dctnrycompress.cpp index 22677b491..e352165ce 100644 --- a/writeengine/wrapper/we_dctnrycompress.cpp +++ b/writeengine/wrapper/we_dctnrycompress.cpp @@ -67,9 +67,9 @@ DctnryCompress0::~DctnryCompress0() /** * Constructor */ -DctnryCompress1::DctnryCompress1(Log* logger) +DctnryCompress1::DctnryCompress1(uint32_t compressionType, Log* logger) { - m_compressionType = 1; + m_compressionType = compressionType; m_chunkManager = new ChunkManager(); if (logger) diff --git a/writeengine/wrapper/we_dctnrycompress.h b/writeengine/wrapper/we_dctnrycompress.h index 968253d45..8ec3ad3ac 100644 --- a/writeengine/wrapper/we_dctnrycompress.h +++ b/writeengine/wrapper/we_dctnrycompress.h @@ -62,7 +62,7 @@ public: /** * @brief Constructor */ - EXPORT DctnryCompress1(Log* logger = 0); + EXPORT DctnryCompress1(uint32_t compressionType, Log* logger = 0); /** * @brief Default Destructor diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index b1cb6b2f8..fe4aeff49 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -76,19 +76,25 @@ StopWatch timer; WriteEngineWrapper::WriteEngineWrapper() : m_opType(NOOP) { m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0; - m_colOp[COMPRESSED_OP] = new ColumnOpCompress1; - m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0; - m_dctnry[COMPRESSED_OP] = new DctnryCompress1; + + m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*comressionType=*/1); + m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1); + + m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*comressionType=*/3); + m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3); } WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opType(rhs.m_opType) { m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0; - m_colOp[COMPRESSED_OP] = new ColumnOpCompress1; - m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0; - m_dctnry[COMPRESSED_OP] = new DctnryCompress1; + + m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*compressionType=*/1); + m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1); + + m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*compressionType=*/3); + m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3); } /**@brief WriteEngineWrapper Constructor @@ -96,9 +102,13 @@ WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opTyp WriteEngineWrapper::~WriteEngineWrapper() { delete m_colOp[UN_COMPRESSED_OP]; - delete m_colOp[COMPRESSED_OP]; delete m_dctnry[UN_COMPRESSED_OP]; - delete m_dctnry[COMPRESSED_OP]; + + delete m_colOp[COMPRESSED_OP_1]; + delete m_dctnry[COMPRESSED_OP_1]; + + delete m_colOp[COMPRESSED_OP_2]; + delete m_dctnry[COMPRESSED_OP_2]; } /**@brief Perform upfront initialization diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h index cdc7d472d..5423a7361 100644 --- a/writeengine/wrapper/writeengine.h +++ b/writeengine/wrapper/writeengine.h @@ -58,9 +58,10 @@ namespace WriteEngine { //... Total compression operation: un_compresssed, compressed -const int UN_COMPRESSED_OP = 0; -const int COMPRESSED_OP = 1; -const int TOTAL_COMPRESS_OP = 2; +const int UN_COMPRESSED_OP = 0; +const int COMPRESSED_OP_1 = 1; +const int COMPRESSED_OP_2 = 2; +const int TOTAL_COMPRESS_OP = 3; //...Forward class declarations class Log; @@ -446,8 +447,10 @@ public: */ void setIsInsert(bool bIsInsert) { - m_colOp[COMPRESSED_OP]->chunkManager()->setIsInsert(bIsInsert); - m_dctnry[COMPRESSED_OP]->chunkManager()->setIsInsert(true); + m_colOp[COMPRESSED_OP_1]->chunkManager()->setIsInsert(bIsInsert); + m_dctnry[COMPRESSED_OP_1]->chunkManager()->setIsInsert(true); + m_colOp[COMPRESSED_OP_2]->chunkManager()->setIsInsert(bIsInsert); + m_dctnry[COMPRESSED_OP_2]->chunkManager()->setIsInsert(true); } /** @@ -458,7 +461,7 @@ public: */ bool getIsInsert() { - return m_colOp[COMPRESSED_OP]->chunkManager()->getIsInsert(); + return m_colOp[COMPRESSED_OP_1]->chunkManager()->getIsInsert(); } std::tr1::unordered_map& getTxnMap() @@ -475,10 +478,23 @@ public: */ int flushChunks(int rc, const std::map& columOids) { - int rtn1 = m_colOp[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids); - int rtn2 = m_dctnry[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids); + std::vector compressedOpIds = {COMPRESSED_OP_1, + COMPRESSED_OP_2}; - return (rtn1 != NO_ERROR ? rtn1 : rtn2); + for (const auto compressedOpId : compressedOpIds) + { + auto rtn = m_colOp[compressedOpId]->chunkManager()->flushChunks( + rc, columOids); + if (rtn != NO_ERROR) + return rtn; + + rtn = m_dctnry[compressedOpId]->chunkManager()->flushChunks( + rc, columOids); + if (rtn != NO_ERROR) + return rtn; + } + + return NO_ERROR; } /** @@ -524,7 +540,7 @@ public: int startTransaction(const TxnID& txnid) { int rc = 0; - rc = m_colOp[COMPRESSED_OP]->chunkManager()->startTransaction(txnid); + rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->startTransaction(txnid); //if ( rc == 0) // rc = m_dctnry[COMPRESSED_OP]->chunkManager()->startTransaction(txnid); return rc; @@ -537,7 +553,8 @@ public: int confirmTransaction (const TxnID& txnid) { int rc = 0; - rc = m_colOp[COMPRESSED_OP]->chunkManager()->confirmTransaction (txnid); + rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->confirmTransaction( + txnid); return rc; } @@ -549,7 +566,8 @@ public: int endTransaction(const TxnID& txnid, bool success) { int rc = 0; - rc = m_colOp[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success); + rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->endTransaction(txnid, + success); //if ( rc == 0) // rc = m_dctnry[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success); return rc; @@ -785,7 +803,16 @@ private: int op(int compressionType) { - return (compressionType > 0 ? COMPRESSED_OP : UN_COMPRESSED_OP); + switch (compressionType) + { + case 1: + case 2: + return COMPRESSED_OP_1; + case 3: + return COMPRESSED_OP_2; + } + + return 0; }