mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
MCOL-987 Add LZ4 compression.
* Adds CompressInterfaceLZ4 which uses LZ4 API for compress/uncompress. * Adds CMake machinery to search LZ4 on running host. * All methods which use static data and do not modify any internal data - become `static`, so we can use them without creation of the specific object. This is possible, because the header specification has not been modified. We still use 2 sections in header, first one with file meta data, the second one with pointers for compressed chunks. * Methods `compress`, `uncompress`, `maxCompressedSize`, `getUncompressedSize` - become pure virtual, so we can override them for the other compression algos. * Adds method `getChunkMagicNumber`, so we can verify chunk magic number for each compression algo. * Renames "s/IDBCompressInterface/CompressInterface/g" according to requirement.
This commit is contained in:
parent
dd12bd3cd0
commit
cc1c3629c5
@ -36,9 +36,9 @@ local deb_build_deps = 'apt update && apt install --yes --no-install-recommends
|
||||
local platformMap(platform) =
|
||||
|
||||
local platform_map = {
|
||||
'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
|
||||
'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
|
||||
'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
|
||||
'opensuse/leap:15': 'zypper ' + rpm_build_deps + ' cmake libboost_system-devel libboost_filesystem-devel libboost_thread-devel libboost_regex-devel libboost_date_time-devel libboost_chrono-devel libboost_atomic-devel gcc-fortran liblz4-devel && cmake ' + cmakeflags + ' -DRPM=sles15 && make -j$(nproc) package',
|
||||
'centos:7': 'yum install -y epel-release && yum install -y cmake3 && ln -s /usr/bin/cmake3 /usr/bin/cmake && yum ' + rpm_build_deps + ' lz4-devel && cmake ' + cmakeflags + ' -DRPM=centos7 && make -j$(nproc) package',
|
||||
'centos:8': "yum install -y libgcc libarchive && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*PowerTools.repo && yum " + rpm_build_deps + ' lz4-devel cmake && cmake ' + cmakeflags + ' -DRPM=centos8 && make -j$(nproc) package',
|
||||
'debian:9': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=stretch' debian/autobake-deb.sh",
|
||||
'debian:10': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=buster' debian/autobake-deb.sh",
|
||||
'ubuntu:18.04': deb_build_deps + " && CMAKEFLAGS='" + cmakeflags + " -DDEB=bionic' debian/autobake-deb.sh",
|
||||
|
@ -163,6 +163,12 @@ if(NOT AWK_EXECUTABLE)
|
||||
return()
|
||||
endif()
|
||||
|
||||
FIND_PACKAGE(LZ4)
|
||||
if (NOT LZ4_FOUND)
|
||||
MESSAGE_ONCE(CS_NO_LZ4 "lz4 not found")
|
||||
return()
|
||||
endif()
|
||||
|
||||
IF (NOT INSTALL_LAYOUT)
|
||||
INCLUDE(check_compiler_flag)
|
||||
|
||||
|
25
cmake/FindLZ4.cmake
Normal file
25
cmake/FindLZ4.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
find_path(LZ4_ROOT_DIR
|
||||
NAMES include/lz4.h
|
||||
)
|
||||
|
||||
find_library(LZ4_LIBRARIES
|
||||
NAMES lz4
|
||||
HINTS ${LZ4_ROOT_DIR}/lib
|
||||
)
|
||||
|
||||
find_path(LZ4_INCLUDE_DIR
|
||||
NAMES lz4.h
|
||||
HINTS ${LZ4_ROOT_DIR}/include
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(lz4 DEFAULT_MSG
|
||||
LZ4_LIBRARIES
|
||||
LZ4_INCLUDE_DIR
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
LZ4_ROOT_DIR
|
||||
LZ4_LIBRARIES
|
||||
LZ4_INCLUDE_DIR
|
||||
)
|
@ -146,9 +146,7 @@ pColStep::pColStep(
|
||||
if (fOid < 1000)
|
||||
throw runtime_error("pColStep: invalid column");
|
||||
|
||||
compress::IDBCompressInterface cmpif;
|
||||
|
||||
if (!cmpif.isCompressionAvail(fColType.compressionType))
|
||||
if (!compress::CompressInterface::isCompressionAvail(fColType.compressionType))
|
||||
{
|
||||
ostringstream oss;
|
||||
oss << "Unsupported compression type " << fColType.compressionType;
|
||||
|
@ -95,7 +95,11 @@ DROP PROCEDURE IF EXISTS `compression_ratio` //
|
||||
|
||||
CREATE PROCEDURE compression_ratio() SQL SECURITY INVOKER
|
||||
BEGIN
|
||||
SELECT CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files WHERE compressed_data_size IS NOT NULL), ':1') COMPRESSION_RATIO;
|
||||
|
||||
SELECT 'Snappy' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='Snappy') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='Snappy' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') compression_ratio
|
||||
UNION ALL
|
||||
SELECT 'LZ4' as compression_method, CONCAT((SELECT SUM(data_size) FROM information_schema.columnstore_extents ce left join information_schema.columnstore_columns cc on ce.object_id = cc.object_id where compression_type='LZ4') / (SELECT SUM(compressed_data_size) FROM information_schema.columnstore_files co left join information_schema.columnstore_columns cc on (co.object_id = cc.object_id) left join information_schema.columnstore_extents ce on (ce.object_id = co.object_id) where compression_type='LZ4' and compressed_data_size IS NOT NULL /* could be a situation when compressed_data_size != NULL but data_size == 0, in this case we will get wrong ratio */ and data_size > 0), ':1') as compression_ratio;
|
||||
|
||||
END //
|
||||
|
||||
create or replace procedure columnstore_upgrade() SQL SECURITY INVOKER
|
||||
|
@ -777,7 +777,6 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
|
||||
parser.setDefaultSchema(schema);
|
||||
parser.setDefaultCharset(default_table_charset);
|
||||
int rc = 0;
|
||||
IDBCompressInterface idbCompress;
|
||||
parser.Parse(ddlStatement.c_str());
|
||||
|
||||
if (get_fe_conn_info_ptr() == NULL)
|
||||
@ -981,7 +980,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
|
||||
|
||||
if (compressionType == 1) compressionType = 2;
|
||||
|
||||
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
|
||||
if ((compressionType > 0) &&
|
||||
!(compress::CompressInterface::isCompressionAvail(
|
||||
compressionType)))
|
||||
{
|
||||
rc = 1;
|
||||
ci->alterTableState = cal_connection_info::NOT_ALTER;
|
||||
@ -1368,7 +1369,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
|
||||
if ((compressionType > 0) &&
|
||||
!(compress::CompressInterface::isCompressionAvail(
|
||||
compressionType)))
|
||||
{
|
||||
rc = 1;
|
||||
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
|
||||
@ -1713,7 +1716,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
|
||||
if ((compressionType > 0) &&
|
||||
!(compress::CompressInterface::isCompressionAvail(
|
||||
compressionType)))
|
||||
{
|
||||
rc = 1;
|
||||
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
|
||||
@ -1842,7 +1847,9 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& tabl
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (( compressionType > 0 ) && !(idbCompress.isCompressionAvail( compressionType )))
|
||||
if ((compressionType > 0) &&
|
||||
!(compress::CompressInterface::isCompressionAvail(
|
||||
compressionType)))
|
||||
{
|
||||
rc = 1;
|
||||
thd->raise_error_printf(ER_INTERNAL_ERROR, (IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE)).c_str());
|
||||
@ -2364,9 +2371,8 @@ int ha_mcs_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* crea
|
||||
|
||||
if (compressiontype == 1) compressiontype = 2;
|
||||
|
||||
IDBCompressInterface idbCompress;
|
||||
|
||||
if ( ( compressiontype > 0 ) && !(idbCompress.isCompressionAvail( compressiontype )) )
|
||||
if ((compressiontype > 0) &&
|
||||
!(compress::CompressInterface::isCompressionAvail(compressiontype)))
|
||||
{
|
||||
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_INVALID_COMPRESSION_TYPE);
|
||||
setError(thd, ER_INTERNAL_ERROR, emsg);
|
||||
|
@ -21,8 +21,10 @@
|
||||
#include "ha_mcs_sysvars.h"
|
||||
|
||||
const char* mcs_compression_type_names[] = {
|
||||
"SNAPPY",
|
||||
"SNAPPY",
|
||||
"SNAPPY", // 0
|
||||
"SNAPPY", // 1
|
||||
"SNAPPY", // 2
|
||||
"LZ4", // 3
|
||||
NullS
|
||||
};
|
||||
|
||||
@ -39,7 +41,8 @@ static MYSQL_THDVAR_ENUM(
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Controls compression algorithm for create tables. Possible values are: "
|
||||
"NO_COMPRESSION segment files aren't compressed; "
|
||||
"SNAPPY segment files are Snappy compressed (default);",
|
||||
"SNAPPY segment files are Snappy compressed (default);"
|
||||
"LZ4 segment files are LZ4 compressed;",
|
||||
NULL, // check
|
||||
NULL, // update
|
||||
1, //default
|
||||
|
@ -30,7 +30,8 @@ extern char cs_commit_hash[];
|
||||
// compression_type
|
||||
enum mcs_compression_type_t {
|
||||
NO_COMPRESSION = 0,
|
||||
SNAPPY = 2
|
||||
SNAPPY = 2,
|
||||
LZ4 = 3
|
||||
};
|
||||
|
||||
// use_import_for_batchinsert mode
|
||||
|
@ -183,6 +183,10 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond)
|
||||
compression_type = "Snappy";
|
||||
break;
|
||||
|
||||
case 3:
|
||||
compression_type = "LZ4";
|
||||
break;
|
||||
|
||||
default:
|
||||
compression_type = "Unknown";
|
||||
break;
|
||||
|
@ -492,6 +492,7 @@
|
||||
<CPUniqueLimit>100</CPUniqueLimit>
|
||||
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
|
||||
<TempFileCompression>Y</TempFileCompression>
|
||||
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
|
||||
</HashJoin>
|
||||
<JobList>
|
||||
<FlushInterval>16K</FlushInterval>
|
||||
@ -539,6 +540,7 @@
|
||||
</UserPriority>
|
||||
<NetworkCompression>
|
||||
<Enabled>Y</Enabled>
|
||||
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
|
||||
</NetworkCompression>
|
||||
<QueryTele>
|
||||
<Host>127.0.0.1</Host>
|
||||
|
@ -308,7 +308,7 @@ void waitForRetry(long count)
|
||||
|
||||
|
||||
//Must hold the FD cache lock!
|
||||
int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterface& decompressor)
|
||||
static int updateptrs(char* ptr, FdCacheType_t::iterator fdit)
|
||||
{
|
||||
ssize_t i;
|
||||
uint32_t progress;
|
||||
@ -357,7 +357,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
|
||||
fdit->second->cmpMTime = mtime;
|
||||
|
||||
int gplRc = 0;
|
||||
gplRc = decompressor.getPtrList(&ptr[4096], 4096, fdit->second->ptrList);
|
||||
gplRc = compress::CompressInterface::getPtrList(&ptr[4096], 4096,
|
||||
fdit->second->ptrList);
|
||||
|
||||
if (gplRc != 0)
|
||||
return -5; // go for a retry.
|
||||
@ -391,7 +392,8 @@ int updateptrs(char* ptr, FdCacheType_t::iterator fdit, const IDBCompressInterfa
|
||||
return -8;
|
||||
|
||||
CompChunkPtrList nextPtrList;
|
||||
gplRc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
|
||||
gplRc = compress::CompressInterface::getPtrList(
|
||||
&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
|
||||
|
||||
if (gplRc != 0)
|
||||
return -7; // go for a retry.
|
||||
@ -445,7 +447,6 @@ void* thr_popper(ioManager* arg)
|
||||
double rqst3;
|
||||
bool locked = false;
|
||||
SPFdEntry_t fe;
|
||||
IDBCompressInterface decompressor;
|
||||
vector<CacheInsert_t> cacheInsertOps;
|
||||
bool copyLocked = false;
|
||||
|
||||
@ -463,8 +464,10 @@ void* thr_popper(ioManager* arg)
|
||||
|
||||
FdCacheType_t::iterator fdit;
|
||||
IDBDataFile* fp = 0;
|
||||
uint32_t maxCompSz = IDBCompressInterface::maxCompressedSize(iom->blocksPerRead * BLOCK_SIZE);
|
||||
uint32_t readBufferSz = maxCompSz + pageSize;
|
||||
size_t maxCompSz =
|
||||
compress::CompressInterface::getMaxCompressedSizeGeneric(
|
||||
iom->blocksPerRead * BLOCK_SIZE);
|
||||
size_t readBufferSz = maxCompSz + pageSize;
|
||||
|
||||
realbuff.reset(new char[readBufferSz]);
|
||||
|
||||
@ -863,7 +866,7 @@ retryReadHeaders:
|
||||
cur_mtime = fp_mtime;
|
||||
|
||||
if (decompRetryCount > 0 || retryReadHeadersCount > 0 || cur_mtime > fdit->second->cmpMTime)
|
||||
updatePtrsRc = updateptrs(&alignedbuff[0], fdit, decompressor);
|
||||
updatePtrsRc = updateptrs(&alignedbuff[0], fdit);
|
||||
|
||||
fdMapMutex.unlock();
|
||||
|
||||
@ -1052,7 +1055,7 @@ retryReadHeaders:
|
||||
#ifdef _MSC_VER
|
||||
unsigned int blen = 4 * 1024 * 1024 + 4;
|
||||
#else
|
||||
uint32_t blen = 4 * 1024 * 1024 + 4;
|
||||
size_t blen = 4 * 1024 * 1024 + 4;
|
||||
#endif
|
||||
#ifdef IDB_COMP_POC_DEBUG
|
||||
{
|
||||
@ -1060,7 +1063,18 @@ retryReadHeaders:
|
||||
cout << "decompress(0x" << hex << (ptrdiff_t)&alignedbuff[0] << dec << ", " << fdit->second->ptrList[cmpOffFact.quot].second << ", 0x" << hex << (ptrdiff_t)uCmpBuf << dec << ", " << blen << ")" << endl;
|
||||
}
|
||||
#endif
|
||||
int dcrc = decompressor.uncompressBlock(&alignedbuff[0],
|
||||
|
||||
std::unique_ptr<compress::CompressInterface> decompressor(
|
||||
compress::getCompressInterfaceByType(
|
||||
static_cast<uint32_t>(fdit->second->compType)));
|
||||
if (!decompressor)
|
||||
{
|
||||
// Use default?
|
||||
decompressor.reset(
|
||||
new compress::CompressInterfaceSnappy());
|
||||
}
|
||||
|
||||
int dcrc = decompressor->uncompressBlock(&alignedbuff[0],
|
||||
fdit->second->ptrList[cmpOffFact.quot].second, uCmpBuf, blen);
|
||||
|
||||
if (dcrc != 0)
|
||||
|
@ -696,13 +696,25 @@ blockReadRetry:
|
||||
i = fp->pread( &cmpHdrBuf[0], 0, 4096 * 3);
|
||||
|
||||
CompChunkPtrList ptrList;
|
||||
IDBCompressInterface decompressor;
|
||||
std::unique_ptr<CompressInterface> decompressor(
|
||||
compress::getCompressInterfaceByType(
|
||||
compress::CompressInterface::getCompressionType(
|
||||
&cmpHdrBuf[0])));
|
||||
|
||||
if (!decompressor)
|
||||
{
|
||||
// Use default?
|
||||
decompressor.reset(
|
||||
new compress::CompressInterfaceSnappy());
|
||||
}
|
||||
|
||||
int dcrc = 0;
|
||||
|
||||
if (i == 4096 * 3)
|
||||
{
|
||||
uint64_t numHdrs = 0; // extra headers
|
||||
dcrc = decompressor.getPtrList(&cmpHdrBuf[4096], 4096, ptrList);
|
||||
dcrc = compress::CompressInterface::getPtrList(
|
||||
&cmpHdrBuf[4096], 4096, ptrList);
|
||||
|
||||
if (dcrc == 0 && ptrList.size() > 0)
|
||||
numHdrs = ptrList[0].first / 4096ULL - 2ULL;
|
||||
@ -723,7 +735,8 @@ blockReadRetry:
|
||||
i = fp->pread( &nextHdrBufPtr[0], 4096 * 2, numHdrs * 4096 );
|
||||
|
||||
CompChunkPtrList nextPtrList;
|
||||
dcrc = decompressor.getPtrList(&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
|
||||
dcrc = compress::CompressInterface::getPtrList(
|
||||
&nextHdrBufPtr[0], numHdrs * 4096, nextPtrList);
|
||||
|
||||
if (dcrc == 0)
|
||||
ptrList.insert(ptrList.end(), nextPtrList.begin(), nextPtrList.end());
|
||||
@ -777,11 +790,11 @@ blockReadRetry:
|
||||
cmpBuf = (char*) alignedBuffer;
|
||||
}
|
||||
|
||||
unsigned blen = 4 * 1024 * 1024;
|
||||
size_t blen = 4 * 1024 * 1024;
|
||||
|
||||
i = fp->pread( cmpBuf, cmpBufOff, cmpBufSz );
|
||||
|
||||
dcrc = decompressor.uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
|
||||
dcrc = decompressor->uncompressBlock(cmpBuf, cmpBufSz, uCmpBuf, blen);
|
||||
|
||||
if (dcrc == 0)
|
||||
{
|
||||
|
@ -42,3 +42,9 @@ if (WITH_REBUILD_EM_UT)
|
||||
target_link_libraries(rebuild_em_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
|
||||
install(TARGETS rebuild_em_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
|
||||
endif()
|
||||
|
||||
if (WITH_COMPRESSION_UT)
|
||||
add_executable(compression_tests compression-tests.cpp)
|
||||
target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
|
||||
install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
|
||||
endif()
|
||||
|
126
tests/compression-tests.cpp
Normal file
126
tests/compression-tests.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
/* Copyright (C) 2021 MariaDB Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "idbcompress.h"
|
||||
|
||||
class CompressionTest : public ::testing::Test
|
||||
{
|
||||
|
||||
protected:
|
||||
std::string genPermutations(string& data)
|
||||
{
|
||||
std::string generated;
|
||||
generate(data, 0, generated);
|
||||
return generated;
|
||||
}
|
||||
|
||||
private:
|
||||
void generate(string& data, uint32_t i, std::string& generated)
|
||||
{
|
||||
if (i == data.size())
|
||||
{
|
||||
generated.append(data);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t k = i, e = data.size(); k < e; ++k)
|
||||
{
|
||||
std::swap(data[i], data[k]);
|
||||
generate(data, i + 1, generated);
|
||||
std::swap(data[i], data[k]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(CompressionTest, LZ4CanCompress)
|
||||
{
|
||||
std::string originalData =
|
||||
"This program is free software; you can redistribute it and/or"
|
||||
"modify it under the terms of the GNU General Public License"
|
||||
"as published by the Free Software Foundation; version 2 of"
|
||||
"the License.";
|
||||
|
||||
std::unique_ptr<compress::CompressInterface> compressor(
|
||||
new compress::CompressInterfaceLZ4());
|
||||
|
||||
size_t originalSize = originalData.size();
|
||||
size_t compressedSize = compressor->maxCompressedSize(originalSize);
|
||||
std::unique_ptr<char[]> compressedData(new char[compressedSize]);
|
||||
std::memset(compressedData.get(), 0, compressedSize);
|
||||
|
||||
auto rc = compressor->compress(originalData.data(), originalSize,
|
||||
compressedData.get(), &compressedSize);
|
||||
ASSERT_EQ(rc, 0);
|
||||
|
||||
std::unique_ptr<char[]> uncompressedData(new char[originalSize]);
|
||||
rc = compressor->uncompress(compressedData.get(), compressedSize,
|
||||
uncompressedData.get(), &originalSize);
|
||||
ASSERT_EQ(rc, 0);
|
||||
std::string result(uncompressedData.get());
|
||||
EXPECT_EQ(originalData, result);
|
||||
}
|
||||
|
||||
TEST_F(CompressionTest, LZvsSnappyUnique)
|
||||
{
|
||||
std::unique_ptr<compress::CompressInterface> lz4Compressor(
|
||||
new compress::CompressInterfaceLZ4());
|
||||
std::unique_ptr<compress::CompressInterface> snappyCompressor(
|
||||
new compress::CompressInterfaceSnappy());
|
||||
// Generate permutations.
|
||||
// 9! * 9 == 3265920 (closer to current chunk size)
|
||||
std::vector<std::string> dataPool{"abcdefghi", "aaadefghi", "aaaaafghi",
|
||||
"aaaaaaahi", "aaaaaaaaj"};
|
||||
|
||||
for (auto& data : dataPool)
|
||||
{
|
||||
std::cout << "Permutations generated for: " << data << std::endl;
|
||||
auto generated = genPermutations(data);
|
||||
auto generatedSize = generated.size();
|
||||
|
||||
auto compressedSizeLZ4 =
|
||||
lz4Compressor->maxCompressedSize(generatedSize);
|
||||
auto compressedSizeSnappy =
|
||||
snappyCompressor->maxCompressedSize(generatedSize);
|
||||
|
||||
std::unique_ptr<char[]> lz4CompressedData(new char[compressedSizeLZ4]);
|
||||
auto rc = lz4Compressor->compress(generated.data(), generatedSize,
|
||||
lz4CompressedData.get(),
|
||||
&compressedSizeLZ4);
|
||||
ASSERT_EQ(rc, 0);
|
||||
|
||||
std::unique_ptr<char[]> snappyCompressedData(
|
||||
new char[compressedSizeSnappy]);
|
||||
rc = snappyCompressor->compress(generated.data(), generatedSize,
|
||||
snappyCompressedData.get(),
|
||||
&compressedSizeSnappy);
|
||||
ASSERT_EQ(rc, 0);
|
||||
|
||||
std::cout << "LZ ratio: "
|
||||
<< (float) ((float) generatedSize /
|
||||
(float) compressedSizeLZ4)
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "Snappy ratio: "
|
||||
<< (float) ((float) generatedSize /
|
||||
(float) compressedSizeSnappy)
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
@ -383,7 +383,7 @@ public:
|
||||
BlockOp blockOp;
|
||||
char fileName[20];
|
||||
int rc;
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
|
||||
printf("\nRunning testCreateDeleteFile \n");
|
||||
idbdatafile::IDBPolicy::init(true, false, "", 0);
|
||||
@ -966,7 +966,7 @@ public:
|
||||
BlockOp blockOp;
|
||||
char fileName[20];
|
||||
int rc;
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
int dbRoot = 1;
|
||||
|
||||
printf("\nRunning testExtensionWOPrealloc \n");
|
||||
@ -1085,7 +1085,7 @@ public:
|
||||
int dbRoot = 1;
|
||||
int colWidth = 65535;
|
||||
|
||||
DctnryCompress1 m_Dctnry;
|
||||
DctnryCompress1 m_Dctnry(/*compressionType=*/1);
|
||||
// This is the magic for the stub in FileOp::oid2FileName
|
||||
int oId = 42;
|
||||
|
||||
@ -1565,7 +1565,7 @@ public:
|
||||
BlockOp blockOp;
|
||||
char fileName[20];
|
||||
int rc;
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
int dbRoot = 1;
|
||||
|
||||
idbdatafile::IDBPolicy::init(true, false, "", 0);
|
||||
|
@ -89,7 +89,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
|
||||
}
|
||||
|
||||
// Read and verify header.
|
||||
char fileHeader[compress::IDBCompressInterface::HDR_BUF_LEN * 2];
|
||||
char fileHeader[compress::CompressInterface::HDR_BUF_LEN * 2];
|
||||
rc = fileOp.readHeaders(dbFile.get(), fileHeader);
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -116,8 +116,8 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
|
||||
}
|
||||
|
||||
// Read the `colDataType` and `colWidth` from the given header.
|
||||
compress::IDBCompressInterface compressor;
|
||||
const auto versionNumber = compressor.getVersionNumber(fileHeader);
|
||||
const auto versionNumber =
|
||||
compress::CompressInterface::getVersionNumber(fileHeader);
|
||||
// Verify header number.
|
||||
if (versionNumber < 3)
|
||||
{
|
||||
@ -129,10 +129,11 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto colDataType = compressor.getColDataType(fileHeader);
|
||||
auto colWidth = compressor.getColumnWidth(fileHeader);
|
||||
auto blockCount = compressor.getBlockCount(fileHeader);
|
||||
auto lbidCount = compressor.getLBIDCount(fileHeader);
|
||||
auto colDataType = compress::CompressInterface::getColDataType(fileHeader);
|
||||
auto colWidth = compress::CompressInterface::getColumnWidth(fileHeader);
|
||||
auto blockCount = compress::CompressInterface::getBlockCount(fileHeader);
|
||||
auto lbidCount = compress::CompressInterface::getLBIDCount(fileHeader);
|
||||
auto compressionType = compress::CompressInterface::getCompressionType(fileHeader);
|
||||
|
||||
if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED)
|
||||
{
|
||||
@ -155,7 +156,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
|
||||
|
||||
uint64_t hwm = 0;
|
||||
rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth,
|
||||
blockCount, isDict, hwm);
|
||||
blockCount, isDict, compressionType, hwm);
|
||||
if (rc != 0)
|
||||
{
|
||||
return rc;
|
||||
@ -172,13 +173,13 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
|
||||
{
|
||||
for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex)
|
||||
{
|
||||
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidIndex);
|
||||
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidIndex);
|
||||
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
|
||||
extentMap.push_back(fileId);
|
||||
}
|
||||
|
||||
// Last one has an actual HWM.
|
||||
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidCount - 1);
|
||||
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, lbidCount - 1);
|
||||
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
|
||||
extentMap.push_back(fileId);
|
||||
|
||||
@ -192,7 +193,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
|
||||
else
|
||||
{
|
||||
// One extent per segment file.
|
||||
auto lbid = compressor.getLBIDByIndex(fileHeader, 0);
|
||||
auto lbid = compress::CompressInterface::getLBIDByIndex(fileHeader, 0);
|
||||
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
|
||||
extentMap.push_back(fileId);
|
||||
|
||||
@ -293,7 +294,7 @@ int32_t EMReBuilder::rebuildExtentMap()
|
||||
int32_t EMReBuilder::searchHWMInSegmentFile(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
|
||||
uint64_t blockCount, bool isDict, uint64_t& hwm)
|
||||
uint64_t blockCount, bool isDict, uint32_t compressionType, uint64_t& hwm)
|
||||
{
|
||||
std::unique_ptr<ChunkManagerWrapper> chunkManagerWrapper;
|
||||
try
|
||||
@ -302,13 +303,15 @@ int32_t EMReBuilder::searchHWMInSegmentFile(
|
||||
{
|
||||
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperDict>(
|
||||
new ChunkManagerWrapperDict(oid, dbRoot, partition, segment,
|
||||
colDataType, colWidth));
|
||||
colDataType, colWidth,
|
||||
compressionType));
|
||||
}
|
||||
else
|
||||
{
|
||||
chunkManagerWrapper = std::unique_ptr<ChunkManagerWrapperColumn>(
|
||||
new ChunkManagerWrapperColumn(oid, dbRoot, partition, segment,
|
||||
colDataType, colWidth));
|
||||
colDataType, colWidth,
|
||||
compressionType));
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -401,12 +404,13 @@ int32_t ChunkManagerWrapper::readBlock(uint32_t blockNumber)
|
||||
|
||||
ChunkManagerWrapperColumn::ChunkManagerWrapperColumn(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth)
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
|
||||
uint32_t compressionType)
|
||||
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
|
||||
colWidth)
|
||||
{
|
||||
pFileOp = std::unique_ptr<WriteEngine::ColumnOpCompress1>(
|
||||
new WriteEngine::ColumnOpCompress1());
|
||||
new WriteEngine::ColumnOpCompress1(compressionType));
|
||||
chunkManager.fileOp(pFileOp.get());
|
||||
// Open compressed column segment file. We will read block by block
|
||||
// from the compressed chunks.
|
||||
@ -463,12 +467,13 @@ bool ChunkManagerWrapperColumn::isEmptyValue(const uint8_t* value) const
|
||||
|
||||
ChunkManagerWrapperDict::ChunkManagerWrapperDict(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth)
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType, uint32_t colWidth,
|
||||
uint32_t compressionType)
|
||||
: ChunkManagerWrapper(oid, dbRoot, partition, segment, colDataType,
|
||||
colWidth)
|
||||
{
|
||||
pFileOp = std::unique_ptr<WriteEngine::DctnryCompress1>(
|
||||
new WriteEngine::DctnryCompress1());
|
||||
new WriteEngine::DctnryCompress1(compressionType));
|
||||
chunkManager.fileOp(pFileOp.get());
|
||||
// Open compressed dict segment file.
|
||||
pFile = chunkManager.getSegmentFilePtr(oid, dbRoot, partition, segment,
|
||||
|
@ -112,7 +112,8 @@ class EMReBuilder
|
||||
int32_t searchHWMInSegmentFile(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t width, uint64_t blocksCount, bool isDict, uint64_t& hwm);
|
||||
uint32_t width, uint64_t blocksCount, bool isDict,
|
||||
uint32_t compressionType, uint64_t& hwm);
|
||||
|
||||
// Sets the dbroot to the given `number`.
|
||||
void setDBRoot(uint32_t number) { dbRoot = number; }
|
||||
@ -184,7 +185,7 @@ class ChunkManagerWrapperColumn : public ChunkManagerWrapper
|
||||
ChunkManagerWrapperColumn(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t colWidth);
|
||||
uint32_t colWidth, uint32_t compressionType);
|
||||
|
||||
~ChunkManagerWrapperColumn() = default;
|
||||
ChunkManagerWrapperColumn(const ChunkManagerWrapperColumn& other) = delete;
|
||||
@ -210,7 +211,7 @@ class ChunkManagerWrapperDict : public ChunkManagerWrapper
|
||||
ChunkManagerWrapperDict(
|
||||
uint32_t oid, uint32_t dbRoot, uint32_t partition, uint32_t segment,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
uint32_t colWidth);
|
||||
uint32_t colWidth, uint32_t compressionType);
|
||||
|
||||
~ChunkManagerWrapperDict() = default;
|
||||
ChunkManagerWrapperDict(const ChunkManagerWrapperDict& other) = delete;
|
||||
|
@ -10,7 +10,7 @@ add_definitions(-DNDEBUG)
|
||||
|
||||
add_library(compress SHARED ${compress_LIB_SRCS})
|
||||
|
||||
target_link_libraries(compress ${SNAPPY_LIBRARIES})
|
||||
target_link_libraries(compress ${SNAPPY_LIBRARIES} ${LZ4_LIBRARIES})
|
||||
|
||||
install(TARGETS compress DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)
|
||||
|
||||
|
@ -22,12 +22,14 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
using namespace std;
|
||||
|
||||
#include "blocksize.h"
|
||||
#include "logger.h"
|
||||
#include "snappy.h"
|
||||
#include "hasher.h"
|
||||
#include "lz4.h"
|
||||
|
||||
#define IDBCOMP_DLLEXPORT
|
||||
#include "idbcompress.h"
|
||||
@ -39,8 +41,7 @@ const uint64_t MAGIC_NUMBER = 0xfdc119a384d0778eULL;
|
||||
const uint64_t VERSION_NUM1 = 1;
|
||||
const uint64_t VERSION_NUM2 = 2;
|
||||
const uint64_t VERSION_NUM3 = 3;
|
||||
const int COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
|
||||
const int PTR_SECTION_OFFSET = compress::IDBCompressInterface::HDR_BUF_LEN;
|
||||
const int PTR_SECTION_OFFSET = compress::CompressInterface::HDR_BUF_LEN;
|
||||
|
||||
// version 1.1 of the chunk data has a short header
|
||||
// QuickLZ compressed data never has the high bit set on the first byte
|
||||
@ -83,7 +84,7 @@ struct CompressedDBFileHeader
|
||||
union CompressedDBFileHeaderBlock
|
||||
{
|
||||
CompressedDBFileHeader fHeader;
|
||||
char fDummy[compress::IDBCompressInterface::HDR_BUF_LEN];
|
||||
char fDummy[compress::CompressInterface::HDR_BUF_LEN];
|
||||
};
|
||||
|
||||
void initCompressedDBFileHeader(
|
||||
@ -110,53 +111,57 @@ namespace compress
|
||||
{
|
||||
#ifndef SKIP_IDB_COMPRESSION
|
||||
|
||||
IDBCompressInterface::IDBCompressInterface(unsigned int numUserPaddingBytes) :
|
||||
CompressInterface::CompressInterface(unsigned int numUserPaddingBytes) :
|
||||
fNumUserPaddingBytes(numUserPaddingBytes)
|
||||
{ }
|
||||
|
||||
IDBCompressInterface::~IDBCompressInterface()
|
||||
{ }
|
||||
|
||||
/* V1 is really only available for decompression, we kill any DDL using V1 by hand.
|
||||
* Maybe should have a new api, isDecompressionAvail() ? Any request to compress
|
||||
* using V1 will silently be changed to V2.
|
||||
*/
|
||||
bool IDBCompressInterface::isCompressionAvail(int compressionType) const
|
||||
/*static*/
|
||||
bool CompressInterface::isCompressionAvail(int compressionType)
|
||||
{
|
||||
if ( (compressionType == 0) ||
|
||||
(compressionType == 1) ||
|
||||
(compressionType == 2) )
|
||||
return true;
|
||||
return ((compressionType == 0) || (compressionType == 1) ||
|
||||
(compressionType == 2) || (compressionType == 3));
|
||||
}
|
||||
|
||||
return false;
|
||||
size_t CompressInterface::getMaxCompressedSizeGeneric(size_t inLen)
|
||||
{
|
||||
return std::max(snappy::MaxCompressedLength(inLen),
|
||||
LZ4_COMPRESSBOUND(inLen)) +
|
||||
HEADER_SIZE;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Compress a block of data
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::compressBlock(const char* in,
|
||||
const size_t inLen,
|
||||
unsigned char* out,
|
||||
unsigned int& outLen) const
|
||||
int CompressInterface::compressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out, size_t& outLen) const
|
||||
{
|
||||
size_t snaplen = 0;
|
||||
utils::Hasher128 hasher;
|
||||
|
||||
// loose input checking.
|
||||
if (outLen < snappy::MaxCompressedLength(inLen) + HEADER_SIZE)
|
||||
if (outLen < maxCompressedSize(inLen))
|
||||
{
|
||||
cerr << "got outLen = " << outLen << " for inLen = " << inLen << ", needed " <<
|
||||
(snappy::MaxCompressedLength(inLen) + HEADER_SIZE) << endl;
|
||||
cerr << "got outLen = " << outLen << " for inLen = " << inLen
|
||||
<< ", needed " << (maxCompressedSize(inLen)) << endl;
|
||||
return ERR_BADOUTSIZE;
|
||||
}
|
||||
|
||||
//apparently this never fails?
|
||||
snappy::RawCompress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]), &snaplen);
|
||||
auto rc = compress(in, inLen, reinterpret_cast<char*>(&out[HEADER_SIZE]),
|
||||
&outLen);
|
||||
if (rc != ERR_OK)
|
||||
{
|
||||
return rc;
|
||||
}
|
||||
|
||||
snaplen = outLen;
|
||||
uint8_t* signature = (uint8_t*) &out[SIG_OFFSET];
|
||||
uint32_t* checksum = (uint32_t*) &out[CHECKSUM_OFFSET];
|
||||
uint32_t* len = (uint32_t*) &out[LEN_OFFSET];
|
||||
*signature = CHUNK_MAGIC3;
|
||||
*signature = getChunkMagicNumber();
|
||||
*checksum = hasher((char*) &out[HEADER_SIZE], snaplen);
|
||||
*len = snaplen;
|
||||
|
||||
@ -171,51 +176,47 @@ int IDBCompressInterface::compressBlock(const char* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Decompress a block of data
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
|
||||
unsigned int& outLen) const
|
||||
int CompressInterface::uncompressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out,
|
||||
size_t& outLen) const
|
||||
{
|
||||
bool comprc = false;
|
||||
size_t ol = 0;
|
||||
|
||||
uint32_t realChecksum;
|
||||
uint32_t storedChecksum;
|
||||
uint32_t storedLen;
|
||||
uint8_t storedMagic;
|
||||
utils::Hasher128 hasher;
|
||||
|
||||
auto tmpOutLen = outLen;
|
||||
outLen = 0;
|
||||
|
||||
if (inLen < 1)
|
||||
{
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
storedMagic = *((uint8_t*) &in[SIG_OFFSET]);
|
||||
|
||||
if (storedMagic == CHUNK_MAGIC3)
|
||||
if (storedMagic == getChunkMagicNumber())
|
||||
{
|
||||
if (inLen < HEADER_SIZE)
|
||||
{
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
storedChecksum = *((uint32_t*) &in[CHECKSUM_OFFSET]);
|
||||
storedLen = *((uint32_t*) (&in[LEN_OFFSET]));
|
||||
|
||||
if (inLen < storedLen + HEADER_SIZE)
|
||||
{
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
realChecksum = hasher(&in[HEADER_SIZE], storedLen);
|
||||
|
||||
if (storedChecksum != realChecksum)
|
||||
{
|
||||
return ERR_CHECKSUM;
|
||||
|
||||
auto rc = uncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out), &tmpOutLen);
|
||||
if (rc != ERR_OK)
|
||||
{
|
||||
cerr << "uncompressBlock failed!" << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
comprc = snappy::GetUncompressedLength(&in[HEADER_SIZE], storedLen, &ol) &&
|
||||
snappy::RawUncompress(&in[HEADER_SIZE], storedLen, reinterpret_cast<char*>(out));
|
||||
outLen = tmpOutLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -223,13 +224,6 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
|
||||
return ERR_BADINPUT;
|
||||
}
|
||||
|
||||
if (!comprc)
|
||||
{
|
||||
cerr << "decomp failed!" << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
outLen = ol;
|
||||
//cerr << "ub: " << inLen << " : " << outLen << endl;
|
||||
|
||||
return ERR_OK;
|
||||
@ -238,7 +232,7 @@ int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, un
|
||||
//------------------------------------------------------------------------------
|
||||
// Verify the passed in buffer contains a valid compression file header.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
|
||||
int CompressInterface::verifyHdr(const void* hdrBuf)
|
||||
{
|
||||
const CompressedDBFileHeader* hdr = reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf);
|
||||
|
||||
@ -255,9 +249,8 @@ int IDBCompressInterface::verifyHdr(const void* hdrBuf) const
|
||||
// Extract compression pointer information out of the pointer buffer that is
|
||||
// passed in. ptrBuf points to the pointer section of the compression hdr.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::getPtrList(const char* ptrBuf,
|
||||
const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs ) const
|
||||
int CompressInterface::getPtrList(const char* ptrBuf, const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs)
|
||||
{
|
||||
int rc = 0;
|
||||
chunkPtrs.clear();
|
||||
@ -285,7 +278,7 @@ int IDBCompressInterface::getPtrList(const char* ptrBuf,
|
||||
// one for the file header, and one for the list of pointers.
|
||||
// Wrapper of above method for backward compatibility.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs ) const
|
||||
int CompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs )
|
||||
{
|
||||
return getPtrList(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN, chunkPtrs);
|
||||
}
|
||||
@ -293,8 +286,8 @@ int IDBCompressInterface::getPtrList(const char* hdrBuf, CompChunkPtrList& chunk
|
||||
//------------------------------------------------------------------------------
|
||||
// Count the number of chunk pointers in the pointer header(s)
|
||||
//------------------------------------------------------------------------------
|
||||
unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize) const
|
||||
unsigned int CompressInterface::getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize)
|
||||
{
|
||||
unsigned int chunkCount = 0;
|
||||
|
||||
@ -318,7 +311,7 @@ unsigned int IDBCompressInterface::getPtrCount(const char* ptrBuf,
|
||||
// This should not be used for compressed dictionary files which could have
|
||||
// more compression chunk headers.
|
||||
//------------------------------------------------------------------------------
|
||||
unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
|
||||
unsigned int CompressInterface::getPtrCount(const char* hdrBuf)
|
||||
{
|
||||
return getPtrCount(hdrBuf + HDR_BUF_LEN, HDR_BUF_LEN);
|
||||
}
|
||||
@ -326,9 +319,8 @@ unsigned int IDBCompressInterface::getPtrCount(const char* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Store list of compression pointers into the specified header.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* ptrBuf,
|
||||
int ptrSectionSize) const
|
||||
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* ptrBuf, int ptrSectionSize)
|
||||
{
|
||||
memset((ptrBuf), 0, ptrSectionSize); // reset the pointer section to 0
|
||||
uint64_t* hdrPtrs = reinterpret_cast<uint64_t*>(ptrBuf);
|
||||
@ -342,7 +334,7 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
//------------------------------------------------------------------------------
|
||||
// Wrapper of above method for backward compatibility
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf) const
|
||||
void CompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* ptrBuf)
|
||||
{
|
||||
storePtrs(ptrs, reinterpret_cast<char*>(ptrBuf) + HDR_BUF_LEN, HDR_BUF_LEN);
|
||||
}
|
||||
@ -350,10 +342,10 @@ void IDBCompressInterface::storePtrs(const std::vector<uint64_t>& ptrs, void* pt
|
||||
//------------------------------------------------------------------------------
|
||||
// Initialize the header blocks to be written at the start of a dictionary file.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::initHdr(
|
||||
void CompressInterface::initHdr(
|
||||
void* hdrBuf, void* ptrBuf, uint32_t colWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType, int hdrSize) const
|
||||
int compressionType, int hdrSize)
|
||||
{
|
||||
memset(hdrBuf, 0, HDR_BUF_LEN);
|
||||
memset(ptrBuf, 0, hdrSize - HDR_BUF_LEN);
|
||||
@ -364,10 +356,10 @@ void IDBCompressInterface::initHdr(
|
||||
//------------------------------------------------------------------------------
|
||||
// Initialize the header blocks to be written at the start of a column file.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::initHdr(
|
||||
void CompressInterface::initHdr(
|
||||
void* hdrBuf, uint32_t columnWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType) const
|
||||
int compressionType)
|
||||
{
|
||||
memset(hdrBuf, 0, HDR_BUF_LEN * 2);
|
||||
initCompressedDBFileHeader(hdrBuf, columnWidth, columnType,
|
||||
@ -377,7 +369,7 @@ void IDBCompressInterface::initHdr(
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the header's version number
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
|
||||
{
|
||||
return (
|
||||
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fVersionNum);
|
||||
@ -386,7 +378,7 @@ uint64_t IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Set the file's block count
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
|
||||
void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count)
|
||||
{
|
||||
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fBlockCount = count;
|
||||
}
|
||||
@ -394,15 +386,24 @@ void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the file's block count
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
|
||||
{
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fBlockCount);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the file's compression type
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
|
||||
{
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)
|
||||
->fCompressionType);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Set the overall header size
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
|
||||
void CompressInterface::setHdrSize(void* hdrBuf, uint64_t size)
|
||||
{
|
||||
reinterpret_cast<CompressedDBFileHeader*>(hdrBuf)->fHeaderSize = size;
|
||||
}
|
||||
@ -410,7 +411,7 @@ void IDBCompressInterface::setHdrSize(void* hdrBuf, uint64_t size) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get the overall header size
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getHdrSize(const void* hdrBuf)
|
||||
{
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fHeaderSize);
|
||||
}
|
||||
@ -419,7 +420,7 @@ uint64_t IDBCompressInterface::getHdrSize(const void* hdrBuf) const
|
||||
// Get column type
|
||||
//-----------------------------------------------------------------------------
|
||||
execplan::CalpontSystemCatalog::ColDataType
|
||||
IDBCompressInterface::getColDataType(const void* hdrBuf) const
|
||||
CompressInterface::getColDataType(const void* hdrBuf)
|
||||
{
|
||||
return (
|
||||
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColDataType);
|
||||
@ -428,7 +429,7 @@ IDBCompressInterface::getColDataType(const void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get column width
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
|
||||
uint64_t CompressInterface::getColumnWidth(const void* hdrBuf)
|
||||
{
|
||||
return (
|
||||
reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fColumnWidth);
|
||||
@ -437,7 +438,7 @@ uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get LBID by index
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index) const
|
||||
uint64_t CompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index)
|
||||
{
|
||||
if (index < LBID_MAX_SIZE)
|
||||
return (reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDS[index]);
|
||||
@ -447,7 +448,7 @@ uint64_t IDBCompressInterface::getLBIDByIndex(const void* hdrBuf, uint64_t index
|
||||
//------------------------------------------------------------------------------
|
||||
// Set LBID by index
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const
|
||||
void CompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index)
|
||||
{
|
||||
if (lbid && index < LBID_MAX_SIZE)
|
||||
{
|
||||
@ -457,7 +458,10 @@ void IDBCompressInterface::setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
|
||||
//------------------------------------------------------------------------------
|
||||
// Get LBID count
|
||||
//------------------------------------------------------------------------------
|
||||
uint64_t CompressInterface::getLBIDCount(void* hdrBuf)
|
||||
{
|
||||
return reinterpret_cast<const CompressedDBFileHeader*>(hdrBuf)->fLBIDCount;
|
||||
}
|
||||
@ -466,9 +470,9 @@ uint64_t IDBCompressInterface::getLBIDCount(void* hdrBuf) const
|
||||
// Calculates the chunk and block offset within the chunk for the specified
|
||||
// block number.
|
||||
//------------------------------------------------------------------------------
|
||||
void IDBCompressInterface::locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const
|
||||
void CompressInterface::locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const
|
||||
{
|
||||
const uint64_t BUFLEN = UNCOMPRESSED_INBUF_LEN;
|
||||
|
||||
@ -485,9 +489,8 @@ void IDBCompressInterface::locateBlock(unsigned int block,
|
||||
// also expand to allow for user requested padding. Lastly, initialize padding
|
||||
// bytes to 0.
|
||||
//------------------------------------------------------------------------------
|
||||
int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
|
||||
unsigned int& len,
|
||||
unsigned int maxLen) const
|
||||
int CompressInterface::padCompressedChunks(unsigned char* buf, size_t& len,
|
||||
unsigned int maxLen) const
|
||||
{
|
||||
int nPaddingBytes = 0;
|
||||
int nRem = len % COMPRESSED_CHUNK_INCREMENT_SIZE;
|
||||
@ -511,30 +514,203 @@ int IDBCompressInterface::padCompressedChunks(unsigned char* buf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* static */
|
||||
uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
|
||||
// Snappy
|
||||
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
|
||||
: CompressInterface(numUserPaddingBytes)
|
||||
{
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceSnappy::compress(const char* in, size_t inLen,
|
||||
char* out, size_t* outLen) const
|
||||
{
|
||||
snappy::RawCompress(in, inLen, out, outLen);
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "Snappy::compress: inLen " << inLen << ", outLen " << *outLen
|
||||
<< std::endl;
|
||||
#endif
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceSnappy::uncompress(const char* in, size_t inLen,
|
||||
char* out, size_t* outLen) const
|
||||
{
|
||||
size_t realOutLen = 0;
|
||||
auto rc = snappy::GetUncompressedLength(in, inLen, &realOutLen);
|
||||
|
||||
if (!rc || realOutLen > *outLen)
|
||||
{
|
||||
cerr << "snappy::GetUncompressedLength failed. InLen: " << inLen
|
||||
<< ", outLen: " << *outLen << ", realOutLen: " << realOutLen
|
||||
<< endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
rc = snappy::RawUncompress(in, inLen, out);
|
||||
|
||||
if (!rc)
|
||||
{
|
||||
cerr << "snappy::RawUnompress failed. InLen: " << inLen
|
||||
<< ", outLen: " << *outLen << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "Snappy::uncompress: inLen " << inLen << ", outLen "
|
||||
<< *outLen << std::endl;
|
||||
#endif
|
||||
*outLen = realOutLen;
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
size_t CompressInterfaceSnappy::maxCompressedSize(size_t uncompSize) const
|
||||
{
|
||||
return (snappy::MaxCompressedLength(uncompSize) + HEADER_SIZE);
|
||||
}
|
||||
|
||||
int IDBCompressInterface::compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const
|
||||
{
|
||||
snappy::RawCompress(in, inLen, out, outLen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
|
||||
{
|
||||
return !(snappy::RawUncompress(in, inLen, out));
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, size_t* outLen)
|
||||
bool CompressInterfaceSnappy::getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const
|
||||
{
|
||||
return snappy::GetUncompressedLength(in, inLen, outLen);
|
||||
}
|
||||
|
||||
uint8_t CompressInterfaceSnappy::getChunkMagicNumber() const
|
||||
{
|
||||
return CHUNK_MAGIC_SNAPPY;
|
||||
}
|
||||
|
||||
// LZ4
|
||||
CompressInterfaceLZ4::CompressInterfaceLZ4(uint32_t numUserPaddingBytes)
|
||||
: CompressInterface(numUserPaddingBytes)
|
||||
{
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceLZ4::compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const
|
||||
{
|
||||
auto compressedLen = LZ4_compress_default(in, out, inLen, *outLen);
|
||||
|
||||
if (!compressedLen)
|
||||
{
|
||||
cerr << "LZ_compress_default failed. InLen: " << inLen
|
||||
<< ", compressedLen: " << compressedLen << endl;
|
||||
return ERR_COMPRESS;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "LZ4::compress: inLen " << inLen << ", comressedLen "
|
||||
<< compressedLen << std::endl;
|
||||
#endif
|
||||
|
||||
*outLen = compressedLen;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
int32_t CompressInterfaceLZ4::uncompress(const char* in, size_t inLen,
|
||||
char* out, size_t* outLen) const
|
||||
{
|
||||
auto decompressedLen = LZ4_decompress_safe(in, out, inLen, *outLen);
|
||||
|
||||
if (decompressedLen < 0)
|
||||
{
|
||||
cerr << "LZ_decompress_safe failed with error code " << decompressedLen
|
||||
<< endl;
|
||||
cerr << "InLen: " << inLen << ", outLen: " << *outLen << endl;
|
||||
return ERR_DECOMPRESS;
|
||||
}
|
||||
|
||||
*outLen = decompressedLen;
|
||||
|
||||
#ifdef DEBUG_COMPRESSION
|
||||
std::cout << "LZ4::uncompress: inLen " << inLen << ", outLen " << *outLen
|
||||
<< std::endl;
|
||||
#endif
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
size_t CompressInterfaceLZ4::maxCompressedSize(size_t uncompSize) const
|
||||
{
|
||||
return (LZ4_COMPRESSBOUND(uncompSize) + HEADER_SIZE);
|
||||
}
|
||||
|
||||
bool CompressInterfaceLZ4::getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const
|
||||
{
|
||||
// LZ4 does not have such function.
|
||||
idbassert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t CompressInterfaceLZ4::getChunkMagicNumber() const
|
||||
{
|
||||
return CHUNK_MAGIC_LZ4;
|
||||
}
|
||||
|
||||
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
switch (compressionType)
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
return new CompressInterfaceSnappy(numUserPaddingBytes);
|
||||
case 3:
|
||||
return new CompressInterfaceLZ4(numUserPaddingBytes);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
if (compressionName == "SNAPPY")
|
||||
return new CompressInterfaceSnappy(numUserPaddingBytes);
|
||||
else if (compressionName == "LZ4")
|
||||
return new CompressInterfaceLZ4(numUserPaddingBytes);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void initializeCompressorPool(
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
|
||||
compressorPool,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
compressorPool = {
|
||||
make_pair(2, std::shared_ptr<CompressInterface>(
|
||||
new CompressInterfaceSnappy(numUserPaddingBytes))),
|
||||
make_pair(3, std::shared_ptr<CompressInterface>(
|
||||
new CompressInterfaceLZ4(numUserPaddingBytes)))};
|
||||
}
|
||||
|
||||
std::shared_ptr<CompressInterface> getCompressorByType(
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>&
|
||||
compressorPool,
|
||||
uint32_t compressionType)
|
||||
{
|
||||
switch (compressionType)
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
if (!compressorPool.count(2))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return compressorPool[2];
|
||||
case 3:
|
||||
if (!compressorPool.count(3))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return compressorPool[3];
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace compress
|
||||
|
@ -26,6 +26,7 @@
|
||||
#endif
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "calpontsystemcatalog.h"
|
||||
|
||||
@ -41,11 +42,12 @@ namespace compress
|
||||
typedef std::pair<uint64_t, uint64_t> CompChunkPtr;
|
||||
typedef std::vector<CompChunkPtr> CompChunkPtrList;
|
||||
|
||||
class IDBCompressInterface
|
||||
class CompressInterface
|
||||
{
|
||||
public:
|
||||
static const unsigned int HDR_BUF_LEN = 4096;
|
||||
static const unsigned int UNCOMPRESSED_INBUF_LEN = 512 * 1024 * 8;
|
||||
static const uint32_t COMPRESSED_CHUNK_INCREMENT_SIZE = 8192;
|
||||
|
||||
// error codes from uncompressBlock()
|
||||
static const int ERR_OK = 0;
|
||||
@ -53,22 +55,29 @@ public:
|
||||
static const int ERR_DECOMPRESS = -2;
|
||||
static const int ERR_BADINPUT = -3;
|
||||
static const int ERR_BADOUTSIZE = -4;
|
||||
static const int ERR_COMPRESS = -5;
|
||||
|
||||
/**
|
||||
* When IDBCompressInterface object is being used to compress a chunk, this
|
||||
* When CompressInterface object is being used to compress a chunk, this
|
||||
* construct can be used to specify the padding added by padCompressedChunks
|
||||
*/
|
||||
EXPORT explicit IDBCompressInterface(unsigned int numUserPaddingBytes = 0);
|
||||
EXPORT explicit CompressInterface(unsigned int numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* dtor
|
||||
*/
|
||||
EXPORT virtual ~IDBCompressInterface();
|
||||
EXPORT virtual ~CompressInterface() = default;
|
||||
|
||||
/**
|
||||
* see if the algo is available in this lib
|
||||
*/
|
||||
EXPORT bool isCompressionAvail(int compressionType = 0) const;
|
||||
EXPORT static bool isCompressionAvail(int compressionType = 0);
|
||||
|
||||
/**
|
||||
* Returns the maximum compressed size from all available compression
|
||||
* types.
|
||||
*/
|
||||
EXPORT static size_t getMaxCompressedSizeGeneric(size_t inLen);
|
||||
|
||||
/**
|
||||
* Compresses specified "in" buffer of length "inLen" bytes.
|
||||
@ -76,30 +85,31 @@ public:
|
||||
* "out" should be sized using maxCompressedSize() to allow for incompressible data.
|
||||
* Returns 0 if success.
|
||||
*/
|
||||
EXPORT int compressBlock(const char* in,
|
||||
const size_t inLen,
|
||||
unsigned char* out,
|
||||
unsigned int& outLen) const;
|
||||
|
||||
EXPORT int compressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out, size_t& outLen) const;
|
||||
|
||||
/**
|
||||
* outLen must be initialized with the size of the out buffer before calling uncompressBlock.
|
||||
* On return, outLen will have the number of bytes used in out.
|
||||
*/
|
||||
EXPORT int uncompressBlock(const char* in, const size_t inLen, unsigned char* out,
|
||||
unsigned int& outLen) const;
|
||||
EXPORT int uncompressBlock(const char* in, const size_t inLen,
|
||||
unsigned char* out, size_t& outLen) const;
|
||||
|
||||
/**
|
||||
* This fcn wraps whatever compression algorithm we're using at the time, and
|
||||
* is not specific to blocks on disk.
|
||||
*/
|
||||
EXPORT int compress(const char* in, size_t inLen, char* out, size_t* outLen) const;
|
||||
EXPORT virtual int compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const = 0;
|
||||
|
||||
/**
|
||||
* This fcn wraps whatever compression algorithm we're using at the time, and
|
||||
* is not specific to blocks on disk. The caller needs to make sure out is big
|
||||
* enough to contain the output by using getUncompressedSize().
|
||||
*/
|
||||
EXPORT int uncompress(const char* in, size_t inLen, char* out) const;
|
||||
EXPORT virtual int uncompress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const = 0;
|
||||
|
||||
/**
|
||||
* Initialize header buffer at start of compressed db file.
|
||||
@ -107,23 +117,24 @@ public:
|
||||
* @warning hdrBuf must be at least HDR_BUF_LEN bytes
|
||||
* @warning ptrBuf must be at least (hdrSize-HDR_BUF_LEN) bytes
|
||||
*/
|
||||
EXPORT void initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType, int hdrSize) const;
|
||||
|
||||
EXPORT static void
|
||||
initHdr(void* hdrBuf, void* ptrBuf, uint32_t columnWidht,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType, int hdrSize);
|
||||
/**
|
||||
* Initialize header buffer at start of compressed db file.
|
||||
*
|
||||
* @warning hdrBuf must be at least HDR_BUF_LEN*2 bytes
|
||||
*/
|
||||
EXPORT void initHdr(void* hdrBuf, uint32_t columnWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType) const;
|
||||
EXPORT static void
|
||||
initHdr(void* hdrBuf, uint32_t columnWidth,
|
||||
execplan::CalpontSystemCatalog::ColDataType columnType,
|
||||
int compressionType);
|
||||
|
||||
/**
|
||||
* Verify the passed in buffer contains a compressed db file header.
|
||||
*/
|
||||
EXPORT int verifyHdr(const void* hdrBuf) const;
|
||||
EXPORT static int verifyHdr(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* Extracts list of compression pointers from the specified ptr buffer.
|
||||
@ -131,9 +142,8 @@ public:
|
||||
* chunkPtrs is a vector of offset, size pairs for the compressed chunks.
|
||||
* Returns 0 if success.
|
||||
*/
|
||||
EXPORT int getPtrList(const char* ptrBuf,
|
||||
const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs) const;
|
||||
EXPORT static int getPtrList(const char* ptrBuf, const int ptrBufSize,
|
||||
CompChunkPtrList& chunkPtrs);
|
||||
|
||||
/**
|
||||
* Extracts list of compression pointers from the specified header.
|
||||
@ -142,28 +152,28 @@ public:
|
||||
* Note: the pointer passed in is the beginning of the header,
|
||||
* not the pointer section as above.
|
||||
*/
|
||||
EXPORT int getPtrList(const char* hdrBuf, CompChunkPtrList& chunkPtrs) const;
|
||||
EXPORT static int getPtrList(const char* hdrBuf,
|
||||
CompChunkPtrList& chunkPtrs);
|
||||
|
||||
/**
|
||||
* Return the number of chunk pointers contained in the specified ptr buffer.
|
||||
* ptrBuf points to the pointer section taken from the headers.
|
||||
*/
|
||||
EXPORT unsigned int getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize) const;
|
||||
EXPORT static unsigned int getPtrCount(const char* ptrBuf,
|
||||
const int ptrBufSize);
|
||||
|
||||
/**
|
||||
* Return the number of chunk pointers contained in the specified header.
|
||||
* hdrBuf points to start of 2 buffer headers from compressed db file.
|
||||
* For non-dictionary columns.
|
||||
*/
|
||||
EXPORT unsigned int getPtrCount(const char* hdrBuf) const;
|
||||
EXPORT static unsigned int getPtrCount(const char* hdrBuf);
|
||||
|
||||
/**
|
||||
* Store vector of pointers into the specified buffer header's pointer section.
|
||||
*/
|
||||
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* hdrBuf,
|
||||
int ptrSectionSize) const;
|
||||
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* hdrBuf, int ptrSectionSize);
|
||||
|
||||
/**
|
||||
* Store vector of pointers into the specified buffer header.
|
||||
@ -171,14 +181,14 @@ public:
|
||||
* Note: the pointer passed in is the beginning of the header,
|
||||
* not the pointer section as above.
|
||||
*/
|
||||
EXPORT void storePtrs(const std::vector<uint64_t>& ptrs, void* hdrBuf) const;
|
||||
EXPORT static void storePtrs(const std::vector<uint64_t>& ptrs,
|
||||
void* hdrBuf);
|
||||
|
||||
/**
|
||||
* Calculates the chunk, and the block offset within the chunk, for the
|
||||
* specified block number.
|
||||
*/
|
||||
EXPORT void locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex,
|
||||
EXPORT void locateBlock(unsigned int block, unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const;
|
||||
|
||||
/**
|
||||
@ -187,9 +197,8 @@ public:
|
||||
* maxLen is the maximum size for buf. nonzero return code means the
|
||||
* result output buffer length is > than maxLen.
|
||||
*/
|
||||
EXPORT int padCompressedChunks(unsigned char* buf,
|
||||
unsigned int& len,
|
||||
unsigned int maxLen ) const;
|
||||
EXPORT int padCompressedChunks(unsigned char* buf, size_t& len,
|
||||
unsigned int maxLen) const;
|
||||
|
||||
/*
|
||||
* Mutator methods for the block count in the file
|
||||
@ -197,17 +206,22 @@ public:
|
||||
/**
|
||||
* getVersionNumber
|
||||
*/
|
||||
EXPORT uint64_t getVersionNumber(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getVersionNumber(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* setBlockCount
|
||||
*/
|
||||
EXPORT void setBlockCount(void* hdrBuf, uint64_t count) const;
|
||||
EXPORT static void setBlockCount(void* hdrBuf, uint64_t count);
|
||||
|
||||
/**
|
||||
* getBlockCount
|
||||
*/
|
||||
EXPORT uint64_t getBlockCount(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getBlockCount(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getCompressionType
|
||||
*/
|
||||
EXPORT static uint64_t getCompressionType(const void* hdrBuf);
|
||||
|
||||
/*
|
||||
* Mutator methods for the overall header size
|
||||
@ -215,38 +229,38 @@ public:
|
||||
/**
|
||||
* setHdrSize
|
||||
*/
|
||||
EXPORT void setHdrSize(void* hdrBuf, uint64_t size) const;
|
||||
EXPORT static void setHdrSize(void* hdrBuf, uint64_t size);
|
||||
|
||||
/**
|
||||
* getHdrSize
|
||||
*/
|
||||
EXPORT uint64_t getHdrSize(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getHdrSize(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getColumnType
|
||||
*/
|
||||
EXPORT execplan::CalpontSystemCatalog::ColDataType
|
||||
getColDataType(const void* hdrBuf) const;
|
||||
EXPORT static execplan::CalpontSystemCatalog::ColDataType
|
||||
getColDataType(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getColumnWidth
|
||||
*/
|
||||
EXPORT uint64_t getColumnWidth(const void* hdrBuf) const;
|
||||
EXPORT static uint64_t getColumnWidth(const void* hdrBuf);
|
||||
|
||||
/**
|
||||
* getLBIDByIndex
|
||||
*/
|
||||
EXPORT uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index) const;
|
||||
EXPORT static uint64_t getLBIDByIndex(const void* hdrBuf, uint64_t index);
|
||||
|
||||
/**
|
||||
* setLBIDByIndex
|
||||
*/
|
||||
EXPORT void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index) const;
|
||||
EXPORT static void setLBIDByIndex(void* hdrBuf, uint64_t lbid, uint64_t index);
|
||||
|
||||
/**
|
||||
* getLBIDCount
|
||||
*/
|
||||
EXPORT uint64_t getLBIDCount(void* hdrBuf) const;
|
||||
EXPORT static uint64_t getLBIDCount(void* hdrBuf);
|
||||
|
||||
/**
|
||||
* Mutator methods for the user padding bytes
|
||||
@ -271,97 +285,213 @@ public:
|
||||
* Given an input, uncompressed block, what's the maximum possible output,
|
||||
* compressed size?
|
||||
*/
|
||||
EXPORT static uint64_t maxCompressedSize(uint64_t uncompSize);
|
||||
EXPORT virtual size_t maxCompressedSize(size_t uncompSize) const = 0;
|
||||
|
||||
/**
|
||||
* Given a compressed block, returns the uncompressed size in outLen.
|
||||
* Returns false on error, true on success.
|
||||
*/
|
||||
EXPORT static bool getUncompressedSize(char* in, size_t inLen, size_t* outLen);
|
||||
EXPORT virtual bool getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const = 0;
|
||||
|
||||
protected:
|
||||
protected:
|
||||
virtual uint8_t getChunkMagicNumber() const = 0;
|
||||
|
||||
private:
|
||||
private:
|
||||
//defaults okay
|
||||
//IDBCompressInterface(const IDBCompressInterface& rhs);
|
||||
//IDBCompressInterface& operator=(const IDBCompressInterface& rhs);
|
||||
//CompressInterface(const CompressInterface& rhs);
|
||||
//CompressInterface& operator=(const CompressInterface& rhs);
|
||||
|
||||
unsigned int fNumUserPaddingBytes; // Num bytes to pad compressed chunks
|
||||
};
|
||||
|
||||
class CompressInterfaceSnappy : public CompressInterface
|
||||
{
|
||||
public:
|
||||
EXPORT CompressInterfaceSnappy(uint32_t numUserPaddingBytes = 0);
|
||||
EXPORT ~CompressInterfaceSnappy() = default;
|
||||
/**
|
||||
* Compress the given block using snappy compression API.
|
||||
*/
|
||||
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Uncompress the given block using snappy compression API.
|
||||
*/
|
||||
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Get max compressed size for the given `uncompSize` value using snappy
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
|
||||
|
||||
/**
|
||||
* Get uncompressed size for the given block using snappy
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT
|
||||
bool getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const override;
|
||||
|
||||
protected:
|
||||
uint8_t getChunkMagicNumber() const override;
|
||||
|
||||
private:
|
||||
const uint8_t CHUNK_MAGIC_SNAPPY = 0xfd;
|
||||
};
|
||||
|
||||
class CompressInterfaceLZ4 : public CompressInterface
|
||||
{
|
||||
public:
|
||||
EXPORT CompressInterfaceLZ4(uint32_t numUserPaddingBytes = 0);
|
||||
EXPORT ~CompressInterfaceLZ4() = default;
|
||||
/**
|
||||
* Compress the given block using LZ4 compression API.
|
||||
*/
|
||||
EXPORT int32_t compress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Uncompress the given block using LZ4 compression API.
|
||||
*/
|
||||
EXPORT int32_t uncompress(const char* in, size_t inLen, char* out,
|
||||
size_t* outLen) const override;
|
||||
/**
|
||||
* Get max compressed size for the given `uncompSize` value using LZ4
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT size_t maxCompressedSize(size_t uncompSize) const override;
|
||||
|
||||
/**
|
||||
* Get uncompressed size for the given block using LZ4
|
||||
* compression API.
|
||||
*/
|
||||
EXPORT
|
||||
bool getUncompressedSize(char* in, size_t inLen,
|
||||
size_t* outLen) const override;
|
||||
|
||||
protected:
|
||||
uint8_t getChunkMagicNumber() const override;
|
||||
|
||||
private:
|
||||
const uint8_t CHUNK_MAGIC_LZ4 = 0xfc;
|
||||
};
|
||||
|
||||
using CompressorPool =
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CompressInterface>>;
|
||||
|
||||
/**
|
||||
* Returns a pointer to the appropriate compression interface based on
|
||||
* `compressionType`. `compressionType` must be greater than 0.
|
||||
* Note: caller is responsible for memory deallocation.
|
||||
*/
|
||||
EXPORT CompressInterface*
|
||||
getCompressInterfaceByType(uint32_t compressionType,
|
||||
uint32_t numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* Returns a pointer to the appropriate compression interface based on
|
||||
* `compressionName`.
|
||||
* Note: caller is responsible for memory deallocation.
|
||||
*/
|
||||
EXPORT CompressInterface* getCompressInterfaceByName(const std::string& compressionName,
|
||||
uint32_t numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* Initializes a given `unordered_map` with all available compression
|
||||
* interfaces.
|
||||
*/
|
||||
EXPORT void initializeCompressorPool(CompressorPool& compressorPool,
|
||||
uint32_t numUserPaddingBytes = 0);
|
||||
|
||||
/**
|
||||
* Returns a `shared_ptr` to the appropriate compression interface.
|
||||
*/
|
||||
EXPORT std::shared_ptr<CompressInterface>
|
||||
getCompressorByType(CompressorPool& compressorPool, uint32_t compressionType);
|
||||
|
||||
#ifdef SKIP_IDB_COMPRESSION
|
||||
inline IDBCompressInterface::IDBCompressInterface(unsigned int /*numUserPaddingBytes*/) {}
|
||||
inline IDBCompressInterface::~IDBCompressInterface() {}
|
||||
inline bool IDBCompressInterface::isCompressionAvail(int c) const
|
||||
inline CompressInterface::CompressInterface(unsigned int /*numUserPaddingBytes*/) {}
|
||||
inline bool CompressInterface::isCompressionAvail(int c)
|
||||
{
|
||||
return (c == 0);
|
||||
}
|
||||
inline int IDBCompressInterface::compressBlock(const char*, const size_t, unsigned char*, unsigned int&) const
|
||||
inline int CompressInterface::compressBlock(const char*, const size_t, unsigned char*, size_t&) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::uncompressBlock(const char* in, const size_t inLen, unsigned char* out, unsigned int& outLen) const
|
||||
inline int CompressInterface::uncompressBlock(const char* in,
|
||||
const size_t inLen,
|
||||
unsigned char* out,
|
||||
size_t& outLen) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::compress(const char* in, size_t inLen, char* out, size_t* outLen) const
|
||||
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) {}
|
||||
inline int CompressInterface::verifyHdr(const void*)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::uncompress(const char* in, size_t inLen, char* out) const
|
||||
inline void CompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) {}
|
||||
inline void CompressInterface::initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
|
||||
inline int CompressInterface::getPtrList(const char*, const int, CompChunkPtrList&)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline unsigned int CompressInterface::getPtrCount(const char*, const int)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::initHdr(void*, void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int, int) const {}
|
||||
inline void initHdr(void*, uint32_t, execplan::CalpontSystemCatalog::ColDataType, int) const {}
|
||||
inline int IDBCompressInterface::verifyHdr(const void*) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::getPtrList(const char*, const int, CompChunkPtrList&) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline int IDBCompressInterface::getPtrList(const char*, CompChunkPtrList&) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline unsigned int IDBCompressInterface::getPtrCount(const char*, const int) const
|
||||
inline unsigned int CompressInterface::getPtrCount(const char*)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline unsigned int IDBCompressInterface::getPtrCount(const char*) const
|
||||
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) {}
|
||||
inline void CompressInterface::storePtrs(const std::vector<uint64_t>&, void*) {}
|
||||
inline void
|
||||
CompressInterface::locateBlock(unsigned int block, unsigned int& chunkIndex,
|
||||
unsigned int& blockOffsetWithinChunk) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*, int) const {}
|
||||
inline void IDBCompressInterface::storePtrs(const std::vector<uint64_t>&, void*) const {}
|
||||
inline void IDBCompressInterface::locateBlock(unsigned int block,
|
||||
unsigned int& chunkIndex, unsigned int& blockOffsetWithinChunk) const {}
|
||||
inline int IDBCompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
|
||||
inline int CompressInterface::padCompressedChunks(unsigned char* buf, unsigned int& len, unsigned int maxLen) const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
inline uint64_t
|
||||
IDBCompressInterface::getVersionNumber(const void* hdrBuf) const
|
||||
inline uint64_t CompressInterface::getVersionNumber(const void* hdrBuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::setBlockCount(void* hdrBuf, uint64_t count) const {}
|
||||
inline uint64_t IDBCompressInterface::getBlockCount(const void* hdrBuf) const
|
||||
inline void CompressInterface::setBlockCount(void* hdrBuf, uint64_t count) {}
|
||||
inline uint64_t CompressInterface::getBlockCount(const void* hdrBuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline void IDBCompressInterface::setHdrSize(void*, uint64_t) const {}
|
||||
inline uint64_t IDBCompressInterface::getHdrSize(const void*) const
|
||||
inline uint64_t CompressInterface::getCompressionType(const void* hdrBuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline execplan::CalpontSystemCatalog::ColDataType
|
||||
IDBCompressInterface::getColDataType(const void* hdrBuf) const
|
||||
CompressInterface::getColDataType(const void* hdrBuf)
|
||||
{
|
||||
return execplan::CalpontSystemCatalog::ColDataType::UNDEFINED;
|
||||
}
|
||||
inline uint64_t CompressInterface::getColumnWidth(const void* hdrBuf) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
inline uint64_t getLBID0(const void* hdrBuf) { return 0; }
|
||||
void setLBID0(void* hdrBuf, uint64_t lbid) {}
|
||||
inline uint64_t getLBID1(const void* hdrBuf) { return 0; }
|
||||
void setLBID1(void* hdrBuf, uint64_t lbid) {}
|
||||
inline void CompressInterface::setHdrSize(void*, uint64_t) {}
|
||||
inline uint64_t CompressInterface::getHdrSize(const void*)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
CompressInterfaceSnappy::CompressInterfaceSnappy(uint32_t numUserPaddingBytes)
|
||||
: CompressInterface(numUserPaddingBytes)
|
||||
{
|
||||
}
|
||||
inline uint64_t IDBCompressInterface::getColumnWidth(const void* hdrBuf) const { return 0; }
|
||||
inline uint64_t IDBCompressInterface::maxCompressedSize(uint64_t uncompSize)
|
||||
{
|
||||
@ -377,8 +507,13 @@ inline bool IDBCompressInterface::getUncompressedSize(char* in, size_t inLen, si
|
||||
{
|
||||
return false;
|
||||
}
|
||||
uint8_t getChunkMagicNumber() const { return 0; }
|
||||
CompressInterface* getCompressInterfaceByType(uint32_t compressionType,
|
||||
uint32_t numUserPaddingBytes)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#undef EXPORT
|
||||
|
@ -176,25 +176,24 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
|
||||
return -1;
|
||||
}
|
||||
|
||||
compress::IDBCompressInterface decompressor;
|
||||
char hdr1[compress::CompressInterface::HDR_BUF_LEN];
|
||||
nBytes = readFillBuffer( pFile, hdr1, compress::CompressInterface::HDR_BUF_LEN);
|
||||
|
||||
char hdr1[compress::IDBCompressInterface::HDR_BUF_LEN];
|
||||
nBytes = readFillBuffer( pFile, hdr1, compress::IDBCompressInterface::HDR_BUF_LEN);
|
||||
|
||||
if ( nBytes != compress::IDBCompressInterface::HDR_BUF_LEN )
|
||||
if ( nBytes != compress::CompressInterface::HDR_BUF_LEN )
|
||||
{
|
||||
delete pFile;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Verify we are a compressed file
|
||||
if (decompressor.verifyHdr(hdr1) < 0)
|
||||
if (compress::CompressInterface::verifyHdr(hdr1) < 0)
|
||||
{
|
||||
delete pFile;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - compress::IDBCompressInterface::HDR_BUF_LEN;
|
||||
int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
|
||||
compress::CompressInterface::HDR_BUF_LEN;
|
||||
char* hdr2 = new char[ptrSecSize];
|
||||
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
|
||||
|
||||
@ -206,7 +205,8 @@ off64_t PosixFileSystem::compressedSize(const char* path) const
|
||||
}
|
||||
|
||||
compress::CompChunkPtrList chunkPtrs;
|
||||
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
|
||||
int rc = compress::CompressInterface::getPtrList(hdr2, ptrSecSize,
|
||||
chunkPtrs);
|
||||
delete[] hdr2;
|
||||
|
||||
if (rc != 0)
|
||||
|
@ -50,7 +50,10 @@ namespace joiner
|
||||
|
||||
uint64_t uniqueNums = 0;
|
||||
|
||||
JoinPartition::JoinPartition() { }
|
||||
JoinPartition::JoinPartition()
|
||||
{
|
||||
compressor.reset(new compress::CompressInterfaceSnappy());
|
||||
}
|
||||
|
||||
/* This is the ctor used by THJS */
|
||||
JoinPartition::JoinPartition(const RowGroup& lRG,
|
||||
@ -103,6 +106,22 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
|
||||
|
||||
for (int i = 0; i < (int) bucketCount; i++)
|
||||
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
|
||||
|
||||
string compressionType;
|
||||
try
|
||||
{
|
||||
compressionType =
|
||||
config->getConfig("HashJoin", "TempFileCompressionType");
|
||||
} catch (...) {}
|
||||
|
||||
if (compressionType == "LZ4")
|
||||
{
|
||||
compressor.reset(new compress::CompressInterfaceLZ4());
|
||||
}
|
||||
else
|
||||
{
|
||||
compressor.reset(new compress::CompressInterfaceSnappy());
|
||||
}
|
||||
}
|
||||
|
||||
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
|
||||
@ -151,6 +170,8 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode) :
|
||||
smallRG.setData(&buffer);
|
||||
smallRG.resetRowGroup(0);
|
||||
smallRG.getRow(0, &smallRow);
|
||||
|
||||
compressor = jp.compressor;
|
||||
}
|
||||
|
||||
|
||||
@ -694,6 +715,7 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
|
||||
|
||||
fs.seekg(offset);
|
||||
fs.read((char*) &len, sizeof(len));
|
||||
|
||||
saveErrno = errno;
|
||||
|
||||
if (!fs)
|
||||
@ -735,12 +757,14 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
|
||||
else
|
||||
{
|
||||
size_t uncompressedSize;
|
||||
fs.read((char*) &uncompressedSize, sizeof(uncompressedSize));
|
||||
|
||||
boost::scoped_array<char> buf(new char[len]);
|
||||
|
||||
fs.read(buf.get(), len);
|
||||
saveErrno = errno;
|
||||
|
||||
if (!fs)
|
||||
if (!fs || !uncompressedSize)
|
||||
{
|
||||
fs.close();
|
||||
ostringstream os;
|
||||
@ -749,9 +773,9 @@ void JoinPartition::readByteStream(int which, ByteStream* bs)
|
||||
}
|
||||
|
||||
totalBytesRead += len;
|
||||
compressor.getUncompressedSize(buf.get(), len, &uncompressedSize);
|
||||
bs->needAtLeast(uncompressedSize);
|
||||
compressor.uncompress(buf.get(), len, (char*) bs->getInputPtr());
|
||||
compressor->uncompress(buf.get(), len, (char*) bs->getInputPtr(),
|
||||
&uncompressedSize);
|
||||
bs->advanceInputPtr(uncompressedSize);
|
||||
}
|
||||
|
||||
@ -801,13 +825,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs)
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t maxSize = compressor.maxCompressedSize(len);
|
||||
size_t actualSize;
|
||||
size_t maxSize = compressor->maxCompressedSize(len);
|
||||
size_t actualSize = maxSize;
|
||||
boost::scoped_array<uint8_t> compressed(new uint8_t[maxSize]);
|
||||
|
||||
compressor.compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
|
||||
ret = actualSize + 4;
|
||||
compressor->compress((char*) bs.buf(), len, (char*) compressed.get(), &actualSize);
|
||||
ret = actualSize + 4 + 8; // sizeof (size_t) == 8. Why 4?
|
||||
fs.write((char*) &actualSize, sizeof(actualSize));
|
||||
// Save uncompressed len.
|
||||
fs.write((char*) &len, sizeof(len));
|
||||
fs.write((char*) compressed.get(), actualSize);
|
||||
saveErrno = errno;
|
||||
|
||||
|
@ -164,7 +164,7 @@ private:
|
||||
|
||||
/* Compression support */
|
||||
bool useCompression;
|
||||
compress::IDBCompressInterface compressor;
|
||||
std::shared_ptr<compress::CompressInterface> compressor;
|
||||
/* TBD: do the reading/writing in one thread, compression/decompression in another */
|
||||
|
||||
/* Some stats for reporting */
|
||||
|
@ -64,6 +64,7 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
|
||||
{
|
||||
config::Config* config = config::Config::makeConfig();
|
||||
string val;
|
||||
string compressionType;
|
||||
|
||||
try
|
||||
{
|
||||
@ -75,6 +76,19 @@ CompressedInetStreamSocket::CompressedInetStreamSocket()
|
||||
useCompression = true;
|
||||
else
|
||||
useCompression = false;
|
||||
|
||||
try
|
||||
{
|
||||
compressionType =
|
||||
config->getConfig("NetworkCompression", "NetworkCompression");
|
||||
}
|
||||
catch (...) { }
|
||||
|
||||
auto* compressInterface = compress::getCompressInterfaceByName(compressionType);
|
||||
if (!compressInterface)
|
||||
compressInterface = new compress::CompressInterfaceSnappy();
|
||||
|
||||
alg.reset(compressInterface);
|
||||
}
|
||||
|
||||
Socket* CompressedInetStreamSocket::clone() const
|
||||
@ -87,20 +101,25 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
|
||||
{
|
||||
SBS readBS, ret;
|
||||
size_t uncompressedSize;
|
||||
bool err;
|
||||
|
||||
readBS = InetStreamSocket::read(timeout, isTimeOut, stats);
|
||||
|
||||
if (readBS->length() == 0 || fMagicBuffer == BYTESTREAM_MAGIC)
|
||||
return readBS;
|
||||
|
||||
err = alg.getUncompressedSize((char*) readBS->buf(), readBS->length(), &uncompressedSize);
|
||||
// Read stored len, first 4 bytes.
|
||||
uint32_t storedLen = *(uint32_t*) readBS->buf();
|
||||
|
||||
if (!err)
|
||||
if (!storedLen)
|
||||
return SBS(new ByteStream(0));
|
||||
|
||||
uncompressedSize = storedLen;
|
||||
ret.reset(new ByteStream(uncompressedSize));
|
||||
alg.uncompress((char*) readBS->buf(), readBS->length(), (char*) ret->getInputPtr());
|
||||
|
||||
alg->uncompress((char*) readBS->buf() + HEADER_SIZE,
|
||||
readBS->length() - HEADER_SIZE, (char*) ret->getInputPtr(),
|
||||
&uncompressedSize);
|
||||
|
||||
ret->advanceInputPtr(uncompressedSize);
|
||||
|
||||
return ret;
|
||||
@ -108,15 +127,18 @@ const SBS CompressedInetStreamSocket::read(const struct timespec* timeout, bool*
|
||||
|
||||
void CompressedInetStreamSocket::write(const ByteStream& msg, Stats* stats)
|
||||
{
|
||||
size_t outLen = 0;
|
||||
uint32_t len = msg.length();
|
||||
size_t len = msg.length();
|
||||
|
||||
if (useCompression && (len > 512))
|
||||
{
|
||||
ByteStream smsg(alg.maxCompressedSize(len));
|
||||
size_t outLen = alg->maxCompressedSize(len) + HEADER_SIZE;
|
||||
ByteStream smsg(outLen);
|
||||
|
||||
alg.compress((char*) msg.buf(), len, (char*) smsg.getInputPtr(), &outLen);
|
||||
smsg.advanceInputPtr(outLen);
|
||||
alg->compress((char*) msg.buf(), len,
|
||||
(char*) smsg.getInputPtr() + HEADER_SIZE, &outLen);
|
||||
// Save original len.
|
||||
*(uint32_t*) smsg.getInputPtr() = len;
|
||||
smsg.advanceInputPtr(outLen + HEADER_SIZE);
|
||||
|
||||
if (outLen < len)
|
||||
do_write(smsg, COMPRESSED_BYTESTREAM_MAGIC, stats);
|
||||
|
@ -54,8 +54,9 @@ public:
|
||||
virtual const IOSocket accept(const struct timespec* timeout);
|
||||
virtual void connect(const sockaddr* addr);
|
||||
private:
|
||||
compress::IDBCompressInterface alg;
|
||||
std::shared_ptr<compress::CompressInterface> alg;
|
||||
bool useCompression;
|
||||
static const uint32_t HEADER_SIZE = 4;
|
||||
};
|
||||
|
||||
} //namespace messageqcpp
|
||||
|
@ -337,15 +337,12 @@ int BulkLoad::loadJobInfo(
|
||||
}
|
||||
}
|
||||
|
||||
// Validate that specified compression type is available
|
||||
compress::IDBCompressInterface compressor;
|
||||
|
||||
for (unsigned kT = 0; kT < curJob.jobTableList.size(); kT++)
|
||||
{
|
||||
for (unsigned kC = 0; kC < curJob.jobTableList[kT].colList.size(); kC++)
|
||||
{
|
||||
if ( !compressor.isCompressionAvail(
|
||||
curJob.jobTableList[kT].colList[kC].compressionType) )
|
||||
if (!compress::CompressInterface::isCompressionAvail(
|
||||
curJob.jobTableList[kT].colList[kC].compressionType))
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Specified compression type (" <<
|
||||
|
@ -60,12 +60,11 @@ ColumnBufferCompressed::ColumnBufferCompressed( ColumnInfo* pColInfo,
|
||||
fToBeCompressedBuffer(0),
|
||||
fToBeCompressedCapacity(0),
|
||||
fNumBytes(0),
|
||||
fCompressor(0),
|
||||
fPreLoadHWMChunk(true),
|
||||
fFlushedStartHwmChunk(false)
|
||||
{
|
||||
fUserPaddingBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
|
||||
fCompressor = new compress::IDBCompressInterface( fUserPaddingBytes );
|
||||
compress::initializeCompressorPool(fCompressorPool, fUserPaddingBytes);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -79,7 +78,6 @@ ColumnBufferCompressed::~ColumnBufferCompressed()
|
||||
fToBeCompressedBuffer = 0;
|
||||
fToBeCompressedCapacity = 0;
|
||||
fNumBytes = 0;
|
||||
delete fCompressor;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -91,9 +89,7 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
|
||||
fFile = f;
|
||||
fStartingHwm = startHwm;
|
||||
|
||||
IDBCompressInterface compressor;
|
||||
|
||||
if (compressor.getPtrList(hdrs, fChunkPtrs) != 0)
|
||||
if (compress::CompressInterface::getPtrList(hdrs, fChunkPtrs) != 0)
|
||||
{
|
||||
return ERR_COMP_PARSE_HDRS;
|
||||
}
|
||||
@ -102,7 +98,15 @@ int ColumnBufferCompressed::setDbFile(IDBDataFile* f, HWM startHwm, const char*
|
||||
// rollback), that fall after the HWM, then drop those trailing ptrs.
|
||||
unsigned int chunkIndex = 0;
|
||||
unsigned int blockOffsetWithinChunk = 0;
|
||||
fCompressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
|
||||
|
||||
auto compressor = compress::getCompressorByType(
|
||||
fCompressorPool, fColInfo->column.compressionType);
|
||||
if (!compressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
compressor->locateBlock(fStartingHwm, chunkIndex, blockOffsetWithinChunk);
|
||||
|
||||
if ((chunkIndex + 1) < fChunkPtrs.size())
|
||||
{
|
||||
@ -127,11 +131,11 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
|
||||
if (!fToBeCompressedBuffer)
|
||||
{
|
||||
fToBeCompressedBuffer =
|
||||
new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN];
|
||||
new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
|
||||
}
|
||||
|
||||
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
|
||||
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
CompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
fColInfo->column.emptyVal,
|
||||
fColInfo->column.width );
|
||||
|
||||
@ -147,10 +151,10 @@ int ColumnBufferCompressed::resetToBeCompressedColBuf(
|
||||
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
|
||||
}
|
||||
|
||||
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
|
||||
// Set file offset past end of last chunk
|
||||
startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2;
|
||||
startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
|
||||
|
||||
if (fChunkPtrs.size() > 0)
|
||||
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
|
||||
@ -223,7 +227,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
|
||||
|
||||
// Expand the compression buffer size if working with an abbrev extent, and
|
||||
// the bytes we are about to add will overflow the abbreviated extent.
|
||||
if ((fToBeCompressedCapacity < IDBCompressInterface::UNCOMPRESSED_INBUF_LEN) &&
|
||||
if ((fToBeCompressedCapacity < CompressInterface::UNCOMPRESSED_INBUF_LEN) &&
|
||||
((fNumBytes + writeSize + fillUpWEmptiesWriteSize) > fToBeCompressedCapacity) )
|
||||
{
|
||||
std::ostringstream oss;
|
||||
@ -233,7 +237,7 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
|
||||
"; part-" << fColInfo->curCol.dataFile.fPartition <<
|
||||
"; seg-" << fColInfo->curCol.dataFile.fSegment;
|
||||
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
|
||||
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
}
|
||||
|
||||
if ((fNumBytes + writeSize + fillUpWEmptiesWriteSize) <= fToBeCompressedCapacity)
|
||||
@ -316,12 +320,12 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
|
||||
|
||||
// Start over again loading a new to-be-compressed buffer
|
||||
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
|
||||
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
CompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
fColInfo->column.emptyVal,
|
||||
fColInfo->column.width );
|
||||
|
||||
fToBeCompressedCapacity =
|
||||
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
bufOffset = fToBeCompressedBuffer;
|
||||
|
||||
fNumBytes = 0;
|
||||
@ -377,21 +381,31 @@ int ColumnBufferCompressed::writeToFile(int startOffset, int writeSize,
|
||||
//------------------------------------------------------------------------------
|
||||
int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
|
||||
{
|
||||
const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(fToBeCompressedCapacity) +
|
||||
fUserPaddingBytes;
|
||||
auto compressor = compress::getCompressorByType(
|
||||
fCompressorPool, fColInfo->column.compressionType);
|
||||
if (!compressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
const size_t OUTPUT_BUFFER_SIZE =
|
||||
compressor->maxCompressedSize(fToBeCompressedCapacity) +
|
||||
fUserPaddingBytes +
|
||||
// Padded len = len + COMPRESSED_SIZE_INCREMENT_CHUNK - (len %
|
||||
// COMPRESSED_SIZE_INCREMENT_CHUNK) + usePadding
|
||||
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
|
||||
|
||||
unsigned char* compressedOutBuf = new unsigned char[ OUTPUT_BUFFER_SIZE ];
|
||||
boost::scoped_array<unsigned char> compressedOutBufPtr(compressedOutBuf);
|
||||
unsigned int outputLen = OUTPUT_BUFFER_SIZE;
|
||||
size_t outputLen = OUTPUT_BUFFER_SIZE;
|
||||
|
||||
#ifdef PROFILE
|
||||
Stats::startParseEvent(WE_STATS_COMPRESS_COL_COMPRESS);
|
||||
#endif
|
||||
|
||||
int rc = fCompressor->compressBlock(
|
||||
reinterpret_cast<char*>(fToBeCompressedBuffer),
|
||||
fToBeCompressedCapacity,
|
||||
compressedOutBuf,
|
||||
outputLen );
|
||||
int rc = compressor->compressBlock(
|
||||
reinterpret_cast<char*>(fToBeCompressedBuffer),
|
||||
fToBeCompressedCapacity, compressedOutBuf, outputLen);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -399,7 +413,7 @@ int ColumnBufferCompressed::compressAndFlush( bool bFinishingFile )
|
||||
}
|
||||
|
||||
// Round up the compressed chunk size
|
||||
rc = fCompressor->padCompressedChunks( compressedOutBuf,
|
||||
rc = compressor->padCompressedChunks( compressedOutBuf,
|
||||
outputLen, OUTPUT_BUFFER_SIZE );
|
||||
|
||||
if (rc != 0)
|
||||
@ -581,26 +595,24 @@ int ColumnBufferCompressed::finishFile(bool bTruncFile)
|
||||
int ColumnBufferCompressed::saveCompressionHeaders( )
|
||||
{
|
||||
// Construct the header records
|
||||
char hdrBuf[IDBCompressInterface::HDR_BUF_LEN * 2];
|
||||
char hdrBuf[CompressInterface::HDR_BUF_LEN * 2];
|
||||
RETURN_ON_ERROR(fColInfo->colOp->readHeaders(fFile, hdrBuf));
|
||||
|
||||
BRM::LBID_t lbid = fCompressor->getLBIDByIndex(hdrBuf, 0);
|
||||
fCompressor->initHdr(hdrBuf, fColInfo->column.width,
|
||||
fColInfo->column.dataType,
|
||||
fColInfo->column.compressionType);
|
||||
fCompressor->setBlockCount(hdrBuf,
|
||||
(fColInfo->getFileSize() / BYTE_PER_BLOCK) );
|
||||
BRM::LBID_t lbid = compress::CompressInterface::getLBIDByIndex(hdrBuf, 0);
|
||||
compress::CompressInterface::initHdr(hdrBuf, fColInfo->column.width, fColInfo->column.dataType,
|
||||
fColInfo->column.compressionType);
|
||||
compress::CompressInterface::setBlockCount(hdrBuf, (fColInfo->getFileSize() / BYTE_PER_BLOCK));
|
||||
// If lbid written in the header is not 0 and not equal to `lastupdatedlbid` - we are running
|
||||
// for the next extent for column segment file.
|
||||
const auto lastUpdatedLbid = fColInfo->getLastUpdatedLBID();
|
||||
if (lbid && lastUpdatedLbid != lbid)
|
||||
{
|
||||
// Write back lbid, after header initialization.
|
||||
fCompressor->setLBIDByIndex(hdrBuf, lbid, 0);
|
||||
fCompressor->setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrBuf, lbid, 0);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrBuf, lastUpdatedLbid, 1);
|
||||
}
|
||||
else
|
||||
fCompressor->setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrBuf, fColInfo->getLastUpdatedLBID(), 0);
|
||||
|
||||
std::vector<uint64_t> ptrs;
|
||||
|
||||
@ -611,7 +623,7 @@ int ColumnBufferCompressed::saveCompressionHeaders( )
|
||||
|
||||
unsigned lastIdx = fChunkPtrs.size() - 1;
|
||||
ptrs.push_back( fChunkPtrs[lastIdx].first + fChunkPtrs[lastIdx].second );
|
||||
fCompressor->storePtrs( ptrs, hdrBuf );
|
||||
compress::CompressInterface::storePtrs(ptrs, hdrBuf);
|
||||
|
||||
// Write out the header records
|
||||
//char resp;
|
||||
@ -641,9 +653,9 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
|
||||
if (!fToBeCompressedBuffer)
|
||||
{
|
||||
fToBeCompressedBuffer =
|
||||
new unsigned char[IDBCompressInterface::UNCOMPRESSED_INBUF_LEN];
|
||||
new unsigned char[CompressInterface::UNCOMPRESSED_INBUF_LEN];
|
||||
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
|
||||
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
CompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
fColInfo->column.emptyVal,
|
||||
fColInfo->column.width );
|
||||
bNewBuffer = true;
|
||||
@ -656,12 +668,19 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
|
||||
unsigned int blockOffsetWithinChunk = 0;
|
||||
bool bSkipStartingBlks = false;
|
||||
|
||||
auto compressor = compress::getCompressorByType(
|
||||
fCompressorPool, fColInfo->column.compressionType);
|
||||
if (!compressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
if (fPreLoadHWMChunk)
|
||||
{
|
||||
if (fChunkPtrs.size() > 0)
|
||||
{
|
||||
fCompressor->locateBlock(fStartingHwm,
|
||||
chunkIndex, blockOffsetWithinChunk);
|
||||
compressor->locateBlock(fStartingHwm, chunkIndex,
|
||||
blockOffsetWithinChunk);
|
||||
|
||||
if (chunkIndex < fChunkPtrs.size())
|
||||
startFileOffset = fChunkPtrs[chunkIndex].first;
|
||||
@ -718,8 +737,8 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
|
||||
}
|
||||
|
||||
// Uncompress the chunk into our 4MB buffer
|
||||
unsigned int outLen = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
int rc = fCompressor->uncompressBlock(
|
||||
size_t outLen = CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
int rc = compressor->uncompressBlock(
|
||||
compressedOutBuf,
|
||||
fChunkPtrs[chunkIndex].second,
|
||||
fToBeCompressedBuffer,
|
||||
@ -758,7 +777,7 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
|
||||
if (!bNewBuffer)
|
||||
{
|
||||
BlockOp::setEmptyBuf( fToBeCompressedBuffer,
|
||||
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
CompressInterface::UNCOMPRESSED_INBUF_LEN,
|
||||
fColInfo->column.emptyVal,
|
||||
fColInfo->column.width );
|
||||
}
|
||||
@ -775,10 +794,10 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
|
||||
fLog->logMsg( oss.str(), MSGLVL_INFO2 );
|
||||
}
|
||||
|
||||
fToBeCompressedCapacity = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
fToBeCompressedCapacity = CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
|
||||
// Set file offset to start after last current chunk
|
||||
startFileOffset = IDBCompressInterface::HDR_BUF_LEN * 2;
|
||||
startFileOffset = CompressInterface::HDR_BUF_LEN * 2;
|
||||
|
||||
if (fChunkPtrs.size() > 0)
|
||||
startFileOffset = fChunkPtrs[ fChunkPtrs.size() - 1 ].first +
|
||||
@ -796,5 +815,4 @@ int ColumnBufferCompressed::initToBeCompressedBuffer(long long& startFileOffset)
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -107,8 +107,7 @@ private:
|
||||
// should always be 4MB, unless
|
||||
// working with abbrev extent.
|
||||
size_t fNumBytes; // num Bytes in comp buffer
|
||||
compress::IDBCompressInterface*
|
||||
fCompressor; // data compression object
|
||||
compress::CompressorPool fCompressorPool; // data compression object pool
|
||||
compress::CompChunkPtrList
|
||||
fChunkPtrs; // col file header information
|
||||
bool fPreLoadHWMChunk; // preload 1st HWM chunk only
|
||||
|
@ -450,7 +450,7 @@ int ColumnInfo::createDelayedFileIfNeeded( const std::string& tableName )
|
||||
if (column.dctnry.fCompressionType != 0)
|
||||
{
|
||||
DctnryCompress1* tempD1;
|
||||
tempD1 = new DctnryCompress1;
|
||||
tempD1 = new DctnryCompress1(column.dctnry.fCompressionType);
|
||||
tempD1->setMaxActiveChunkNum(1);
|
||||
tempD1->setBulkFlag(true);
|
||||
tempD = tempD1;
|
||||
@ -668,7 +668,7 @@ int ColumnInfo::extendColumnNewExtent(
|
||||
uint16_t segmentNew = 0;
|
||||
BRM::LBID_t startLbid;
|
||||
|
||||
char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
|
||||
// Extend the column by adding an extent to the next
|
||||
// DBRoot, partition, and segment file in the rotation
|
||||
@ -1684,7 +1684,8 @@ int ColumnInfo::openDctnryStore( bool bMustExist )
|
||||
|
||||
if ( column.dctnry.fCompressionType != 0)
|
||||
{
|
||||
DctnryCompress1* dctnryCompress1 = new DctnryCompress1;
|
||||
DctnryCompress1* dctnryCompress1 =
|
||||
new DctnryCompress1(column.dctnry.fCompressionType);
|
||||
dctnryCompress1->setMaxActiveChunkNum(1);
|
||||
dctnryCompress1->setBulkFlag(true);
|
||||
fStore = dctnryCompress1;
|
||||
|
@ -108,7 +108,7 @@ int ColumnInfoCompressed::closeColumnFile(bool bCompletingExtent, bool bAbort)
|
||||
//------------------------------------------------------------------------------
|
||||
int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
|
||||
{
|
||||
char hdr[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdr[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
RETURN_ON_ERROR( colOp->readHeaders(curCol.dataFile.pFile, hdr) );
|
||||
|
||||
// Initialize the output buffer manager for the column.
|
||||
@ -129,10 +129,9 @@ int ColumnInfoCompressed::setupInitialColumnFile( HWM oldHwm, HWM hwm )
|
||||
|
||||
fColBufferMgr = mgr;
|
||||
|
||||
IDBCompressInterface compressor;
|
||||
int abbrevFlag =
|
||||
( compressor.getBlockCount(hdr) ==
|
||||
uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width / BYTE_PER_BLOCK) );
|
||||
int abbrevFlag = (compress::CompressInterface::getBlockCount(hdr) ==
|
||||
uint64_t(INITIAL_EXTENT_ROWS_TO_DISK * column.width /
|
||||
BYTE_PER_BLOCK));
|
||||
setFileSize( hwm, abbrevFlag );
|
||||
|
||||
// See if dealing with abbreviated extent that will need expanding.
|
||||
@ -324,9 +323,9 @@ int ColumnInfoCompressed::truncateDctnryStore(
|
||||
return rc;
|
||||
}
|
||||
|
||||
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
|
||||
char controlHdr[ CompressInterface::HDR_BUF_LEN ];
|
||||
rc = fTruncateDctnryFileOp.readFile( dFile,
|
||||
(unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN);
|
||||
(unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
@ -345,8 +344,7 @@ int ColumnInfoCompressed::truncateDctnryStore(
|
||||
return rc;
|
||||
}
|
||||
|
||||
IDBCompressInterface compressor;
|
||||
int rc1 = compressor.verifyHdr( controlHdr );
|
||||
int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
|
||||
|
||||
if (rc1 != 0)
|
||||
{
|
||||
@ -372,7 +370,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
|
||||
// actually grow the file (something we don't want to do), because we have
|
||||
// not yet reserved a full extent (on disk) for this dictionary store file.
|
||||
const int PSEUDO_COL_WIDTH = 8;
|
||||
uint64_t numBlocks = compressor.getBlockCount( controlHdr );
|
||||
uint64_t numBlocks =
|
||||
compress::CompressInterface::getBlockCount(controlHdr);
|
||||
|
||||
if ( numBlocks == uint64_t
|
||||
(INITIAL_EXTENT_ROWS_TO_DISK * PSEUDO_COL_WIDTH / BYTE_PER_BLOCK) )
|
||||
@ -390,8 +389,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
uint64_t hdrSize = compressor.getHdrSize(controlHdr);
|
||||
uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
|
||||
uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
|
||||
uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
|
||||
char* pointerHdr = new char[ptrHdrSize];
|
||||
|
||||
rc = fTruncateDctnryFileOp.readFile(dFile,
|
||||
@ -416,7 +415,8 @@ int ColumnInfoCompressed::truncateDctnryStore(
|
||||
}
|
||||
|
||||
CompChunkPtrList chunkPtrs;
|
||||
rc1 = compressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs );
|
||||
rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
|
||||
chunkPtrs);
|
||||
delete[] pointerHdr;
|
||||
|
||||
if (rc1 != 0)
|
||||
|
@ -96,7 +96,7 @@ size_t readFillBuffer(
|
||||
return totalBytesRead;
|
||||
}
|
||||
|
||||
off64_t getCompressedDataSize(string& fileName)
|
||||
static off64_t getCompressedDataSize(string& fileName)
|
||||
{
|
||||
off64_t dataSize = 0;
|
||||
IDBDataFile* pFile = 0;
|
||||
@ -119,21 +119,21 @@ off64_t getCompressedDataSize(string& fileName)
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
|
||||
IDBCompressInterface decompressor;
|
||||
//--------------------------------------------------------------------------
|
||||
// Read headers and extract compression pointers
|
||||
//--------------------------------------------------------------------------
|
||||
char hdr1[IDBCompressInterface::HDR_BUF_LEN];
|
||||
nBytes = readFillBuffer( pFile, hdr1, IDBCompressInterface::HDR_BUF_LEN);
|
||||
char hdr1[CompressInterface::HDR_BUF_LEN];
|
||||
nBytes = readFillBuffer( pFile, hdr1, CompressInterface::HDR_BUF_LEN);
|
||||
|
||||
if ( nBytes != IDBCompressInterface::HDR_BUF_LEN )
|
||||
if ( nBytes != CompressInterface::HDR_BUF_LEN )
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Error reading first header from file " << fileName;
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
|
||||
int64_t ptrSecSize = decompressor.getHdrSize(hdr1) - IDBCompressInterface::HDR_BUF_LEN;
|
||||
int64_t ptrSecSize = compress::CompressInterface::getHdrSize(hdr1) -
|
||||
CompressInterface::HDR_BUF_LEN;
|
||||
char* hdr2 = new char[ptrSecSize];
|
||||
nBytes = readFillBuffer( pFile, hdr2, ptrSecSize);
|
||||
|
||||
@ -145,7 +145,8 @@ off64_t getCompressedDataSize(string& fileName)
|
||||
}
|
||||
|
||||
CompChunkPtrList chunkPtrs;
|
||||
int rc = decompressor.getPtrList(hdr2, ptrSecSize, chunkPtrs);
|
||||
int rc =
|
||||
compress::CompressInterface::getPtrList(hdr2, ptrSecSize, chunkPtrs);
|
||||
delete[] hdr2;
|
||||
|
||||
if (rc != 0)
|
||||
|
@ -51,6 +51,7 @@ namespace WriteEngine
|
||||
BulkRollbackFileCompressed::BulkRollbackFileCompressed(BulkRollbackMgr* mgr) :
|
||||
BulkRollbackFile(mgr)
|
||||
{
|
||||
compress::initializeCompressorPool(fCompressorPool);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -104,7 +105,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
|
||||
}
|
||||
|
||||
// Read and parse the header pointers
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];;
|
||||
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];;
|
||||
CompChunkPtrList chunkPtrs;
|
||||
std::string errMsg;
|
||||
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
|
||||
@ -127,7 +128,20 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
|
||||
unsigned int blockOffset = fileSizeBlocks - 1;
|
||||
unsigned int chunkIndex = 0;
|
||||
unsigned int blkOffsetInChunk = 0;
|
||||
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
|
||||
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool,
|
||||
compress::CompressInterface::getCompressionType(hdrs));
|
||||
if (!fCompressor)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Error, wrong compression type for segment file"
|
||||
<< ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
|
||||
<< partNum << "; segment-" << segNum << ";";
|
||||
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
|
||||
}
|
||||
|
||||
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
|
||||
|
||||
// Truncate the extra extents that are to be aborted
|
||||
if (chunkIndex < chunkPtrs.size())
|
||||
@ -145,7 +159,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
|
||||
logging::M0075, columnOID, msgText2.str() );
|
||||
|
||||
// Drop off any trailing pointers (that point beyond the last block)
|
||||
fCompressor.setBlockCount( hdrs, fileSizeBlocks );
|
||||
compress::CompressInterface::setBlockCount(hdrs, fileSizeBlocks);
|
||||
std::vector<uint64_t> ptrs;
|
||||
|
||||
for (unsigned i = 0; i <= chunkIndex; i++)
|
||||
@ -155,7 +169,7 @@ void BulkRollbackFileCompressed::truncateSegmentFile(
|
||||
|
||||
ptrs.push_back( chunkPtrs[chunkIndex].first +
|
||||
chunkPtrs[chunkIndex].second );
|
||||
fCompressor.storePtrs( ptrs, hdrs );
|
||||
compress::CompressInterface::storePtrs(ptrs, hdrs);
|
||||
|
||||
rc = fDbFile.writeHeaders( pFile, hdrs );
|
||||
|
||||
@ -252,7 +266,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
|
||||
}
|
||||
|
||||
// Read and parse the header pointers
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
CompChunkPtrList chunkPtrs;
|
||||
std::string errMsg;
|
||||
int rc = loadColumnHdrPtrs(pFile, hdrs, chunkPtrs, errMsg);
|
||||
@ -275,7 +289,20 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
|
||||
unsigned int blockOffset = startOffsetBlk - 1;
|
||||
unsigned int chunkIndex = 0;
|
||||
unsigned int blkOffsetInChunk = 0;
|
||||
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
|
||||
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool,
|
||||
compress::CompressInterface::getCompressionType(hdrs));
|
||||
if (!fCompressor)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Error, wrong compression type for segment file"
|
||||
<< ": OID-" << columnOID << "; DbRoot-" << dbRoot << "; partition-"
|
||||
<< partNum << "; segment-" << segNum << ";";
|
||||
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
|
||||
}
|
||||
|
||||
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
|
||||
|
||||
if (chunkIndex < chunkPtrs.size())
|
||||
{
|
||||
@ -401,7 +428,8 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
|
||||
// Watch for the special case where we are restoring a db file as an
|
||||
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we
|
||||
// just restore the first pointer (set to 8192).
|
||||
fCompressor.setBlockCount( hdrs, (startOffsetBlk + nBlocks) );
|
||||
compress::CompressInterface::setBlockCount(hdrs,
|
||||
(startOffsetBlk + nBlocks));
|
||||
std::vector<uint64_t> newPtrs;
|
||||
|
||||
if ((chunkIndex > 0) || (restoredChunkLen > 0))
|
||||
@ -413,7 +441,7 @@ void BulkRollbackFileCompressed::reInitTruncColumnExtent(
|
||||
}
|
||||
|
||||
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
|
||||
fCompressor.storePtrs( newPtrs, hdrs );
|
||||
compress::CompressInterface::storePtrs(newPtrs, hdrs);
|
||||
|
||||
rc = fDbFile.writeHeaders( pFile, hdrs );
|
||||
|
||||
@ -482,7 +510,7 @@ int BulkRollbackFileCompressed::loadColumnHdrPtrs(
|
||||
}
|
||||
|
||||
// Parse the header pointers
|
||||
int rc1 = fCompressor.getPtrList( hdrs, chunkPtrs );
|
||||
int rc1 = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
|
||||
|
||||
if (rc1 != 0)
|
||||
{
|
||||
@ -548,7 +576,7 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
|
||||
throw WeException( oss.str(), ERR_FILE_OPEN );
|
||||
}
|
||||
|
||||
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
|
||||
char controlHdr[ CompressInterface::HDR_BUF_LEN ];
|
||||
CompChunkPtrList chunkPtrs;
|
||||
uint64_t ptrHdrSize;
|
||||
std::string errMsg;
|
||||
@ -572,7 +600,20 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
|
||||
unsigned int blockOffset = startOffsetBlk - 1;
|
||||
unsigned int chunkIndex = 0;
|
||||
unsigned int blkOffsetInChunk = 0;
|
||||
fCompressor.locateBlock( blockOffset, chunkIndex, blkOffsetInChunk );
|
||||
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool,
|
||||
compress::CompressInterface::getCompressionType(controlHdr));
|
||||
if (!fCompressor)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Error, wrong compression type for segment file"
|
||||
<< ": OID-" << dStoreOID << "; DbRoot-" << dbRoot << "; partition-"
|
||||
<< partNum << "; segment-" << segNum << ";";
|
||||
throw WeException(oss.str(), ERR_COMP_WRONG_COMP_TYPE);
|
||||
}
|
||||
|
||||
fCompressor->locateBlock(blockOffset, chunkIndex, blkOffsetInChunk);
|
||||
|
||||
if (chunkIndex < chunkPtrs.size())
|
||||
{
|
||||
@ -686,7 +727,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
|
||||
// Watch for the special case where we are restoring a db file as an
|
||||
// empty file (chunkindex=0 and restoredChunkLen=0); in this case we
|
||||
// just restore the first pointer (set to 8192).
|
||||
fCompressor.setBlockCount( controlHdr, (startOffsetBlk + nBlocks) );
|
||||
compress::CompressInterface::setBlockCount(controlHdr,
|
||||
(startOffsetBlk + nBlocks));
|
||||
std::vector<uint64_t> newPtrs;
|
||||
|
||||
if ((chunkIndex > 0) || (restoredChunkLen > 0))
|
||||
@ -699,7 +741,8 @@ void BulkRollbackFileCompressed::reInitTruncDctnryExtent(
|
||||
|
||||
newPtrs.push_back( chunkPtrs[chunkIndex].first + restoredChunkLen );
|
||||
char* pointerHdr = new char[ptrHdrSize];
|
||||
fCompressor.storePtrs( newPtrs, pointerHdr, ptrHdrSize );
|
||||
compress::CompressInterface::storePtrs(newPtrs, pointerHdr,
|
||||
ptrHdrSize);
|
||||
|
||||
rc = fDbFile.writeHeaders( pFile, controlHdr, pointerHdr, ptrHdrSize );
|
||||
delete[] pointerHdr;
|
||||
@ -759,7 +802,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
|
||||
std::string& errMsg) const
|
||||
{
|
||||
int rc = fDbFile.readFile(
|
||||
pFile, (unsigned char*)controlHdr, IDBCompressInterface::HDR_BUF_LEN);
|
||||
pFile, (unsigned char*)controlHdr, CompressInterface::HDR_BUF_LEN);
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
@ -771,7 +814,7 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
|
||||
return rc;
|
||||
}
|
||||
|
||||
int rc1 = fCompressor.verifyHdr( controlHdr );
|
||||
int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
|
||||
|
||||
if (rc1 != 0)
|
||||
{
|
||||
@ -786,8 +829,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
|
||||
return rc;
|
||||
}
|
||||
|
||||
uint64_t hdrSize = fCompressor.getHdrSize(controlHdr);
|
||||
ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
|
||||
uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
|
||||
ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
|
||||
char* pointerHdr = new char[ptrHdrSize];
|
||||
|
||||
rc = fDbFile.readFile(pFile, (unsigned char*)pointerHdr, ptrHdrSize);
|
||||
@ -804,7 +847,8 @@ int BulkRollbackFileCompressed::loadDctnryHdrPtrs(
|
||||
}
|
||||
|
||||
// Parse the header pointers
|
||||
rc1 = fCompressor.getPtrList( pointerHdr, ptrHdrSize, chunkPtrs );
|
||||
rc1 = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
|
||||
chunkPtrs);
|
||||
delete[] pointerHdr;
|
||||
|
||||
if (rc1 != 0)
|
||||
@ -1033,5 +1077,4 @@ size_t BulkRollbackFileCompressed::readFillBuffer(
|
||||
|
||||
return totalBytesRead;
|
||||
}
|
||||
|
||||
} //end of namespace
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "we_define.h"
|
||||
#include "we_type.h"
|
||||
@ -148,7 +149,7 @@ private:
|
||||
uint64_t& ptrHdrSize,
|
||||
std::string& errMsg ) const;
|
||||
|
||||
compress::IDBCompressInterface fCompressor;
|
||||
compress::CompressorPool fCompressorPool;
|
||||
};
|
||||
|
||||
} //end of namespace
|
||||
|
@ -67,8 +67,6 @@ namespace WriteEngine
|
||||
extern int NUM_BLOCKS_PER_INITIAL_EXTENT; // defined in we_dctnry.cpp
|
||||
extern WErrorCodes ec; // defined in we_log.cpp
|
||||
|
||||
const int COMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::maxCompressedSize(UNCOMPRESSED_CHUNK_SIZE) + 64 + 3 + 8 * 1024;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Search for the specified chunk in fChunkList.
|
||||
//------------------------------------------------------------------------------
|
||||
@ -91,18 +89,24 @@ ChunkData* CompFileData::findChunk(int64_t id) const
|
||||
//------------------------------------------------------------------------------
|
||||
// ChunkManager constructor
|
||||
//------------------------------------------------------------------------------
|
||||
ChunkManager::ChunkManager() : fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
|
||||
fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
|
||||
fFileOp(0), fSysLogger(NULL), fTransId(-1),
|
||||
fLocalModuleId(Config::getLocalModuleID()),
|
||||
fFs(fIsHdfs ?
|
||||
IDBFileSystem::getFs(IDBDataFile::HDFS) :
|
||||
IDBPolicy::useCloud() ?
|
||||
IDBFileSystem::getFs(IDBDataFile::CLOUD) :
|
||||
IDBFileSystem::getFs(IDBDataFile::BUFFERED))
|
||||
ChunkManager::ChunkManager()
|
||||
: fMaxActiveChunkNum(100), fLenCompressed(0), fIsBulkLoad(false),
|
||||
fDropFdCache(false), fIsInsert(false), fIsHdfs(IDBPolicy::useHdfs()),
|
||||
fFileOp(0), fSysLogger(NULL), fTransId(-1),
|
||||
fLocalModuleId(Config::getLocalModuleID()),
|
||||
fFs(fIsHdfs ? IDBFileSystem::getFs(IDBDataFile::HDFS)
|
||||
: IDBPolicy::useCloud()
|
||||
? IDBFileSystem::getFs(IDBDataFile::CLOUD)
|
||||
: IDBFileSystem::getFs(IDBDataFile::BUFFERED))
|
||||
{
|
||||
fUserPaddings = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
|
||||
fCompressor.numUserPaddingBytes(fUserPaddings);
|
||||
compress::initializeCompressorPool(fCompressorPool, fUserPaddings);
|
||||
|
||||
COMPRESSED_CHUNK_SIZE =
|
||||
compress::CompressInterface::getMaxCompressedSizeGeneric(
|
||||
UNCOMPRESSED_CHUNK_SIZE) +
|
||||
64 + 3 + 8 * 1024;
|
||||
|
||||
fMaxCompressedBufSize = COMPRESSED_CHUNK_SIZE + fUserPaddings;
|
||||
fBufCompressed = new char[fMaxCompressedBufSize];
|
||||
fSysLogger = new logging::Logger(SUBSYSTEM_ID_WE);
|
||||
@ -383,16 +387,22 @@ CompFileData* ChunkManager::getFileData(const FID& fid,
|
||||
}
|
||||
|
||||
// make sure the header is valid
|
||||
if (fCompressor.verifyHdr(fileData->fFileHeader.fControlData) != 0)
|
||||
if (compress::CompressInterface::verifyHdr(fileData->fFileHeader.fControlData) != 0)
|
||||
{
|
||||
WE_COMP_DBG(cout << "Invalid header." << endl;)
|
||||
delete fileData;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
|
||||
int headerSize = compress::CompressInterface::getHdrSize(
|
||||
fileData->fFileHeader.fControlData);
|
||||
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
|
||||
|
||||
// Save segment file compression type.
|
||||
uint32_t compressionType = compress::CompressInterface::getCompressionType(
|
||||
fileData->fFileHeader.fControlData);
|
||||
fileData->fCompressionType = compressionType;
|
||||
|
||||
if (ptrSecSize > COMPRESSED_FILE_HEADER_UNIT)
|
||||
{
|
||||
// >8K header, dictionary width > 128
|
||||
@ -462,11 +472,12 @@ IDBDataFile* ChunkManager::createDctnryFile(const FID& fid,
|
||||
|
||||
// Dictionary store extent width == 0. See more details in function
|
||||
// `createDictStoreExtent`.
|
||||
fCompressor.initHdr(fileData->fFileHeader.fControlData,
|
||||
fileData->fFileHeader.fPtrSection,
|
||||
/*colWidth=*/0, fileData->fColDataType,
|
||||
fFileOp->compressionType(), hdrSize);
|
||||
fCompressor.setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
|
||||
compress::CompressInterface::initHdr(
|
||||
fileData->fFileHeader.fControlData, fileData->fFileHeader.fPtrSection,
|
||||
/*colWidth=*/0, fileData->fColDataType, fFileOp->compressionType(), hdrSize);
|
||||
compress::CompressInterface::setLBIDByIndex(fileData->fFileHeader.fControlData, lbid, 0);
|
||||
// Save compression type.
|
||||
fileData->fCompressionType = fFileOp->compressionType();
|
||||
|
||||
if (writeHeader(fileData, __LINE__) != NO_ERROR)
|
||||
{
|
||||
@ -771,9 +782,16 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
|
||||
}
|
||||
|
||||
// uncompress the read in buffer
|
||||
unsigned int dataLen = sizeof(chunkData->fBufUnCompressed);
|
||||
size_t dataLen = sizeof(chunkData->fBufUnCompressed);
|
||||
|
||||
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool, fileData->fCompressionType);
|
||||
if (!fCompressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
|
||||
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
|
||||
{
|
||||
if (fIsFix)
|
||||
@ -784,7 +802,7 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
|
||||
{
|
||||
char* hdr = fileData->fFileHeader.fControlData;
|
||||
|
||||
if (fCompressor.getBlockCount(hdr) < 512)
|
||||
if (compress::CompressInterface::getBlockCount(hdr) < 512)
|
||||
blocks = 256;
|
||||
}
|
||||
|
||||
@ -820,7 +838,8 @@ int ChunkManager::fetchChunkFromFile(IDBDataFile* pFile, int64_t id, ChunkData*&
|
||||
{
|
||||
if (id == 0 && ptrs[id] == 0) // if the 1st ptr is not set for new extent
|
||||
{
|
||||
ptrs[0] = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
|
||||
ptrs[0] = compress::CompressInterface::getHdrSize(
|
||||
fileData->fFileHeader.fControlData);
|
||||
}
|
||||
|
||||
// load the uncompressed buffer with empty values.
|
||||
@ -907,10 +926,17 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
|
||||
// compress the chunk before writing it to file
|
||||
fLenCompressed = fMaxCompressedBufSize;
|
||||
|
||||
if (fCompressor.compressBlock((char*)chunkData->fBufUnCompressed,
|
||||
chunkData->fLenUnCompressed,
|
||||
(unsigned char*)fBufCompressed,
|
||||
fLenCompressed) != 0)
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool, fileData->fCompressionType);
|
||||
if (!fCompressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
if (fCompressor->compressBlock((char*) chunkData->fBufUnCompressed,
|
||||
chunkData->fLenUnCompressed,
|
||||
(unsigned char*) fBufCompressed,
|
||||
fLenCompressed) != 0)
|
||||
{
|
||||
logMessage(ERR_COMP_COMPRESS, logging::LOG_TYPE_ERROR, __LINE__);
|
||||
return ERR_COMP_COMPRESS;
|
||||
@ -941,7 +967,8 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
|
||||
// [chunkId+0] is the start offset of current chunk.
|
||||
// [chunkId+1] is the start offset of next chunk, the offset diff is current chunk size.
|
||||
// [chunkId+2] is 0 or not indicates if the next chunk exists.
|
||||
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
|
||||
int headerSize = compress::CompressInterface::getHdrSize(
|
||||
fileData->fFileHeader.fControlData);
|
||||
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
|
||||
int64_t usablePtrIds = (ptrSecSize / sizeof(uint64_t)) - 2;
|
||||
|
||||
@ -968,7 +995,7 @@ int ChunkManager::writeChunkToFile(CompFileData* fileData, ChunkData* chunkData)
|
||||
else if (lastChunk)
|
||||
{
|
||||
// add padding space if the chunk is written first time
|
||||
if (fCompressor.padCompressedChunks(
|
||||
if (fCompressor->padCompressedChunks(
|
||||
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize) != 0)
|
||||
{
|
||||
WE_COMP_DBG(cout << "Last chunk:" << chunkId << ", padding failed." << endl;)
|
||||
@ -1272,7 +1299,8 @@ int ChunkManager::closeFile(CompFileData* fileData)
|
||||
int ChunkManager::writeHeader(CompFileData* fileData, int ln)
|
||||
{
|
||||
int rc = NO_ERROR;
|
||||
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
|
||||
int headerSize = compress::CompressInterface::getHdrSize(
|
||||
fileData->fFileHeader.fControlData);
|
||||
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
|
||||
|
||||
if (!fIsHdfs && !fIsBulkLoad)
|
||||
@ -1422,8 +1450,10 @@ int ChunkManager::updateColumnExtent(IDBDataFile* pFile, int addBlockCount, int6
|
||||
|
||||
int rc = NO_ERROR;
|
||||
char* hdr = pFileData->fFileHeader.fControlData;
|
||||
fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount);
|
||||
fCompressor.setLBIDByIndex(hdr, lbid, 1);
|
||||
compress::CompressInterface::setBlockCount(
|
||||
hdr, compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
|
||||
compress::CompressInterface::setLBIDByIndex(hdr, lbid, 1);
|
||||
|
||||
ChunkData* chunkData = (pFileData)->findChunk(0);
|
||||
|
||||
if (chunkData != NULL)
|
||||
@ -1475,7 +1505,7 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
|
||||
|
||||
char* hdr = i->second->fFileHeader.fControlData;
|
||||
char* uncompressedBuf = chunkData->fBufUnCompressed;
|
||||
int currentBlockCount = fCompressor.getBlockCount(hdr);
|
||||
int currentBlockCount = compress::CompressInterface::getBlockCount(hdr);
|
||||
|
||||
// Bug 3203, write out the compressed initial extent.
|
||||
if (currentBlockCount == 0)
|
||||
@ -1511,13 +1541,15 @@ int ChunkManager::updateDctnryExtent(IDBDataFile* pFile, int addBlockCount,
|
||||
}
|
||||
|
||||
if (rc == NO_ERROR)
|
||||
fCompressor.setBlockCount(hdr, fCompressor.getBlockCount(hdr) + addBlockCount);
|
||||
compress::CompressInterface::setBlockCount(
|
||||
hdr,
|
||||
compress::CompressInterface::getBlockCount(hdr) + addBlockCount);
|
||||
|
||||
if (currentBlockCount)
|
||||
{
|
||||
// Append to the end.
|
||||
uint64_t lbidCount = fCompressor.getLBIDCount(hdr);
|
||||
fCompressor.setLBIDByIndex(hdr, lbid, lbidCount);
|
||||
uint64_t lbidCount = compress::CompressInterface::getLBIDCount(hdr);
|
||||
compress::CompressInterface::setLBIDByIndex(hdr, lbid, lbidCount);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
@ -1684,7 +1716,8 @@ int ChunkManager::getBlockCount(IDBDataFile* pFile)
|
||||
map<IDBDataFile*, CompFileData*>::iterator fpIt = fFilePtrMap.find(pFile);
|
||||
idbassert(fpIt != fFilePtrMap.end());
|
||||
|
||||
return fCompressor.getBlockCount(fpIt->second->fFileHeader.fControlData);
|
||||
return compress::CompressInterface::getBlockCount(
|
||||
fpIt->second->fFileHeader.fControlData);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -1758,11 +1791,13 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
|
||||
origFilePtr->flush();
|
||||
|
||||
// back out the current pointers
|
||||
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
|
||||
int headerSize = compress::CompressInterface::getHdrSize(
|
||||
fileData->fFileHeader.fControlData);
|
||||
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
|
||||
compress::CompChunkPtrList origPtrs;
|
||||
|
||||
if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
|
||||
if (compress::CompressInterface::getPtrList(
|
||||
fileData->fFileHeader.fPtrSection, ptrSecSize, origPtrs) != 0)
|
||||
{
|
||||
ostringstream oss;
|
||||
oss << "Chunk shifting failed, file:" << origFileName << " -- invalid header.";
|
||||
@ -1876,7 +1911,14 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
|
||||
ChunkData* chunkData = chunksTouched[k];
|
||||
fLenCompressed = fMaxCompressedBufSize;
|
||||
|
||||
if ((rc = fCompressor.compressBlock((char*)chunkData->fBufUnCompressed,
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool, fileData->fCompressionType);
|
||||
if (!fCompressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
if ((rc = fCompressor->compressBlock((char*)chunkData->fBufUnCompressed,
|
||||
chunkData->fLenUnCompressed,
|
||||
(unsigned char*)fBufCompressed,
|
||||
fLenCompressed)) != 0)
|
||||
@ -1894,7 +1936,7 @@ int ChunkManager::reallocateChunks(CompFileData* fileData)
|
||||
<< fLenCompressed;)
|
||||
|
||||
// shifting chunk, add padding space
|
||||
if ((rc = fCompressor.padCompressedChunks(
|
||||
if ((rc = fCompressor->padCompressedChunks(
|
||||
(unsigned char*)fBufCompressed, fLenCompressed, fMaxCompressedBufSize)) != 0)
|
||||
{
|
||||
WE_COMP_DBG(cout << ", but padding failed." << endl;)
|
||||
@ -2245,7 +2287,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
|
||||
}
|
||||
|
||||
// make sure the header is valid
|
||||
if ((rc = fCompressor.verifyHdr(fileData->fFileHeader.fControlData)) != 0)
|
||||
if ((rc = compress::CompressInterface::verifyHdr(
|
||||
fileData->fFileHeader.fControlData)) != 0)
|
||||
{
|
||||
ostringstream oss;
|
||||
oss << "Invalid header in new " << fileData->fFileName << ", roll back";
|
||||
@ -2254,7 +2297,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
|
||||
return rc;
|
||||
}
|
||||
|
||||
int headerSize = fCompressor.getHdrSize(fileData->fFileHeader.fControlData);
|
||||
int headerSize = compress::CompressInterface::getHdrSize(
|
||||
fileData->fFileHeader.fControlData);
|
||||
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
|
||||
|
||||
// read in the pointer section in header
|
||||
@ -2270,7 +2314,8 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
|
||||
// get pointer list
|
||||
compress::CompChunkPtrList ptrs;
|
||||
|
||||
if (fCompressor.getPtrList(fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
|
||||
if (compress::CompressInterface::getPtrList(
|
||||
fileData->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
|
||||
{
|
||||
ostringstream oss;
|
||||
oss << "Failed to parse pointer list from new " << fileData->fFileName << "@" << __LINE__;
|
||||
@ -2282,6 +2327,13 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
|
||||
ChunkData chunkData;
|
||||
int numOfChunks = ptrs.size(); // number of chunks in the file
|
||||
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool, fileData->fCompressionType);
|
||||
if (!fCompressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numOfChunks && rc == NO_ERROR; i++)
|
||||
{
|
||||
unsigned int chunkSize = ptrs[i].second;
|
||||
@ -2304,9 +2356,9 @@ int ChunkManager::verifyChunksAfterRealloc(CompFileData* fileData)
|
||||
}
|
||||
|
||||
// uncompress the read in buffer
|
||||
unsigned int dataLen = sizeof(chunkData.fBufUnCompressed);
|
||||
size_t dataLen = sizeof(chunkData.fBufUnCompressed);
|
||||
|
||||
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
|
||||
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
|
||||
(unsigned char*)chunkData.fBufUnCompressed, dataLen) != 0)
|
||||
{
|
||||
ostringstream oss;
|
||||
@ -2624,13 +2676,15 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
|
||||
if (mit != fFileMap.end())
|
||||
{
|
||||
|
||||
int headerSize = fCompressor.getHdrSize(mit->second->fFileHeader.fControlData);
|
||||
int headerSize = compress::CompressInterface::getHdrSize(
|
||||
mit->second->fFileHeader.fControlData);
|
||||
int ptrSecSize = headerSize - COMPRESSED_FILE_HEADER_UNIT;
|
||||
|
||||
// get pointer list
|
||||
compress::CompChunkPtrList ptrs;
|
||||
|
||||
if (fCompressor.getPtrList(mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
|
||||
if (compress::CompressInterface::getPtrList(
|
||||
mit->second->fFileHeader.fPtrSection, ptrSecSize, ptrs) != 0)
|
||||
{
|
||||
ostringstream oss;
|
||||
oss << "Failed to parse pointer list from new " << mit->second->fFileName << "@" << __LINE__;
|
||||
@ -2662,9 +2716,16 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
|
||||
|
||||
// uncompress the read in buffer
|
||||
chunkData = new ChunkData(numOfChunks - 1);
|
||||
unsigned int dataLen = sizeof(chunkData->fBufUnCompressed);
|
||||
size_t dataLen = sizeof(chunkData->fBufUnCompressed);
|
||||
|
||||
if (fCompressor.uncompressBlock((char*)fBufCompressed, chunkSize,
|
||||
auto fCompressor = compress::getCompressorByType(
|
||||
fCompressorPool, mit->second->fCompressionType);
|
||||
if (!fCompressor)
|
||||
{
|
||||
return ERR_COMP_WRONG_COMP_TYPE;
|
||||
}
|
||||
|
||||
if (fCompressor->uncompressBlock((char*)fBufCompressed, chunkSize,
|
||||
(unsigned char*)chunkData->fBufUnCompressed, dataLen) != 0)
|
||||
{
|
||||
mit->second->fChunkList.push_back(chunkData);
|
||||
@ -2676,7 +2737,7 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
|
||||
{
|
||||
char* hdr = mit->second->fFileHeader.fControlData;
|
||||
|
||||
if (fCompressor.getBlockCount(hdr) < 512)
|
||||
if (compress::CompressInterface::getBlockCount(hdr) < 512)
|
||||
blocks = 256;
|
||||
}
|
||||
|
||||
@ -2693,7 +2754,6 @@ int ChunkManager::checkFixLastDictChunk(const FID& fid,
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim:ts=4 sw=4:
|
||||
|
@ -64,8 +64,8 @@ namespace WriteEngine
|
||||
// forward reference
|
||||
class FileOp;
|
||||
|
||||
const int UNCOMPRESSED_CHUNK_SIZE = compress::IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
const int COMPRESSED_FILE_HEADER_UNIT = compress::IDBCompressInterface::HDR_BUF_LEN;
|
||||
const int UNCOMPRESSED_CHUNK_SIZE = compress::CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
const int COMPRESSED_FILE_HEADER_UNIT = compress::CompressInterface::HDR_BUF_LEN;
|
||||
|
||||
// assume UNCOMPRESSED_CHUNK_SIZE > 0xBFFF (49151), 8 * 1024 bytes padding
|
||||
|
||||
@ -136,7 +136,7 @@ class CompFileData
|
||||
public:
|
||||
CompFileData(const FileID& id, const FID& fid, const execplan::CalpontSystemCatalog::ColDataType colDataType, int colWidth) :
|
||||
fFileID(id), fFid(fid), fColDataType(colDataType), fColWidth(colWidth), fDctnryCol(false),
|
||||
fFilePtr(NULL), fIoBSize(0) {}
|
||||
fFilePtr(NULL), fIoBSize(0), fCompressionType(1) {}
|
||||
|
||||
ChunkData* findChunk(int64_t cid) const;
|
||||
|
||||
@ -152,6 +152,7 @@ protected:
|
||||
std::list<ChunkData*> fChunkList;
|
||||
boost::scoped_array<char> fIoBuffer;
|
||||
size_t fIoBSize;
|
||||
uint32_t fCompressionType;
|
||||
|
||||
friend class ChunkManager;
|
||||
};
|
||||
@ -369,22 +370,23 @@ protected:
|
||||
std::list<std::pair<FileID, ChunkData*> > fActiveChunks;
|
||||
unsigned int fMaxActiveChunkNum; // max active chunks per file
|
||||
char* fBufCompressed;
|
||||
unsigned int fLenCompressed;
|
||||
unsigned int fMaxCompressedBufSize;
|
||||
unsigned int fUserPaddings;
|
||||
size_t fLenCompressed;
|
||||
size_t fMaxCompressedBufSize;
|
||||
size_t fUserPaddings;
|
||||
bool fIsBulkLoad;
|
||||
bool fDropFdCache;
|
||||
bool fIsInsert;
|
||||
bool fIsHdfs;
|
||||
FileOp* fFileOp;
|
||||
compress::IDBCompressInterface fCompressor;
|
||||
compress::CompressorPool fCompressorPool;
|
||||
logging::Logger* fSysLogger;
|
||||
TxnID fTransId;
|
||||
int fLocalModuleId;
|
||||
idbdatafile::IDBFileSystem& fFs;
|
||||
bool fIsFix;
|
||||
size_t COMPRESSED_CHUNK_SIZE;
|
||||
|
||||
private:
|
||||
private:
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -348,6 +348,7 @@ const int ERR_COMP_READ_FILE = ERR_COMPBASE + 16;// Failed to read from a
|
||||
const int ERR_COMP_WRITE_FILE = ERR_COMPBASE + 17;// Failed to write to a compresssed data file
|
||||
const int ERR_COMP_CLOSE_FILE = ERR_COMPBASE + 18;// Failed to close a compressed data file
|
||||
const int ERR_COMP_TRUNCATE_ZERO = ERR_COMPBASE + 19;// Invalid attempt to truncate file to 0 bytes
|
||||
const int ERR_COMP_WRONG_COMP_TYPE = ERR_COMPBASE + 20;// Invalid compression type.
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Auto-increment error
|
||||
|
@ -652,14 +652,19 @@ int FileOp::extendFile(
|
||||
// @bug 5349: check that new extent's fbo is not past current EOF
|
||||
if (m_compressionType)
|
||||
{
|
||||
char hdrsIn[ compress::IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdrsIn[ compress::CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
RETURN_ON_ERROR( readHeaders(pFile, hdrsIn) );
|
||||
|
||||
IDBCompressInterface compressor;
|
||||
unsigned int ptrCount = compressor.getPtrCount(hdrsIn);
|
||||
std::unique_ptr<compress::CompressInterface> compressor(
|
||||
compress::getCompressInterfaceByType(
|
||||
compress::CompressInterface::getCompressionType(hdrsIn)));
|
||||
|
||||
unsigned int ptrCount =
|
||||
compress::CompressInterface::getPtrCount(hdrsIn);
|
||||
unsigned int chunkIndex = 0;
|
||||
unsigned int blockOffsetWithinChunk = 0;
|
||||
compressor.locateBlock((hwm - 1), chunkIndex, blockOffsetWithinChunk);
|
||||
compressor->locateBlock((hwm - 1), chunkIndex,
|
||||
blockOffsetWithinChunk);
|
||||
|
||||
//std::ostringstream oss1;
|
||||
//oss1 << "Extending compressed column file"<<
|
||||
@ -816,9 +821,8 @@ int FileOp::extendFile(
|
||||
|
||||
if ((m_compressionType) && (hdrs))
|
||||
{
|
||||
IDBCompressInterface compressor;
|
||||
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compressor.setLBIDByIndex(hdrs, startLbid, 0);
|
||||
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -976,9 +980,8 @@ int FileOp::addExtentExactFile(
|
||||
|
||||
if ((m_compressionType) && (hdrs))
|
||||
{
|
||||
IDBCompressInterface compressor;
|
||||
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compressor.setLBIDByIndex(hdrs, startLbid, 0);
|
||||
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrs, startLbid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1064,13 +1067,11 @@ int FileOp::initColumnExtent(
|
||||
{
|
||||
if ((bNewFile) && (m_compressionType))
|
||||
{
|
||||
char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2];
|
||||
IDBCompressInterface compressor;
|
||||
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compressor.setLBIDByIndex(hdrs, lbid, 0);
|
||||
|
||||
char hdrs[CompressInterface::HDR_BUF_LEN * 2];
|
||||
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrs, lbid, 0);
|
||||
if (bAbbrevExtent)
|
||||
compressor.setBlockCount(hdrs, nBlocks);
|
||||
compress::CompressInterface::setBlockCount(hdrs, nBlocks);
|
||||
|
||||
RETURN_ON_ERROR(writeHeaders(pFile, hdrs));
|
||||
}
|
||||
@ -1262,7 +1263,7 @@ int FileOp::initAbbrevCompColumnExtent(
|
||||
Stats::startParseEvent(WE_STATS_COMPRESS_COL_INIT_ABBREV_EXT);
|
||||
#endif
|
||||
|
||||
char hdrs[IDBCompressInterface::HDR_BUF_LEN * 2];
|
||||
char hdrs[CompressInterface::HDR_BUF_LEN * 2];
|
||||
rc = writeInitialCompColumnChunk( pFile,
|
||||
nBlocks,
|
||||
INITIAL_EXTENT_ROWS_TO_DISK,
|
||||
@ -1308,24 +1309,30 @@ int FileOp::writeInitialCompColumnChunk(
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType,
|
||||
char* hdrs)
|
||||
{
|
||||
const int INPUT_BUFFER_SIZE = nRows * width;
|
||||
const size_t INPUT_BUFFER_SIZE = nRows * width;
|
||||
char* toBeCompressedInput = new char[INPUT_BUFFER_SIZE];
|
||||
unsigned int userPaddingBytes = Config::getNumCompressedPadBlks() *
|
||||
BYTE_PER_BLOCK;
|
||||
const int OUTPUT_BUFFER_SIZE = IDBCompressInterface::maxCompressedSize(INPUT_BUFFER_SIZE) +
|
||||
userPaddingBytes;
|
||||
// Compress an initialized abbreviated extent
|
||||
// Initially m_compressionType == 0, but this function is used under
|
||||
// condtion where m_compressionType > 0.
|
||||
std::unique_ptr<CompressInterface> compressor(
|
||||
compress::getCompressInterfaceByType(m_compressionType,
|
||||
userPaddingBytes));
|
||||
const size_t OUTPUT_BUFFER_SIZE =
|
||||
compressor->maxCompressedSize(INPUT_BUFFER_SIZE) + userPaddingBytes +
|
||||
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
|
||||
|
||||
unsigned char* compressedOutput = new unsigned char[OUTPUT_BUFFER_SIZE];
|
||||
unsigned int outputLen = OUTPUT_BUFFER_SIZE;
|
||||
size_t outputLen = OUTPUT_BUFFER_SIZE;
|
||||
boost::scoped_array<char> toBeCompressedInputPtr( toBeCompressedInput );
|
||||
boost::scoped_array<unsigned char> compressedOutputPtr(compressedOutput);
|
||||
|
||||
setEmptyBuf( (unsigned char*)toBeCompressedInput,
|
||||
INPUT_BUFFER_SIZE, emptyVal, width);
|
||||
|
||||
// Compress an initialized abbreviated extent
|
||||
IDBCompressInterface compressor( userPaddingBytes );
|
||||
int rc = compressor.compressBlock(toBeCompressedInput,
|
||||
INPUT_BUFFER_SIZE, compressedOutput, outputLen );
|
||||
int rc = compressor->compressBlock(toBeCompressedInput, INPUT_BUFFER_SIZE,
|
||||
compressedOutput, outputLen);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -1333,8 +1340,8 @@ int FileOp::writeInitialCompColumnChunk(
|
||||
}
|
||||
|
||||
// Round up the compressed chunk size
|
||||
rc = compressor.padCompressedChunks( compressedOutput,
|
||||
outputLen, OUTPUT_BUFFER_SIZE );
|
||||
rc = compressor->padCompressedChunks(compressedOutput, outputLen,
|
||||
OUTPUT_BUFFER_SIZE);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -1347,23 +1354,22 @@ int FileOp::writeInitialCompColumnChunk(
|
||||
// "; blkAllocCnt: " << nBlocksAllocated <<
|
||||
// "; compressedByteCnt: " << outputLen << std::endl;
|
||||
|
||||
compressor.initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compressor.setBlockCount(hdrs, nBlocksAllocated);
|
||||
compressor.setLBIDByIndex(hdrs, startLBID, 0);
|
||||
compress::CompressInterface::initHdr(hdrs, width, colDataType, m_compressionType);
|
||||
compress::CompressInterface::setBlockCount(hdrs, nBlocksAllocated);
|
||||
compress::CompressInterface::setLBIDByIndex(hdrs, startLBID, 0);
|
||||
|
||||
// Store compression pointers in the header
|
||||
std::vector<uint64_t> ptrs;
|
||||
ptrs.push_back( IDBCompressInterface::HDR_BUF_LEN * 2 );
|
||||
ptrs.push_back( outputLen + (IDBCompressInterface::HDR_BUF_LEN * 2) );
|
||||
compressor.storePtrs(ptrs, hdrs);
|
||||
ptrs.push_back( CompressInterface::HDR_BUF_LEN * 2 );
|
||||
ptrs.push_back( outputLen + (CompressInterface::HDR_BUF_LEN * 2) );
|
||||
compress::CompressInterface::storePtrs(ptrs, hdrs);
|
||||
|
||||
RETURN_ON_ERROR( writeHeaders(pFile, hdrs) );
|
||||
|
||||
// Write the compressed data
|
||||
if ( pFile->write( compressedOutput, outputLen ) != outputLen )
|
||||
{
|
||||
size_t writtenLen = pFile->write(compressedOutput, outputLen);
|
||||
if (writtenLen != outputLen)
|
||||
return ERR_FILE_WRITE;
|
||||
}
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
@ -1421,7 +1427,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
return ERR_FILE_OPEN;
|
||||
}
|
||||
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
char hdrs[ CompressInterface::HDR_BUF_LEN * 2 ];
|
||||
rc = readHeaders( pFile, hdrs );
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
@ -1432,9 +1438,14 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
}
|
||||
|
||||
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
|
||||
IDBCompressInterface compressor( userPadBytes );
|
||||
|
||||
std::unique_ptr<CompressInterface> compressor(
|
||||
compress::getCompressInterfaceByType(
|
||||
compress::CompressInterface::getCompressionType(hdrs),
|
||||
userPadBytes));
|
||||
|
||||
CompChunkPtrList chunkPtrs;
|
||||
int rcComp = compressor.getPtrList( hdrs, chunkPtrs );
|
||||
int rcComp = compress::CompressInterface::getPtrList(hdrs, chunkPtrs);
|
||||
|
||||
if (rcComp != 0)
|
||||
{
|
||||
@ -1444,7 +1455,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
}
|
||||
|
||||
// Nothing to do if the proposed HWM is < the current block count
|
||||
uint64_t blkCount = compressor.getBlockCount(hdrs);
|
||||
uint64_t blkCount = compress::CompressInterface::getBlockCount(hdrs);
|
||||
|
||||
if (blkCount > (hwm + 1))
|
||||
{
|
||||
@ -1455,7 +1466,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
const unsigned int ROWS_PER_EXTENT =
|
||||
BRMWrapper::getInstance()->getInstance()->getExtentRows();
|
||||
const unsigned int ROWS_PER_CHUNK =
|
||||
IDBCompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
|
||||
CompressInterface::UNCOMPRESSED_INBUF_LEN / colWidth;
|
||||
const unsigned int CHUNKS_PER_EXTENT = ROWS_PER_EXTENT / ROWS_PER_CHUNK;
|
||||
|
||||
// If this is an abbreviated extent, we first expand to a full extent
|
||||
@ -1493,7 +1504,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
|
||||
CompChunkPtr chunkOutPtr;
|
||||
rc = expandAbbrevColumnChunk( pFile, emptyVal, colWidth,
|
||||
chunkPtrs[0], chunkOutPtr );
|
||||
chunkPtrs[0], chunkOutPtr, hdrs );
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
@ -1515,7 +1526,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
|
||||
// Update block count to reflect a full extent
|
||||
blkCount = (ROWS_PER_EXTENT * colWidth) / BYTE_PER_BLOCK;
|
||||
compressor.setBlockCount( hdrs, blkCount );
|
||||
compress::CompressInterface::setBlockCount(hdrs, blkCount);
|
||||
}
|
||||
|
||||
// Calculate the number of empty chunks we need to add to fill this extent
|
||||
@ -1532,7 +1543,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
compressor.getBlockCount(hdrs) << std::endl;
|
||||
std::cout << "Pointer Header Size (in bytes): " <<
|
||||
(compressor.getHdrSize(hdrs) -
|
||||
IDBCompressInterface::HDR_BUF_LEN) << std::endl;
|
||||
CompressInterface::HDR_BUF_LEN) << std::endl;
|
||||
std::cout << "Chunk Pointers (offset,length): " << std::endl;
|
||||
|
||||
for (unsigned k = 0; k < chunkPtrs.size(); k++)
|
||||
@ -1551,8 +1562,10 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
// Fill in or add necessary remaining empty chunks
|
||||
if (numChunksToFill > 0)
|
||||
{
|
||||
const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes;
|
||||
const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
const int OUT_BUF_LEN =
|
||||
compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
|
||||
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
|
||||
|
||||
// Allocate buffer, and store in scoped_array to insure it's deletion.
|
||||
// Create scope {...} to manage deletion of buffers
|
||||
@ -1566,9 +1579,9 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
// Compress and then pad the compressed chunk
|
||||
setEmptyBuf( (unsigned char*)toBeCompressedBuf,
|
||||
IN_BUF_LEN, emptyVal, colWidth );
|
||||
unsigned int outputLen = OUT_BUF_LEN;
|
||||
rcComp = compressor.compressBlock( toBeCompressedBuf,
|
||||
IN_BUF_LEN, compressedBuf, outputLen );
|
||||
size_t outputLen = OUT_BUF_LEN;
|
||||
rcComp = compressor->compressBlock(toBeCompressedBuf, IN_BUF_LEN,
|
||||
compressedBuf, outputLen);
|
||||
|
||||
if (rcComp != 0)
|
||||
{
|
||||
@ -1579,8 +1592,8 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
|
||||
toBeCompressedInputPtr.reset(); // release memory
|
||||
|
||||
rcComp = compressor.padCompressedChunks( compressedBuf,
|
||||
outputLen, OUT_BUF_LEN );
|
||||
rcComp = compressor->padCompressedChunks(compressedBuf, outputLen,
|
||||
OUT_BUF_LEN);
|
||||
|
||||
if (rcComp != 0)
|
||||
{
|
||||
@ -1639,7 +1652,7 @@ int FileOp::fillCompColumnExtentEmptyChunks(OID oid,
|
||||
|
||||
ptrs.push_back( chunkPtrs[chunkPtrs.size() - 1].first +
|
||||
chunkPtrs[chunkPtrs.size() - 1].second );
|
||||
compressor.storePtrs( ptrs, hdrs );
|
||||
compress::CompressInterface::storePtrs(ptrs, hdrs);
|
||||
|
||||
rc = writeHeaders( pFile, hdrs );
|
||||
|
||||
@ -1697,11 +1710,24 @@ int FileOp::expandAbbrevColumnChunk(
|
||||
const uint8_t* emptyVal,
|
||||
int colWidth,
|
||||
const CompChunkPtr& chunkInPtr,
|
||||
CompChunkPtr& chunkOutPtr )
|
||||
CompChunkPtr& chunkOutPtr,
|
||||
const char *hdrs )
|
||||
{
|
||||
int userPadBytes = Config::getNumCompressedPadBlks() * BYTE_PER_BLOCK;
|
||||
const int IN_BUF_LEN = IDBCompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
const int OUT_BUF_LEN = IDBCompressInterface::maxCompressedSize(IN_BUF_LEN) + userPadBytes;
|
||||
auto realCompressionType = m_compressionType;
|
||||
if (hdrs)
|
||||
{
|
||||
realCompressionType =
|
||||
compress::CompressInterface::getCompressionType(hdrs);
|
||||
}
|
||||
std::unique_ptr<CompressInterface> compressor(
|
||||
compress::getCompressInterfaceByType(realCompressionType,
|
||||
userPadBytes));
|
||||
|
||||
const int IN_BUF_LEN = CompressInterface::UNCOMPRESSED_INBUF_LEN;
|
||||
const int OUT_BUF_LEN =
|
||||
compressor->maxCompressedSize(IN_BUF_LEN) + userPadBytes +
|
||||
compress::CompressInterface::COMPRESSED_CHUNK_INCREMENT_SIZE;
|
||||
|
||||
char* toBeCompressedBuf = new char[ IN_BUF_LEN ];
|
||||
boost::scoped_array<char> toBeCompressedPtr(toBeCompressedBuf);
|
||||
@ -1717,13 +1743,10 @@ int FileOp::expandAbbrevColumnChunk(
|
||||
chunkInPtr.second) );
|
||||
|
||||
// Uncompress an "abbreviated" chunk into our 4MB buffer
|
||||
unsigned int outputLen = IN_BUF_LEN;
|
||||
IDBCompressInterface compressor( userPadBytes );
|
||||
int rc = compressor.uncompressBlock(
|
||||
compressedInBuf,
|
||||
chunkInPtr.second,
|
||||
(unsigned char*)toBeCompressedBuf,
|
||||
outputLen);
|
||||
size_t outputLen = IN_BUF_LEN;
|
||||
int rc = compressor->uncompressBlock(compressedInBuf, chunkInPtr.second,
|
||||
(unsigned char*) toBeCompressedBuf,
|
||||
outputLen);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -1739,11 +1762,8 @@ int FileOp::expandAbbrevColumnChunk(
|
||||
|
||||
// Compress the data we just read, as a "full" 4MB chunk
|
||||
outputLen = OUT_BUF_LEN;
|
||||
rc = compressor.compressBlock(
|
||||
reinterpret_cast<char*>(toBeCompressedBuf),
|
||||
IN_BUF_LEN,
|
||||
compressedOutBuf,
|
||||
outputLen );
|
||||
rc = compressor->compressBlock(reinterpret_cast<char*>(toBeCompressedBuf),
|
||||
IN_BUF_LEN, compressedOutBuf, outputLen);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -1751,8 +1771,8 @@ int FileOp::expandAbbrevColumnChunk(
|
||||
}
|
||||
|
||||
// Round up the compressed chunk size
|
||||
rc = compressor.padCompressedChunks( compressedOutBuf,
|
||||
outputLen, OUT_BUF_LEN );
|
||||
rc = compressor->padCompressedChunks(compressedOutBuf, outputLen,
|
||||
OUT_BUF_LEN);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -1782,7 +1802,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* hdr) const
|
||||
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
|
||||
|
||||
// Write the headers
|
||||
if (pFile->write( hdr, IDBCompressInterface::HDR_BUF_LEN * 2 ) != IDBCompressInterface::HDR_BUF_LEN * 2)
|
||||
if (pFile->write( hdr, CompressInterface::HDR_BUF_LEN * 2 ) != CompressInterface::HDR_BUF_LEN * 2)
|
||||
{
|
||||
return ERR_FILE_WRITE;
|
||||
}
|
||||
@ -1808,7 +1828,7 @@ int FileOp::writeHeaders(IDBDataFile* pFile, const char* controlHdr,
|
||||
RETURN_ON_ERROR( setFileOffset(pFile, 0, SEEK_SET) );
|
||||
|
||||
// Write the control header
|
||||
if (pFile->write( controlHdr, IDBCompressInterface::HDR_BUF_LEN ) != IDBCompressInterface::HDR_BUF_LEN)
|
||||
if (pFile->write( controlHdr, CompressInterface::HDR_BUF_LEN ) != CompressInterface::HDR_BUF_LEN)
|
||||
{
|
||||
return ERR_FILE_WRITE;
|
||||
}
|
||||
@ -2651,9 +2671,8 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdrs ) const
|
||||
{
|
||||
RETURN_ON_ERROR( setFileOffset(pFile, 0) );
|
||||
RETURN_ON_ERROR( readFile( pFile, reinterpret_cast<unsigned char*>(hdrs),
|
||||
(IDBCompressInterface::HDR_BUF_LEN * 2) ) );
|
||||
IDBCompressInterface compressor;
|
||||
int rc = compressor.verifyHdr( hdrs );
|
||||
(CompressInterface::HDR_BUF_LEN * 2) ) );
|
||||
int rc = compress::CompressInterface::verifyHdr(hdrs);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -2671,11 +2690,10 @@ int FileOp::readHeaders( IDBDataFile* pFile, char* hdr1, char* hdr2 ) const
|
||||
unsigned char* hdrPtr = reinterpret_cast<unsigned char*>(hdr1);
|
||||
RETURN_ON_ERROR( setFileOffset(pFile, 0) );
|
||||
RETURN_ON_ERROR( readFile( pFile, hdrPtr,
|
||||
IDBCompressInterface::HDR_BUF_LEN ));
|
||||
CompressInterface::HDR_BUF_LEN ));
|
||||
|
||||
IDBCompressInterface compressor;
|
||||
int ptrSecSize = compressor.getHdrSize(hdrPtr) -
|
||||
IDBCompressInterface::HDR_BUF_LEN;
|
||||
int ptrSecSize = compress::CompressInterface::getHdrSize(hdrPtr) -
|
||||
CompressInterface::HDR_BUF_LEN;
|
||||
return readFile( pFile, reinterpret_cast<unsigned char*>(hdr2),
|
||||
ptrSecSize );
|
||||
}
|
||||
|
@ -529,11 +529,11 @@ private:
|
||||
FileOp(const FileOp& rhs);
|
||||
FileOp& operator=(const FileOp& rhs);
|
||||
|
||||
int expandAbbrevColumnChunk( IDBDataFile* pFile,
|
||||
const uint8_t* emptyVal,
|
||||
int colWidth,
|
||||
const compress::CompChunkPtr& chunkInPtr,
|
||||
compress::CompChunkPtr& chunkOutPt);
|
||||
int expandAbbrevColumnChunk(IDBDataFile* pFile, const uint8_t* emptyVal,
|
||||
int colWidth,
|
||||
const compress::CompChunkPtr& chunkInPtr,
|
||||
compress::CompChunkPtr& chunkOutPt,
|
||||
const char* hdrs = nullptr);
|
||||
|
||||
int initAbbrevCompColumnExtent(
|
||||
IDBDataFile* pFile, uint16_t dbRoot, int nBlocks,
|
||||
|
@ -1007,9 +1007,9 @@ void RBMetaWriter::backupHWMChunk(
|
||||
}
|
||||
|
||||
// Read Control header
|
||||
char controlHdr[ IDBCompressInterface::HDR_BUF_LEN ];
|
||||
char controlHdr[ CompressInterface::HDR_BUF_LEN ];
|
||||
rc = fileOp.readFile( dbFile, (unsigned char*)controlHdr,
|
||||
IDBCompressInterface::HDR_BUF_LEN );
|
||||
CompressInterface::HDR_BUF_LEN );
|
||||
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
@ -1025,8 +1025,7 @@ void RBMetaWriter::backupHWMChunk(
|
||||
throw WeException( oss.str(), rc );
|
||||
}
|
||||
|
||||
IDBCompressInterface compressor;
|
||||
int rc1 = compressor.verifyHdr( controlHdr );
|
||||
int rc1 = compress::CompressInterface::verifyHdr(controlHdr);
|
||||
|
||||
if (rc1 != 0)
|
||||
{
|
||||
@ -1045,9 +1044,23 @@ void RBMetaWriter::backupHWMChunk(
|
||||
throw WeException( oss.str(), rc );
|
||||
}
|
||||
|
||||
auto compressionType =
|
||||
compress::CompressInterface::getCompressionType(controlHdr);
|
||||
std::unique_ptr<compress::CompressInterface> compressor(
|
||||
compress::getCompressInterfaceByType(compressionType));
|
||||
|
||||
if (!compressor)
|
||||
{
|
||||
WErrorCodes ec;
|
||||
std::ostringstream oss;
|
||||
oss << "Ivalid compression type " << compressionType;
|
||||
fileOp.closeFile( dbFile );
|
||||
throw WeException(oss.str(), rc);
|
||||
}
|
||||
|
||||
// Read Pointer header data
|
||||
uint64_t hdrSize = compressor.getHdrSize(controlHdr);
|
||||
uint64_t ptrHdrSize = hdrSize - IDBCompressInterface::HDR_BUF_LEN;
|
||||
uint64_t hdrSize = compress::CompressInterface::getHdrSize(controlHdr);
|
||||
uint64_t ptrHdrSize = hdrSize - CompressInterface::HDR_BUF_LEN;
|
||||
char* pointerHdr = new char[ptrHdrSize];
|
||||
rc = fileOp.readFile( dbFile, (unsigned char*)pointerHdr, ptrHdrSize );
|
||||
|
||||
@ -1067,7 +1080,8 @@ void RBMetaWriter::backupHWMChunk(
|
||||
}
|
||||
|
||||
CompChunkPtrList chunkPtrs;
|
||||
rc = compressor.getPtrList(pointerHdr, ptrHdrSize, chunkPtrs );
|
||||
rc = compress::CompressInterface::getPtrList(pointerHdr, ptrHdrSize,
|
||||
chunkPtrs);
|
||||
delete[] pointerHdr;
|
||||
|
||||
if (rc != 0)
|
||||
@ -1087,7 +1101,7 @@ void RBMetaWriter::backupHWMChunk(
|
||||
unsigned int blockOffsetWithinChunk = 0;
|
||||
unsigned char* buffer = 0;
|
||||
uint64_t chunkSize = 0;
|
||||
compressor.locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
|
||||
compressor->locateBlock(startingHWM, chunkIndex, blockOffsetWithinChunk);
|
||||
|
||||
if (chunkIndex < chunkPtrs.size())
|
||||
{
|
||||
|
@ -121,9 +121,9 @@ int ColumnOpCompress0::saveBlock(IDBDataFile* pFile, const unsigned char* writeB
|
||||
* Constructor
|
||||
*/
|
||||
|
||||
ColumnOpCompress1::ColumnOpCompress1(Log* logger)
|
||||
ColumnOpCompress1::ColumnOpCompress1(uint32_t compressionType, Log* logger)
|
||||
{
|
||||
m_compressionType = 1;
|
||||
m_compressionType = compressionType;
|
||||
m_chunkManager = new ChunkManager();
|
||||
|
||||
if (logger)
|
||||
@ -164,11 +164,7 @@ bool ColumnOpCompress1::abbreviatedExtent(IDBDataFile* pFile, int colWidth) cons
|
||||
|
||||
int ColumnOpCompress1::blocksInFile(IDBDataFile* pFile) const
|
||||
{
|
||||
CompFileHeader compFileHeader;
|
||||
readHeaders(pFile, compFileHeader.fControlData, compFileHeader.fPtrSection);
|
||||
|
||||
compress::IDBCompressInterface compressor;
|
||||
return compressor.getBlockCount(compFileHeader.fControlData);
|
||||
return m_chunkManager->getBlockCount(pFile);
|
||||
}
|
||||
|
||||
|
||||
|
@ -97,7 +97,7 @@ public:
|
||||
/**
|
||||
* @brief Constructor
|
||||
*/
|
||||
EXPORT ColumnOpCompress1(Log* logger = 0);
|
||||
EXPORT ColumnOpCompress1(uint32_t compressionType, Log* logger = 0);
|
||||
|
||||
/**
|
||||
* @brief Default Destructor
|
||||
|
@ -67,9 +67,9 @@ DctnryCompress0::~DctnryCompress0()
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
DctnryCompress1::DctnryCompress1(Log* logger)
|
||||
DctnryCompress1::DctnryCompress1(uint32_t compressionType, Log* logger)
|
||||
{
|
||||
m_compressionType = 1;
|
||||
m_compressionType = compressionType;
|
||||
m_chunkManager = new ChunkManager();
|
||||
|
||||
if (logger)
|
||||
|
@ -62,7 +62,7 @@ public:
|
||||
/**
|
||||
* @brief Constructor
|
||||
*/
|
||||
EXPORT DctnryCompress1(Log* logger = 0);
|
||||
EXPORT DctnryCompress1(uint32_t compressionType, Log* logger = 0);
|
||||
|
||||
/**
|
||||
* @brief Default Destructor
|
||||
|
@ -76,19 +76,25 @@ StopWatch timer;
|
||||
WriteEngineWrapper::WriteEngineWrapper() : m_opType(NOOP)
|
||||
{
|
||||
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
|
||||
m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
|
||||
|
||||
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
|
||||
m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
|
||||
|
||||
m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*comressionType=*/1);
|
||||
m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
|
||||
|
||||
m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*comressionType=*/3);
|
||||
m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
|
||||
}
|
||||
|
||||
WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opType(rhs.m_opType)
|
||||
{
|
||||
m_colOp[UN_COMPRESSED_OP] = new ColumnOpCompress0;
|
||||
m_colOp[COMPRESSED_OP] = new ColumnOpCompress1;
|
||||
|
||||
m_dctnry[UN_COMPRESSED_OP] = new DctnryCompress0;
|
||||
m_dctnry[COMPRESSED_OP] = new DctnryCompress1;
|
||||
|
||||
m_colOp[COMPRESSED_OP_1] = new ColumnOpCompress1(/*compressionType=*/1);
|
||||
m_dctnry[COMPRESSED_OP_1] = new DctnryCompress1(/*compressionType=*/1);
|
||||
|
||||
m_colOp[COMPRESSED_OP_2] = new ColumnOpCompress1(/*compressionType=*/3);
|
||||
m_dctnry[COMPRESSED_OP_2] = new DctnryCompress1(/*compressionType=*/3);
|
||||
}
|
||||
|
||||
/**@brief WriteEngineWrapper Constructor
|
||||
@ -96,9 +102,13 @@ WriteEngineWrapper::WriteEngineWrapper(const WriteEngineWrapper& rhs) : m_opTyp
|
||||
WriteEngineWrapper::~WriteEngineWrapper()
|
||||
{
|
||||
delete m_colOp[UN_COMPRESSED_OP];
|
||||
delete m_colOp[COMPRESSED_OP];
|
||||
delete m_dctnry[UN_COMPRESSED_OP];
|
||||
delete m_dctnry[COMPRESSED_OP];
|
||||
|
||||
delete m_colOp[COMPRESSED_OP_1];
|
||||
delete m_dctnry[COMPRESSED_OP_1];
|
||||
|
||||
delete m_colOp[COMPRESSED_OP_2];
|
||||
delete m_dctnry[COMPRESSED_OP_2];
|
||||
}
|
||||
|
||||
/**@brief Perform upfront initialization
|
||||
|
@ -58,9 +58,10 @@ namespace WriteEngine
|
||||
{
|
||||
|
||||
//... Total compression operation: un_compresssed, compressed
|
||||
const int UN_COMPRESSED_OP = 0;
|
||||
const int COMPRESSED_OP = 1;
|
||||
const int TOTAL_COMPRESS_OP = 2;
|
||||
const int UN_COMPRESSED_OP = 0;
|
||||
const int COMPRESSED_OP_1 = 1;
|
||||
const int COMPRESSED_OP_2 = 2;
|
||||
const int TOTAL_COMPRESS_OP = 3;
|
||||
|
||||
//...Forward class declarations
|
||||
class Log;
|
||||
@ -446,8 +447,10 @@ public:
|
||||
*/
|
||||
void setIsInsert(bool bIsInsert)
|
||||
{
|
||||
m_colOp[COMPRESSED_OP]->chunkManager()->setIsInsert(bIsInsert);
|
||||
m_dctnry[COMPRESSED_OP]->chunkManager()->setIsInsert(true);
|
||||
m_colOp[COMPRESSED_OP_1]->chunkManager()->setIsInsert(bIsInsert);
|
||||
m_dctnry[COMPRESSED_OP_1]->chunkManager()->setIsInsert(true);
|
||||
m_colOp[COMPRESSED_OP_2]->chunkManager()->setIsInsert(bIsInsert);
|
||||
m_dctnry[COMPRESSED_OP_2]->chunkManager()->setIsInsert(true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -458,7 +461,7 @@ public:
|
||||
*/
|
||||
bool getIsInsert()
|
||||
{
|
||||
return m_colOp[COMPRESSED_OP]->chunkManager()->getIsInsert();
|
||||
return m_colOp[COMPRESSED_OP_1]->chunkManager()->getIsInsert();
|
||||
}
|
||||
|
||||
std::tr1::unordered_map<TxnID, SP_TxnLBIDRec_t>& getTxnMap()
|
||||
@ -475,10 +478,23 @@ public:
|
||||
*/
|
||||
int flushChunks(int rc, const std::map<FID, FID>& columOids)
|
||||
{
|
||||
int rtn1 = m_colOp[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids);
|
||||
int rtn2 = m_dctnry[COMPRESSED_OP]->chunkManager()->flushChunks(rc, columOids);
|
||||
std::vector<int32_t> compressedOpIds = {COMPRESSED_OP_1,
|
||||
COMPRESSED_OP_2};
|
||||
|
||||
return (rtn1 != NO_ERROR ? rtn1 : rtn2);
|
||||
for (const auto compressedOpId : compressedOpIds)
|
||||
{
|
||||
auto rtn = m_colOp[compressedOpId]->chunkManager()->flushChunks(
|
||||
rc, columOids);
|
||||
if (rtn != NO_ERROR)
|
||||
return rtn;
|
||||
|
||||
rtn = m_dctnry[compressedOpId]->chunkManager()->flushChunks(
|
||||
rc, columOids);
|
||||
if (rtn != NO_ERROR)
|
||||
return rtn;
|
||||
}
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -524,7 +540,7 @@ public:
|
||||
int startTransaction(const TxnID& txnid)
|
||||
{
|
||||
int rc = 0;
|
||||
rc = m_colOp[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
|
||||
rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->startTransaction(txnid);
|
||||
//if ( rc == 0)
|
||||
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->startTransaction(txnid);
|
||||
return rc;
|
||||
@ -537,7 +553,8 @@ public:
|
||||
int confirmTransaction (const TxnID& txnid)
|
||||
{
|
||||
int rc = 0;
|
||||
rc = m_colOp[COMPRESSED_OP]->chunkManager()->confirmTransaction (txnid);
|
||||
rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->confirmTransaction(
|
||||
txnid);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -549,7 +566,8 @@ public:
|
||||
int endTransaction(const TxnID& txnid, bool success)
|
||||
{
|
||||
int rc = 0;
|
||||
rc = m_colOp[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
|
||||
rc = m_colOp[COMPRESSED_OP_1]->chunkManager()->endTransaction(txnid,
|
||||
success);
|
||||
//if ( rc == 0)
|
||||
// rc = m_dctnry[COMPRESSED_OP]->chunkManager()->endTransaction(txnid, success);
|
||||
return rc;
|
||||
@ -785,7 +803,16 @@ private:
|
||||
|
||||
int op(int compressionType)
|
||||
{
|
||||
return (compressionType > 0 ? COMPRESSED_OP : UN_COMPRESSED_OP);
|
||||
switch (compressionType)
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
return COMPRESSED_OP_1;
|
||||
case 3:
|
||||
return COMPRESSED_OP_2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user