You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
feat(extent-elimination)!: re-enable extent-elimination for dictionary columns scanning
This is "productization" of an old code that would enable extent elimination for dictionary columns. This concrete patch enables it, fixes perfomance degradation (main problem with old code) and also fixes incorrect behavior of cpimport.
This commit is contained in:
@ -33,7 +33,7 @@ optparse.define short=S long=skip-columnstore-submodules desc="Skip columnstore
|
|||||||
optparse.define short=u long=skip-unit-tests desc="Skip UnitTests" variable=SKIP_UNIT_TESTS default=false value=true
|
optparse.define short=u long=skip-unit-tests desc="Skip UnitTests" variable=SKIP_UNIT_TESTS default=false value=true
|
||||||
optparse.define short=B long=run-microbench="Compile and run microbenchmarks " variable=RUN_BENCHMARKS default=false value=true
|
optparse.define short=B long=run-microbench="Compile and run microbenchmarks " variable=RUN_BENCHMARKS default=false value=true
|
||||||
optparse.define short=b long=branch desc="Choose git branch. For menu use -b \"\"" variable=BRANCH default=$CURRENT_BRANCH
|
optparse.define short=b long=branch desc="Choose git branch. For menu use -b \"\"" variable=BRANCH default=$CURRENT_BRANCH
|
||||||
optparse.define short=D long=without-core-dumps desc="Do not produce core dumps" variable=WITHOUT_COREDUMPS default=false value=true
|
optparse.define short=W long=without-core-dumps desc="Do not produce core dumps" variable=WITHOUT_COREDUMPS default=false value=true
|
||||||
optparse.define short=v long=verbose desc="Verbose makefile commands" variable=MAKEFILE_VERBOSE default=false value=true
|
optparse.define short=v long=verbose desc="Verbose makefile commands" variable=MAKEFILE_VERBOSE default=false value=true
|
||||||
optparse.define short=A long=asan desc="Build with ASAN" variable=ASAN default=false value=true
|
optparse.define short=A long=asan desc="Build with ASAN" variable=ASAN default=false value=true
|
||||||
optparse.define short=T long=tsan desc="Build with TSAN" variable=TSAN default=false value=true
|
optparse.define short=T long=tsan desc="Build with TSAN" variable=TSAN default=false value=true
|
||||||
@ -46,6 +46,7 @@ optparse.define short=n long=no-clean-install desc="Do not perform a clean insta
|
|||||||
optparse.define short=j long=parallel desc="Number of paralles for build" variable=CPUS default=$(getconf _NPROCESSORS_ONLN)
|
optparse.define short=j long=parallel desc="Number of paralles for build" variable=CPUS default=$(getconf _NPROCESSORS_ONLN)
|
||||||
optparse.define short=F long=show-build-flags desc="Print CMake flags, while build" variable=PRINT_CMAKE_FLAGS default=false
|
optparse.define short=F long=show-build-flags desc="Print CMake flags, while build" variable=PRINT_CMAKE_FLAGS default=false
|
||||||
optparse.define short=c long=cloud desc="Enable cloud storage" variable=CLOUD_STORAGE_ENABLED default=false value=true
|
optparse.define short=c long=cloud desc="Enable cloud storage" variable=CLOUD_STORAGE_ENABLED default=false value=true
|
||||||
|
optparse.define short=f long=do-not-freeze-revision desc="Disable revision freezing, or do not set 'update none' for columnstore submodule in MDB repository" variable=DO_NOT_FREEZE_REVISION default=false value=true
|
||||||
|
|
||||||
source $( optparse.build )
|
source $( optparse.build )
|
||||||
|
|
||||||
@ -547,7 +548,9 @@ generate_svgs()
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
disable_git_restore_frozen_revision
|
if [[ $DO_NOT_FREEZE_REVISION = false ]] ; then
|
||||||
|
disable_git_restore_frozen_revision
|
||||||
|
fi
|
||||||
|
|
||||||
select_branch
|
select_branch
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ using namespace messageqcpp;
|
|||||||
using namespace rowgroup;
|
using namespace rowgroup;
|
||||||
using namespace joiner;
|
using namespace joiner;
|
||||||
|
|
||||||
//#define XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX
|
#define XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX
|
||||||
|
|
||||||
namespace joblist
|
namespace joblist
|
||||||
{
|
{
|
||||||
|
@ -126,6 +126,7 @@ messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
|
|||||||
{
|
{
|
||||||
messageqcpp::ByteStream bs;
|
messageqcpp::ByteStream bs;
|
||||||
|
|
||||||
|
datatypes::Charset cset(charsetNumber);
|
||||||
if (hasEqFilter)
|
if (hasEqFilter)
|
||||||
{
|
{
|
||||||
idbassert(filterCount == eqFilter.size());
|
idbassert(filterCount == eqFilter.size());
|
||||||
@ -133,7 +134,7 @@ messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
|
|||||||
for (uint32_t i = 0; i < filterCount; i++)
|
for (uint32_t i = 0; i < filterCount; i++)
|
||||||
{
|
{
|
||||||
uint8_t roundFlag = 0;
|
uint8_t roundFlag = 0;
|
||||||
int64_t encodedPrefix = encodeStringPrefix((unsigned char*)eqFilter[i].c_str(), eqFilter[i].size(), charsetNumber);
|
int64_t encodedPrefix = encodeStringPrefix((unsigned char*)eqFilter[i].c_str(), eqFilter[i].size(), cset);
|
||||||
bs << eqOp;
|
bs << eqOp;
|
||||||
bs << roundFlag;
|
bs << roundFlag;
|
||||||
bs << encodedPrefix;
|
bs << encodedPrefix;
|
||||||
@ -173,7 +174,7 @@ messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
|
|||||||
bs << roundFlag;
|
bs << roundFlag;
|
||||||
filterStringCopy >> size;
|
filterStringCopy >> size;
|
||||||
ptr = filterStringCopy.buf();
|
ptr = filterStringCopy.buf();
|
||||||
encodedPrefix = encodeStringPrefix(ptr, size, charsetNumber);
|
encodedPrefix = encodeStringPrefix(ptr, size, cset);
|
||||||
bs << encodedPrefix;
|
bs << encodedPrefix;
|
||||||
filterStringCopy.advance(size);
|
filterStringCopy.advance(size);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,64 @@
|
|||||||
|
DROP DATABASE IF EXISTS MCOL4580;
|
||||||
|
CREATE DATABASE MCOL4580;
|
||||||
|
USE MCOL4580;
|
||||||
|
CREATE TABLE t(d TEXT) ENGINE=COLUMNSTORE;
|
||||||
|
INSERT INTO t(d) VALUES ('b'),('b'),('b');
|
||||||
|
SELECT CALSETTRACE(1);
|
||||||
|
CALSETTRACE(1)
|
||||||
|
0
|
||||||
|
SELECT COUNT(*) FROM t WHERE d = 'a';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-0; PartitionBlocksEliminated-1;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d < 'b';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-3; PartitionBlocksEliminated-0;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d > 'b';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-3; PartitionBlocksEliminated-0;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d <= 'a';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-0; PartitionBlocksEliminated-1;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d >= 'c';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-0; PartitionBlocksEliminated-1;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d != 'b';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-0; PartitionBlocksEliminated-1;
|
||||||
|
INSERT INTO t SELECT * FROM t;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d = 'b';
|
||||||
|
COUNT(*)
|
||||||
|
6
|
||||||
|
DROP TABLE t;
|
||||||
|
CREATE TABLE t (c TEXT CHARACTER SET utf8 COLLATE utf8_czech_ci) engine=columnstore;
|
||||||
|
INSERT INTO t(c) VALUES ('ch'), ('ch');
|
||||||
|
SELECT COUNT(*) FROM t WHERE c < 'cz';
|
||||||
|
COUNT(*)
|
||||||
|
0
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
touched eliminated
|
||||||
|
BlocksTouched-0; PartitionBlocksEliminated-1;
|
||||||
|
SELECT COUNT(*) FROM t WHERE c > 'cz';
|
||||||
|
COUNT(*)
|
||||||
|
2
|
||||||
|
SELECT COUNT(*) FROM t WHERE c = 'CH';
|
||||||
|
COUNT(*)
|
||||||
|
2
|
||||||
|
DROP DATABASE MCOL4580;
|
@ -0,0 +1,47 @@
|
|||||||
|
--disable_warnings # we disable warnings through the test: as we use calsettrace(1), it produces many unnecessary warnings.
|
||||||
|
DROP DATABASE IF EXISTS MCOL4580;
|
||||||
|
CREATE DATABASE MCOL4580;
|
||||||
|
USE MCOL4580;
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Binary collation test.
|
||||||
|
|
||||||
|
CREATE TABLE t(d TEXT) ENGINE=COLUMNSTORE;
|
||||||
|
INSERT INTO t(d) VALUES ('b'),('b'),('b');
|
||||||
|
SELECT CALSETTRACE(1);
|
||||||
|
SELECT COUNT(*) FROM t WHERE d = 'a';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
# As strict comparisons became soft (strict less '<' became less or equal '<='), these two parts will not work as expected.
|
||||||
|
SELECT COUNT(*) FROM t WHERE d < 'b';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d > 'b';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d <= 'a';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d >= 'c';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
# note that extent elimination will eliminate extents with only single value
|
||||||
|
# in the case of not-equal predicate.
|
||||||
|
SELECT COUNT(*) FROM t WHERE d != 'b';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
INSERT INTO t SELECT * FROM t;
|
||||||
|
SELECT COUNT(*) FROM t WHERE d = 'b';
|
||||||
|
DROP TABLE t;
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Actual collation test.
|
||||||
|
|
||||||
|
# Reference chart: https://collation-charts.org/mysql60/mysql604.utf8_czech_ci.html
|
||||||
|
# We will use the fact that "cz" should go before "ch".
|
||||||
|
|
||||||
|
CREATE TABLE t (c TEXT CHARACTER SET utf8 COLLATE utf8_czech_ci) engine=columnstore;
|
||||||
|
INSERT INTO t(c) VALUES ('ch'), ('ch');
|
||||||
|
SELECT COUNT(*) FROM t WHERE c < 'cz';
|
||||||
|
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
|
||||||
|
|
||||||
|
# and to see we do not broke anything (must be 1):
|
||||||
|
SELECT COUNT(*) FROM t WHERE c > 'cz';
|
||||||
|
SELECT COUNT(*) FROM t WHERE c = 'CH';
|
||||||
|
|
||||||
|
DROP DATABASE MCOL4580;
|
||||||
|
--enable_warnings
|
@ -434,6 +434,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out,
|
|||||||
header.PhysicalIO = 0;
|
header.PhysicalIO = 0;
|
||||||
|
|
||||||
header.NBYTES = sizeof(DictOutput);
|
header.NBYTES = sizeof(DictOutput);
|
||||||
|
datatypes::Charset cset(charsetNumber);
|
||||||
|
|
||||||
for (nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType, (in->InputFlags ? true : false), skipNulls);
|
for (nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType, (in->InputFlags ? true : false), skipNulls);
|
||||||
sigptr.len != -1;
|
sigptr.len != -1;
|
||||||
@ -442,7 +443,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out,
|
|||||||
#if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
|
#if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
|
||||||
if (minMax)
|
if (minMax)
|
||||||
{
|
{
|
||||||
uint64_t v = encodeStringPrefix_check_null(sigptr.data, sigptr.len, charsetNumber);
|
uint64_t v = encodeStringPrefix_check_null(sigptr.data, sigptr.len, cset);
|
||||||
minMax[1] = minMax[1] < v ? v : minMax[1];
|
minMax[1] = minMax[1] < v ? v : minMax[1];
|
||||||
minMax[0] = minMax[0] > v ? v : minMax[0];
|
minMax[0] = minMax[0] > v ? v : minMax[0];
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,7 @@
|
|||||||
class PrimTest;
|
class PrimTest;
|
||||||
|
|
||||||
// XXX: turn off dictionary range setting during scan.
|
// XXX: turn off dictionary range setting during scan.
|
||||||
//#define XXX_PRIMITIVES_TOKEN_RANGES_XXX
|
#define XXX_PRIMITIVES_TOKEN_RANGES_XXX
|
||||||
|
|
||||||
namespace primitives
|
namespace primitives
|
||||||
{
|
{
|
||||||
|
@ -24,10 +24,9 @@
|
|||||||
#include "string_prefixes.h"
|
#include "string_prefixes.h"
|
||||||
|
|
||||||
// XXX: string (or, actually, a BLOB) with all NUL chars will be encoded into zero. Which corresponds to
|
// XXX: string (or, actually, a BLOB) with all NUL chars will be encoded into zero. Which corresponds to
|
||||||
// encoding of empty string, or NULL.
|
// encoding of empty string.
|
||||||
int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber)
|
int64_t encodeStringPrefix(const uint8_t* str, size_t len, datatypes::Charset& cset)
|
||||||
{
|
{
|
||||||
datatypes::Charset cset(charsetNumber);
|
|
||||||
uint8_t fixedLenPrefix[8];
|
uint8_t fixedLenPrefix[8];
|
||||||
memset(fixedLenPrefix, 0, sizeof(fixedLenPrefix));
|
memset(fixedLenPrefix, 0, sizeof(fixedLenPrefix));
|
||||||
cset.strnxfrm(fixedLenPrefix, sizeof(fixedLenPrefix), 8, str, len, 0);
|
cset.strnxfrm(fixedLenPrefix, sizeof(fixedLenPrefix), 8, str, len, 0);
|
||||||
@ -41,11 +40,11 @@ int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber)
|
|||||||
return acc;
|
return acc;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber)
|
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, datatypes::Charset& cset)
|
||||||
{
|
{
|
||||||
if (len < 1)
|
if (len < 1 && str == nullptr)
|
||||||
{
|
{
|
||||||
return joblist::UBIGINTNULL;
|
return joblist::UBIGINTNULL;
|
||||||
}
|
}
|
||||||
return encodeStringPrefix(str, len, charsetNumber);
|
return encodeStringPrefix(str, len, cset);
|
||||||
}
|
}
|
||||||
|
@ -23,10 +23,14 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "collation.h"
|
||||||
|
#include "joblisttypes.h"
|
||||||
|
|
||||||
|
|
||||||
// Encode string prefix into an int64_t, packing as many chars from string as possible
|
// Encode string prefix into an int64_t, packing as many chars from string as possible
|
||||||
// into the result and respecting the collation provided by charsetNumber.
|
// into the result and respecting the collation provided by charsetNumber.
|
||||||
//
|
//
|
||||||
// For one example, for CI Czech collation, encodeStringPrefix("cz") < encodeStringPrefix("CH").
|
// For one example, for CI Czech collation, encodeStringPrefix("cz") < encodeStringPrefix("CH").
|
||||||
int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber);
|
int64_t encodeStringPrefix(const uint8_t* str, size_t len, datatypes::Charset& cset);
|
||||||
|
|
||||||
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber);
|
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, datatypes::Charset& cset);
|
||||||
|
@ -189,6 +189,7 @@ int ColExtInf::updateEntryLbid(BRM::LBID_t startLbid)
|
|||||||
void ColExtInf::getCPInfoForBRM(JobColumn column, BRMReporter& brmReporter)
|
void ColExtInf::getCPInfoForBRM(JobColumn column, BRMReporter& brmReporter)
|
||||||
{
|
{
|
||||||
bool bIsChar = ((column.weType == WriteEngine::WR_CHAR) && (column.colType != COL_TYPE_DICT));
|
bool bIsChar = ((column.weType == WriteEngine::WR_CHAR) && (column.colType != COL_TYPE_DICT));
|
||||||
|
bool bIsText = (column.weType == WriteEngine::WR_TEXT);
|
||||||
|
|
||||||
boost::mutex::scoped_lock lock(fMapMutex);
|
boost::mutex::scoped_lock lock(fMapMutex);
|
||||||
|
|
||||||
@ -206,69 +207,81 @@ void ColExtInf::getCPInfoForBRM(JobColumn column, BRMReporter& brmReporter)
|
|||||||
int128_t bigMinVal = iter->second.fbigMinVal;
|
int128_t bigMinVal = iter->second.fbigMinVal;
|
||||||
int128_t bigMaxVal = iter->second.fbigMaxVal;
|
int128_t bigMaxVal = iter->second.fbigMaxVal;
|
||||||
|
|
||||||
if (bIsChar)
|
bool bIsValid = true;
|
||||||
|
|
||||||
|
if (bIsChar || bIsText)
|
||||||
{
|
{
|
||||||
// If we have added 1 or more rows, then we should have a valid
|
// If we have added 1 or more rows, then we should have a valid
|
||||||
// range in our RowExtMap object, in which case...
|
// range in our RowExtMap object, in which case...
|
||||||
// We swap/restore byte order before sending min/max string to BRM;
|
// We swap/restore byte order before sending min/max string to BRM;
|
||||||
// else we leave fMinVal & fMaxVal set to LLONG_MIN and send as-is,
|
// else we leave fMinVal & fMaxVal set to LLONG_MIN and send as-is,
|
||||||
// to let BRM know we added no rows.
|
// to let BRM know we added no rows.
|
||||||
|
|
||||||
if ((iter->second.fMinVal != iter->second.fMaxVal) || (iter->second.fMinVal != LLONG_MIN))
|
if ((iter->second.fMinVal != iter->second.fMaxVal) || (iter->second.fMinVal != LLONG_MIN))
|
||||||
{
|
{
|
||||||
minVal = static_cast<int64_t>(uint64ToStr(static_cast<uint64_t>(iter->second.fMinVal)));
|
minVal = static_cast<int64_t>(uint64ToStr(static_cast<uint64_t>(iter->second.fMinVal)));
|
||||||
maxVal = static_cast<int64_t>(uint64ToStr(static_cast<uint64_t>(iter->second.fMaxVal)));
|
maxVal = static_cast<int64_t>(uint64ToStr(static_cast<uint64_t>(iter->second.fMaxVal)));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// This is dropping range to invalid.
|
||||||
|
minVal = static_cast<int64_t>(~(0UL));
|
||||||
|
maxVal = static_cast<int64_t>(0);
|
||||||
|
bIsValid = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log for now; may control with debug flag later
|
if (bIsValid) {
|
||||||
// if (fLog->isDebug( DEBUG_1 ))
|
// Log for now; may control with debug flag later
|
||||||
// TODO MCOL-641 Add support here.
|
// if (fLog->isDebug( DEBUG_1 ))
|
||||||
{
|
// TODO MCOL-641 Add support here.
|
||||||
std::ostringstream oss;
|
{
|
||||||
oss << "Saving CP update for OID-" << fColOid << "; lbid-" << iter->second.fLbid << "; type-"
|
std::ostringstream oss;
|
||||||
<< bIsChar << "; isNew-" << iter->second.fNewExtent;
|
oss << "Saving CP update for OID-" << fColOid << "; lbid-" << iter->second.fLbid << "; type-"
|
||||||
|
<< bIsChar << "; isNew-" << iter->second.fNewExtent;
|
||||||
|
|
||||||
if (bIsChar)
|
if (bIsChar)
|
||||||
{
|
{
|
||||||
char minValStr[sizeof(int64_t) + 1];
|
char minValStr[sizeof(int64_t) + 1];
|
||||||
char maxValStr[sizeof(int64_t) + 1];
|
char maxValStr[sizeof(int64_t) + 1];
|
||||||
memcpy(minValStr, &minVal, sizeof(int64_t));
|
memcpy(minValStr, &minVal, sizeof(int64_t));
|
||||||
memcpy(maxValStr, &maxVal, sizeof(int64_t));
|
memcpy(maxValStr, &maxVal, sizeof(int64_t));
|
||||||
minValStr[sizeof(int64_t)] = '\0';
|
minValStr[sizeof(int64_t)] = '\0';
|
||||||
maxValStr[sizeof(int64_t)] = '\0';
|
maxValStr[sizeof(int64_t)] = '\0';
|
||||||
oss << "; minVal: " << minVal << "; (" << minValStr << ")"
|
oss << "; minVal: " << minVal << "; (" << minValStr << ")"
|
||||||
<< "; maxVal: " << maxVal << "; (" << maxValStr << ")";
|
<< "; maxVal: " << maxVal << "; (" << maxValStr << ")";
|
||||||
|
}
|
||||||
|
else if (isUnsigned(column.dataType))
|
||||||
|
{
|
||||||
|
oss << "; min: " << static_cast<uint64_t>(minVal) << "; max: " << static_cast<uint64_t>(maxVal);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
oss << "; min: " << minVal << "; max: " << maxVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
fLog->logMsg(oss.str(), MSGLVL_INFO2);
|
||||||
}
|
}
|
||||||
else if (isUnsigned(column.dataType))
|
|
||||||
|
BRM::CPInfoMerge cpInfoMerge;
|
||||||
|
cpInfoMerge.startLbid = iter->second.fLbid;
|
||||||
|
if (column.width <= 8)
|
||||||
{
|
{
|
||||||
oss << "; min: " << static_cast<uint64_t>(minVal) << "; max: " << static_cast<uint64_t>(maxVal);
|
cpInfoMerge.max = maxVal;
|
||||||
|
cpInfoMerge.min = minVal;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
oss << "; min: " << minVal << "; max: " << maxVal;
|
cpInfoMerge.bigMax = bigMaxVal;
|
||||||
|
cpInfoMerge.bigMin = bigMinVal;
|
||||||
}
|
}
|
||||||
|
cpInfoMerge.seqNum = -1; // Not used by mergeExtentsMaxMin. XXX: this marks extent invalid, BTW.
|
||||||
fLog->logMsg(oss.str(), MSGLVL_INFO2);
|
cpInfoMerge.type = column.dataType;
|
||||||
|
cpInfoMerge.newExtent = iter->second.fNewExtent;
|
||||||
|
cpInfoMerge.colWidth = column.width;
|
||||||
|
brmReporter.addToCPInfo(cpInfoMerge);
|
||||||
}
|
}
|
||||||
|
|
||||||
BRM::CPInfoMerge cpInfoMerge;
|
|
||||||
cpInfoMerge.startLbid = iter->second.fLbid;
|
|
||||||
if (column.width <= 8)
|
|
||||||
{
|
|
||||||
cpInfoMerge.max = maxVal;
|
|
||||||
cpInfoMerge.min = minVal;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cpInfoMerge.bigMax = bigMaxVal;
|
|
||||||
cpInfoMerge.bigMin = bigMinVal;
|
|
||||||
}
|
|
||||||
cpInfoMerge.seqNum = -1; // Not used by mergeExtentsMaxMin
|
|
||||||
cpInfoMerge.type = column.dataType;
|
|
||||||
cpInfoMerge.newExtent = iter->second.fNewExtent;
|
|
||||||
cpInfoMerge.colWidth = column.width;
|
|
||||||
brmReporter.addToCPInfo(cpInfoMerge);
|
|
||||||
|
|
||||||
++iter;
|
++iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// XXX: a definition to switch off computations for token columns.
|
// XXX: a definition to switch off computations for token columns.
|
||||||
//#define XXX_WRITEENGINE_TOKENS_RANGES_XXX
|
#define XXX_WRITEENGINE_TOKENS_RANGES_XXX
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
@ -465,12 +465,12 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
|
|||||||
}
|
}
|
||||||
case WR_CHAR:
|
case WR_CHAR:
|
||||||
{
|
{
|
||||||
fetchNewOldValues<int64_t, int64_t>(value, oldValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
|
fetchNewOldValues<uint64_t, uint64_t>(uvalue, oldUValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
|
||||||
// for characters (strings, actually), we fetched then in LSB order, on x86, at the very least.
|
// for characters (strings, actually), we fetched then in LSB order, on x86, at the very least.
|
||||||
// this means most significant byte of the string, which is first, is now in LSB of uvalue/oldValue.
|
// this means most significant byte of the string, which is first, is now in LSB of uvalue/oldValue.
|
||||||
// we must perform a conversion.
|
// we must perform a conversion.
|
||||||
value = uint64ToStr(uvalue);
|
uvalue = uint64ToStr(uvalue);
|
||||||
oldValue = uint64ToStr(oldValue);
|
oldUValue = uint64ToStr(oldUValue);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: idbassert_s(0, "unknown WR type tag"); return;
|
default: idbassert_s(0, "unknown WR type tag"); return;
|
||||||
@ -1732,6 +1732,9 @@ int WriteEngineWrapper::insertColumnRecs(
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||||
|
datatypes::Charset cset(dctnryStructList[i].fCharsetNumber);
|
||||||
|
#endif
|
||||||
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
|
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
|
||||||
{
|
{
|
||||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||||
@ -1754,8 +1757,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
|||||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||||
dctTuple.sigSize = dctStr_iter->length();
|
dctTuple.sigSize = dctStr_iter->length();
|
||||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||||
strPrefix = encodeStringPrefix(dctTuple.sigValue, dctTuple.sigSize,
|
strPrefix = encodeStringPrefix(dctTuple.sigValue, dctTuple.sigSize, cset);
|
||||||
dctnryStructList[i].fCharsetNumber);
|
|
||||||
#endif
|
#endif
|
||||||
dctTuple.isNull = false;
|
dctTuple.isNull = false;
|
||||||
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
|
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
|
||||||
@ -1822,8 +1824,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
|||||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||||
dctTuple.sigSize = dctStr_iter->length();
|
dctTuple.sigSize = dctStr_iter->length();
|
||||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||||
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
|
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize, cset);
|
||||||
dctnryStructList[i].fCharsetNumber);
|
|
||||||
#endif
|
#endif
|
||||||
dctTuple.isNull = false;
|
dctTuple.isNull = false;
|
||||||
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
|
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
|
||||||
|
Reference in New Issue
Block a user