1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00

MCOL-4580 extent elimination for dictionary-based text/varchar types

The idea is relatively simple - encode prefixes of collated strings as
integers and use them to compute extents' ranges. Then we can eliminate
extents with strings.

The actual patch does have all the code there but miss one important
step: we do not keep collation index, we keep charset index. Because of
this, some of the tests in the bugfix suite fail and thus main
functionality is turned off.

The reason of this patch to be put into PR at all is that it contains
changes that made CHAR/VARCHAR columns unsigned. This change is needed in
vectorization work.
This commit is contained in:
Serguey Zefirov 2022-02-04 11:55:09 +00:00
parent a66a8dfabf
commit 53b9a2a0f9
54 changed files with 698 additions and 227 deletions

View File

@ -482,7 +482,11 @@ inline bool isUnsigned(const datatypes::SystemCatalog::ColDataType type)
case datatypes::SystemCatalog::USMALLINT:
case datatypes::SystemCatalog::UMEDINT:
case datatypes::SystemCatalog::UINT:
case datatypes::SystemCatalog::UBIGINT: return true;
case datatypes::SystemCatalog::UBIGINT:
case datatypes::SystemCatalog::CHAR:
case datatypes::SystemCatalog::VARCHAR:
case datatypes::SystemCatalog::TEXT:
case datatypes::SystemCatalog::VARBINARY: return true;
default: return false;
}
@ -2520,4 +2524,3 @@ class TypeHandlerTimestamp : public TypeHandlerTemporal
};
} // end of namespace datatypes

View File

@ -336,6 +336,7 @@ class SignedInteger : public Parser::DD2OM<Sign, UnsignedInteger>
{
public:
using DD2OM::DD2OM;
bool isNull() const
{
return UnsignedInteger::isNull();

View File

@ -424,6 +424,8 @@ void AggregateColumn::evaluate(Row& row, bool& isNull)
else
fResult.intVal = atoll((char*)&fResult.origIntVal);
fResult.uintVal = fResult.intVal;
break;
case CalpontSystemCatalog::BIGINT:

View File

@ -945,17 +945,18 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
break;
}
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::CHAR:
{
int colWidth = ct.colWidth;
if (colWidth <= 8)
{
if ((colWidth == 1) && ((int8_t)joblist::CHAR1NULL == val))
if ((colWidth == 1) && ((uint8_t)joblist::CHAR1NULL == (uint8_t)val))
ret = true;
else if ((colWidth == 2) && ((int16_t)joblist::CHAR2NULL == val))
else if ((colWidth == 2) && ((uint16_t)joblist::CHAR2NULL == (uint16_t)val))
ret = true;
else if ((colWidth < 5) && ((int32_t)joblist::CHAR4NULL == val))
else if ((colWidth < 5) && ((uint32_t)joblist::CHAR4NULL == (uint32_t)val))
ret = true;
else if ((int64_t)joblist::CHAR8NULL == val)
ret = true;
@ -964,7 +965,6 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
{
throw std::logic_error("Not a int column.");
}
break;
}
@ -1075,27 +1075,6 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
break;
}
case execplan::CalpontSystemCatalog::VARCHAR:
{
int colWidth = ct.colWidth;
if (colWidth <= 8)
{
if ((colWidth < 3) && ((int16_t)joblist::CHAR2NULL == val))
ret = true;
else if ((colWidth < 5) && ((int32_t)joblist::CHAR4NULL == val))
ret = true;
else if ((int64_t)joblist::CHAR8NULL == val)
ret = true;
}
else
{
throw std::logic_error("Not a int column.");
}
break;
}
case execplan::CalpontSystemCatalog::UTINYINT:
{
if (joblist::UTINYINTNULL == (uint8_t)val)
@ -1278,4 +1257,3 @@ const std::string colDataTypeToString(CalpontSystemCatalog::ColDataType cdt);
bool ctListSort(const CalpontSystemCatalog::ColType& a, const CalpontSystemCatalog::ColType& b);
} // namespace execplan

View File

@ -48,18 +48,8 @@ ConstantColumn::ConstantColumn(const string& sql, TYPE type)
{
fResult.strVal = sql;
if (type == LITERAL && sql.length() < 9)
{
memcpy(tmp, sql.c_str(), sql.length());
memset(tmp + sql.length(), 0, 8);
fResult.uintVal = uint64ToStr(*((uint64_t*)tmp));
fResult.intVal = (int64_t)fResult.uintVal;
}
else
{
fResult.intVal = atoll(sql.c_str());
fResult.uintVal = strtoull(sql.c_str(), NULL, 0);
}
fResult.intVal = atoll(sql.c_str());
fResult.uintVal = strtoull(sql.c_str(), NULL, 0);
fResult.floatVal = atof(sql.c_str());
fResult.doubleVal = atof(sql.c_str());

View File

@ -264,20 +264,6 @@ void PredicateOperator::setOpType(Type& l, Type& r)
fOperationType.colWidth = 8;
}
}
// If both sides are unsigned, use UBIGINT as result type, otherwise
// "promote" to BIGINT.
else if (isUnsigned(l.colDataType) && isUnsigned(r.colDataType))
{
fOperationType.colDataType = execplan::CalpontSystemCatalog::UBIGINT;
fOperationType.colWidth = 8;
}
else if ((isSignedInteger(l.colDataType) && isUnsigned(r.colDataType)) ||
(isUnsigned(l.colDataType) && isSignedInteger(r.colDataType)) ||
(isSignedInteger(l.colDataType) && isSignedInteger(r.colDataType)))
{
fOperationType.colDataType = execplan::CalpontSystemCatalog::BIGINT;
fOperationType.colWidth = 8;
}
else if ((l.colDataType == execplan::CalpontSystemCatalog::CHAR ||
l.colDataType == execplan::CalpontSystemCatalog::VARCHAR ||
l.colDataType == execplan::CalpontSystemCatalog::TEXT) &&
@ -321,6 +307,20 @@ void PredicateOperator::setOpType(Type& l, Type& r)
fOperationType.colWidth = 255;
}
}
// If both sides are unsigned, use UBIGINT as result type, otherwise
// "promote" to BIGINT.
else if (isUnsigned(l.colDataType) && isInteger(l.colDataType) && isUnsigned(r.colDataType) && isInteger(r.colDataType))
{
fOperationType.colDataType = execplan::CalpontSystemCatalog::UBIGINT;
fOperationType.colWidth = 8;
}
else if ((isSignedInteger(l.colDataType) && isUnsigned(r.colDataType) && isInteger(r.colDataType)) ||
(isUnsigned(l.colDataType) && isInteger(l.colDataType) && isSignedInteger(r.colDataType)) ||
(isSignedInteger(l.colDataType) && isSignedInteger(r.colDataType)))
{
fOperationType.colDataType = execplan::CalpontSystemCatalog::BIGINT;
fOperationType.colWidth = 8;
}
else if (l.colDataType == execplan::CalpontSystemCatalog::LONGDOUBLE ||
r.colDataType == execplan::CalpontSystemCatalog::LONGDOUBLE)
{
@ -410,7 +410,9 @@ bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedCol
if (isNull)
return false;
return numericCompare(val1, rop->getIntVal(row, isNull)) && !isNull;
int64_t val2 = rop->getIntVal(row, isNull);
return numericCompare(val1, val2) && !isNull;
}
case execplan::CalpontSystemCatalog::UBIGINT:

View File

@ -565,6 +565,16 @@ void SimpleColumn::evaluate(Row& row, bool& isNull)
else
fResult.intVal = atoll((char*)&fResult.origIntVal);
// MCOL-4580 - related, probably can be marked with XXX.
// This does not fail in any tests, but it is considered wrong.
// The reasonin behind that is that we changed signedness if characters to unsigned
// and it might be a case with short strings that they were copied as is using
// uint64ToStr encoding into int64_t values. So, potentially, unsuspecting code
// may use getUintVal instead of getIntVal to process short char column, getting
// unitialized value and give floating behavior.
// None of our tests failed, though.
fResult.uintVal = fResult.intVal;
break;
}

View File

@ -36,6 +36,7 @@
#include "columnwidth.h"
#include "mcs_decimal.h"
#include "mcs_int64.h"
#include "numericliteral.h"
namespace messageqcpp
{
@ -664,25 +665,19 @@ inline int64_t TreeNode::getIntVal()
switch (fResultType.colDataType)
{
case CalpontSystemCatalog::CHAR:
if (fResultType.colWidth <= 8)
return fResult.intVal;
return atoll(fResult.strVal.c_str());
case CalpontSystemCatalog::VARCHAR:
if (fResultType.colWidth <= 7)
return fResult.intVal;
return atoll(fResult.strVal.c_str());
// FIXME: ???
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::TEXT:
if (fResultType.colWidth <= 7)
return fResult.intVal;
return atoll(fResult.strVal.c_str());
{
datatypes::DataCondition cnverr;
literal::Converter<literal::SignedInteger> cnv(fResult.strVal, cnverr);
if (datatypes::DataCondition::Code(cnverr) != 0)
{
cerr << "error in int conversion from '" << fResult.strVal << "'";
}
return cnv.toSInt<int64_t>(cnverr);
}
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::TINYINT:
@ -721,6 +716,20 @@ inline uint64_t TreeNode::getUintVal()
{
switch (fResultType.colDataType)
{
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::TEXT:
{
datatypes::DataCondition cnverr;
literal::Converter<literal::UnsignedInteger> cnv(fResult.strVal, cnverr);
if (datatypes::DataCondition::Code(cnverr) != 0)
{
cerr << "error in unsigned int conversion from '" << fResult.strVal << "'";
}
return cnv.toXIntPositive<uint64_t>(cnverr);
}
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:

View File

@ -50,6 +50,8 @@ using namespace messageqcpp;
using namespace rowgroup;
using namespace joiner;
//#define XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX
namespace joblist
{
BatchPrimitiveProcessorJL::BatchPrimitiveProcessorJL(const ResourceManager* rm)
@ -152,6 +154,21 @@ void BatchPrimitiveProcessorJL::addFilterStep(const pDictionaryStep& step)
cc->setBatchPrimitiveProcessor(this);
cc->setQueryUuid(step.queryUuid());
cc->setStepUuid(uuid);
#if defined(XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX)
if (filterSteps.size() > 0)
{
size_t stepsIndex = filterSteps.size() - 1;
SCommand prevCC = filterSteps[stepsIndex];
ColumnCommandJL* pcc = dynamic_cast<ColumnCommandJL*>(prevCC.get());
DictStepJL* ccc = dynamic_cast<DictStepJL*>(cc.get());
if (pcc && ccc)
{
filterSteps[stepsIndex].reset(
new ColumnCommandJL(*pcc, *ccc)); // column command will use same filters.
}
}
#endif
filterSteps.push_back(cc);
filterCount++;
needStrValues = true;
@ -443,6 +460,7 @@ void BatchPrimitiveProcessorJL::getElementTypes(ByteStream& in, vector<ElementTy
if (*validCPData)
{
in >> *lbid;
in >> tmp64;
*min = (int64_t)tmp64;
in >> tmp64;
@ -712,8 +730,9 @@ bool BatchPrimitiveProcessorJL::countThisMsg(messageqcpp::ByteStream& in) const
}
if (data[offset] != 0)
offset += (data[offset + CP_FLAG_AND_LBID] * 2) + CP_FLAG_AND_LBID +
1; // skip the CP data with wide min/max values (16/32 bytes each)
offset += (data[offset + CP_FLAG_AND_LBID + 1] * 2) + CP_FLAG_AND_LBID + 1 +
1; // skip the CP data with wide min/max values (16/32 bytes each). we also skip
// cpFromDictScan flag.
else
offset += CP_FLAG_AND_LBID; // skip only the "valid CP data" & LBID bytes
}
@ -750,9 +769,10 @@ void BatchPrimitiveProcessorJL::deserializeAggregateResult(ByteStream* in, vecto
}
void BatchPrimitiveProcessorJL::getRowGroupData(ByteStream& in, vector<RGData>* out, bool* validCPData,
uint64_t* lbid, int128_t* min, int128_t* max,
uint32_t* cachedIO, uint32_t* physIO, uint32_t* touchedBlocks,
bool* countThis, uint32_t threadID, bool* hasWideColumn,
uint64_t* lbid, bool* fromDictScan, int128_t* min,
int128_t* max, uint32_t* cachedIO, uint32_t* physIO,
uint32_t* touchedBlocks, bool* countThis, uint32_t threadID,
bool* hasWideColumn,
const execplan::CalpontSystemCatalog::ColType& colType) const
{
uint64_t tmp64;
@ -789,6 +809,8 @@ void BatchPrimitiveProcessorJL::getRowGroupData(ByteStream& in, vector<RGData>*
{
in >> *lbid;
in >> tmp8;
*fromDictScan = tmp8 != 0;
in >> tmp8;
*hasWideColumn = (tmp8 > utils::MAXLEGACYWIDTH);
if (UNLIKELY(*hasWideColumn))
{

View File

@ -167,9 +167,9 @@ class BatchPrimitiveProcessorJL
uint32_t* touchedBlocks) const;
void deserializeAggregateResults(messageqcpp::ByteStream* in, std::vector<rowgroup::RGData>* out) const;
void getRowGroupData(messageqcpp::ByteStream& in, std::vector<rowgroup::RGData>* out, bool* validCPData,
uint64_t* lbid, int128_t* min, int128_t* max, uint32_t* cachedIO, uint32_t* physIO,
uint32_t* touchedBlocks, bool* countThis, uint32_t threadID, bool* hasBinaryColumn,
const execplan::CalpontSystemCatalog::ColType& colType) const;
uint64_t* lbid, bool* fromDictScan, int128_t* min, int128_t* max, uint32_t* cachedIO,
uint32_t* physIO, uint32_t* touchedBlocks, bool* countThis, uint32_t threadID,
bool* hasBinaryColumn, const execplan::CalpontSystemCatalog::ColType& colType) const;
void deserializeAggregateResult(messageqcpp::ByteStream* in, std::vector<rowgroup::RGData>* out) const;
bool countThisMsg(messageqcpp::ByteStream& in) const;
@ -365,4 +365,3 @@ class BatchPrimitiveProcessorJL
};
} // namespace joblist

View File

@ -130,6 +130,59 @@ ColumnCommandJL::ColumnCommandJL(const pColStep& step)
fFilesPerColumnPartition = cf->uFromText(fpc);
}
ColumnCommandJL::ColumnCommandJL(const ColumnCommandJL& prevCmd, const DictStepJL& dictWithFilters)
{
BRM::DBRM dbrm;
/* grab necessary vars from scan */
traceFlags = prevCmd.traceFlags;
// we should call this constructor only when paired with dictionary
// and in that case previous command should not have any filters and
// should be "dict" (tokens) column command.
idbassert(dictWithFilters.getFilterCount() == 0 || prevCmd.filterCount == 0);
idbassert(prevCmd.fIsDict);
// need to reencode filters.
filterString = dictWithFilters.reencodedFilterString();
// we have a limitation here.
// consider this: textcol IS NULL AND textcol IN ('a', 'b')
// XXX: should check.
if (filterString.length() > 0 && (BOP = dictWithFilters.getBop() || prevCmd.filterString.length() < 1))
{
filterCount = dictWithFilters.getFilterCount();
BOP = dictWithFilters.getBop();
fContainsRanges = true;
}
else
{
filterCount = prevCmd.filterCount;
filterString = prevCmd.filterString;
BOP = prevCmd.BOP;
}
isScan = prevCmd.isScan;
colType = prevCmd.colType;
extents = prevCmd.extents;
OID = prevCmd.OID;
colName = prevCmd.colName;
rpbShift = prevCmd.rpbShift;
fIsDict = prevCmd.fIsDict;
fLastLbid = prevCmd.fLastLbid;
lbid = prevCmd.lbid;
traceFlags = prevCmd.traceFlags;
dbroot = prevCmd.dbroot;
numDBRoots = prevCmd.numDBRoots;
/* I think modmask isn't necessary for scans */
divShift = prevCmd.divShift;
modMask = (1 << divShift) - 1;
// @Bug 2889. Drop partition enhancement. Read FilesPerColumnPartition and ExtentsPerSegmentFile for use
// in RID calculation.
fFilesPerColumnPartition = prevCmd.fFilesPerColumnPartition;
// MCOL-4685 remove the option to set more than 2 extents per file (ExtentsPreSegmentFile).
fExtentsPerSegmentFile = prevCmd.fExtentsPerSegmentFile;
}
ColumnCommandJL::~ColumnCommandJL()
{
}
@ -141,9 +194,22 @@ void ColumnCommandJL::createCommand(ByteStream& bs) const
colType.serialize(bs);
bs << (uint8_t)isScan;
bs << traceFlags;
bs << filterString;
bs << BOP;
bs << filterCount;
if (isDict() && fContainsRanges)
{
// XXX: we should discern here between IS (NOT) NULL and other filters.
ByteStream empty;
auto zeroFC = filterCount;
bs << empty;
bs << BOP;
zeroFC = 0;
bs << zeroFC;
}
else
{
bs << filterString;
bs << BOP;
bs << filterCount;
}
serializeInlineVector(bs, fLastLbid);
CommandJL::createCommand(bs);
@ -250,7 +316,7 @@ string ColumnCommandJL::toString()
{
ostringstream ret;
ret << "ColumnCommandJL: " << filterCount << " filters colwidth=" << colType.colWidth << " oid=" << OID
ret << "ColumnCommandJL: " << filterCount << " filters, BOP=" << ((int)BOP) << ", colwidth=" << colType.colWidth << " oid=" << OID
<< " name=" << colName;
if (isScan)
@ -286,4 +352,9 @@ void ColumnCommandJL::reloadExtents()
sort(extents.begin(), extents.end(), BRM::ExtentSorter());
}
bool ColumnCommandJL::getIsDict()
{
return fIsDict;
}
}; // namespace joblist

View File

@ -33,6 +33,7 @@
#include "primitivestep.h"
#include "command-jl.h"
#include "dictstep-jl.h"
namespace joblist
{
@ -41,6 +42,7 @@ class ColumnCommandJL : public CommandJL
public:
ColumnCommandJL(const pColScanStep&, std::vector<BRM::LBID_t> lastLBID);
ColumnCommandJL(const pColStep&);
ColumnCommandJL(const ColumnCommandJL&, const DictStepJL&);
virtual ~ColumnCommandJL();
virtual void createCommand(messageqcpp::ByteStream& bs) const;
@ -111,6 +113,7 @@ class ColumnCommandJL : public CommandJL
std::vector<BRM::LBID_t> fLastLbid;
bool fIsDict;
bool fContainsRanges = false;
// @Bug 2889. Added two members below for drop partition enhancement.
// RJD: make sure that we keep enough significant digits around for partition math
@ -125,7 +128,7 @@ class ColumnCommandJL : public CommandJL
public:
// MCOL-4685: remove the option to set more than 2 extents per file (ExtentsPreSegmentFile)
static const unsigned DEFAULT_EXTENTS_PER_SEGMENT_FILE = 2;
bool getIsDict() override;
};
} // namespace joblist

View File

@ -96,6 +96,11 @@ class CommandJL
virtual CommandType getCommandType() = 0;
virtual bool getIsDict()
{
return false;
}
protected:
BatchPrimitiveProcessorJL* bpp;
uint32_t OID;

View File

@ -29,6 +29,7 @@
//
#include "bpp-jl.h"
#include "string_prefixes.h"
using namespace std;
using namespace messageqcpp;
@ -50,7 +51,6 @@ DictStepJL::DictStepJL(const pDictionaryStep& dict)
if (hasEqFilter)
{
// cout << "saw eqfilter\n";
eqOp = dict.tmpCOP;
eqFilter = dict.eqFilter;
}
@ -120,4 +120,63 @@ void DictStepJL::setWidth(uint16_t w)
colWidth = w;
}
messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
{
messageqcpp::ByteStream bs;
if (hasEqFilter)
{
idbassert(filterCount == eqFilter.size());
for (uint32_t i = 0; i < filterCount; i++)
{
uint8_t roundFlag = 0;
int64_t encodedPrefix = encodeStringPrefix((unsigned char*)eqFilter[i].c_str(), eqFilter[i].size(), charsetNumber);
bs << eqOp;
bs << roundFlag;
bs << encodedPrefix;
}
}
else
{
messageqcpp::ByteStream filterStringCopy(
filterString); // XXX I am not sure about real semantics of messagecpp::ByteStream. So - copy.
// please erfer to pdictionary.cpp in this dicrectory, addFilter function for a proper encoding of string
// filters.
for (uint32_t i = 0; i < filterCount; i++)
{
uint8_t cop, roundFlag = 0;
uint16_t size;
const uint8_t* ptr;
int64_t encodedPrefix;
filterStringCopy >> cop;
// as we are dealing with prefixes, we have to use "... or equal" conditions instead of
// strict ones.
// Consider this: ... WHERE col > 'customer#001' AND col < 'customer#100'.
// "Working with prefixes of 8 bytes" means these conditions reduce to ... WHERE col > 'customer' AND
// col < 'customer' and their AND relation is impossible to satisfy. We do not pass this string to
// primproc and that means we can reencode operation codes here.
switch (cop)
{
case COMPARE_LT:
case COMPARE_NGE: cop = COMPARE_LE; break;
case COMPARE_GT:
case COMPARE_NLE: cop = COMPARE_GE; break;
default: break;
}
bs << cop;
bs << roundFlag;
filterStringCopy >> size;
ptr = filterStringCopy.buf();
encodedPrefix = encodeStringPrefix(ptr, size, charsetNumber);
bs << encodedPrefix;
filterStringCopy.advance(size);
}
}
return bs;
}
}; // namespace joblist

View File

@ -61,6 +61,21 @@ class DictStepJL : public CommandJL
void createCommand(messageqcpp::ByteStream&) const;
void runCommand(messageqcpp::ByteStream&) const;
messageqcpp::ByteStream getFilterString() const
{
return filterString;
}
uint32_t getFilterCount() const
{
return filterCount;
}
messageqcpp::ByteStream reencodedFilterString() const;
uint8_t getBop() const
{
return BOP;
}
private:
DictStepJL(const DictStepJL&);

View File

@ -373,7 +373,15 @@ void GroupConcatAgUM::applyMapping(const boost::shared_array<int>& mapping, cons
}
else
{
fRow.setIntField(row.getIntField(mapping[i]), i);
if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::CHAR ||
fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR)
{
fRow.setIntField(row.getUintField(mapping[i]), i);
}
else
{
fRow.setIntField(row.getIntField(mapping[i]), i);
}
}
}
}

View File

@ -1140,9 +1140,10 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo)
for (unsigned i = 0; i < numOfStepsAddToBps; i++)
{
bps->setBPP((it + i)->get());
auto pp = (it + i)->get();
bps->setBPP(pp);
bps->setStepCount();
bps->setLastTupleId((it + i)->get()->tupleId());
bps->setLastTupleId(pp->tupleId());
}
it += itInc;

View File

@ -338,8 +338,8 @@ int LBIDList::getMinMaxFromEntries(T& min, T& max, int32_t& seq, int64_t lbid,
}
template <typename T>
void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, const CalpontSystemCatalog::ColType& type,
bool validData)
void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, bool dictScan,
const CalpontSystemCatalog::ColType& type, bool validData)
{
MinMaxPartition* mmp = NULL;
#ifdef DEBUG
@ -372,18 +372,20 @@ void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, const CalpontSystemCatal
if (mmp->isValid == BRM::CP_INVALID)
{
if (datatypes::isCharType(type.colDataType))
if (!dictScan && datatypes::isCharType(type.colDataType))
{
datatypes::Charset cs(const_cast<CalpontSystemCatalog::ColType&>(type).getCharset());
if (datatypes::TCharShort::strnncollsp(cs, min, mmp->min, type.colWidth) < 0 ||
mmp->min == numeric_limits<int64_t>::max())
// WIP
static_cast<uint64_t>(mmp->min) == numeric_limits<uint64_t>::max())
mmp->min = min;
if (datatypes::TCharShort::strnncollsp(cs, max, mmp->max, type.colWidth) > 0 ||
mmp->max == numeric_limits<int64_t>::min())
// WIP
static_cast<uint64_t>(mmp->max) == numeric_limits<uint64_t>::min())
mmp->max = max;
}
else if (datatypes::isUnsigned(type.colDataType))
else if (dictScan || datatypes::isUnsigned(type.colDataType))
{
if (static_cast<uint64_t>(min) < static_cast<uint64_t>(mmp->min))
mmp->min = min;
@ -526,7 +528,7 @@ bool LBIDList::CasualPartitionDataType(const CalpontSystemCatalog::ColDataType t
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::TEXT: return size < 8;
case CalpontSystemCatalog::TEXT: return size <= 8;
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
@ -695,15 +697,19 @@ bool LBIDList::checkRangeOverlap(T min, T max, T tmin, T tmax,
bool LBIDList::CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
const messageqcpp::ByteStream* bs, const uint16_t NOPS,
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP)
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP,
bool isDict)
{
int length = bs->length(), pos = 0;
const char* MsgDataPtr = (const char*)bs->buf();
bool scan = true;
int64_t value = 0;
int128_t bigValue = 0;
bool bIsUnsigned = datatypes::isUnsigned(ct.colDataType);
bool bIsChar = datatypes::isCharType(ct.colDataType);
// MCOL-4580 - related.
// We definitely can compute isDict flag themselves here, as we have column type and width.
// But, we may also use already computed isDict flags in the steps, available with getIsDict() method..
bool bIsUnsigned = isDict || datatypes::isUnsigned(ct.colDataType);
bool bIsChar = !isDict && datatypes::isCharType(ct.colDataType);
for (int i = 0; i < NOPS; i++)
{
@ -800,9 +806,12 @@ bool LBIDList::CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
{
continue;
}
else if (execplan::isNull(value, ct)) // This will work even if the data column is unsigned.
else
{
continue;
if (execplan::isNull(value, ct)) // This will work even if the data column is unsigned.
{
continue;
}
}
if (bIsChar)
@ -898,11 +907,11 @@ template bool LBIDList::GetMinMax<int64_t>(int64_t* min, int64_t* max, int64_t*
const tr1::unordered_map<int64_t, BRM::EMEntry>& entries,
execplan::CalpontSystemCatalog::ColDataType colDataType);
template void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid,
template void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid, bool dictScan,
const execplan::CalpontSystemCatalog::ColType& type,
bool validData = true);
template void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid,
template void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid, bool dictScan,
const execplan::CalpontSystemCatalog::ColType& type,
bool validData = true);

View File

@ -98,8 +98,8 @@ class LBIDList
execplan::CalpontSystemCatalog::ColDataType type);
template <typename T>
void UpdateMinMax(T min, T max, int64_t lbid, const execplan::CalpontSystemCatalog::ColType& type,
bool validData = true);
void UpdateMinMax(T min, T max, int64_t lbid, bool dictScan,
const execplan::CalpontSystemCatalog::ColType& type, bool validData = true);
void UpdateAllPartitionInfo(const execplan::CalpontSystemCatalog::ColType& colType);
@ -107,7 +107,8 @@ class LBIDList
bool CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
const messageqcpp::ByteStream* MsgDataPtr, const uint16_t NOPS,
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP);
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP,
bool isDict);
template <typename T>
bool checkSingleValue(T min, T max, T value, const execplan::CalpontSystemCatalog::ColType& type);

View File

@ -1054,9 +1054,10 @@ class BatchPrimitive : public JobStep, public PrimitiveMsg, public DECEventListe
struct _CPInfo
{
_CPInfo(int64_t MIN, int64_t MAX, uint64_t l, bool val) : min(MIN), max(MAX), LBID(l), valid(val){};
_CPInfo(int64_t MIN, int64_t MAX, uint64_t l, bool dictScan, bool val)
: min(MIN), max(MAX), LBID(l), valid(val), dictScan(dictScan) {};
_CPInfo(int128_t BIGMIN, int128_t BIGMAX, uint64_t l, bool val)
: bigMin(BIGMIN), bigMax(BIGMAX), LBID(l), valid(val){};
: bigMin(BIGMIN), bigMax(BIGMAX), LBID(l), valid(val), dictScan(false) {};
union
{
int128_t bigMin;
@ -1069,6 +1070,7 @@ struct _CPInfo
};
uint64_t LBID;
bool valid;
bool dictScan;
};
/** @brief class TupleBPS
@ -1834,4 +1836,3 @@ class PseudoColStep : public pColStep
};
} // namespace joblist

View File

@ -75,6 +75,7 @@ using namespace rowgroup;
#include "querytele.h"
using namespace querytele;
#include "columnwidth.h"
#include "pseudocolumn.h"
//#define DEBUG 1
@ -865,6 +866,7 @@ void TupleBPS::storeCasualPartitionInfo(const bool estimateRowCounts)
vector<ColumnCommandJL*> cpColVec;
vector<SP_LBIDList> lbidListVec;
ColumnCommandJL* colCmd = 0;
bool defaultScanFlag = true;
// @bug 2123. We call this earlier in the process for the hash join estimation process now. Return if
// we've already done the work.
@ -876,7 +878,9 @@ void TupleBPS::storeCasualPartitionInfo(const bool estimateRowCounts)
fCPEvaluated = true;
if (colCmdVec.size() == 0)
return;
{
defaultScanFlag = false; // no reason to scan if there are no commands.
}
for (uint32_t i = 0; i < colCmdVec.size(); i++)
{
@ -902,30 +906,28 @@ void TupleBPS::storeCasualPartitionInfo(const bool estimateRowCounts)
}
if (cpColVec.size() == 0)
return;
{
defaultScanFlag = true; // no reason to scan if there are no predicates to evaluate.
}
const bool ignoreCP = ((fTraceFlags & CalpontSelectExecutionPlan::IGNORE_CP) != 0);
for (uint32_t idx = 0; idx < numExtents; idx++)
{
scanFlags[idx] = true;
scanFlags[idx] = defaultScanFlag;
for (uint32_t i = 0; i < cpColVec.size(); i++)
for (uint32_t i = 0; scanFlags[idx] && i < cpColVec.size(); i++)
{
colCmd = cpColVec[i];
const EMEntry& extent = colCmd->getExtents()[idx];
/* If any column filter eliminates an extent, it doesn't get scanned */
scanFlags[idx] =
scanFlags[idx] && (ignoreCP || extent.partition.cprange.isValid != BRM::CP_VALID ||
lbidListVec[i]->CasualPartitionPredicate(
extent.partition.cprange, &(colCmd->getFilterString()),
colCmd->getFilterCount(), colCmd->getColType(), colCmd->getBOP()));
if (!scanFlags[idx])
{
break;
}
scanFlags[idx] = scanFlags[idx] && (extent.colWid <= utils::MAXCOLUMNWIDTH) && // XXX: change to named constant.
(ignoreCP || extent.partition.cprange.isValid != BRM::CP_VALID ||
colCmd->getColType().colWidth != extent.colWid ||
lbidListVec[i]->CasualPartitionPredicate(
extent.partition.cprange, &(colCmd->getFilterString()), colCmd->getFilterCount(),
colCmd->getColType(), colCmd->getBOP(), colCmd->getIsDict()));
}
}
@ -2008,9 +2010,10 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
}
bool unused;
bool fromDictScan;
fromPrimProc.clear();
fBPP->getRowGroupData(*bs, &fromPrimProc, &validCPData, &lbid, &min, &max, &cachedIO, &physIO,
&touchedBlocks, &unused, threadID, &hasBinaryColumn, fColType);
fBPP->getRowGroupData(*bs, &fromPrimProc, &validCPData, &lbid, &fromDictScan, &min, &max, &cachedIO,
&physIO, &touchedBlocks, &unused, threadID, &hasBinaryColumn, fColType);
// Another layer of messiness. Need to refactor this fcn.
while (!fromPrimProc.empty() && !cancelled())
@ -2180,7 +2183,7 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
{
if (fColType.colWidth <= 8)
{
cpv.push_back(_CPInfo((int64_t)min, (int64_t)max, lbid, validCPData));
cpv.push_back(_CPInfo((int64_t)min, (int64_t)max, lbid, fromDictScan, validCPData));
}
else if (fColType.colWidth == 16)
{
@ -2237,7 +2240,9 @@ void TupleBPS::receiveMultiPrimitiveMessages()
}
if (msgsSent == msgsRecvd && finishedSending)
{
break;
}
bool flowControlOn;
fDec->read_some(uniqueID, fNumThreads, bsv, &flowControlOn);
@ -2361,11 +2366,13 @@ void TupleBPS::receiveMultiPrimitiveMessages()
{
if (fColType.colWidth > 8)
{
lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, fColType, cpv[i].valid);
lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, cpv[i].dictScan, fColType,
cpv[i].valid);
}
else
{
lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, fColType, cpv[i].valid);
lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, cpv[i].dictScan, fColType,
cpv[i].valid);
}
}
}

View File

@ -87,7 +87,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_general_ci)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -100,7 +100,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_general_ci)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message
@ -127,7 +127,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_general_nopad_ci)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -140,7 +140,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_general_nopad_ci)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message
@ -167,7 +167,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_bin)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -180,7 +180,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_bin)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message
@ -207,7 +207,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_nopad_bin)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -220,7 +220,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_nopad_bin)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message

View File

@ -99,7 +99,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -112,7 +112,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -125,7 +125,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message
@ -152,7 +152,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -165,7 +165,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -178,7 +178,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message
@ -205,7 +205,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_bin)
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -218,7 +218,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_bin)
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -231,7 +231,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin)
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message
@ -258,7 +258,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -271,7 +271,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -284,7 +284,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message

View File

@ -6,6 +6,6 @@ insert into rounding_table values (26805, 1252, -9647);
insert into rounding_table values (26806, 573, -2804.5);
SELECT CASE a WHEN 26805 THEN ROUND(c/b, 2) WHEN 26806 THEN b END MCOL4940 FROM ( SELECT a, SUM(b) b, SUM(c) c FROM rounding_table GROUP BY a ) abc ;
MCOL4940
573
-7.71
573
DROP DATABASE mcol_4940;

View File

@ -8,7 +8,8 @@ USE mcol_4940;
create table rounding_table ( a int, b double, c double) engine=columnstore;
insert into rounding_table values (26805, 1252, -9647);
insert into rounding_table values (26806, 573, -2804.5);
--sorted_result
SELECT CASE a WHEN 26805 THEN ROUND(c/b, 2) WHEN 26806 THEN b END MCOL4940 FROM ( SELECT a, SUM(b) b, SUM(c) c FROM rounding_table GROUP BY a ) abc ;
DROP DATABASE mcol_4940;

View File

@ -42,6 +42,8 @@ using namespace boost;
#include "simd_sse.h"
#include "utils/common/columnwidth.h"
#include "exceptclasses.h"
using namespace logging;
using namespace dbbc;
using namespace primitives;
@ -1690,7 +1692,9 @@ void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, C
dataType == execplan::CalpontSystemCatalog::TEXT) &&
!isDictTokenScan(in))
{
filterColumnData<T, KIND_TEXT>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
using UT = typename std::conditional<std::is_unsigned<T>::value, T,
typename datatypes::make_unsigned<T>::type>::type;
filterColumnData<UT, KIND_TEXT>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
return;
}

View File

@ -31,6 +31,7 @@ using namespace std;
#include "messageobj.h"
#include "exceptclasses.h"
#include "dataconvert.h"
#include "string_prefixes.h"
#include <sstream>
using namespace logging;
@ -391,8 +392,13 @@ void PrimitiveProcessor::nextSig(int NVALS, const PrimToken* tokens, p_DataValue
}
void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out, bool skipNulls,
#if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
uint32_t charsetNumber, boost::shared_ptr<DictEqualityFilter> eqFilter,
uint8_t eqOp, uint64_t minMax[2])
#else
uint32_t charsetNumber, boost::shared_ptr<DictEqualityFilter> eqFilter,
uint8_t eqOp)
#endif
{
PrimToken* outToken;
const DictFilterElement* filter = 0;
@ -437,6 +443,14 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out,
sigptr.len != -1;
nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType, (in->InputFlags ? true : false), skipNulls))
{
#if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
if (minMax)
{
uint64_t v = encodeStringPrefix_check_null(sigptr.data, sigptr.len, charsetNumber);
minMax[1] = minMax[1] < v ? v : minMax[1];
minMax[0] = minMax[0] > v ? v : minMax[0];
}
#endif
// do aggregate processing
if (in->OutputType & OT_AGGREGATE)
{

View File

@ -54,6 +54,9 @@
class PrimTest;
// XXX: turn off dictionary range setting during scan.
#define XXX_PRIMITIVES_TOKEN_RANGES_XXX
namespace primitives
{
enum ColumnFilterMode
@ -423,7 +426,13 @@ class PrimitiveProcessor
// void p_ColAggregate(const NewColAggRequestHeader *in, NewColAggResultHeader *out);
void p_Dictionary(const DictInput* in, std::vector<uint8_t>* out, bool skipNulls, uint32_t charsetNumber,
boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp);
#if !defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp
#else
boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp,
uint64_t minMax[2] // as name suggests, [0] is min, [1] is max.
#endif
);
inline void setLogicalBlockMode(bool b)
{

View File

@ -117,6 +117,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor()
, validCPData(false)
, minVal(MAX64)
, maxVal(MIN64)
, cpDataFromDictScan(false)
, lbidForCP(0)
, hasWideColumnOut(false)
, busyLoaderCount(0)
@ -138,6 +139,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor()
, processorThreads(0)
, ptMask(0)
, firstInstance(false)
, valuesLBID(0)
{
pp.setLogicalBlockMode(true);
pp.setBlockPtr((int*)blockData);
@ -167,6 +169,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream& b, double prefetch,
, validCPData(false)
, minVal(MAX64)
, maxVal(MIN64)
, cpDataFromDictScan(false)
, lbidForCP(0)
, hasWideColumnOut(false)
, busyLoaderCount(0)
@ -186,10 +189,10 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream& b, double prefetch,
, sockIndex(0)
, endOfJoinerRan(false)
, processorThreads(_processorThreads)
,
// processorThreads(32),
// ptMask(processorThreads - 1),
firstInstance(true)
, firstInstance(true)
, valuesLBID(0)
{
// promote processorThreads to next power of 2. also need to change the name to bucketCount or similar
processorThreads = nextPowOf2(processorThreads);
@ -2010,6 +2013,7 @@ void BatchPrimitiveProcessor::writeProjectionPreamble()
{
*serialized << (uint8_t)1;
*serialized << lbidForCP;
*serialized << ((uint8_t)cpDataFromDictScan);
if (UNLIKELY(hasWideColumnOut))
{
// PSA width
@ -2108,6 +2112,7 @@ void BatchPrimitiveProcessor::makeResponse()
{
*serialized << (uint8_t)1;
*serialized << lbidForCP;
*serialized << ((uint8_t)cpDataFromDictScan);
if (UNLIKELY(hasWideColumnOut))
{
@ -2208,6 +2213,7 @@ int BatchPrimitiveProcessor::operator()()
}
validCPData = false;
cpDataFromDictScan = false;
#ifdef PRIMPROC_STOPWATCH
stopwatch->start("BPP() execute");
execute(stopwatch);

View File

@ -256,6 +256,7 @@ class BatchPrimitiveProcessor
int128_t max128Val;
int64_t maxVal;
};
bool cpDataFromDictScan;
uint64_t lbidForCP;
bool hasWideColumnOut;
@ -431,6 +432,7 @@ class BatchPrimitiveProcessor
uint processorThreads;
uint ptMask;
bool firstInstance;
uint64_t valuesLBID;
friend class Command;
friend class ColumnCommand;

View File

@ -106,8 +106,8 @@ void ColumnCommand::execute()
{
values = bpp->values;
wide128Values = bpp->wide128Values;
bpp->valuesLBID = lbid;
}
_execute();
}
@ -225,9 +225,13 @@ void ColumnCommand::issuePrimitive()
loadData();
if (!suppressFilter)
{
bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter);
}
else
{
bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter);
}
switch (colType.colWidth)
{
@ -282,6 +286,7 @@ void ColumnCommand::updateCPDataNarrow()
if (_isScan)
{
bpp->validCPData = (outMsg->ValidMinMax && !wasVersioned);
bpp->cpDataFromDictScan = false;
bpp->lbidForCP = lbid;
bpp->maxVal = static_cast<int64_t>(outMsg->Max);
bpp->minVal = static_cast<int64_t>(outMsg->Min);
@ -295,6 +300,7 @@ void ColumnCommand::updateCPDataWide()
if (_isScan)
{
bpp->validCPData = (outMsg->ValidMinMax && !wasVersioned);
bpp->cpDataFromDictScan = false;
bpp->lbidForCP = lbid;
if (colType.isWideDecimalType())
{

View File

@ -46,6 +46,8 @@ extern uint32_t dictBufferSize;
DictStep::DictStep() : Command(DICT_STEP), strValues(NULL), filterCount(0), bufferSize(0)
{
fMinMax[0] = MAX_UBIGINT;
fMinMax[1] = MIN_UBIGINT;
}
DictStep::~DictStep()
@ -65,6 +67,8 @@ DictStep& DictStep::operator=(const DictStep& d)
eqOp = d.eqOp;
filterCount = d.filterCount;
charsetNumber = d.charsetNumber;
fMinMax[0] = d.fMinMax[0];
fMinMax[1] = d.fMinMax[1];
return *this;
}
@ -147,8 +151,11 @@ void DictStep::issuePrimitive(bool isFilter)
bpp->physIO += blocksRead;
bpp->touchedBlocks++;
}
#if !defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
bpp->pp.p_Dictionary(primMsg, &result, isFilter, charsetNumber, eqFilter, eqOp);
#else
bpp->pp.p_Dictionary(primMsg, &result, isFilter, charsetNumber, eqFilter, eqOp, fMinMax);
#endif
}
void DictStep::copyResultToTmpSpace(OrderedToken* ot)
@ -390,6 +397,14 @@ void DictStep::_execute()
copyResultToFinalPosition(newRidList.get());
copyRidsForFilterCmd();
}
if (fMinMax[0] <= fMinMax[1] && bpp->valuesLBID != 0)
{
bpp->validCPData = true;
bpp->cpDataFromDictScan = true;
bpp->lbidForCP = bpp->valuesLBID;
bpp->maxVal = fMinMax[1];
bpp->minVal = fMinMax[0];
}
// cout << "DS: /_execute()\n";
}

View File

@ -158,6 +158,7 @@ class DictStep : public Command
bool hasEqFilter;
boost::shared_ptr<primitives::DictEqualityFilter> eqFilter;
uint8_t eqOp; // COMPARE_EQ or COMPARE_NE
uint64_t fMinMax[2];
friend class RTSCommand;
};

View File

@ -11,7 +11,8 @@ set(common_LIB_SRCS
nullvaluemanip.cpp
threadnaming.cpp
utils_utf8.cpp
statistics.cpp)
statistics.cpp
string_prefixes.cpp)
add_library(common SHARED ${common_LIB_SRCS})

View File

@ -177,6 +177,12 @@ class Charset
bool res = !mCharset->wildcmp(subject.str(), subject.end(), pattern.str(), pattern.end(), '\\', '_', '%');
return neg ? !res : res;
}
size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
{
idbassert(mCharset->coll);
return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
}
};
class CollationAwareHasher : public Charset

View File

@ -198,6 +198,7 @@ int64_t getSignedNullValue(CalpontSystemCatalog::ColDataType t, uint32_t colWidt
os << "getSignedNullValue(): got bad column type (" << t << "). Width=" << colWidth << endl;
throw logic_error(os.str());
}
}
} // namespace utils

View File

@ -0,0 +1,51 @@
/*
Copyright (C) 2021, 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/* handling of the conversion of string prefixes to int64_t for quick range checking */
#include "collation.h"
#include "joblisttypes.h"
#include "string_prefixes.h"
// XXX: string (or, actually, a BLOB) with all NUL chars will be encoded into zero. Which corresponds to
// encoding of empty string, or NULL.
int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber)
{
datatypes::Charset cset(charsetNumber);
uint8_t fixedLenPrefix[8];
memset(fixedLenPrefix, 0, sizeof(fixedLenPrefix));
cset.strnxfrm(fixedLenPrefix, sizeof(fixedLenPrefix), 8, str, len, 0);
int64_t acc = 0;
size_t i;
for (i = 0; i < 8; i++)
{
uint8_t byte = fixedLenPrefix[i];
acc = (acc << 8) + byte;
}
return acc;
}
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber)
{
if (len < 1)
{
return joblist::UBIGINTNULL;
}
return encodeStringPrefix(str, len, charsetNumber);
}

View File

@ -0,0 +1,32 @@
/*
Copyright (C) 2021, 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/* handling of the conversion of string prefixes to int64_t for quick range checking */
#pragma once
#include <stdlib.h>
#include <stdint.h>
// Encode string prefix into an int64_t, packing as many chars from string as possible
// into the result and respecting the collation provided by charsetNumber.
//
// For one example, for CI Czech collation, encodeStringPrefix("cz") < encodeStringPrefix("CH").
int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber);
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber);

View File

@ -45,7 +45,7 @@ CalpontSystemCatalog::ColType Func_monthname::operationType(FunctionParm& fp,
string Func_monthname::getStrVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
CalpontSystemCatalog::ColType& op_ct)
{
int32_t month = getIntVal(row, parm, isNull, op_ct);
int32_t month = getIntValInternal(row, parm, isNull, op_ct);
if (month == -1)
return "";
@ -74,8 +74,8 @@ int64_t Func_monthname::getTimestampIntVal(rowgroup::Row& row, FunctionParm& par
return val;
}
int64_t Func_monthname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
CalpontSystemCatalog::ColType& op_ct)
int64_t Func_monthname::getIntValInternal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
CalpontSystemCatalog::ColType& op_ct)
{
int64_t val = 0;
dataconvert::DateTime aDateTime;
@ -165,12 +165,20 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool&
break;
default: isNull = true; return -1;
default:
isNull = true;
return -1;
}
return -1;
}
int64_t Func_monthname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct)
{
return getIntValInternal(row, parm, isNull, op_ct);
}
double Func_monthname::getDoubleVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct)
{

View File

@ -41,6 +41,8 @@ using namespace logging;
#include "funchelpers.h"
#include "exceptclasses.h"
namespace
{
using namespace funcexp;
@ -136,18 +138,27 @@ int64_t Func_round::getIntVal(Row& row, FunctionParm& parm, bool& isNull,
uint64_t Func_round::getUintVal(Row& row, FunctionParm& parm, bool& isNull,
CalpontSystemCatalog::ColType& op_ct)
{
uint64_t x;
if (UNLIKELY(op_ct.colDataType == execplan::CalpontSystemCatalog::DATE))
IDB_Decimal x = getDecimalVal(row, parm, isNull, op_ct);
if (!op_ct.isWideDecimalType())
{
IDB_Decimal d = getDecimalVal(row, parm, isNull, op_ct);
x = static_cast<uint64_t>(d.value);
if (x.scale > 0)
{
while (x.scale-- > 0)
x.value /= 10;
}
else
{
while (x.scale++ < 0)
x.value *= 10;
}
return x.value;
}
else
{
x = parm[0]->data()->getUintVal(row, isNull);
return static_cast<uint64_t>(x.getIntegralPart());
}
return x;
}
double Func_round::getDoubleVal(Row& row, FunctionParm& parm, bool& isNull,
@ -434,10 +445,11 @@ IDB_Decimal Func_round::getDecimalVal(Row& row, FunctionParm& parm, bool& isNull
{
uint64_t x = parm[0]->data()->getUintVal(row, isNull);
if (x > (uint64_t)helpers::maxNumber_c[18])
{
x = helpers::maxNumber_c[18];
}
// why it is here at all???
// if (x > (uint64_t)helpers::maxNumber_c[18])
//{
// x = helpers::maxNumber_c[18];
//}
decimal.value = x;
decimal.scale = 0;

View File

@ -651,6 +651,8 @@ class Func_monthname : public Func_Str
int64_t getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct);
int64_t getIntValInternal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct);
double getDoubleVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct);

View File

@ -305,4 +305,3 @@ class ProtocolError : public std::logic_error
} while (0)
} // namespace logging

View File

@ -2820,16 +2820,24 @@ LBID_t ExtentMap::_createColumnExtent_DBroot(uint32_t size, int OID, uint32_t co
e->HWM = 0;
e->status = EXTENTUNAVAILABLE; // mark extent as in process
// Partition, segment, and blockOffset 0 represents new table or column.
// When DDL creates a table, we can mark the first extent as VALID, since
// the table has no data. Marking as VALID enables cpimport to update
// the CP min/max for the first import.
// If DDL is adding a column to an existing table, setting to VALID won't
// hurt, because DDL resets to INVALID after the extent is created.
if ((e->partitionNum == 0) && (e->segmentNum == 0) && (e->blockOffset == 0))
e->partition.cprange.isValid = CP_VALID;
else
e->partition.cprange.isValid = CP_INVALID;
#if 0 // XXX: sergueyz: I'll leave these under conditional flag for a while because it appears a huge change.
// Partition, segment, and blockOffset 0 represents new table or column.
// When DDL creates a table, we can mark the first extent as VALID, since
// the table has no data. Marking as VALID enables cpimport to update
// the CP min/max for the first import.
// If DDL is adding a column to an existing table, setting to VALID won't
// hurt, because DDL resets to INVALID after the extent is created.
// XXX: the comment above is out of date. bulk set of extents ranges
// works differently right now.
if ((e->partitionNum == 0) &&
(e->segmentNum == 0) &&
(e->blockOffset == 0))
e->partition.cprange.isValid = CP_VALID;
else
e->partition.cprange.isValid = CP_INVALID;
#else
e->partition.cprange.isValid = CP_INVALID;
#endif
partitionNum = e->partitionNum;
segmentNum = e->segmentNum;
@ -3029,16 +3037,22 @@ LBID_t ExtentMap::_createColumnExtentExactFile(uint32_t size, int OID, uint32_t
e->HWM = 0;
}
// Partition, segment, and blockOffset 0 represents new table or column.
// When DDL creates a table, we can mark the first extent as VALID, since
// the table has no data. Marking as VALID enables cpimport to update
// the CP min/max for the first import.
// If DDL is adding a column to an existing table, setting to VALID won't
// hurt, because DDL resets to INVALID after the extent is created.
if ((e->partitionNum == 0) && (e->segmentNum == 0) && (e->blockOffset == 0))
e->partition.cprange.isValid = CP_VALID;
else
e->partition.cprange.isValid = CP_INVALID;
#if 0 // XXX: sergueyz: I'll leave these under conditional flag for a while because it appears a huge change.
// Partition, segment, and blockOffset 0 represents new table or column.
// When DDL creates a table, we can mark the first extent as VALID, since
// the table has no data. Marking as VALID enables cpimport to update
// the CP min/max for the first import.
// If DDL is adding a column to an existing table, setting to VALID won't
// hurt, because DDL resets to INVALID after the extent is created.
if ((e->partitionNum == 0) &&
(e->segmentNum == 0) &&
(e->blockOffset == 0))
e->partition.cprange.isValid = CP_VALID;
else
e->partition.cprange.isValid = CP_INVALID;
#else
e->partition.cprange.isValid = CP_INVALID;
#endif
startBlockOffset = e->blockOffset;

View File

@ -739,7 +739,7 @@ int BulkLoad::preProcess(Job& job, int tableNo, TableInfo* tableInfo)
// Setup import to start loading into starting HWM DB file
RETURN_ON_ERROR(info->setupInitialColumnExtent(dbRoot, partition, segment,
job.jobTableList[tableNo].tblName, lbid, oldHwm, hwm,
bSkippedToNewExtent, false));
bSkippedToNewExtent, bSkippedToNewExtent || oldHwm < 1));
}
tableInfo->addColumn(info);

View File

@ -1717,7 +1717,7 @@ int BulkLoadBuffer::parseCol(ColumnInfo& columnInfo)
lastInputRowInExtent += columnInfo.rowsPerExtent();
if (isUnsigned(columnInfo.column.dataType) || isCharType(columnInfo.column.dataType))
if (isUnsigned(columnInfo.column.dataType))
{
if (columnInfo.column.width <= 8)
{

View File

@ -54,7 +54,7 @@ class BLBufferStats
};
BLBufferStats(ColDataType colDataType) : satCount(0)
{
if (isUnsigned(colDataType) || isCharType(colDataType))
if (isUnsigned(colDataType))
{
minBufferVal = static_cast<int64_t>(MAX_UBIGINT);
maxBufferVal = static_cast<int64_t>(MIN_UBIGINT);

View File

@ -104,7 +104,7 @@ void ColExtInf::addOrUpdateEntryTemplate(RID lastInputRow, T minVal, T maxVal, C
}
else // Update the range
{
if (isUnsigned(colDataType) || isCharType(colDataType))
if (isUnsigned(colDataType))
{
if (width <= 8)
{

View File

@ -241,6 +241,7 @@ uint8_t WE_DDLCommandProc::writeSystable(ByteStream& bs, std::string& err)
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = column.oid;
}
else
@ -656,9 +657,10 @@ uint8_t WE_DDLCommandProc::writeCreateSyscolumn(ByteStream& bs, std::string& err
dctnryStruct.fCompressionType = 2;
}
if (colStruct.tokenFlag)
if (colStruct.tokenFlag) // TODO: XXX: this is copied aplenty. NEED TO REFACTOR.
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = column.oid;
}
else
@ -1046,6 +1048,7 @@ uint8_t WE_DDLCommandProc::writeSyscolumn(ByteStream& bs, std::string& err)
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = column.oid;
}
else
@ -2442,6 +2445,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnTablename(ByteStream& bs, std::string&
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else
@ -2846,6 +2850,7 @@ uint8_t WE_DDLCommandProc::updateSystableTablename(ByteStream& bs, std::string&
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else
@ -3087,6 +3092,7 @@ uint8_t WE_DDLCommandProc::updateSystablesTablename(ByteStream& bs, std::string&
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else
@ -3273,6 +3279,7 @@ uint8_t WE_DDLCommandProc::updateSystablesTablename(ByteStream& bs, std::string&
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else
@ -4258,6 +4265,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnSetDefault(messageqcpp::ByteStream& bs
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else
@ -4545,6 +4553,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnRenameColumn(messageqcpp::ByteStream&
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column1.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column1.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else
@ -4756,6 +4765,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnRenameColumn(messageqcpp::ByteStream&
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = column5.colType.ddn.dictOID;
dctnryStruct.fCharsetNumber = column5.colType.charsetNumber;
dctnryStruct.columnOid = colStruct.dataOid;
}
else

View File

@ -181,7 +181,9 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
colStruct.colDataType = colType.colDataType;
if (colStruct.tokenFlag)
dctnryStruct.fCharsetNumber = colType.charsetNumber;
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = colType.ddn.dictOID;
dctnryStruct.columnOid = colStruct.dataOid;
@ -1037,6 +1039,8 @@ uint8_t WE_DMLCommandProc::processBatchInsert(messageqcpp::ByteStream& bs, std::
colStruct.colDataType = colType.colDataType;
dctnryStruct.fCharsetNumber = colType.charsetNumber;
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = colType.ddn.dictOID;
@ -1619,6 +1623,8 @@ uint8_t WE_DMLCommandProc::processBatchInsertBinary(messageqcpp::ByteStream& bs,
colStruct.colDataType = colType.colDataType;
dctnryStruct.fCharsetNumber = colType.charsetNumber;
if (colStruct.tokenFlag)
{
dctnryStruct.dctnryOid = colType.ddn.dictOID;
@ -2830,6 +2836,7 @@ uint8_t WE_DMLCommandProc::processUpdate(messageqcpp::ByteStream& bs, std::strin
dctnryStruct.dctnryOid = colType.ddn.dictOID;
dctnryStruct.columnOid = colStruct.dataOid;
dctnryStruct.fCompressionType = colType.compressionType;
dctnryStruct.fCharsetNumber = colType.charsetNumber;
dctnryStruct.colWidth = colType.colWidth;
if (NO_ERROR != (error = fWEWrapper.openDctnry(txnId, dctnryStruct, false))) // @bug 5572 HDFS tmp file
@ -4445,6 +4452,8 @@ uint8_t WE_DMLCommandProc::processFixRows(messageqcpp::ByteStream& bs, std::stri
dctnryStruct.fCompressionType = colStruct.fCompressionType;
dctnryStruct.dctnryOid = 0;
dctnryStruct.fCharsetNumber = colType.charsetNumber;
if (colType.colWidth > 8) // token
{
colStruct.colWidth = 8;

View File

@ -149,11 +149,10 @@ int ServiceWriteEngine::setupResources()
return -3;
}
if (rlim.rlim_cur != 65536)
if (rlim.rlim_cur < 65536)
{
return -4;
}
#endif
return 0;
}

View File

@ -24,6 +24,7 @@
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include <boost/thread.hpp>
#include <boost/thread/tss.hpp>
@ -49,17 +50,19 @@ namespace WriteEngine
// forward reference
class DbFileOp;
/** @brief Extended CPInfo - with type handler for all type-related information */
/** @brief Extended CPInfo - with all type-related information and associated range data */
struct ExtCPInfo
{
execplan::CalpontSystemCatalog::ColDataType fColType;
int fColWidth;
BRM::CPInfo fCPInfo;
std::shared_ptr<std::vector<int64_t>> fStringsPrefixes;
ExtCPInfo(execplan::CalpontSystemCatalog::ColDataType colType, int colWidth)
: fColType(colType), fColWidth(colWidth)
{
fCPInfo.isBinaryColumn = (unsigned int)colWidth > datatypes::MAXLEGACYWIDTH;
}
void toInvalid()
{
auto mm = datatypes::MinMaxInfo::invalidRange(fColType);
@ -68,7 +71,22 @@ struct ExtCPInfo
fCPInfo.bigMax = mm.int128Max;
fCPInfo.bigMin = mm.int128Min;
}
void addStringPrefix(int64_t strPrefix)
{
if (!fStringsPrefixes)
{
fStringsPrefixes.reset(new std::vector<int64_t>());
}
fStringsPrefixes->push_back(strPrefix);
}
bool hasStringsPrefixes() const
{
return fStringsPrefixes.get() != nullptr;
}
int64_t* stringsPrefixes() const
{
return hasStringsPrefixes() ? fStringsPrefixes->data() : nullptr;
}
bool isInvalid()
{
datatypes::MinMaxInfo mm;

View File

@ -344,6 +344,7 @@ struct DctnryStruct /** @brief Dctnry Interface Struct*/
uint16_t fColSegment; /** @brief Segment for column file */
uint16_t fColDbRoot; /** @brief DBRoot for column file */
int fCompressionType; /** @brief Compression tpye for column file */
int fCharsetNumber; /** @brief Charset number to account for collation when computing string prefixes */
DctnryStruct()
: dctnryOid(0)
, columnOid(0)
@ -353,6 +354,7 @@ struct DctnryStruct /** @brief Dctnry Interface Struct*/
, fColSegment(0)
, fColDbRoot(0)
, fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0)
, fCharsetNumber(8)
{
}
};

View File

@ -214,6 +214,7 @@ int WEBrmUpdater::updateHighWaterMarkInBRM()
int WEBrmUpdater::updateCPAndHWMInBRM()
{
int rc = 0;
size_t i;
// BUG 4232. some imports may not contain CP but HWM
if ((fCPInfo.size() > 0) || (fHWMInfo.size() > 0))
@ -227,6 +228,13 @@ int WEBrmUpdater::updateCPAndHWMInBRM()
const std::vector<CPInfoMerge> & mergeCPDataArgs,
VER_t transID = 0) DBRM_THROW;
*/
for (i = 0; i < fCPInfo.size(); i++)
{
if (fCPInfo[i].newExtent)
{
fCPInfo[i].seqNum = 0; // to be in sync with DBRM.
}
}
rc = fpBrm->bulkSetHWMAndCP(fHWMInfo, fCPInfoData, fCPInfo, 0);
// rc = fpBrm->mergeExtentsMaxMin(fCPInfo);

View File

@ -21,6 +21,10 @@
/** @writeengine.cpp
* A wrapper class for the write engine to write information to files
*/
// XXX: a definition to switch off computations for token columns.
//#define XXX_WRITEENGINE_TOKENS_RANGES_XXX
#include <cmath>
#include <cstdlib>
#include <unistd.h>
@ -59,6 +63,7 @@ using namespace execplan;
#include "MonitorProcMem.h"
using namespace idbdatafile;
#include "dataconvert.h"
#include "string_prefixes.h"
#ifdef _MSC_VER
#define isnan _isnan
@ -362,6 +367,9 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
case WR_UINT:
case WR_ULONGLONG:
case WR_CHAR:
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
case WR_TOKEN:
#endif
{
isUnsigned = true;
break;
@ -385,6 +393,13 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
maxMin->fromToChars();
}
}
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
if (colType == WR_TOKEN)
{
oldValArrayVoid = nullptr; // no old values for tokens, sadly.
valArrayVoid = (void*)maxMin->stringsPrefixes();
}
#endif
size_t i;
for (i = 0; i < totalOldRow; i++)
{
@ -435,6 +450,9 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
fetchNewOldValues<int64_t, int64_t>(value, oldValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
break;
}
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
case WR_TOKEN:
#endif
case WR_ULONGLONG:
{
fetchNewOldValues<uint64_t, uint64_t>(uvalue, oldUValue, valArrayVoid, oldValArrayVoid, i,
@ -449,12 +467,11 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
}
case WR_CHAR:
{
fetchNewOldValues<uint64_t, uint64_t>(uvalue, oldUValue, valArrayVoid, oldValArrayVoid, i,
totalNewRow);
fetchNewOldValues<int64_t, int64_t>(value, oldValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
// for characters (strings, actually), we fetched then in LSB order, on x86, at the very least.
// this means most significant byte of the string, which is first, is now in LSB of uvalue/oldValue.
// we must perform a conversion.
uvalue = uint64ToStr(uvalue);
value = uint64ToStr(uvalue);
oldValue = uint64ToStr(oldValue);
break;
}
@ -576,6 +593,7 @@ void WriteEngineWrapper::convertValue(const execplan::CalpontSystemCatalog::ColT
curStr = curStr.substr(0, MAX_COLUMN_BOUNDARY);
memcpy(value, curStr.c_str(), curStr.length());
break;
case WriteEngine::WR_FLOAT:
@ -1179,10 +1197,17 @@ static void log_this(const char *message,
#endif
/** @brief Determine whether we may update a column's ranges (by type) and return nullptr if we can't */
static ExtCPInfo* getCPInfoToUpdateForUpdatableType(const ColStruct& colStruct, ExtCPInfo* currentCPInfo)
static ExtCPInfo* getCPInfoToUpdateForUpdatableType(const ColStruct& colStruct, ExtCPInfo* currentCPInfo,
OpType optype)
{
if (colStruct.tokenFlag)
{
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
if (currentCPInfo && currentCPInfo->hasStringsPrefixes() && optype == INSERT)
{
return currentCPInfo;
}
#endif
return nullptr;
}
switch (colStruct.colType)
@ -1689,10 +1714,16 @@ int WriteEngineWrapper::insertColumnRecs(
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
{
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
int64_t strPrefix;
#endif
if (dctStr_iter->length() == 0)
{
Token nullToken;
col_iter->data = nullToken;
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
strPrefix = (int64_t)joblist::UBIGINTNULL; // the string prefixes are signed long ints.
#endif
}
else
{
@ -1702,6 +1733,10 @@ int WriteEngineWrapper::insertColumnRecs(
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigSize = dctStr_iter->length();
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
dctnryStructList[i].fCharsetNumber);
#endif
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
@ -1717,6 +1752,9 @@ int WriteEngineWrapper::insertColumnRecs(
col_iter->data = dctTuple.token;
}
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
maxMins[i].fSplitMaxMinInfo[0].addStringPrefix(strPrefix);
#endif
dctStr_iter++;
col_iter++;
}
@ -1744,10 +1782,16 @@ int WriteEngineWrapper::insertColumnRecs(
for (uint32_t rows = 0; rows < rowsLeft; rows++)
{
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
int64_t strPrefix;
#endif
if (dctStr_iter->length() == 0)
{
Token nullToken;
col_iter->data = nullToken;
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
strPrefix = joblist::UBIGINTNULL; // string prefixes are signed long ints.
#endif
}
else
{
@ -1757,6 +1801,10 @@ int WriteEngineWrapper::insertColumnRecs(
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigSize = dctStr_iter->length();
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
dctnryStructList[i].fCharsetNumber);
#endif
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
@ -1772,6 +1820,9 @@ int WriteEngineWrapper::insertColumnRecs(
col_iter->data = dctTuple.token;
}
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
maxMins[i].fSplitMaxMinInfo[1].addStringPrefix(strPrefix);
#endif
dctStr_iter++;
col_iter++;
}
@ -1938,7 +1989,7 @@ int WriteEngineWrapper::insertColumnRecs(
if (isFirstBatchPm && (totalRow == rowsLeft))
{
// in this particular case we already marked extents as invalid up there.
// in this particular case we already marked extents as invalid above.
}
else
{
@ -1950,7 +2001,7 @@ int WriteEngineWrapper::insertColumnRecs(
if (firstHalfCount)
{
ExtCPInfo* cpInfoP =
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[0]);
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[0], m_opType);
RID thisRid = rowsLeft ? lastRid : lastRidNew;
successFlag = colOp->calculateRowId(thisRid, BYTE_PER_BLOCK / width, width, curFbo, curBio);
@ -1966,7 +2017,7 @@ int WriteEngineWrapper::insertColumnRecs(
if (rowsLeft)
{
ExtCPInfo* cpInfoP =
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[1]);
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[1], m_opType);
if (cpInfoP)
{
RETURN_ON_ERROR(GetLBIDRange(newExtentsStartingLbids[i], colStructList[i], *cpInfoP));
@ -4446,11 +4497,6 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
ColumnOp* colOp = NULL;
ExtCPInfoList infosToUpdate;
if (m_opType != DELETE)
{
m_opType = UPDATE;
}
for (unsigned extent = 0; extent < numExtents; extent++)
{
colStructList = colExtentsStruct[extent];
@ -4524,15 +4570,19 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
}
std::vector<ExtCPInfo*> currentExtentRangesPtrs(colStructList.size(), NULL); // pointers for each extent.
if (m_opType != DELETE)
m_opType = UPDATE;
for (unsigned j = 0; j < colStructList.size(); j++)
{
colOp = m_colOp[op(colStructList[j].fCompressionType)];
ExtCPInfo* cpInfoP = &(currentExtentRanges[j]);
cpInfoP = getCPInfoToUpdateForUpdatableType(colStructList[j], cpInfoP);
cpInfoP = getCPInfoToUpdateForUpdatableType(colStructList[j], cpInfoP, m_opType);
currentExtentRangesPtrs[j] = cpInfoP;
if (colStructList[j].tokenFlag)
continue;
// XXX: highly dubious.
// if (!colStructList[j].tokenFlag)
// continue;
width = colOp->getCorrectRowWidth(colStructList[j].colDataType, colStructList[j].colWidth);
successFlag = colOp->calculateRowId(aRid, BYTE_PER_BLOCK / width, width, curFbo, curBio);
@ -4550,9 +4600,6 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
// timer.start("markExtentsInvalid");
//#endif
if (m_opType != DELETE)
m_opType = UPDATE;
rc = writeColumnRecUpdate(txnid, cscColTypeList, colStructList, colValueList, colOldValueList,
ridLists[extent], tableOid, true, ridLists[extent].size(),
&currentExtentRangesPtrs);
@ -4578,6 +4625,7 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
{
cpInfo.fCPInfo.seqNum = SEQNUM_MARK_INVALID_SET_RANGE;
}
// ZZZZ
rc = BRMWrapper::getInstance()->setExtentsMaxMin(infosToDrop);
setInvalidCPInfosSpecialMarks(infosToUpdate);
rc = BRMWrapper::getInstance()->setExtentsMaxMin(infosToUpdate);
@ -4611,12 +4659,9 @@ int WriteEngineWrapper::updateColumnRecs(const TxnID& txnid, const CSCTypesList&
colOp = m_colOp[op(colExtentsStruct[j].fCompressionType)];
ExtCPInfo* cpInfoP = &(infosToUpdate[j]);
cpInfoP = getCPInfoToUpdateForUpdatableType(colExtentsStruct[j], cpInfoP);
cpInfoP = getCPInfoToUpdateForUpdatableType(colExtentsStruct[j], cpInfoP, m_opType);
pointersToInfos.push_back(cpInfoP);
if (colExtentsStruct[j].tokenFlag)
continue;
width = colOp->getCorrectRowWidth(colExtentsStruct[j].colDataType, colExtentsStruct[j].colWidth);
successFlag = colOp->calculateRowId(aRid, BYTE_PER_BLOCK / width, width, curFbo, curBio);
@ -4964,7 +5009,7 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, const CSCTypesList& c
allocateValArray(valArray, totalRow1, colStructList[i].colType, colStructList[i].colWidth);
ExtCPInfo* cpInfo = getCPInfoToUpdateForUpdatableType(
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL);
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL, m_opType);
if (m_opType != INSERT && cpInfo != NULL) // we allocate space for old values only when we need them.
{
@ -5109,7 +5154,7 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, const CSCTypesList& c
}
ExtCPInfo* cpInfo = getCPInfoToUpdateForUpdatableType(
newColStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[1] : NULL);
newColStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[1] : NULL, m_opType);
allocateValArray(valArray, totalRow2, newColStructList[i].colType, newColStructList[i].colWidth);
if (m_opType != INSERT && cpInfo != NULL) // we allocate space for old values only when we need them.
@ -5190,7 +5235,7 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, const CSCTypesList& c
ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)];
ExtCPInfo* cpInfo = getCPInfoToUpdateForUpdatableType(
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL);
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL, m_opType);
// set params
colOp->initColumn(curCol);