mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
MCOL-4580 extent elimination for dictionary-based text/varchar types
The idea is relatively simple - encode prefixes of collated strings as integers and use them to compute extents' ranges. Then we can eliminate extents with strings. The actual patch does have all the code there but miss one important step: we do not keep collation index, we keep charset index. Because of this, some of the tests in the bugfix suite fail and thus main functionality is turned off. The reason of this patch to be put into PR at all is that it contains changes that made CHAR/VARCHAR columns unsigned. This change is needed in vectorization work.
This commit is contained in:
parent
a66a8dfabf
commit
53b9a2a0f9
@ -482,7 +482,11 @@ inline bool isUnsigned(const datatypes::SystemCatalog::ColDataType type)
|
||||
case datatypes::SystemCatalog::USMALLINT:
|
||||
case datatypes::SystemCatalog::UMEDINT:
|
||||
case datatypes::SystemCatalog::UINT:
|
||||
case datatypes::SystemCatalog::UBIGINT: return true;
|
||||
case datatypes::SystemCatalog::UBIGINT:
|
||||
case datatypes::SystemCatalog::CHAR:
|
||||
case datatypes::SystemCatalog::VARCHAR:
|
||||
case datatypes::SystemCatalog::TEXT:
|
||||
case datatypes::SystemCatalog::VARBINARY: return true;
|
||||
|
||||
default: return false;
|
||||
}
|
||||
@ -2520,4 +2524,3 @@ class TypeHandlerTimestamp : public TypeHandlerTemporal
|
||||
};
|
||||
|
||||
} // end of namespace datatypes
|
||||
|
||||
|
@ -336,6 +336,7 @@ class SignedInteger : public Parser::DD2OM<Sign, UnsignedInteger>
|
||||
{
|
||||
public:
|
||||
using DD2OM::DD2OM;
|
||||
|
||||
bool isNull() const
|
||||
{
|
||||
return UnsignedInteger::isNull();
|
||||
|
@ -424,6 +424,8 @@ void AggregateColumn::evaluate(Row& row, bool& isNull)
|
||||
else
|
||||
fResult.intVal = atoll((char*)&fResult.origIntVal);
|
||||
|
||||
fResult.uintVal = fResult.intVal;
|
||||
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::BIGINT:
|
||||
|
@ -945,17 +945,18 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
|
||||
break;
|
||||
}
|
||||
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
case execplan::CalpontSystemCatalog::CHAR:
|
||||
{
|
||||
int colWidth = ct.colWidth;
|
||||
|
||||
if (colWidth <= 8)
|
||||
{
|
||||
if ((colWidth == 1) && ((int8_t)joblist::CHAR1NULL == val))
|
||||
if ((colWidth == 1) && ((uint8_t)joblist::CHAR1NULL == (uint8_t)val))
|
||||
ret = true;
|
||||
else if ((colWidth == 2) && ((int16_t)joblist::CHAR2NULL == val))
|
||||
else if ((colWidth == 2) && ((uint16_t)joblist::CHAR2NULL == (uint16_t)val))
|
||||
ret = true;
|
||||
else if ((colWidth < 5) && ((int32_t)joblist::CHAR4NULL == val))
|
||||
else if ((colWidth < 5) && ((uint32_t)joblist::CHAR4NULL == (uint32_t)val))
|
||||
ret = true;
|
||||
else if ((int64_t)joblist::CHAR8NULL == val)
|
||||
ret = true;
|
||||
@ -964,7 +965,6 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
|
||||
{
|
||||
throw std::logic_error("Not a int column.");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1075,27 +1075,6 @@ inline bool isNull(int64_t val, const execplan::CalpontSystemCatalog::ColType& c
|
||||
break;
|
||||
}
|
||||
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
{
|
||||
int colWidth = ct.colWidth;
|
||||
|
||||
if (colWidth <= 8)
|
||||
{
|
||||
if ((colWidth < 3) && ((int16_t)joblist::CHAR2NULL == val))
|
||||
ret = true;
|
||||
else if ((colWidth < 5) && ((int32_t)joblist::CHAR4NULL == val))
|
||||
ret = true;
|
||||
else if ((int64_t)joblist::CHAR8NULL == val)
|
||||
ret = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::logic_error("Not a int column.");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
{
|
||||
if (joblist::UTINYINTNULL == (uint8_t)val)
|
||||
@ -1278,4 +1257,3 @@ const std::string colDataTypeToString(CalpontSystemCatalog::ColDataType cdt);
|
||||
bool ctListSort(const CalpontSystemCatalog::ColType& a, const CalpontSystemCatalog::ColType& b);
|
||||
|
||||
} // namespace execplan
|
||||
|
||||
|
@ -48,18 +48,8 @@ ConstantColumn::ConstantColumn(const string& sql, TYPE type)
|
||||
{
|
||||
fResult.strVal = sql;
|
||||
|
||||
if (type == LITERAL && sql.length() < 9)
|
||||
{
|
||||
memcpy(tmp, sql.c_str(), sql.length());
|
||||
memset(tmp + sql.length(), 0, 8);
|
||||
fResult.uintVal = uint64ToStr(*((uint64_t*)tmp));
|
||||
fResult.intVal = (int64_t)fResult.uintVal;
|
||||
}
|
||||
else
|
||||
{
|
||||
fResult.intVal = atoll(sql.c_str());
|
||||
fResult.uintVal = strtoull(sql.c_str(), NULL, 0);
|
||||
}
|
||||
fResult.intVal = atoll(sql.c_str());
|
||||
fResult.uintVal = strtoull(sql.c_str(), NULL, 0);
|
||||
|
||||
fResult.floatVal = atof(sql.c_str());
|
||||
fResult.doubleVal = atof(sql.c_str());
|
||||
|
@ -264,20 +264,6 @@ void PredicateOperator::setOpType(Type& l, Type& r)
|
||||
fOperationType.colWidth = 8;
|
||||
}
|
||||
}
|
||||
// If both sides are unsigned, use UBIGINT as result type, otherwise
|
||||
// "promote" to BIGINT.
|
||||
else if (isUnsigned(l.colDataType) && isUnsigned(r.colDataType))
|
||||
{
|
||||
fOperationType.colDataType = execplan::CalpontSystemCatalog::UBIGINT;
|
||||
fOperationType.colWidth = 8;
|
||||
}
|
||||
else if ((isSignedInteger(l.colDataType) && isUnsigned(r.colDataType)) ||
|
||||
(isUnsigned(l.colDataType) && isSignedInteger(r.colDataType)) ||
|
||||
(isSignedInteger(l.colDataType) && isSignedInteger(r.colDataType)))
|
||||
{
|
||||
fOperationType.colDataType = execplan::CalpontSystemCatalog::BIGINT;
|
||||
fOperationType.colWidth = 8;
|
||||
}
|
||||
else if ((l.colDataType == execplan::CalpontSystemCatalog::CHAR ||
|
||||
l.colDataType == execplan::CalpontSystemCatalog::VARCHAR ||
|
||||
l.colDataType == execplan::CalpontSystemCatalog::TEXT) &&
|
||||
@ -321,6 +307,20 @@ void PredicateOperator::setOpType(Type& l, Type& r)
|
||||
fOperationType.colWidth = 255;
|
||||
}
|
||||
}
|
||||
// If both sides are unsigned, use UBIGINT as result type, otherwise
|
||||
// "promote" to BIGINT.
|
||||
else if (isUnsigned(l.colDataType) && isInteger(l.colDataType) && isUnsigned(r.colDataType) && isInteger(r.colDataType))
|
||||
{
|
||||
fOperationType.colDataType = execplan::CalpontSystemCatalog::UBIGINT;
|
||||
fOperationType.colWidth = 8;
|
||||
}
|
||||
else if ((isSignedInteger(l.colDataType) && isUnsigned(r.colDataType) && isInteger(r.colDataType)) ||
|
||||
(isUnsigned(l.colDataType) && isInteger(l.colDataType) && isSignedInteger(r.colDataType)) ||
|
||||
(isSignedInteger(l.colDataType) && isSignedInteger(r.colDataType)))
|
||||
{
|
||||
fOperationType.colDataType = execplan::CalpontSystemCatalog::BIGINT;
|
||||
fOperationType.colWidth = 8;
|
||||
}
|
||||
else if (l.colDataType == execplan::CalpontSystemCatalog::LONGDOUBLE ||
|
||||
r.colDataType == execplan::CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
@ -410,7 +410,9 @@ bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedCol
|
||||
if (isNull)
|
||||
return false;
|
||||
|
||||
return numericCompare(val1, rop->getIntVal(row, isNull)) && !isNull;
|
||||
int64_t val2 = rop->getIntVal(row, isNull);
|
||||
|
||||
return numericCompare(val1, val2) && !isNull;
|
||||
}
|
||||
|
||||
case execplan::CalpontSystemCatalog::UBIGINT:
|
||||
|
@ -565,6 +565,16 @@ void SimpleColumn::evaluate(Row& row, bool& isNull)
|
||||
else
|
||||
fResult.intVal = atoll((char*)&fResult.origIntVal);
|
||||
|
||||
// MCOL-4580 - related, probably can be marked with XXX.
|
||||
// This does not fail in any tests, but it is considered wrong.
|
||||
// The reasonin behind that is that we changed signedness if characters to unsigned
|
||||
// and it might be a case with short strings that they were copied as is using
|
||||
// uint64ToStr encoding into int64_t values. So, potentially, unsuspecting code
|
||||
// may use getUintVal instead of getIntVal to process short char column, getting
|
||||
// unitialized value and give floating behavior.
|
||||
// None of our tests failed, though.
|
||||
fResult.uintVal = fResult.intVal;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "columnwidth.h"
|
||||
#include "mcs_decimal.h"
|
||||
#include "mcs_int64.h"
|
||||
#include "numericliteral.h"
|
||||
|
||||
namespace messageqcpp
|
||||
{
|
||||
@ -664,25 +665,19 @@ inline int64_t TreeNode::getIntVal()
|
||||
switch (fResultType.colDataType)
|
||||
{
|
||||
case CalpontSystemCatalog::CHAR:
|
||||
if (fResultType.colWidth <= 8)
|
||||
return fResult.intVal;
|
||||
|
||||
return atoll(fResult.strVal.c_str());
|
||||
|
||||
case CalpontSystemCatalog::VARCHAR:
|
||||
if (fResultType.colWidth <= 7)
|
||||
return fResult.intVal;
|
||||
|
||||
return atoll(fResult.strVal.c_str());
|
||||
|
||||
// FIXME: ???
|
||||
case CalpontSystemCatalog::VARBINARY:
|
||||
case CalpontSystemCatalog::BLOB:
|
||||
case CalpontSystemCatalog::TEXT:
|
||||
if (fResultType.colWidth <= 7)
|
||||
return fResult.intVal;
|
||||
|
||||
return atoll(fResult.strVal.c_str());
|
||||
{
|
||||
datatypes::DataCondition cnverr;
|
||||
literal::Converter<literal::SignedInteger> cnv(fResult.strVal, cnverr);
|
||||
if (datatypes::DataCondition::Code(cnverr) != 0)
|
||||
{
|
||||
cerr << "error in int conversion from '" << fResult.strVal << "'";
|
||||
}
|
||||
return cnv.toSInt<int64_t>(cnverr);
|
||||
}
|
||||
|
||||
case CalpontSystemCatalog::BIGINT:
|
||||
case CalpontSystemCatalog::TINYINT:
|
||||
@ -721,6 +716,20 @@ inline uint64_t TreeNode::getUintVal()
|
||||
{
|
||||
switch (fResultType.colDataType)
|
||||
{
|
||||
case CalpontSystemCatalog::CHAR:
|
||||
case CalpontSystemCatalog::VARCHAR:
|
||||
case CalpontSystemCatalog::VARBINARY:
|
||||
case CalpontSystemCatalog::BLOB:
|
||||
case CalpontSystemCatalog::TEXT:
|
||||
{
|
||||
datatypes::DataCondition cnverr;
|
||||
literal::Converter<literal::UnsignedInteger> cnv(fResult.strVal, cnverr);
|
||||
if (datatypes::DataCondition::Code(cnverr) != 0)
|
||||
{
|
||||
cerr << "error in unsigned int conversion from '" << fResult.strVal << "'";
|
||||
}
|
||||
return cnv.toXIntPositive<uint64_t>(cnverr);
|
||||
}
|
||||
case CalpontSystemCatalog::BIGINT:
|
||||
case CalpontSystemCatalog::TINYINT:
|
||||
case CalpontSystemCatalog::SMALLINT:
|
||||
|
@ -50,6 +50,8 @@ using namespace messageqcpp;
|
||||
using namespace rowgroup;
|
||||
using namespace joiner;
|
||||
|
||||
//#define XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX
|
||||
|
||||
namespace joblist
|
||||
{
|
||||
BatchPrimitiveProcessorJL::BatchPrimitiveProcessorJL(const ResourceManager* rm)
|
||||
@ -152,6 +154,21 @@ void BatchPrimitiveProcessorJL::addFilterStep(const pDictionaryStep& step)
|
||||
cc->setBatchPrimitiveProcessor(this);
|
||||
cc->setQueryUuid(step.queryUuid());
|
||||
cc->setStepUuid(uuid);
|
||||
|
||||
#if defined(XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX)
|
||||
if (filterSteps.size() > 0)
|
||||
{
|
||||
size_t stepsIndex = filterSteps.size() - 1;
|
||||
SCommand prevCC = filterSteps[stepsIndex];
|
||||
ColumnCommandJL* pcc = dynamic_cast<ColumnCommandJL*>(prevCC.get());
|
||||
DictStepJL* ccc = dynamic_cast<DictStepJL*>(cc.get());
|
||||
if (pcc && ccc)
|
||||
{
|
||||
filterSteps[stepsIndex].reset(
|
||||
new ColumnCommandJL(*pcc, *ccc)); // column command will use same filters.
|
||||
}
|
||||
}
|
||||
#endif
|
||||
filterSteps.push_back(cc);
|
||||
filterCount++;
|
||||
needStrValues = true;
|
||||
@ -443,6 +460,7 @@ void BatchPrimitiveProcessorJL::getElementTypes(ByteStream& in, vector<ElementTy
|
||||
if (*validCPData)
|
||||
{
|
||||
in >> *lbid;
|
||||
|
||||
in >> tmp64;
|
||||
*min = (int64_t)tmp64;
|
||||
in >> tmp64;
|
||||
@ -712,8 +730,9 @@ bool BatchPrimitiveProcessorJL::countThisMsg(messageqcpp::ByteStream& in) const
|
||||
}
|
||||
|
||||
if (data[offset] != 0)
|
||||
offset += (data[offset + CP_FLAG_AND_LBID] * 2) + CP_FLAG_AND_LBID +
|
||||
1; // skip the CP data with wide min/max values (16/32 bytes each)
|
||||
offset += (data[offset + CP_FLAG_AND_LBID + 1] * 2) + CP_FLAG_AND_LBID + 1 +
|
||||
1; // skip the CP data with wide min/max values (16/32 bytes each). we also skip
|
||||
// cpFromDictScan flag.
|
||||
else
|
||||
offset += CP_FLAG_AND_LBID; // skip only the "valid CP data" & LBID bytes
|
||||
}
|
||||
@ -750,9 +769,10 @@ void BatchPrimitiveProcessorJL::deserializeAggregateResult(ByteStream* in, vecto
|
||||
}
|
||||
|
||||
void BatchPrimitiveProcessorJL::getRowGroupData(ByteStream& in, vector<RGData>* out, bool* validCPData,
|
||||
uint64_t* lbid, int128_t* min, int128_t* max,
|
||||
uint32_t* cachedIO, uint32_t* physIO, uint32_t* touchedBlocks,
|
||||
bool* countThis, uint32_t threadID, bool* hasWideColumn,
|
||||
uint64_t* lbid, bool* fromDictScan, int128_t* min,
|
||||
int128_t* max, uint32_t* cachedIO, uint32_t* physIO,
|
||||
uint32_t* touchedBlocks, bool* countThis, uint32_t threadID,
|
||||
bool* hasWideColumn,
|
||||
const execplan::CalpontSystemCatalog::ColType& colType) const
|
||||
{
|
||||
uint64_t tmp64;
|
||||
@ -789,6 +809,8 @@ void BatchPrimitiveProcessorJL::getRowGroupData(ByteStream& in, vector<RGData>*
|
||||
{
|
||||
in >> *lbid;
|
||||
in >> tmp8;
|
||||
*fromDictScan = tmp8 != 0;
|
||||
in >> tmp8;
|
||||
*hasWideColumn = (tmp8 > utils::MAXLEGACYWIDTH);
|
||||
if (UNLIKELY(*hasWideColumn))
|
||||
{
|
||||
|
@ -167,9 +167,9 @@ class BatchPrimitiveProcessorJL
|
||||
uint32_t* touchedBlocks) const;
|
||||
void deserializeAggregateResults(messageqcpp::ByteStream* in, std::vector<rowgroup::RGData>* out) const;
|
||||
void getRowGroupData(messageqcpp::ByteStream& in, std::vector<rowgroup::RGData>* out, bool* validCPData,
|
||||
uint64_t* lbid, int128_t* min, int128_t* max, uint32_t* cachedIO, uint32_t* physIO,
|
||||
uint32_t* touchedBlocks, bool* countThis, uint32_t threadID, bool* hasBinaryColumn,
|
||||
const execplan::CalpontSystemCatalog::ColType& colType) const;
|
||||
uint64_t* lbid, bool* fromDictScan, int128_t* min, int128_t* max, uint32_t* cachedIO,
|
||||
uint32_t* physIO, uint32_t* touchedBlocks, bool* countThis, uint32_t threadID,
|
||||
bool* hasBinaryColumn, const execplan::CalpontSystemCatalog::ColType& colType) const;
|
||||
void deserializeAggregateResult(messageqcpp::ByteStream* in, std::vector<rowgroup::RGData>* out) const;
|
||||
bool countThisMsg(messageqcpp::ByteStream& in) const;
|
||||
|
||||
@ -365,4 +365,3 @@ class BatchPrimitiveProcessorJL
|
||||
};
|
||||
|
||||
} // namespace joblist
|
||||
|
||||
|
@ -130,6 +130,59 @@ ColumnCommandJL::ColumnCommandJL(const pColStep& step)
|
||||
fFilesPerColumnPartition = cf->uFromText(fpc);
|
||||
}
|
||||
|
||||
ColumnCommandJL::ColumnCommandJL(const ColumnCommandJL& prevCmd, const DictStepJL& dictWithFilters)
|
||||
{
|
||||
BRM::DBRM dbrm;
|
||||
|
||||
/* grab necessary vars from scan */
|
||||
traceFlags = prevCmd.traceFlags;
|
||||
// we should call this constructor only when paired with dictionary
|
||||
// and in that case previous command should not have any filters and
|
||||
// should be "dict" (tokens) column command.
|
||||
idbassert(dictWithFilters.getFilterCount() == 0 || prevCmd.filterCount == 0);
|
||||
idbassert(prevCmd.fIsDict);
|
||||
|
||||
// need to reencode filters.
|
||||
filterString = dictWithFilters.reencodedFilterString();
|
||||
// we have a limitation here.
|
||||
// consider this: textcol IS NULL AND textcol IN ('a', 'b')
|
||||
// XXX: should check.
|
||||
if (filterString.length() > 0 && (BOP = dictWithFilters.getBop() || prevCmd.filterString.length() < 1))
|
||||
{
|
||||
filterCount = dictWithFilters.getFilterCount();
|
||||
BOP = dictWithFilters.getBop();
|
||||
fContainsRanges = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
filterCount = prevCmd.filterCount;
|
||||
filterString = prevCmd.filterString;
|
||||
BOP = prevCmd.BOP;
|
||||
}
|
||||
isScan = prevCmd.isScan;
|
||||
colType = prevCmd.colType;
|
||||
extents = prevCmd.extents;
|
||||
OID = prevCmd.OID;
|
||||
colName = prevCmd.colName;
|
||||
rpbShift = prevCmd.rpbShift;
|
||||
fIsDict = prevCmd.fIsDict;
|
||||
fLastLbid = prevCmd.fLastLbid;
|
||||
lbid = prevCmd.lbid;
|
||||
traceFlags = prevCmd.traceFlags;
|
||||
dbroot = prevCmd.dbroot;
|
||||
numDBRoots = prevCmd.numDBRoots;
|
||||
|
||||
/* I think modmask isn't necessary for scans */
|
||||
divShift = prevCmd.divShift;
|
||||
modMask = (1 << divShift) - 1;
|
||||
|
||||
// @Bug 2889. Drop partition enhancement. Read FilesPerColumnPartition and ExtentsPerSegmentFile for use
|
||||
// in RID calculation.
|
||||
fFilesPerColumnPartition = prevCmd.fFilesPerColumnPartition;
|
||||
// MCOL-4685 remove the option to set more than 2 extents per file (ExtentsPreSegmentFile).
|
||||
fExtentsPerSegmentFile = prevCmd.fExtentsPerSegmentFile;
|
||||
}
|
||||
|
||||
ColumnCommandJL::~ColumnCommandJL()
|
||||
{
|
||||
}
|
||||
@ -141,9 +194,22 @@ void ColumnCommandJL::createCommand(ByteStream& bs) const
|
||||
colType.serialize(bs);
|
||||
bs << (uint8_t)isScan;
|
||||
bs << traceFlags;
|
||||
bs << filterString;
|
||||
bs << BOP;
|
||||
bs << filterCount;
|
||||
if (isDict() && fContainsRanges)
|
||||
{
|
||||
// XXX: we should discern here between IS (NOT) NULL and other filters.
|
||||
ByteStream empty;
|
||||
auto zeroFC = filterCount;
|
||||
bs << empty;
|
||||
bs << BOP;
|
||||
zeroFC = 0;
|
||||
bs << zeroFC;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs << filterString;
|
||||
bs << BOP;
|
||||
bs << filterCount;
|
||||
}
|
||||
serializeInlineVector(bs, fLastLbid);
|
||||
|
||||
CommandJL::createCommand(bs);
|
||||
@ -250,7 +316,7 @@ string ColumnCommandJL::toString()
|
||||
{
|
||||
ostringstream ret;
|
||||
|
||||
ret << "ColumnCommandJL: " << filterCount << " filters colwidth=" << colType.colWidth << " oid=" << OID
|
||||
ret << "ColumnCommandJL: " << filterCount << " filters, BOP=" << ((int)BOP) << ", colwidth=" << colType.colWidth << " oid=" << OID
|
||||
<< " name=" << colName;
|
||||
|
||||
if (isScan)
|
||||
@ -286,4 +352,9 @@ void ColumnCommandJL::reloadExtents()
|
||||
sort(extents.begin(), extents.end(), BRM::ExtentSorter());
|
||||
}
|
||||
|
||||
bool ColumnCommandJL::getIsDict()
|
||||
{
|
||||
return fIsDict;
|
||||
}
|
||||
|
||||
}; // namespace joblist
|
||||
|
@ -33,6 +33,7 @@
|
||||
|
||||
#include "primitivestep.h"
|
||||
#include "command-jl.h"
|
||||
#include "dictstep-jl.h"
|
||||
|
||||
namespace joblist
|
||||
{
|
||||
@ -41,6 +42,7 @@ class ColumnCommandJL : public CommandJL
|
||||
public:
|
||||
ColumnCommandJL(const pColScanStep&, std::vector<BRM::LBID_t> lastLBID);
|
||||
ColumnCommandJL(const pColStep&);
|
||||
ColumnCommandJL(const ColumnCommandJL&, const DictStepJL&);
|
||||
virtual ~ColumnCommandJL();
|
||||
|
||||
virtual void createCommand(messageqcpp::ByteStream& bs) const;
|
||||
@ -111,6 +113,7 @@ class ColumnCommandJL : public CommandJL
|
||||
std::vector<BRM::LBID_t> fLastLbid;
|
||||
|
||||
bool fIsDict;
|
||||
bool fContainsRanges = false;
|
||||
|
||||
// @Bug 2889. Added two members below for drop partition enhancement.
|
||||
// RJD: make sure that we keep enough significant digits around for partition math
|
||||
@ -125,7 +128,7 @@ class ColumnCommandJL : public CommandJL
|
||||
public:
|
||||
// MCOL-4685: remove the option to set more than 2 extents per file (ExtentsPreSegmentFile)
|
||||
static const unsigned DEFAULT_EXTENTS_PER_SEGMENT_FILE = 2;
|
||||
bool getIsDict() override;
|
||||
};
|
||||
|
||||
} // namespace joblist
|
||||
|
||||
|
@ -96,6 +96,11 @@ class CommandJL
|
||||
|
||||
virtual CommandType getCommandType() = 0;
|
||||
|
||||
virtual bool getIsDict()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
BatchPrimitiveProcessorJL* bpp;
|
||||
uint32_t OID;
|
||||
|
@ -29,6 +29,7 @@
|
||||
//
|
||||
|
||||
#include "bpp-jl.h"
|
||||
#include "string_prefixes.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace messageqcpp;
|
||||
@ -50,7 +51,6 @@ DictStepJL::DictStepJL(const pDictionaryStep& dict)
|
||||
|
||||
if (hasEqFilter)
|
||||
{
|
||||
// cout << "saw eqfilter\n";
|
||||
eqOp = dict.tmpCOP;
|
||||
eqFilter = dict.eqFilter;
|
||||
}
|
||||
@ -120,4 +120,63 @@ void DictStepJL::setWidth(uint16_t w)
|
||||
colWidth = w;
|
||||
}
|
||||
|
||||
messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
|
||||
{
|
||||
messageqcpp::ByteStream bs;
|
||||
|
||||
if (hasEqFilter)
|
||||
{
|
||||
idbassert(filterCount == eqFilter.size());
|
||||
|
||||
for (uint32_t i = 0; i < filterCount; i++)
|
||||
{
|
||||
uint8_t roundFlag = 0;
|
||||
int64_t encodedPrefix = encodeStringPrefix((unsigned char*)eqFilter[i].c_str(), eqFilter[i].size(), charsetNumber);
|
||||
bs << eqOp;
|
||||
bs << roundFlag;
|
||||
bs << encodedPrefix;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
messageqcpp::ByteStream filterStringCopy(
|
||||
filterString); // XXX I am not sure about real semantics of messagecpp::ByteStream. So - copy.
|
||||
// please erfer to pdictionary.cpp in this dicrectory, addFilter function for a proper encoding of string
|
||||
// filters.
|
||||
for (uint32_t i = 0; i < filterCount; i++)
|
||||
{
|
||||
uint8_t cop, roundFlag = 0;
|
||||
uint16_t size;
|
||||
const uint8_t* ptr;
|
||||
int64_t encodedPrefix;
|
||||
filterStringCopy >> cop;
|
||||
// as we are dealing with prefixes, we have to use "... or equal" conditions instead of
|
||||
// strict ones.
|
||||
// Consider this: ... WHERE col > 'customer#001' AND col < 'customer#100'.
|
||||
// "Working with prefixes of 8 bytes" means these conditions reduce to ... WHERE col > 'customer' AND
|
||||
// col < 'customer' and their AND relation is impossible to satisfy. We do not pass this string to
|
||||
// primproc and that means we can reencode operation codes here.
|
||||
switch (cop)
|
||||
{
|
||||
case COMPARE_LT:
|
||||
case COMPARE_NGE: cop = COMPARE_LE; break;
|
||||
|
||||
case COMPARE_GT:
|
||||
case COMPARE_NLE: cop = COMPARE_GE; break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
bs << cop;
|
||||
bs << roundFlag;
|
||||
filterStringCopy >> size;
|
||||
ptr = filterStringCopy.buf();
|
||||
encodedPrefix = encodeStringPrefix(ptr, size, charsetNumber);
|
||||
bs << encodedPrefix;
|
||||
filterStringCopy.advance(size);
|
||||
}
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
}; // namespace joblist
|
||||
|
@ -61,6 +61,21 @@ class DictStepJL : public CommandJL
|
||||
void createCommand(messageqcpp::ByteStream&) const;
|
||||
void runCommand(messageqcpp::ByteStream&) const;
|
||||
|
||||
messageqcpp::ByteStream getFilterString() const
|
||||
{
|
||||
return filterString;
|
||||
}
|
||||
uint32_t getFilterCount() const
|
||||
{
|
||||
return filterCount;
|
||||
}
|
||||
messageqcpp::ByteStream reencodedFilterString() const;
|
||||
|
||||
uint8_t getBop() const
|
||||
{
|
||||
return BOP;
|
||||
}
|
||||
|
||||
private:
|
||||
DictStepJL(const DictStepJL&);
|
||||
|
||||
|
@ -373,7 +373,15 @@ void GroupConcatAgUM::applyMapping(const boost::shared_array<int>& mapping, cons
|
||||
}
|
||||
else
|
||||
{
|
||||
fRow.setIntField(row.getIntField(mapping[i]), i);
|
||||
if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::CHAR ||
|
||||
fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR)
|
||||
{
|
||||
fRow.setIntField(row.getUintField(mapping[i]), i);
|
||||
}
|
||||
else
|
||||
{
|
||||
fRow.setIntField(row.getIntField(mapping[i]), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1140,9 +1140,10 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo)
|
||||
|
||||
for (unsigned i = 0; i < numOfStepsAddToBps; i++)
|
||||
{
|
||||
bps->setBPP((it + i)->get());
|
||||
auto pp = (it + i)->get();
|
||||
bps->setBPP(pp);
|
||||
bps->setStepCount();
|
||||
bps->setLastTupleId((it + i)->get()->tupleId());
|
||||
bps->setLastTupleId(pp->tupleId());
|
||||
}
|
||||
|
||||
it += itInc;
|
||||
|
@ -338,8 +338,8 @@ int LBIDList::getMinMaxFromEntries(T& min, T& max, int32_t& seq, int64_t lbid,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, const CalpontSystemCatalog::ColType& type,
|
||||
bool validData)
|
||||
void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, bool dictScan,
|
||||
const CalpontSystemCatalog::ColType& type, bool validData)
|
||||
{
|
||||
MinMaxPartition* mmp = NULL;
|
||||
#ifdef DEBUG
|
||||
@ -372,18 +372,20 @@ void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, const CalpontSystemCatal
|
||||
|
||||
if (mmp->isValid == BRM::CP_INVALID)
|
||||
{
|
||||
if (datatypes::isCharType(type.colDataType))
|
||||
if (!dictScan && datatypes::isCharType(type.colDataType))
|
||||
{
|
||||
datatypes::Charset cs(const_cast<CalpontSystemCatalog::ColType&>(type).getCharset());
|
||||
if (datatypes::TCharShort::strnncollsp(cs, min, mmp->min, type.colWidth) < 0 ||
|
||||
mmp->min == numeric_limits<int64_t>::max())
|
||||
// WIP
|
||||
static_cast<uint64_t>(mmp->min) == numeric_limits<uint64_t>::max())
|
||||
mmp->min = min;
|
||||
|
||||
if (datatypes::TCharShort::strnncollsp(cs, max, mmp->max, type.colWidth) > 0 ||
|
||||
mmp->max == numeric_limits<int64_t>::min())
|
||||
// WIP
|
||||
static_cast<uint64_t>(mmp->max) == numeric_limits<uint64_t>::min())
|
||||
mmp->max = max;
|
||||
}
|
||||
else if (datatypes::isUnsigned(type.colDataType))
|
||||
else if (dictScan || datatypes::isUnsigned(type.colDataType))
|
||||
{
|
||||
if (static_cast<uint64_t>(min) < static_cast<uint64_t>(mmp->min))
|
||||
mmp->min = min;
|
||||
@ -526,7 +528,7 @@ bool LBIDList::CasualPartitionDataType(const CalpontSystemCatalog::ColDataType t
|
||||
|
||||
case CalpontSystemCatalog::VARCHAR:
|
||||
case CalpontSystemCatalog::BLOB:
|
||||
case CalpontSystemCatalog::TEXT: return size < 8;
|
||||
case CalpontSystemCatalog::TEXT: return size <= 8;
|
||||
|
||||
case CalpontSystemCatalog::TINYINT:
|
||||
case CalpontSystemCatalog::SMALLINT:
|
||||
@ -695,15 +697,19 @@ bool LBIDList::checkRangeOverlap(T min, T max, T tmin, T tmax,
|
||||
|
||||
bool LBIDList::CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
|
||||
const messageqcpp::ByteStream* bs, const uint16_t NOPS,
|
||||
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP)
|
||||
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP,
|
||||
bool isDict)
|
||||
{
|
||||
int length = bs->length(), pos = 0;
|
||||
const char* MsgDataPtr = (const char*)bs->buf();
|
||||
bool scan = true;
|
||||
int64_t value = 0;
|
||||
int128_t bigValue = 0;
|
||||
bool bIsUnsigned = datatypes::isUnsigned(ct.colDataType);
|
||||
bool bIsChar = datatypes::isCharType(ct.colDataType);
|
||||
// MCOL-4580 - related.
|
||||
// We definitely can compute isDict flag themselves here, as we have column type and width.
|
||||
// But, we may also use already computed isDict flags in the steps, available with getIsDict() method..
|
||||
bool bIsUnsigned = isDict || datatypes::isUnsigned(ct.colDataType);
|
||||
bool bIsChar = !isDict && datatypes::isCharType(ct.colDataType);
|
||||
|
||||
for (int i = 0; i < NOPS; i++)
|
||||
{
|
||||
@ -800,9 +806,12 @@ bool LBIDList::CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else if (execplan::isNull(value, ct)) // This will work even if the data column is unsigned.
|
||||
else
|
||||
{
|
||||
continue;
|
||||
if (execplan::isNull(value, ct)) // This will work even if the data column is unsigned.
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (bIsChar)
|
||||
@ -898,11 +907,11 @@ template bool LBIDList::GetMinMax<int64_t>(int64_t* min, int64_t* max, int64_t*
|
||||
const tr1::unordered_map<int64_t, BRM::EMEntry>& entries,
|
||||
execplan::CalpontSystemCatalog::ColDataType colDataType);
|
||||
|
||||
template void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid,
|
||||
template void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid, bool dictScan,
|
||||
const execplan::CalpontSystemCatalog::ColType& type,
|
||||
bool validData = true);
|
||||
|
||||
template void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid,
|
||||
template void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid, bool dictScan,
|
||||
const execplan::CalpontSystemCatalog::ColType& type,
|
||||
bool validData = true);
|
||||
|
||||
|
@ -98,8 +98,8 @@ class LBIDList
|
||||
execplan::CalpontSystemCatalog::ColDataType type);
|
||||
|
||||
template <typename T>
|
||||
void UpdateMinMax(T min, T max, int64_t lbid, const execplan::CalpontSystemCatalog::ColType& type,
|
||||
bool validData = true);
|
||||
void UpdateMinMax(T min, T max, int64_t lbid, bool dictScan,
|
||||
const execplan::CalpontSystemCatalog::ColType& type, bool validData = true);
|
||||
|
||||
void UpdateAllPartitionInfo(const execplan::CalpontSystemCatalog::ColType& colType);
|
||||
|
||||
@ -107,7 +107,8 @@ class LBIDList
|
||||
|
||||
bool CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
|
||||
const messageqcpp::ByteStream* MsgDataPtr, const uint16_t NOPS,
|
||||
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP);
|
||||
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP,
|
||||
bool isDict);
|
||||
|
||||
template <typename T>
|
||||
bool checkSingleValue(T min, T max, T value, const execplan::CalpontSystemCatalog::ColType& type);
|
||||
|
@ -1054,9 +1054,10 @@ class BatchPrimitive : public JobStep, public PrimitiveMsg, public DECEventListe
|
||||
|
||||
struct _CPInfo
|
||||
{
|
||||
_CPInfo(int64_t MIN, int64_t MAX, uint64_t l, bool val) : min(MIN), max(MAX), LBID(l), valid(val){};
|
||||
_CPInfo(int64_t MIN, int64_t MAX, uint64_t l, bool dictScan, bool val)
|
||||
: min(MIN), max(MAX), LBID(l), valid(val), dictScan(dictScan) {};
|
||||
_CPInfo(int128_t BIGMIN, int128_t BIGMAX, uint64_t l, bool val)
|
||||
: bigMin(BIGMIN), bigMax(BIGMAX), LBID(l), valid(val){};
|
||||
: bigMin(BIGMIN), bigMax(BIGMAX), LBID(l), valid(val), dictScan(false) {};
|
||||
union
|
||||
{
|
||||
int128_t bigMin;
|
||||
@ -1069,6 +1070,7 @@ struct _CPInfo
|
||||
};
|
||||
uint64_t LBID;
|
||||
bool valid;
|
||||
bool dictScan;
|
||||
};
|
||||
|
||||
/** @brief class TupleBPS
|
||||
@ -1834,4 +1836,3 @@ class PseudoColStep : public pColStep
|
||||
};
|
||||
|
||||
} // namespace joblist
|
||||
|
||||
|
@ -75,6 +75,7 @@ using namespace rowgroup;
|
||||
#include "querytele.h"
|
||||
using namespace querytele;
|
||||
|
||||
#include "columnwidth.h"
|
||||
#include "pseudocolumn.h"
|
||||
//#define DEBUG 1
|
||||
|
||||
@ -865,6 +866,7 @@ void TupleBPS::storeCasualPartitionInfo(const bool estimateRowCounts)
|
||||
vector<ColumnCommandJL*> cpColVec;
|
||||
vector<SP_LBIDList> lbidListVec;
|
||||
ColumnCommandJL* colCmd = 0;
|
||||
bool defaultScanFlag = true;
|
||||
|
||||
// @bug 2123. We call this earlier in the process for the hash join estimation process now. Return if
|
||||
// we've already done the work.
|
||||
@ -876,7 +878,9 @@ void TupleBPS::storeCasualPartitionInfo(const bool estimateRowCounts)
|
||||
fCPEvaluated = true;
|
||||
|
||||
if (colCmdVec.size() == 0)
|
||||
return;
|
||||
{
|
||||
defaultScanFlag = false; // no reason to scan if there are no commands.
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < colCmdVec.size(); i++)
|
||||
{
|
||||
@ -902,30 +906,28 @@ void TupleBPS::storeCasualPartitionInfo(const bool estimateRowCounts)
|
||||
}
|
||||
|
||||
if (cpColVec.size() == 0)
|
||||
return;
|
||||
{
|
||||
defaultScanFlag = true; // no reason to scan if there are no predicates to evaluate.
|
||||
}
|
||||
|
||||
const bool ignoreCP = ((fTraceFlags & CalpontSelectExecutionPlan::IGNORE_CP) != 0);
|
||||
|
||||
for (uint32_t idx = 0; idx < numExtents; idx++)
|
||||
{
|
||||
scanFlags[idx] = true;
|
||||
scanFlags[idx] = defaultScanFlag;
|
||||
|
||||
for (uint32_t i = 0; i < cpColVec.size(); i++)
|
||||
for (uint32_t i = 0; scanFlags[idx] && i < cpColVec.size(); i++)
|
||||
{
|
||||
colCmd = cpColVec[i];
|
||||
const EMEntry& extent = colCmd->getExtents()[idx];
|
||||
|
||||
/* If any column filter eliminates an extent, it doesn't get scanned */
|
||||
scanFlags[idx] =
|
||||
scanFlags[idx] && (ignoreCP || extent.partition.cprange.isValid != BRM::CP_VALID ||
|
||||
lbidListVec[i]->CasualPartitionPredicate(
|
||||
extent.partition.cprange, &(colCmd->getFilterString()),
|
||||
colCmd->getFilterCount(), colCmd->getColType(), colCmd->getBOP()));
|
||||
|
||||
if (!scanFlags[idx])
|
||||
{
|
||||
break;
|
||||
}
|
||||
scanFlags[idx] = scanFlags[idx] && (extent.colWid <= utils::MAXCOLUMNWIDTH) && // XXX: change to named constant.
|
||||
(ignoreCP || extent.partition.cprange.isValid != BRM::CP_VALID ||
|
||||
colCmd->getColType().colWidth != extent.colWid ||
|
||||
lbidListVec[i]->CasualPartitionPredicate(
|
||||
extent.partition.cprange, &(colCmd->getFilterString()), colCmd->getFilterCount(),
|
||||
colCmd->getColType(), colCmd->getBOP(), colCmd->getIsDict()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2008,9 +2010,10 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
|
||||
}
|
||||
|
||||
bool unused;
|
||||
bool fromDictScan;
|
||||
fromPrimProc.clear();
|
||||
fBPP->getRowGroupData(*bs, &fromPrimProc, &validCPData, &lbid, &min, &max, &cachedIO, &physIO,
|
||||
&touchedBlocks, &unused, threadID, &hasBinaryColumn, fColType);
|
||||
fBPP->getRowGroupData(*bs, &fromPrimProc, &validCPData, &lbid, &fromDictScan, &min, &max, &cachedIO,
|
||||
&physIO, &touchedBlocks, &unused, threadID, &hasBinaryColumn, fColType);
|
||||
|
||||
// Another layer of messiness. Need to refactor this fcn.
|
||||
while (!fromPrimProc.empty() && !cancelled())
|
||||
@ -2180,7 +2183,7 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
|
||||
{
|
||||
if (fColType.colWidth <= 8)
|
||||
{
|
||||
cpv.push_back(_CPInfo((int64_t)min, (int64_t)max, lbid, validCPData));
|
||||
cpv.push_back(_CPInfo((int64_t)min, (int64_t)max, lbid, fromDictScan, validCPData));
|
||||
}
|
||||
else if (fColType.colWidth == 16)
|
||||
{
|
||||
@ -2237,7 +2240,9 @@ void TupleBPS::receiveMultiPrimitiveMessages()
|
||||
}
|
||||
|
||||
if (msgsSent == msgsRecvd && finishedSending)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
bool flowControlOn;
|
||||
fDec->read_some(uniqueID, fNumThreads, bsv, &flowControlOn);
|
||||
@ -2361,11 +2366,13 @@ void TupleBPS::receiveMultiPrimitiveMessages()
|
||||
{
|
||||
if (fColType.colWidth > 8)
|
||||
{
|
||||
lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, fColType, cpv[i].valid);
|
||||
lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, cpv[i].dictScan, fColType,
|
||||
cpv[i].valid);
|
||||
}
|
||||
else
|
||||
{
|
||||
lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, fColType, cpv[i].valid);
|
||||
lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, cpv[i].dictScan, fColType,
|
||||
cpv[i].valid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_general_ci)
|
||||
c1 HEX(c1)
|
||||
Ъ─ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
|
||||
c1
|
||||
Ъ─
|
||||
Level Code Message
|
||||
@ -100,7 +100,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_general_ci)
|
||||
c1 HEX(c1)
|
||||
ЪЪЪ─ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ЪЪЪ─
|
||||
Level Code Message
|
||||
@ -127,7 +127,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_general_nopad_ci)
|
||||
c1 HEX(c1)
|
||||
Ъ─ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
|
||||
c1
|
||||
Ъ─
|
||||
Level Code Message
|
||||
@ -140,7 +140,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_general_nopad_ci)
|
||||
c1 HEX(c1)
|
||||
ЪЪЪ─ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ЪЪЪ─
|
||||
Level Code Message
|
||||
@ -167,7 +167,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_bin)
|
||||
c1 HEX(c1)
|
||||
Ъ─ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
|
||||
c1
|
||||
Ъ─
|
||||
Level Code Message
|
||||
@ -180,7 +180,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_bin)
|
||||
c1 HEX(c1)
|
||||
ЪЪЪ─ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ЪЪЪ─
|
||||
Level Code Message
|
||||
@ -207,7 +207,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_nopad_bin)
|
||||
c1 HEX(c1)
|
||||
Ъ─ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
|
||||
c1
|
||||
Ъ─
|
||||
Level Code Message
|
||||
@ -220,7 +220,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_nopad_bin)
|
||||
c1 HEX(c1)
|
||||
ЪЪЪ─ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ЪЪЪ─
|
||||
Level Code Message
|
||||
|
@ -99,7 +99,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
|
||||
c1 HEX(c1)
|
||||
é E9
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
|
||||
mcs_ctype_extent_latin1 t1 c1 E9 E9
|
||||
c1
|
||||
é
|
||||
Level Code Message
|
||||
@ -112,7 +112,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
|
||||
c1 HEX(c1)
|
||||
ÿ€ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
|
||||
c1
|
||||
ÿ€
|
||||
Level Code Message
|
||||
@ -125,7 +125,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
|
||||
c1 HEX(c1)
|
||||
ÿÿÿ€ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ÿÿÿ€
|
||||
Level Code Message
|
||||
@ -152,7 +152,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
|
||||
c1 HEX(c1)
|
||||
é E9
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
|
||||
mcs_ctype_extent_latin1 t1 c1 E9 E9
|
||||
c1
|
||||
é
|
||||
Level Code Message
|
||||
@ -165,7 +165,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
|
||||
c1 HEX(c1)
|
||||
ÿ€ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
|
||||
c1
|
||||
ÿ€
|
||||
Level Code Message
|
||||
@ -178,7 +178,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
|
||||
c1 HEX(c1)
|
||||
ÿÿÿ€ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ÿÿÿ€
|
||||
Level Code Message
|
||||
@ -205,7 +205,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_bin)
|
||||
c1 HEX(c1)
|
||||
é E9
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
|
||||
mcs_ctype_extent_latin1 t1 c1 E9 E9
|
||||
c1
|
||||
é
|
||||
Level Code Message
|
||||
@ -218,7 +218,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_bin)
|
||||
c1 HEX(c1)
|
||||
ÿ€ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
|
||||
c1
|
||||
ÿ€
|
||||
Level Code Message
|
||||
@ -231,7 +231,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin)
|
||||
c1 HEX(c1)
|
||||
ÿÿÿ€ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ÿÿÿ€
|
||||
Level Code Message
|
||||
@ -258,7 +258,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
|
||||
c1 HEX(c1)
|
||||
é E9
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
|
||||
mcs_ctype_extent_latin1 t1 c1 E9 E9
|
||||
c1
|
||||
é
|
||||
Level Code Message
|
||||
@ -271,7 +271,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
|
||||
c1 HEX(c1)
|
||||
ÿ€ FF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
|
||||
c1
|
||||
ÿ€
|
||||
Level Code Message
|
||||
@ -284,7 +284,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
|
||||
c1 HEX(c1)
|
||||
ÿÿÿ€ FFFFFF80
|
||||
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
|
||||
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
|
||||
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
|
||||
c1
|
||||
ÿÿÿ€
|
||||
Level Code Message
|
||||
|
@ -6,6 +6,6 @@ insert into rounding_table values (26805, 1252, -9647);
|
||||
insert into rounding_table values (26806, 573, -2804.5);
|
||||
SELECT CASE a WHEN 26805 THEN ROUND(c/b, 2) WHEN 26806 THEN b END MCOL4940 FROM ( SELECT a, SUM(b) b, SUM(c) c FROM rounding_table GROUP BY a ) abc ;
|
||||
MCOL4940
|
||||
573
|
||||
-7.71
|
||||
573
|
||||
DROP DATABASE mcol_4940;
|
||||
|
@ -8,7 +8,8 @@ USE mcol_4940;
|
||||
create table rounding_table ( a int, b double, c double) engine=columnstore;
|
||||
insert into rounding_table values (26805, 1252, -9647);
|
||||
insert into rounding_table values (26806, 573, -2804.5);
|
||||
|
||||
|
||||
--sorted_result
|
||||
SELECT CASE a WHEN 26805 THEN ROUND(c/b, 2) WHEN 26806 THEN b END MCOL4940 FROM ( SELECT a, SUM(b) b, SUM(c) c FROM rounding_table GROUP BY a ) abc ;
|
||||
|
||||
DROP DATABASE mcol_4940;
|
||||
|
@ -42,6 +42,8 @@ using namespace boost;
|
||||
#include "simd_sse.h"
|
||||
#include "utils/common/columnwidth.h"
|
||||
|
||||
#include "exceptclasses.h"
|
||||
|
||||
using namespace logging;
|
||||
using namespace dbbc;
|
||||
using namespace primitives;
|
||||
@ -1690,7 +1692,9 @@ void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in, C
|
||||
dataType == execplan::CalpontSystemCatalog::TEXT) &&
|
||||
!isDictTokenScan(in))
|
||||
{
|
||||
filterColumnData<T, KIND_TEXT>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
|
||||
using UT = typename std::conditional<std::is_unsigned<T>::value, T,
|
||||
typename datatypes::make_unsigned<T>::type>::type;
|
||||
filterColumnData<UT, KIND_TEXT>(in, out, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,7 @@ using namespace std;
|
||||
#include "messageobj.h"
|
||||
#include "exceptclasses.h"
|
||||
#include "dataconvert.h"
|
||||
#include "string_prefixes.h"
|
||||
#include <sstream>
|
||||
|
||||
using namespace logging;
|
||||
@ -391,8 +392,13 @@ void PrimitiveProcessor::nextSig(int NVALS, const PrimToken* tokens, p_DataValue
|
||||
}
|
||||
|
||||
void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out, bool skipNulls,
|
||||
#if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
|
||||
uint32_t charsetNumber, boost::shared_ptr<DictEqualityFilter> eqFilter,
|
||||
uint8_t eqOp, uint64_t minMax[2])
|
||||
#else
|
||||
uint32_t charsetNumber, boost::shared_ptr<DictEqualityFilter> eqFilter,
|
||||
uint8_t eqOp)
|
||||
#endif
|
||||
{
|
||||
PrimToken* outToken;
|
||||
const DictFilterElement* filter = 0;
|
||||
@ -437,6 +443,14 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out,
|
||||
sigptr.len != -1;
|
||||
nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType, (in->InputFlags ? true : false), skipNulls))
|
||||
{
|
||||
#if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
|
||||
if (minMax)
|
||||
{
|
||||
uint64_t v = encodeStringPrefix_check_null(sigptr.data, sigptr.len, charsetNumber);
|
||||
minMax[1] = minMax[1] < v ? v : minMax[1];
|
||||
minMax[0] = minMax[0] > v ? v : minMax[0];
|
||||
}
|
||||
#endif
|
||||
// do aggregate processing
|
||||
if (in->OutputType & OT_AGGREGATE)
|
||||
{
|
||||
|
@ -54,6 +54,9 @@
|
||||
|
||||
class PrimTest;
|
||||
|
||||
// XXX: turn off dictionary range setting during scan.
|
||||
#define XXX_PRIMITIVES_TOKEN_RANGES_XXX
|
||||
|
||||
namespace primitives
|
||||
{
|
||||
enum ColumnFilterMode
|
||||
@ -423,7 +426,13 @@ class PrimitiveProcessor
|
||||
// void p_ColAggregate(const NewColAggRequestHeader *in, NewColAggResultHeader *out);
|
||||
|
||||
void p_Dictionary(const DictInput* in, std::vector<uint8_t>* out, bool skipNulls, uint32_t charsetNumber,
|
||||
boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp);
|
||||
#if !defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
|
||||
boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp
|
||||
#else
|
||||
boost::shared_ptr<DictEqualityFilter> eqFilter, uint8_t eqOp,
|
||||
uint64_t minMax[2] // as name suggests, [0] is min, [1] is max.
|
||||
#endif
|
||||
);
|
||||
|
||||
inline void setLogicalBlockMode(bool b)
|
||||
{
|
||||
|
@ -117,6 +117,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor()
|
||||
, validCPData(false)
|
||||
, minVal(MAX64)
|
||||
, maxVal(MIN64)
|
||||
, cpDataFromDictScan(false)
|
||||
, lbidForCP(0)
|
||||
, hasWideColumnOut(false)
|
||||
, busyLoaderCount(0)
|
||||
@ -138,6 +139,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor()
|
||||
, processorThreads(0)
|
||||
, ptMask(0)
|
||||
, firstInstance(false)
|
||||
, valuesLBID(0)
|
||||
{
|
||||
pp.setLogicalBlockMode(true);
|
||||
pp.setBlockPtr((int*)blockData);
|
||||
@ -167,6 +169,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream& b, double prefetch,
|
||||
, validCPData(false)
|
||||
, minVal(MAX64)
|
||||
, maxVal(MIN64)
|
||||
, cpDataFromDictScan(false)
|
||||
, lbidForCP(0)
|
||||
, hasWideColumnOut(false)
|
||||
, busyLoaderCount(0)
|
||||
@ -186,10 +189,10 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream& b, double prefetch,
|
||||
, sockIndex(0)
|
||||
, endOfJoinerRan(false)
|
||||
, processorThreads(_processorThreads)
|
||||
,
|
||||
// processorThreads(32),
|
||||
// ptMask(processorThreads - 1),
|
||||
firstInstance(true)
|
||||
, firstInstance(true)
|
||||
, valuesLBID(0)
|
||||
{
|
||||
// promote processorThreads to next power of 2. also need to change the name to bucketCount or similar
|
||||
processorThreads = nextPowOf2(processorThreads);
|
||||
@ -2010,6 +2013,7 @@ void BatchPrimitiveProcessor::writeProjectionPreamble()
|
||||
{
|
||||
*serialized << (uint8_t)1;
|
||||
*serialized << lbidForCP;
|
||||
*serialized << ((uint8_t)cpDataFromDictScan);
|
||||
if (UNLIKELY(hasWideColumnOut))
|
||||
{
|
||||
// PSA width
|
||||
@ -2108,6 +2112,7 @@ void BatchPrimitiveProcessor::makeResponse()
|
||||
{
|
||||
*serialized << (uint8_t)1;
|
||||
*serialized << lbidForCP;
|
||||
*serialized << ((uint8_t)cpDataFromDictScan);
|
||||
|
||||
if (UNLIKELY(hasWideColumnOut))
|
||||
{
|
||||
@ -2208,6 +2213,7 @@ int BatchPrimitiveProcessor::operator()()
|
||||
}
|
||||
|
||||
validCPData = false;
|
||||
cpDataFromDictScan = false;
|
||||
#ifdef PRIMPROC_STOPWATCH
|
||||
stopwatch->start("BPP() execute");
|
||||
execute(stopwatch);
|
||||
|
@ -256,6 +256,7 @@ class BatchPrimitiveProcessor
|
||||
int128_t max128Val;
|
||||
int64_t maxVal;
|
||||
};
|
||||
bool cpDataFromDictScan;
|
||||
|
||||
uint64_t lbidForCP;
|
||||
bool hasWideColumnOut;
|
||||
@ -431,6 +432,7 @@ class BatchPrimitiveProcessor
|
||||
uint processorThreads;
|
||||
uint ptMask;
|
||||
bool firstInstance;
|
||||
uint64_t valuesLBID;
|
||||
|
||||
friend class Command;
|
||||
friend class ColumnCommand;
|
||||
|
@ -106,8 +106,8 @@ void ColumnCommand::execute()
|
||||
{
|
||||
values = bpp->values;
|
||||
wide128Values = bpp->wide128Values;
|
||||
bpp->valuesLBID = lbid;
|
||||
}
|
||||
|
||||
_execute();
|
||||
}
|
||||
|
||||
@ -225,9 +225,13 @@ void ColumnCommand::issuePrimitive()
|
||||
loadData();
|
||||
|
||||
if (!suppressFilter)
|
||||
{
|
||||
bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter);
|
||||
}
|
||||
|
||||
switch (colType.colWidth)
|
||||
{
|
||||
@ -282,6 +286,7 @@ void ColumnCommand::updateCPDataNarrow()
|
||||
if (_isScan)
|
||||
{
|
||||
bpp->validCPData = (outMsg->ValidMinMax && !wasVersioned);
|
||||
bpp->cpDataFromDictScan = false;
|
||||
bpp->lbidForCP = lbid;
|
||||
bpp->maxVal = static_cast<int64_t>(outMsg->Max);
|
||||
bpp->minVal = static_cast<int64_t>(outMsg->Min);
|
||||
@ -295,6 +300,7 @@ void ColumnCommand::updateCPDataWide()
|
||||
if (_isScan)
|
||||
{
|
||||
bpp->validCPData = (outMsg->ValidMinMax && !wasVersioned);
|
||||
bpp->cpDataFromDictScan = false;
|
||||
bpp->lbidForCP = lbid;
|
||||
if (colType.isWideDecimalType())
|
||||
{
|
||||
|
@ -46,6 +46,8 @@ extern uint32_t dictBufferSize;
|
||||
|
||||
DictStep::DictStep() : Command(DICT_STEP), strValues(NULL), filterCount(0), bufferSize(0)
|
||||
{
|
||||
fMinMax[0] = MAX_UBIGINT;
|
||||
fMinMax[1] = MIN_UBIGINT;
|
||||
}
|
||||
|
||||
DictStep::~DictStep()
|
||||
@ -65,6 +67,8 @@ DictStep& DictStep::operator=(const DictStep& d)
|
||||
eqOp = d.eqOp;
|
||||
filterCount = d.filterCount;
|
||||
charsetNumber = d.charsetNumber;
|
||||
fMinMax[0] = d.fMinMax[0];
|
||||
fMinMax[1] = d.fMinMax[1];
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -147,8 +151,11 @@ void DictStep::issuePrimitive(bool isFilter)
|
||||
bpp->physIO += blocksRead;
|
||||
bpp->touchedBlocks++;
|
||||
}
|
||||
|
||||
#if !defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
|
||||
bpp->pp.p_Dictionary(primMsg, &result, isFilter, charsetNumber, eqFilter, eqOp);
|
||||
#else
|
||||
bpp->pp.p_Dictionary(primMsg, &result, isFilter, charsetNumber, eqFilter, eqOp, fMinMax);
|
||||
#endif
|
||||
}
|
||||
|
||||
void DictStep::copyResultToTmpSpace(OrderedToken* ot)
|
||||
@ -390,6 +397,14 @@ void DictStep::_execute()
|
||||
copyResultToFinalPosition(newRidList.get());
|
||||
copyRidsForFilterCmd();
|
||||
}
|
||||
if (fMinMax[0] <= fMinMax[1] && bpp->valuesLBID != 0)
|
||||
{
|
||||
bpp->validCPData = true;
|
||||
bpp->cpDataFromDictScan = true;
|
||||
bpp->lbidForCP = bpp->valuesLBID;
|
||||
bpp->maxVal = fMinMax[1];
|
||||
bpp->minVal = fMinMax[0];
|
||||
}
|
||||
|
||||
// cout << "DS: /_execute()\n";
|
||||
}
|
||||
|
@ -158,6 +158,7 @@ class DictStep : public Command
|
||||
bool hasEqFilter;
|
||||
boost::shared_ptr<primitives::DictEqualityFilter> eqFilter;
|
||||
uint8_t eqOp; // COMPARE_EQ or COMPARE_NE
|
||||
uint64_t fMinMax[2];
|
||||
|
||||
friend class RTSCommand;
|
||||
};
|
||||
|
@ -11,7 +11,8 @@ set(common_LIB_SRCS
|
||||
nullvaluemanip.cpp
|
||||
threadnaming.cpp
|
||||
utils_utf8.cpp
|
||||
statistics.cpp)
|
||||
statistics.cpp
|
||||
string_prefixes.cpp)
|
||||
|
||||
add_library(common SHARED ${common_LIB_SRCS})
|
||||
|
||||
|
@ -177,6 +177,12 @@ class Charset
|
||||
bool res = !mCharset->wildcmp(subject.str(), subject.end(), pattern.str(), pattern.end(), '\\', '_', '%');
|
||||
return neg ? !res : res;
|
||||
}
|
||||
size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
|
||||
{
|
||||
idbassert(mCharset->coll);
|
||||
|
||||
return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
|
||||
}
|
||||
};
|
||||
|
||||
class CollationAwareHasher : public Charset
|
||||
|
@ -198,6 +198,7 @@ int64_t getSignedNullValue(CalpontSystemCatalog::ColDataType t, uint32_t colWidt
|
||||
os << "getSignedNullValue(): got bad column type (" << t << "). Width=" << colWidth << endl;
|
||||
throw logic_error(os.str());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
|
51
utils/common/string_prefixes.cpp
Normal file
51
utils/common/string_prefixes.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
Copyright (C) 2021, 2022 MariaDB Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/* handling of the conversion of string prefixes to int64_t for quick range checking */
|
||||
|
||||
#include "collation.h"
|
||||
#include "joblisttypes.h"
|
||||
|
||||
#include "string_prefixes.h"
|
||||
|
||||
// XXX: string (or, actually, a BLOB) with all NUL chars will be encoded into zero. Which corresponds to
|
||||
// encoding of empty string, or NULL.
|
||||
int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber)
|
||||
{
|
||||
datatypes::Charset cset(charsetNumber);
|
||||
uint8_t fixedLenPrefix[8];
|
||||
memset(fixedLenPrefix, 0, sizeof(fixedLenPrefix));
|
||||
cset.strnxfrm(fixedLenPrefix, sizeof(fixedLenPrefix), 8, str, len, 0);
|
||||
int64_t acc = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uint8_t byte = fixedLenPrefix[i];
|
||||
acc = (acc << 8) + byte;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber)
|
||||
{
|
||||
if (len < 1)
|
||||
{
|
||||
return joblist::UBIGINTNULL;
|
||||
}
|
||||
return encodeStringPrefix(str, len, charsetNumber);
|
||||
}
|
32
utils/common/string_prefixes.h
Normal file
32
utils/common/string_prefixes.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
Copyright (C) 2021, 2022 MariaDB Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/* handling of the conversion of string prefixes to int64_t for quick range checking */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// Encode string prefix into an int64_t, packing as many chars from string as possible
|
||||
// into the result and respecting the collation provided by charsetNumber.
|
||||
//
|
||||
// For one example, for CI Czech collation, encodeStringPrefix("cz") < encodeStringPrefix("CH").
|
||||
int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber);
|
||||
|
||||
int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber);
|
@ -45,7 +45,7 @@ CalpontSystemCatalog::ColType Func_monthname::operationType(FunctionParm& fp,
|
||||
string Func_monthname::getStrVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
|
||||
CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
int32_t month = getIntVal(row, parm, isNull, op_ct);
|
||||
int32_t month = getIntValInternal(row, parm, isNull, op_ct);
|
||||
|
||||
if (month == -1)
|
||||
return "";
|
||||
@ -74,8 +74,8 @@ int64_t Func_monthname::getTimestampIntVal(rowgroup::Row& row, FunctionParm& par
|
||||
return val;
|
||||
}
|
||||
|
||||
int64_t Func_monthname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
|
||||
CalpontSystemCatalog::ColType& op_ct)
|
||||
int64_t Func_monthname::getIntValInternal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
|
||||
CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
int64_t val = 0;
|
||||
dataconvert::DateTime aDateTime;
|
||||
@ -165,12 +165,20 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool&
|
||||
|
||||
break;
|
||||
|
||||
default: isNull = true; return -1;
|
||||
default:
|
||||
isNull = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t Func_monthname::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
return getIntValInternal(row, parm, isNull, op_ct);
|
||||
}
|
||||
|
||||
double Func_monthname::getDoubleVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
|
@ -41,6 +41,8 @@ using namespace logging;
|
||||
|
||||
#include "funchelpers.h"
|
||||
|
||||
#include "exceptclasses.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace funcexp;
|
||||
@ -136,18 +138,27 @@ int64_t Func_round::getIntVal(Row& row, FunctionParm& parm, bool& isNull,
|
||||
uint64_t Func_round::getUintVal(Row& row, FunctionParm& parm, bool& isNull,
|
||||
CalpontSystemCatalog::ColType& op_ct)
|
||||
{
|
||||
uint64_t x;
|
||||
if (UNLIKELY(op_ct.colDataType == execplan::CalpontSystemCatalog::DATE))
|
||||
IDB_Decimal x = getDecimalVal(row, parm, isNull, op_ct);
|
||||
|
||||
if (!op_ct.isWideDecimalType())
|
||||
{
|
||||
IDB_Decimal d = getDecimalVal(row, parm, isNull, op_ct);
|
||||
x = static_cast<uint64_t>(d.value);
|
||||
if (x.scale > 0)
|
||||
{
|
||||
while (x.scale-- > 0)
|
||||
x.value /= 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (x.scale++ < 0)
|
||||
x.value *= 10;
|
||||
}
|
||||
|
||||
return x.value;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = parm[0]->data()->getUintVal(row, isNull);
|
||||
return static_cast<uint64_t>(x.getIntegralPart());
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
double Func_round::getDoubleVal(Row& row, FunctionParm& parm, bool& isNull,
|
||||
@ -434,10 +445,11 @@ IDB_Decimal Func_round::getDecimalVal(Row& row, FunctionParm& parm, bool& isNull
|
||||
{
|
||||
uint64_t x = parm[0]->data()->getUintVal(row, isNull);
|
||||
|
||||
if (x > (uint64_t)helpers::maxNumber_c[18])
|
||||
{
|
||||
x = helpers::maxNumber_c[18];
|
||||
}
|
||||
// why it is here at all???
|
||||
// if (x > (uint64_t)helpers::maxNumber_c[18])
|
||||
//{
|
||||
// x = helpers::maxNumber_c[18];
|
||||
//}
|
||||
|
||||
decimal.value = x;
|
||||
decimal.scale = 0;
|
||||
|
@ -651,6 +651,8 @@ class Func_monthname : public Func_Str
|
||||
|
||||
int64_t getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
int64_t getIntValInternal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
|
||||
double getDoubleVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
|
@ -305,4 +305,3 @@ class ProtocolError : public std::logic_error
|
||||
} while (0)
|
||||
|
||||
} // namespace logging
|
||||
|
||||
|
@ -2820,16 +2820,24 @@ LBID_t ExtentMap::_createColumnExtent_DBroot(uint32_t size, int OID, uint32_t co
|
||||
e->HWM = 0;
|
||||
e->status = EXTENTUNAVAILABLE; // mark extent as in process
|
||||
|
||||
// Partition, segment, and blockOffset 0 represents new table or column.
|
||||
// When DDL creates a table, we can mark the first extent as VALID, since
|
||||
// the table has no data. Marking as VALID enables cpimport to update
|
||||
// the CP min/max for the first import.
|
||||
// If DDL is adding a column to an existing table, setting to VALID won't
|
||||
// hurt, because DDL resets to INVALID after the extent is created.
|
||||
if ((e->partitionNum == 0) && (e->segmentNum == 0) && (e->blockOffset == 0))
|
||||
e->partition.cprange.isValid = CP_VALID;
|
||||
else
|
||||
e->partition.cprange.isValid = CP_INVALID;
|
||||
#if 0 // XXX: sergueyz: I'll leave these under conditional flag for a while because it appears a huge change.
|
||||
// Partition, segment, and blockOffset 0 represents new table or column.
|
||||
// When DDL creates a table, we can mark the first extent as VALID, since
|
||||
// the table has no data. Marking as VALID enables cpimport to update
|
||||
// the CP min/max for the first import.
|
||||
// If DDL is adding a column to an existing table, setting to VALID won't
|
||||
// hurt, because DDL resets to INVALID after the extent is created.
|
||||
// XXX: the comment above is out of date. bulk set of extents ranges
|
||||
// works differently right now.
|
||||
if ((e->partitionNum == 0) &&
|
||||
(e->segmentNum == 0) &&
|
||||
(e->blockOffset == 0))
|
||||
e->partition.cprange.isValid = CP_VALID;
|
||||
else
|
||||
e->partition.cprange.isValid = CP_INVALID;
|
||||
#else
|
||||
e->partition.cprange.isValid = CP_INVALID;
|
||||
#endif
|
||||
|
||||
partitionNum = e->partitionNum;
|
||||
segmentNum = e->segmentNum;
|
||||
@ -3029,16 +3037,22 @@ LBID_t ExtentMap::_createColumnExtentExactFile(uint32_t size, int OID, uint32_t
|
||||
e->HWM = 0;
|
||||
}
|
||||
|
||||
// Partition, segment, and blockOffset 0 represents new table or column.
|
||||
// When DDL creates a table, we can mark the first extent as VALID, since
|
||||
// the table has no data. Marking as VALID enables cpimport to update
|
||||
// the CP min/max for the first import.
|
||||
// If DDL is adding a column to an existing table, setting to VALID won't
|
||||
// hurt, because DDL resets to INVALID after the extent is created.
|
||||
if ((e->partitionNum == 0) && (e->segmentNum == 0) && (e->blockOffset == 0))
|
||||
e->partition.cprange.isValid = CP_VALID;
|
||||
else
|
||||
e->partition.cprange.isValid = CP_INVALID;
|
||||
#if 0 // XXX: sergueyz: I'll leave these under conditional flag for a while because it appears a huge change.
|
||||
// Partition, segment, and blockOffset 0 represents new table or column.
|
||||
// When DDL creates a table, we can mark the first extent as VALID, since
|
||||
// the table has no data. Marking as VALID enables cpimport to update
|
||||
// the CP min/max for the first import.
|
||||
// If DDL is adding a column to an existing table, setting to VALID won't
|
||||
// hurt, because DDL resets to INVALID after the extent is created.
|
||||
if ((e->partitionNum == 0) &&
|
||||
(e->segmentNum == 0) &&
|
||||
(e->blockOffset == 0))
|
||||
e->partition.cprange.isValid = CP_VALID;
|
||||
else
|
||||
e->partition.cprange.isValid = CP_INVALID;
|
||||
#else
|
||||
e->partition.cprange.isValid = CP_INVALID;
|
||||
#endif
|
||||
|
||||
startBlockOffset = e->blockOffset;
|
||||
|
||||
|
@ -739,7 +739,7 @@ int BulkLoad::preProcess(Job& job, int tableNo, TableInfo* tableInfo)
|
||||
// Setup import to start loading into starting HWM DB file
|
||||
RETURN_ON_ERROR(info->setupInitialColumnExtent(dbRoot, partition, segment,
|
||||
job.jobTableList[tableNo].tblName, lbid, oldHwm, hwm,
|
||||
bSkippedToNewExtent, false));
|
||||
bSkippedToNewExtent, bSkippedToNewExtent || oldHwm < 1));
|
||||
}
|
||||
|
||||
tableInfo->addColumn(info);
|
||||
|
@ -1717,7 +1717,7 @@ int BulkLoadBuffer::parseCol(ColumnInfo& columnInfo)
|
||||
|
||||
lastInputRowInExtent += columnInfo.rowsPerExtent();
|
||||
|
||||
if (isUnsigned(columnInfo.column.dataType) || isCharType(columnInfo.column.dataType))
|
||||
if (isUnsigned(columnInfo.column.dataType))
|
||||
{
|
||||
if (columnInfo.column.width <= 8)
|
||||
{
|
||||
|
@ -54,7 +54,7 @@ class BLBufferStats
|
||||
};
|
||||
BLBufferStats(ColDataType colDataType) : satCount(0)
|
||||
{
|
||||
if (isUnsigned(colDataType) || isCharType(colDataType))
|
||||
if (isUnsigned(colDataType))
|
||||
{
|
||||
minBufferVal = static_cast<int64_t>(MAX_UBIGINT);
|
||||
maxBufferVal = static_cast<int64_t>(MIN_UBIGINT);
|
||||
|
@ -104,7 +104,7 @@ void ColExtInf::addOrUpdateEntryTemplate(RID lastInputRow, T minVal, T maxVal, C
|
||||
}
|
||||
else // Update the range
|
||||
{
|
||||
if (isUnsigned(colDataType) || isCharType(colDataType))
|
||||
if (isUnsigned(colDataType))
|
||||
{
|
||||
if (width <= 8)
|
||||
{
|
||||
|
@ -241,6 +241,7 @@ uint8_t WE_DDLCommandProc::writeSystable(ByteStream& bs, std::string& err)
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = column.oid;
|
||||
}
|
||||
else
|
||||
@ -656,9 +657,10 @@ uint8_t WE_DDLCommandProc::writeCreateSyscolumn(ByteStream& bs, std::string& err
|
||||
dctnryStruct.fCompressionType = 2;
|
||||
}
|
||||
|
||||
if (colStruct.tokenFlag)
|
||||
if (colStruct.tokenFlag) // TODO: XXX: this is copied aplenty. NEED TO REFACTOR.
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = column.oid;
|
||||
}
|
||||
else
|
||||
@ -1046,6 +1048,7 @@ uint8_t WE_DDLCommandProc::writeSyscolumn(ByteStream& bs, std::string& err)
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = column.oid;
|
||||
}
|
||||
else
|
||||
@ -2442,6 +2445,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnTablename(ByteStream& bs, std::string&
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
@ -2846,6 +2850,7 @@ uint8_t WE_DDLCommandProc::updateSystableTablename(ByteStream& bs, std::string&
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
@ -3087,6 +3092,7 @@ uint8_t WE_DDLCommandProc::updateSystablesTablename(ByteStream& bs, std::string&
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
@ -3273,6 +3279,7 @@ uint8_t WE_DDLCommandProc::updateSystablesTablename(ByteStream& bs, std::string&
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
@ -4258,6 +4265,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnSetDefault(messageqcpp::ByteStream& bs
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
@ -4545,6 +4553,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnRenameColumn(messageqcpp::ByteStream&
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column1.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column1.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
@ -4756,6 +4765,7 @@ uint8_t WE_DDLCommandProc::updateSyscolumnRenameColumn(messageqcpp::ByteStream&
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = column5.colType.ddn.dictOID;
|
||||
dctnryStruct.fCharsetNumber = column5.colType.charsetNumber;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
}
|
||||
else
|
||||
|
@ -181,7 +181,9 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
|
||||
|
||||
colStruct.colDataType = colType.colDataType;
|
||||
|
||||
if (colStruct.tokenFlag)
|
||||
dctnryStruct.fCharsetNumber = colType.charsetNumber;
|
||||
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = colType.ddn.dictOID;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
@ -1037,6 +1039,8 @@ uint8_t WE_DMLCommandProc::processBatchInsert(messageqcpp::ByteStream& bs, std::
|
||||
|
||||
colStruct.colDataType = colType.colDataType;
|
||||
|
||||
dctnryStruct.fCharsetNumber = colType.charsetNumber;
|
||||
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = colType.ddn.dictOID;
|
||||
@ -1619,6 +1623,8 @@ uint8_t WE_DMLCommandProc::processBatchInsertBinary(messageqcpp::ByteStream& bs,
|
||||
|
||||
colStruct.colDataType = colType.colDataType;
|
||||
|
||||
dctnryStruct.fCharsetNumber = colType.charsetNumber;
|
||||
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
dctnryStruct.dctnryOid = colType.ddn.dictOID;
|
||||
@ -2830,6 +2836,7 @@ uint8_t WE_DMLCommandProc::processUpdate(messageqcpp::ByteStream& bs, std::strin
|
||||
dctnryStruct.dctnryOid = colType.ddn.dictOID;
|
||||
dctnryStruct.columnOid = colStruct.dataOid;
|
||||
dctnryStruct.fCompressionType = colType.compressionType;
|
||||
dctnryStruct.fCharsetNumber = colType.charsetNumber;
|
||||
dctnryStruct.colWidth = colType.colWidth;
|
||||
|
||||
if (NO_ERROR != (error = fWEWrapper.openDctnry(txnId, dctnryStruct, false))) // @bug 5572 HDFS tmp file
|
||||
@ -4445,6 +4452,8 @@ uint8_t WE_DMLCommandProc::processFixRows(messageqcpp::ByteStream& bs, std::stri
|
||||
dctnryStruct.fCompressionType = colStruct.fCompressionType;
|
||||
dctnryStruct.dctnryOid = 0;
|
||||
|
||||
dctnryStruct.fCharsetNumber = colType.charsetNumber;
|
||||
|
||||
if (colType.colWidth > 8) // token
|
||||
{
|
||||
colStruct.colWidth = 8;
|
||||
|
@ -149,11 +149,10 @@ int ServiceWriteEngine::setupResources()
|
||||
return -3;
|
||||
}
|
||||
|
||||
if (rlim.rlim_cur != 65536)
|
||||
if (rlim.rlim_cur < 65536)
|
||||
{
|
||||
return -4;
|
||||
}
|
||||
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <boost/thread.hpp>
|
||||
#include <boost/thread/tss.hpp>
|
||||
@ -49,17 +50,19 @@ namespace WriteEngine
|
||||
// forward reference
|
||||
class DbFileOp;
|
||||
|
||||
/** @brief Extended CPInfo - with type handler for all type-related information */
|
||||
/** @brief Extended CPInfo - with all type-related information and associated range data */
|
||||
struct ExtCPInfo
|
||||
{
|
||||
execplan::CalpontSystemCatalog::ColDataType fColType;
|
||||
int fColWidth;
|
||||
BRM::CPInfo fCPInfo;
|
||||
std::shared_ptr<std::vector<int64_t>> fStringsPrefixes;
|
||||
ExtCPInfo(execplan::CalpontSystemCatalog::ColDataType colType, int colWidth)
|
||||
: fColType(colType), fColWidth(colWidth)
|
||||
{
|
||||
fCPInfo.isBinaryColumn = (unsigned int)colWidth > datatypes::MAXLEGACYWIDTH;
|
||||
}
|
||||
|
||||
void toInvalid()
|
||||
{
|
||||
auto mm = datatypes::MinMaxInfo::invalidRange(fColType);
|
||||
@ -68,7 +71,22 @@ struct ExtCPInfo
|
||||
fCPInfo.bigMax = mm.int128Max;
|
||||
fCPInfo.bigMin = mm.int128Min;
|
||||
}
|
||||
|
||||
void addStringPrefix(int64_t strPrefix)
|
||||
{
|
||||
if (!fStringsPrefixes)
|
||||
{
|
||||
fStringsPrefixes.reset(new std::vector<int64_t>());
|
||||
}
|
||||
fStringsPrefixes->push_back(strPrefix);
|
||||
}
|
||||
bool hasStringsPrefixes() const
|
||||
{
|
||||
return fStringsPrefixes.get() != nullptr;
|
||||
}
|
||||
int64_t* stringsPrefixes() const
|
||||
{
|
||||
return hasStringsPrefixes() ? fStringsPrefixes->data() : nullptr;
|
||||
}
|
||||
bool isInvalid()
|
||||
{
|
||||
datatypes::MinMaxInfo mm;
|
||||
|
@ -344,6 +344,7 @@ struct DctnryStruct /** @brief Dctnry Interface Struct*/
|
||||
uint16_t fColSegment; /** @brief Segment for column file */
|
||||
uint16_t fColDbRoot; /** @brief DBRoot for column file */
|
||||
int fCompressionType; /** @brief Compression tpye for column file */
|
||||
int fCharsetNumber; /** @brief Charset number to account for collation when computing string prefixes */
|
||||
DctnryStruct()
|
||||
: dctnryOid(0)
|
||||
, columnOid(0)
|
||||
@ -353,6 +354,7 @@ struct DctnryStruct /** @brief Dctnry Interface Struct*/
|
||||
, fColSegment(0)
|
||||
, fColDbRoot(0)
|
||||
, fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0)
|
||||
, fCharsetNumber(8)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -214,6 +214,7 @@ int WEBrmUpdater::updateHighWaterMarkInBRM()
|
||||
int WEBrmUpdater::updateCPAndHWMInBRM()
|
||||
{
|
||||
int rc = 0;
|
||||
size_t i;
|
||||
|
||||
// BUG 4232. some imports may not contain CP but HWM
|
||||
if ((fCPInfo.size() > 0) || (fHWMInfo.size() > 0))
|
||||
@ -227,6 +228,13 @@ int WEBrmUpdater::updateCPAndHWMInBRM()
|
||||
const std::vector<CPInfoMerge> & mergeCPDataArgs,
|
||||
VER_t transID = 0) DBRM_THROW;
|
||||
*/
|
||||
for (i = 0; i < fCPInfo.size(); i++)
|
||||
{
|
||||
if (fCPInfo[i].newExtent)
|
||||
{
|
||||
fCPInfo[i].seqNum = 0; // to be in sync with DBRM.
|
||||
}
|
||||
}
|
||||
rc = fpBrm->bulkSetHWMAndCP(fHWMInfo, fCPInfoData, fCPInfo, 0);
|
||||
|
||||
// rc = fpBrm->mergeExtentsMaxMin(fCPInfo);
|
||||
|
@ -21,6 +21,10 @@
|
||||
/** @writeengine.cpp
|
||||
* A wrapper class for the write engine to write information to files
|
||||
*/
|
||||
|
||||
// XXX: a definition to switch off computations for token columns.
|
||||
//#define XXX_WRITEENGINE_TOKENS_RANGES_XXX
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
@ -59,6 +63,7 @@ using namespace execplan;
|
||||
#include "MonitorProcMem.h"
|
||||
using namespace idbdatafile;
|
||||
#include "dataconvert.h"
|
||||
#include "string_prefixes.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define isnan _isnan
|
||||
@ -362,6 +367,9 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
|
||||
case WR_UINT:
|
||||
case WR_ULONGLONG:
|
||||
case WR_CHAR:
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
case WR_TOKEN:
|
||||
#endif
|
||||
{
|
||||
isUnsigned = true;
|
||||
break;
|
||||
@ -385,6 +393,13 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
|
||||
maxMin->fromToChars();
|
||||
}
|
||||
}
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
if (colType == WR_TOKEN)
|
||||
{
|
||||
oldValArrayVoid = nullptr; // no old values for tokens, sadly.
|
||||
valArrayVoid = (void*)maxMin->stringsPrefixes();
|
||||
}
|
||||
#endif
|
||||
size_t i;
|
||||
for (i = 0; i < totalOldRow; i++)
|
||||
{
|
||||
@ -435,6 +450,9 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
|
||||
fetchNewOldValues<int64_t, int64_t>(value, oldValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
|
||||
break;
|
||||
}
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
case WR_TOKEN:
|
||||
#endif
|
||||
case WR_ULONGLONG:
|
||||
{
|
||||
fetchNewOldValues<uint64_t, uint64_t>(uvalue, oldUValue, valArrayVoid, oldValArrayVoid, i,
|
||||
@ -449,12 +467,11 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
|
||||
}
|
||||
case WR_CHAR:
|
||||
{
|
||||
fetchNewOldValues<uint64_t, uint64_t>(uvalue, oldUValue, valArrayVoid, oldValArrayVoid, i,
|
||||
totalNewRow);
|
||||
fetchNewOldValues<int64_t, int64_t>(value, oldValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
|
||||
// for characters (strings, actually), we fetched then in LSB order, on x86, at the very least.
|
||||
// this means most significant byte of the string, which is first, is now in LSB of uvalue/oldValue.
|
||||
// we must perform a conversion.
|
||||
uvalue = uint64ToStr(uvalue);
|
||||
value = uint64ToStr(uvalue);
|
||||
oldValue = uint64ToStr(oldValue);
|
||||
break;
|
||||
}
|
||||
@ -576,6 +593,7 @@ void WriteEngineWrapper::convertValue(const execplan::CalpontSystemCatalog::ColT
|
||||
curStr = curStr.substr(0, MAX_COLUMN_BOUNDARY);
|
||||
|
||||
memcpy(value, curStr.c_str(), curStr.length());
|
||||
|
||||
break;
|
||||
|
||||
case WriteEngine::WR_FLOAT:
|
||||
@ -1179,10 +1197,17 @@ static void log_this(const char *message,
|
||||
#endif
|
||||
|
||||
/** @brief Determine whether we may update a column's ranges (by type) and return nullptr if we can't */
|
||||
static ExtCPInfo* getCPInfoToUpdateForUpdatableType(const ColStruct& colStruct, ExtCPInfo* currentCPInfo)
|
||||
static ExtCPInfo* getCPInfoToUpdateForUpdatableType(const ColStruct& colStruct, ExtCPInfo* currentCPInfo,
|
||||
OpType optype)
|
||||
{
|
||||
if (colStruct.tokenFlag)
|
||||
{
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
if (currentCPInfo && currentCPInfo->hasStringsPrefixes() && optype == INSERT)
|
||||
{
|
||||
return currentCPInfo;
|
||||
}
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
switch (colStruct.colType)
|
||||
@ -1689,10 +1714,16 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
|
||||
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
|
||||
{
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
int64_t strPrefix;
|
||||
#endif
|
||||
if (dctStr_iter->length() == 0)
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
strPrefix = (int64_t)joblist::UBIGINTNULL; // the string prefixes are signed long ints.
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1702,6 +1733,10 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
|
||||
dctnryStructList[i].fCharsetNumber);
|
||||
#endif
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
|
||||
|
||||
@ -1717,6 +1752,9 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
col_iter->data = dctTuple.token;
|
||||
}
|
||||
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
maxMins[i].fSplitMaxMinInfo[0].addStringPrefix(strPrefix);
|
||||
#endif
|
||||
dctStr_iter++;
|
||||
col_iter++;
|
||||
}
|
||||
@ -1744,10 +1782,16 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
|
||||
for (uint32_t rows = 0; rows < rowsLeft; rows++)
|
||||
{
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
int64_t strPrefix;
|
||||
#endif
|
||||
if (dctStr_iter->length() == 0)
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
strPrefix = joblist::UBIGINTNULL; // string prefixes are signed long ints.
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1757,6 +1801,10 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
|
||||
dctnryStructList[i].fCharsetNumber);
|
||||
#endif
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
|
||||
|
||||
@ -1772,6 +1820,9 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
col_iter->data = dctTuple.token;
|
||||
}
|
||||
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
maxMins[i].fSplitMaxMinInfo[1].addStringPrefix(strPrefix);
|
||||
#endif
|
||||
dctStr_iter++;
|
||||
col_iter++;
|
||||
}
|
||||
@ -1938,7 +1989,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
|
||||
if (isFirstBatchPm && (totalRow == rowsLeft))
|
||||
{
|
||||
// in this particular case we already marked extents as invalid up there.
|
||||
// in this particular case we already marked extents as invalid above.
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1950,7 +2001,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
if (firstHalfCount)
|
||||
{
|
||||
ExtCPInfo* cpInfoP =
|
||||
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[0]);
|
||||
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[0], m_opType);
|
||||
RID thisRid = rowsLeft ? lastRid : lastRidNew;
|
||||
successFlag = colOp->calculateRowId(thisRid, BYTE_PER_BLOCK / width, width, curFbo, curBio);
|
||||
|
||||
@ -1966,7 +2017,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
if (rowsLeft)
|
||||
{
|
||||
ExtCPInfo* cpInfoP =
|
||||
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[1]);
|
||||
getCPInfoToUpdateForUpdatableType(colStructList[i], &maxMins[i].fSplitMaxMinInfo[1], m_opType);
|
||||
if (cpInfoP)
|
||||
{
|
||||
RETURN_ON_ERROR(GetLBIDRange(newExtentsStartingLbids[i], colStructList[i], *cpInfoP));
|
||||
@ -4446,11 +4497,6 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
|
||||
ColumnOp* colOp = NULL;
|
||||
ExtCPInfoList infosToUpdate;
|
||||
|
||||
if (m_opType != DELETE)
|
||||
{
|
||||
m_opType = UPDATE;
|
||||
}
|
||||
|
||||
for (unsigned extent = 0; extent < numExtents; extent++)
|
||||
{
|
||||
colStructList = colExtentsStruct[extent];
|
||||
@ -4524,15 +4570,19 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
|
||||
}
|
||||
std::vector<ExtCPInfo*> currentExtentRangesPtrs(colStructList.size(), NULL); // pointers for each extent.
|
||||
|
||||
if (m_opType != DELETE)
|
||||
m_opType = UPDATE;
|
||||
|
||||
for (unsigned j = 0; j < colStructList.size(); j++)
|
||||
{
|
||||
colOp = m_colOp[op(colStructList[j].fCompressionType)];
|
||||
ExtCPInfo* cpInfoP = &(currentExtentRanges[j]);
|
||||
cpInfoP = getCPInfoToUpdateForUpdatableType(colStructList[j], cpInfoP);
|
||||
cpInfoP = getCPInfoToUpdateForUpdatableType(colStructList[j], cpInfoP, m_opType);
|
||||
currentExtentRangesPtrs[j] = cpInfoP;
|
||||
|
||||
if (colStructList[j].tokenFlag)
|
||||
continue;
|
||||
// XXX: highly dubious.
|
||||
// if (!colStructList[j].tokenFlag)
|
||||
// continue;
|
||||
|
||||
width = colOp->getCorrectRowWidth(colStructList[j].colDataType, colStructList[j].colWidth);
|
||||
successFlag = colOp->calculateRowId(aRid, BYTE_PER_BLOCK / width, width, curFbo, curBio);
|
||||
@ -4550,9 +4600,6 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
|
||||
// timer.start("markExtentsInvalid");
|
||||
//#endif
|
||||
|
||||
if (m_opType != DELETE)
|
||||
m_opType = UPDATE;
|
||||
|
||||
rc = writeColumnRecUpdate(txnid, cscColTypeList, colStructList, colValueList, colOldValueList,
|
||||
ridLists[extent], tableOid, true, ridLists[extent].size(),
|
||||
¤tExtentRangesPtrs);
|
||||
@ -4578,6 +4625,7 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
|
||||
{
|
||||
cpInfo.fCPInfo.seqNum = SEQNUM_MARK_INVALID_SET_RANGE;
|
||||
}
|
||||
// ZZZZ
|
||||
rc = BRMWrapper::getInstance()->setExtentsMaxMin(infosToDrop);
|
||||
setInvalidCPInfosSpecialMarks(infosToUpdate);
|
||||
rc = BRMWrapper::getInstance()->setExtentsMaxMin(infosToUpdate);
|
||||
@ -4611,12 +4659,9 @@ int WriteEngineWrapper::updateColumnRecs(const TxnID& txnid, const CSCTypesList&
|
||||
colOp = m_colOp[op(colExtentsStruct[j].fCompressionType)];
|
||||
|
||||
ExtCPInfo* cpInfoP = &(infosToUpdate[j]);
|
||||
cpInfoP = getCPInfoToUpdateForUpdatableType(colExtentsStruct[j], cpInfoP);
|
||||
cpInfoP = getCPInfoToUpdateForUpdatableType(colExtentsStruct[j], cpInfoP, m_opType);
|
||||
pointersToInfos.push_back(cpInfoP);
|
||||
|
||||
if (colExtentsStruct[j].tokenFlag)
|
||||
continue;
|
||||
|
||||
width = colOp->getCorrectRowWidth(colExtentsStruct[j].colDataType, colExtentsStruct[j].colWidth);
|
||||
successFlag = colOp->calculateRowId(aRid, BYTE_PER_BLOCK / width, width, curFbo, curBio);
|
||||
|
||||
@ -4964,7 +5009,7 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, const CSCTypesList& c
|
||||
allocateValArray(valArray, totalRow1, colStructList[i].colType, colStructList[i].colWidth);
|
||||
|
||||
ExtCPInfo* cpInfo = getCPInfoToUpdateForUpdatableType(
|
||||
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL);
|
||||
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL, m_opType);
|
||||
|
||||
if (m_opType != INSERT && cpInfo != NULL) // we allocate space for old values only when we need them.
|
||||
{
|
||||
@ -5109,7 +5154,7 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, const CSCTypesList& c
|
||||
}
|
||||
|
||||
ExtCPInfo* cpInfo = getCPInfoToUpdateForUpdatableType(
|
||||
newColStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[1] : NULL);
|
||||
newColStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[1] : NULL, m_opType);
|
||||
allocateValArray(valArray, totalRow2, newColStructList[i].colType, newColStructList[i].colWidth);
|
||||
|
||||
if (m_opType != INSERT && cpInfo != NULL) // we allocate space for old values only when we need them.
|
||||
@ -5190,7 +5235,7 @@ int WriteEngineWrapper::writeColumnRec(const TxnID& txnid, const CSCTypesList& c
|
||||
ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)];
|
||||
|
||||
ExtCPInfo* cpInfo = getCPInfoToUpdateForUpdatableType(
|
||||
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL);
|
||||
colStructList[i], maxMins ? ((*maxMins)[i]).fSplitMaxMinInfoPtrs[0] : NULL, m_opType);
|
||||
|
||||
// set params
|
||||
colOp->initColumn(curCol);
|
||||
|
Loading…
x
Reference in New Issue
Block a user