1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-671 Fix TEXT/BLOB single row SELECT WHERE

pDictionaryScan won't work for BLOB/TEXT since it requires searching the
data file and rebuilding the token from matches. The tokens can't be
rebuild correctly due the bits in the token used for block counts. This
patch forces the use of pDictionaryStep instead for WHERE conditions.

In addition this patch adds support for TEXT/BLOB in various parts of
the job step processing. This fixes things like error 202 during an
UPDATE with a join condition on TEXT/BLOB columns.
This commit is contained in:
Andrew Hutchings
2017-04-21 11:21:59 +01:00
parent dce4b11437
commit 6128293ad3
18 changed files with 82 additions and 19 deletions

View File

@ -300,9 +300,11 @@ void PredicateOperator::setOpType(Type& l, Type& r)
fOperationType.colWidth = 8;
}
else if ((l.colDataType == execplan::CalpontSystemCatalog::CHAR ||
l.colDataType == execplan::CalpontSystemCatalog::VARCHAR) &&
l.colDataType == execplan::CalpontSystemCatalog::VARCHAR ||
l.colDataType == execplan::CalpontSystemCatalog::TEXT) &&
(r.colDataType == execplan::CalpontSystemCatalog::CHAR ||
r.colDataType == execplan::CalpontSystemCatalog::VARCHAR))
r.colDataType == execplan::CalpontSystemCatalog::VARCHAR ||
r.colDataType == execplan::CalpontSystemCatalog::TEXT))
{
if ( ( (l.colDataType == execplan::CalpontSystemCatalog::CHAR && l.colWidth <= 8) ||
(l.colDataType == execplan::CalpontSystemCatalog::VARCHAR && l.colWidth < 8) ) &&

View File

@ -337,6 +337,7 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::TEXT:
{
if (fOp == OP_ISNULL)
{
@ -365,7 +366,8 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu
}
//FIXME: ???
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::BLOB:
return false;
break;

View File

@ -354,7 +354,8 @@ void GroupConcatAgUM::applyMapping(const boost::shared_array<int>& mapping, cons
{
if (fRow.getColumnWidth(i) > 8 &&
(fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::CHAR ||
fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR))
fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR ||
fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT))
{
fRow.setStringField(row.getStringPointer(mapping[i]), row.getStringLength(mapping[i]), i);
}
@ -453,6 +454,7 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
{
oss << row.getStringField(*i).c_str();
//oss << row.getStringField(*i);
@ -567,6 +569,7 @@ int64_t GroupConcator::lengthEstimate(const rowgroup::Row& row)
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
{
int64_t colWidth = row.getStringLength(*i);
fieldLen += colWidth; // getStringLength() does the same thing as below

View File

@ -133,9 +133,9 @@ uint32_t fudgeWidth(const CalpontSystemCatalog::ColType& ict, CalpontSystemCatal
{
CalpontSystemCatalog::OID dictOid = isDictCol(ict);
CalpontSystemCatalog::ColType ct = ict;
if (ct.colDataType != CalpontSystemCatalog::VARBINARY)
if (ct.colDataType != CalpontSystemCatalog::VARBINARY && ct.colDataType != CalpontSystemCatalog::BLOB)
{
if (ct.colDataType == CalpontSystemCatalog::VARCHAR)
if (ct.colDataType == CalpontSystemCatalog::VARCHAR || ct.colDataType == CalpontSystemCatalog::TEXT)
ct.colWidth++;
//Round colWidth up
@ -319,7 +319,9 @@ CalpontSystemCatalog::OID isDictCol(const CalpontSystemCatalog::ColType& colType
if (colType.colWidth > 8) return colType.ddn.dictOID;
if (colType.colDataType == CalpontSystemCatalog::VARCHAR &&
colType.colWidth > 7) return colType.ddn.dictOID;
if (colType.colDataType == CalpontSystemCatalog::VARBINARY)
if (colType.colDataType == CalpontSystemCatalog::VARBINARY ||
colType.colDataType == CalpontSystemCatalog::BLOB ||
colType.colDataType == CalpontSystemCatalog::TEXT)
return colType.ddn.dictOID;
return 0;
@ -748,13 +750,17 @@ bool compatibleColumnTypes(const CalpontSystemCatalog::ColDataType& dt1, uint32_
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
// @bug 1495 compound/string join
if (dt2 != CalpontSystemCatalog::VARCHAR &&
dt2 != CalpontSystemCatalog::CHAR)
dt2 != CalpontSystemCatalog::CHAR &&
dt2 != CalpontSystemCatalog::TEXT)
return false;
break;
case CalpontSystemCatalog::VARBINARY:
if (dt2 != CalpontSystemCatalog::VARBINARY) return false;
case CalpontSystemCatalog::BLOB:
if (dt2 != CalpontSystemCatalog::VARBINARY &&
dt2 != CalpontSystemCatalog::BLOB) return false;
break;
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:

View File

@ -1521,7 +1521,9 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo)
pcs->schema(sc->schemaName());
pcs->cardinality(sc->cardinality());
if (filterWithDictionary(dictOid, jobInfo.stringScanThreshold))
if (ct.colDataType == execplan::CalpontSystemCatalog::TEXT ||
ct.colDataType == execplan::CalpontSystemCatalog::BLOB ||
filterWithDictionary(dictOid, jobInfo.stringScanThreshold))
{
pDictionaryStep* pds = new pDictionaryStep(dictOid, tbl_oid, ct, jobInfo);
jobInfo.keyInfo->dictOidToColOid[dictOid] = sc->oid();
@ -1668,6 +1670,8 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo)
else if ( CalpontSystemCatalog::CHAR != ct.colDataType &&
CalpontSystemCatalog::VARCHAR != ct.colDataType &&
CalpontSystemCatalog::VARBINARY != ct.colDataType &&
CalpontSystemCatalog::TEXT != ct.colDataType &&
CalpontSystemCatalog::BLOB != ct.colDataType &&
ConstantColumn::NULLDATA != cc->type() &&
(cop & COMPARE_LIKE) ) // both like and not like
{
@ -2520,7 +2524,9 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo)
pcs->schema(sc->schemaName());
pcs->cardinality(sc->cardinality());
if (filterWithDictionary(dictOid, jobInfo.stringScanThreshold))
if (ct.colDataType == execplan::CalpontSystemCatalog::TEXT ||
ct.colDataType == execplan::CalpontSystemCatalog::BLOB ||
filterWithDictionary(dictOid, jobInfo.stringScanThreshold))
{
pDictionaryStep* pds = new pDictionaryStep(dictOid, tbOID, ct, jobInfo);
jobInfo.keyInfo->dictOidToColOid[dictOid] = sc->oid();

View File

@ -133,6 +133,8 @@ void getColumnValue(ConstantColumn** cc, uint64_t i, const Row& row)
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::BLOB:
oss << (char*) (row.getStringField(i).c_str());
*cc = new ConstantColumn(oss.str());
break;

View File

@ -1375,7 +1375,7 @@ bool addFunctionJoin(vector<uint32_t>& joinedTables, JobStepVector& joinSteps,
TupleInfo ti1 = getTupleInfo(key1, jobInfo);
TupleInfo ti2 = getTupleInfo(key2, jobInfo);
if (ti1.dtype==CalpontSystemCatalog::CHAR || ti1.dtype==CalpontSystemCatalog::VARCHAR)
if (ti1.dtype==CalpontSystemCatalog::CHAR || ti1.dtype==CalpontSystemCatalog::VARCHAR || ti1.dtype==CalpontSystemCatalog::TEXT)
m1->second.fTypeless = m2->second.fTypeless = true; // ti2 is compatible
else
m1->second.fTypeless = m2->second.fTypeless = false;

View File

@ -450,6 +450,8 @@ bool LBIDList::CasualPartitionDataType(const CalpontSystemCatalog::ColDataType t
case CalpontSystemCatalog::CHAR:
return size <9;
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::TEXT:
return size <8;
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:

View File

@ -222,7 +222,9 @@ PassThruStep::PassThruStep(
colWidth = colType.colWidth;
realWidth = colType.colWidth;
isDictColumn = ((colType.colDataType == CalpontSystemCatalog::VARCHAR && colType.colWidth > 7)
|| (colType.colDataType == CalpontSystemCatalog::CHAR && colType.colWidth > 8));
|| (colType.colDataType == CalpontSystemCatalog::CHAR && colType.colWidth > 8)
|| (colType.colDataType == CalpontSystemCatalog::TEXT)
|| (colType.colDataType == CalpontSystemCatalog::BLOB));
fColType = colType;
fPseudoType = 0;

View File

@ -111,6 +111,7 @@ uint64_t RowEstimator::adjustValue(const execplan::CalpontSystemCatalog::ColType
// TODO: Use dictionary column HWM for dictionary columns.
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
// Last byte is the first character in the string.
return (0xFF & value);
default:

View File

@ -217,7 +217,9 @@ SJSTEP& SubQueryTransformer::makeSubQueryStep(execplan::CalpontSelectExecutionPl
// skip char/varchar/varbinary column because the colWidth in row is fudged.
if (colDataTypeInRg != CalpontSystemCatalog::VARCHAR &&
colDataTypeInRg != CalpontSystemCatalog::CHAR &&
colDataTypeInRg != CalpontSystemCatalog::VARBINARY)
colDataTypeInRg != CalpontSystemCatalog::VARBINARY &&
colDataTypeInRg != CalpontSystemCatalog::TEXT &&
colDataTypeInRg != CalpontSystemCatalog::BLOB)
{
ct.colWidth = row.getColumnWidth(i);
ct.colDataType = row.getColTypes()[i];

View File

@ -1120,6 +1120,8 @@ void TupleAggregateStep::prep1PhaseAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -1186,6 +1188,8 @@ void TupleAggregateStep::prep1PhaseAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -1548,6 +1552,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -1608,6 +1614,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -1745,6 +1753,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
{
if (typeAgg[colAgg] == CalpontSystemCatalog::CHAR ||
typeAgg[colAgg] == CalpontSystemCatalog::VARCHAR ||
typeAgg[colAgg] == CalpontSystemCatalog::BLOB ||
typeAgg[colAgg] == CalpontSystemCatalog::TEXT ||
typeAgg[colAgg] == CalpontSystemCatalog::DATE ||
typeAgg[colAgg] == CalpontSystemCatalog::DATETIME)
{
@ -2528,6 +2538,8 @@ void TupleAggregateStep::prep2PhasesAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -2599,6 +2611,8 @@ void TupleAggregateStep::prep2PhasesAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -3181,6 +3195,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -3241,6 +3257,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
{
if (typeProj[colProj] == CalpontSystemCatalog::CHAR ||
typeProj[colProj] == CalpontSystemCatalog::VARCHAR ||
typeProj[colProj] == CalpontSystemCatalog::BLOB ||
typeProj[colProj] == CalpontSystemCatalog::TEXT ||
typeProj[colProj] == CalpontSystemCatalog::DATE ||
typeProj[colProj] == CalpontSystemCatalog::DATETIME)
{
@ -3395,6 +3413,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
{
if (typeAggUm[colUm] == CalpontSystemCatalog::CHAR ||
typeAggUm[colUm] == CalpontSystemCatalog::VARCHAR ||
typeAggUm[colUm] == CalpontSystemCatalog::BLOB ||
typeAggUm[colUm] == CalpontSystemCatalog::TEXT ||
typeAggUm[colUm] == CalpontSystemCatalog::DATE ||
typeAggUm[colUm] == CalpontSystemCatalog::DATETIME)
{

View File

@ -192,7 +192,8 @@ void TupleConstantStep::constructContanstRow(const JobInfo& jobInfo)
if (cc->type() == ConstantColumn::NULLDATA)
{
if (types[*i] == CalpontSystemCatalog::CHAR ||
types[*i] == CalpontSystemCatalog::VARCHAR)
types[*i] == CalpontSystemCatalog::VARCHAR ||
types[*i] == CalpontSystemCatalog::TEXT)
{
fRowConst.setStringField("", *i);
}
@ -246,6 +247,7 @@ void TupleConstantStep::constructContanstRow(const JobInfo& jobInfo)
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
{
fRowConst.setStringField(c.strVal, *i);
break;

View File

@ -401,6 +401,7 @@ void TupleUnion::normalize(const Row &in, Row *out)
out->setUintField(in.getUintField(i), i);
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: {
ostringstream os;
if (in.getScale(i)) {
@ -482,6 +483,7 @@ dec1: uint64_t val = in.getIntField(i);
out->setUintField(in.getUintField(i), i);
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: {
ostringstream os;
if (in.getScale(i)) {
@ -541,9 +543,11 @@ dec2: uint64_t val = in.getIntField(i);
}
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR:
switch (out->getColTypes()[i]) {
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR:
out->setStringField(in.getStringField(i), i);
break;
@ -568,6 +572,7 @@ dec2: uint64_t val = in.getIntField(i);
break;
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: {
string d = DataConvert::dateToString(in.getUintField(i));
out->setStringField(d, i);
@ -593,6 +598,7 @@ dec2: uint64_t val = in.getIntField(i);
break;
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: {
string d = DataConvert::datetimeToString(in.getUintField(i));
out->setStringField(d, i);
@ -639,6 +645,7 @@ dec2: uint64_t val = in.getIntField(i);
out->setDoubleField(val, i);
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: {
ostringstream os;
os.precision(15); // to match mysql's output
@ -706,6 +713,7 @@ dec3: /* have to pick a scale to use for the double. using 5... */
break;
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR:
default: {
char buf[50];
@ -722,6 +730,7 @@ dec3: /* have to pick a scale to use for the double. using 5... */
}
break;
}
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::VARBINARY: {
// out->setVarBinaryField(in.getVarBinaryStringField(i), i); // not efficient
out->setVarBinaryField(in.getVarBinaryField(i), in.getVarBinaryLength(i), i);
@ -856,6 +865,7 @@ void TupleUnion::writeNull(Row *out, uint32_t col)
case CalpontSystemCatalog::DATETIME:
out->setUintField<8>(joblist::DATETIMENULL, col); break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::VARCHAR: {
uint32_t len = out->getColumnWidth(col);
switch (len) {
@ -872,6 +882,7 @@ void TupleUnion::writeNull(Row *out, uint32_t col)
}
break;
}
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::VARBINARY:
// could use below if zero length and NULL are treated the same
// out->setVarBinaryField("", col); break;

View File

@ -540,7 +540,8 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo)
{
CalpontSystemCatalog::ColType rt = wc->resultType();
if ((types[ridx] == CalpontSystemCatalog::CHAR ||
types[ridx] == CalpontSystemCatalog::VARCHAR) &&
types[ridx] == CalpontSystemCatalog::VARCHAR ||
types[ridx] == CalpontSystemCatalog::TEXT) &&
rg.getColumnWidth(ridx) >= jobInfo.stringTableThreshold)
{
wfsUpdateStringTable++;

View File

@ -524,7 +524,7 @@ inline bool colCompare(int64_t val1, int64_t val2, uint8_t COP, uint8_t rf, int
}
else if ( (type == CalpontSystemCatalog::CHAR || type == CalpontSystemCatalog::VARCHAR ||
type == CalpontSystemCatalog::BLOB || type == CalpontSystemCatalog::TEXT) && !isNull )
type == CalpontSystemCatalog::TEXT) && !isNull )
{
if (!regex.used && !rf)
return colCompare_(order_swap(val1), order_swap(val2), COP);

View File

@ -95,7 +95,6 @@ Command* FilterCommand::makeFilterCommand(ByteStream& bs, vector<SCommand>& cmds
// due to endian issue
if (cmd0->getColType().colDataType == execplan::CalpontSystemCatalog::CHAR ||
cmd0->getColType().colDataType == execplan::CalpontSystemCatalog::VARCHAR ||
cmd0->getColType().colDataType == execplan::CalpontSystemCatalog::BLOB ||
cmd0->getColType().colDataType == execplan::CalpontSystemCatalog::TEXT)
{
StrFilterCmd* sc = new StrFilterCmd();

View File

@ -1072,6 +1072,7 @@ boost::any
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
{
//check data length
if ( data.length() > (unsigned int)colType.colWidth )
@ -1122,7 +1123,6 @@ boost::any
break;
case CalpontSystemCatalog::BLOB:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::CLOB:
value = data;
break;
@ -1263,6 +1263,7 @@ boost::any
}
break;
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::TEXT:
{
std::string charnull;
if (colType.colWidth == 1 )
@ -1291,6 +1292,7 @@ boost::any
}
break;
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::BLOB:
{
WriteEngine::Token nullToken;
value = nullToken;