1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-02 06:13:16 +03:00

feat(joblist): MCOL-5756 MCOL-5222 ORDER BY on UNIONs in outer select

This commit is contained in:
Aleksei Antipovskii
2025-08-26 02:44:56 +02:00
committed by Leonid Fedorov
parent ebde9b5648
commit 3a316181e7
10 changed files with 334 additions and 93 deletions

View File

@@ -5049,7 +5049,7 @@ void associateTupleJobSteps(JobStepVector& querySteps, JobStepVector& projectSte
adjustLastStep(querySteps, deliverySteps, jobInfo); // to match the select clause
}
SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo)
SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, uint32_t keyCount)
{
vector<RowGroup> inputRGs;
vector<bool> distinct;
@@ -5124,7 +5124,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
dl->OID(CNX_VTABLE_ID);
JobStepAssociation jsa;
jsa.outAdd(spdl);
TupleUnion* unionStep = new TupleUnion(CNX_VTABLE_ID, jobInfo);
TupleUnion* unionStep = new TupleUnion(CNX_VTABLE_ID, jobInfo, keyCount);
unionStep->inputAssociation(jsaToUnion);
unionStep->outputAssociation(jsa);
@@ -5167,6 +5167,11 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
for (size_t i = 0; i < jobInfo.deliveredCols.size(); i++)
{
auto* cc = dynamic_cast<ConstantColumn*>(jobInfo.deliveredCols[i].get());
if (cc)
{
jobInfo.deliveredCols[i].reset(new SimpleColumn(*jobInfo.deliveredCols[i]));
}
CalpontSystemCatalog::ColType ct = jobInfo.deliveredCols[i]->resultType();
// XXX remove after connector change
ct.colDataType = types[i];

View File

@@ -129,7 +129,7 @@ void associateTupleJobSteps(JobStepVector& querySteps, JobStepVector& projectSte
void orExpresssion(const execplan::Operator* op, JobInfo& jobInfo);
// union the queries and return the tuple union step
SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo);
SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, uint32_t keyCount);
// Used for join graph analysis.
// WHITE - node is not processed.

View File

@@ -1985,29 +1985,200 @@ void makeJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVec
makeVtableModeSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
}
void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
JobStepVector&, DeliveredTableMap& deliverySteps)
void fixUnionExpressionCol(ParseTree* tree, void* obj)
{
CalpontSelectExecutionPlan::SelectList& selectVec = csep->unionVec();
if (tree->left() || tree->right())
{
return;
}
auto* ac = dynamic_cast<ArithmeticColumn*>(tree->data());
if (ac && ac->expression())
{
ac->expression()->walk(fixUnionExpressionCol, obj);
ac->setSimpleColumnList();
return;
}
auto* fc = dynamic_cast<FunctionColumn*>(tree->data());
if (fc && !fc->functionParms().empty())
{
for (auto& parm : fc->functionParms())
{
parm->walk(fixUnionExpressionCol, obj);
}
fc->setSimpleColumnList();
return;
}
auto* csep = static_cast<CalpontSelectExecutionPlan*>(obj);
auto* rc = dynamic_cast<ReturnedColumn*>(tree->data());
if (rc)
{
if (dynamic_cast<ConstantColumn*>(rc) || rc->orderPos() == -1ull)
{
return;
}
auto* newrc = csep->returnedCols()[rc->orderPos()]->clone();
csep->returnedCols()[rc->orderPos()]->incRefCount();
tree->data(newrc);
}
}
void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps,
JobStepVector& /*projectSteps*/, DeliveredTableMap& deliverySteps)
{
CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec();
uint8_t distinctUnionNum = csep->distinctUnionNum();
RetColsVector unionRetCols = csep->returnedCols();
uint32_t unionRetColsCount = csep->returnedCols().size();
JobStepVector unionFeeders;
for (CalpontSelectExecutionPlan::SelectList::iterator cit = selectVec.begin(); cit != selectVec.end();
cit++)
std::decay_t<decltype(csep->orderByCols())> expOrderByCols;
for (auto& obc : csep->orderByCols())
{
// @bug4848, enhance and unify limit handling.
SJSTEP sub = doUnionSub(cit->get(), jobInfo);
if (obc->orderPos() != -1ull)
{
continue;
}
if (dynamic_cast<SimpleColumn*>(obc.get()) == nullptr &&
dynamic_cast<ConstantColumn*>(obc.get()) == nullptr)
{
// Arithmetic & function columns need special processing
expOrderByCols.push_back(obc);
}
}
for (auto& unionSub : unionVec)
{
auto* unionCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(unionSub.get());
for (auto& obc : expOrderByCols)
{
// Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table in
// the UNION, and add the expression to the returned columns.
auto* col = obc->clone();
auto* ac = dynamic_cast<ArithmeticColumn*>(col);
auto* fc = dynamic_cast<FunctionColumn*>(col);
if (ac)
{
ac->expression()->walk(fixUnionExpressionCol, unionCSEP);
ac->setSimpleColumnList();
}
else if (fc)
{
for (auto& parm : fc->functionParms())
{
parm->walk(fixUnionExpressionCol, unionCSEP);
}
fc->setSimpleColumnList();
}
unionCSEP->returnedCols().emplace_back(col);
}
SJSTEP sub = doUnionSub(unionSub.get(), jobInfo);
querySteps.push_back(sub);
unionFeeders.push_back(sub);
}
jobInfo.deliveredCols = unionRetCols;
SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo));
for (auto& obc : expOrderByCols)
{
// Add a SimpleColumn to the outer query for the every ORDER BY expression
auto* sc = new SimpleColumn(*obc.get());
csep->returnedCols().emplace_back(sc);
sc->colPosition(csep->returnedCols().size() - 1);
sc->orderPos(csep->returnedCols().size() - 1);
obc->orderPos(csep->returnedCols().size() - 1);
}
jobInfo.deliveredCols = csep->returnedCols();
SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo, unionRetColsCount));
querySteps.push_back(unionStep);
uint16_t stepNo = jobInfo.subId * 10000;
numberSteps(querySteps, stepNo, jobInfo.traceFlags);
deliverySteps[execplan::CNX_VTABLE_ID] = unionStep;
if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull)
{
jobInfo.limitStart = csep->limitStart();
jobInfo.limitCount = csep->limitNum();
jobInfo.orderByThreads = csep->orderByThreads();
for (auto& obc : csep->orderByCols())
{
auto* osc = dynamic_cast<SimpleColumn*>(obc.get());
if (osc)
{
auto* sc = dynamic_cast<SimpleColumn*>(jobInfo.deliveredCols[obc->orderPos()].get());
idbassert(sc);
sc->schemaName("");
sc->tableAlias(querySteps[0]->alias());
sc->colPosition(obc->orderPos());
sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos());
jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc());
}
else
{
auto* tus = dynamic_cast<TupleUnion*>(unionStep.get());
auto& keys = tus->getOutputRowGroup().getKeys();
idbassert(obc->orderPos() < keys.size());
jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc());
}
}
for (auto& rc : csep->returnedCols())
{
// Replace ConstantColumns with SimpleColumns and fix OIDs
auto* sc = dynamic_cast<SimpleColumn*>(rc.get());
if (sc)
{
sc->schemaName("");
sc->tableAlias(querySteps[0]->alias());
sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
}
else
{
sc = new SimpleColumn(*rc.get());
rc.reset(sc);
sc->schemaName("");
sc->tableAlias(querySteps[0]->alias());
sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition());
}
}
doProject(csep->returnedCols(), jobInfo);
checkReturnedColumns(csep, jobInfo);
auto* tas = new TupleAnnexStep(jobInfo);
jobInfo.annexStep.reset(tas);
tas->setLimit(jobInfo.limitStart, jobInfo.limitCount);
if (!jobInfo.orderByColVec.empty())
{
tas->addOrderBy(new LimitedOrderBy());
if (jobInfo.orderByThreads > 1)
{
tas->setParallelOp();
}
tas->setMaxThreads(jobInfo.orderByThreads);
}
auto* tds = dynamic_cast<TupleDeliveryStep*>(unionStep.get());
RowGroup rg = tds->getDeliveredRowGroup();
AnyDataListSPtr spdlIn(new AnyDataList());
RowGroupDL* dlIn;
if (!jobInfo.orderByColVec.empty())
dlIn = new RowGroupDL(jobInfo.orderByThreads, jobInfo.fifoSize);
else
dlIn = new RowGroupDL(1, jobInfo.fifoSize);
dlIn->OID(CNX_VTABLE_ID);
spdlIn->rowGroupDL(dlIn);
JobStepAssociation jsaIn;
jsaIn.outAdd(spdlIn);
dynamic_cast<JobStep*>(tds)->outputAssociation(jsaIn);
jobInfo.annexStep->inputAssociation(jsaIn);
AnyDataListSPtr spdlOut(new AnyDataList());
RowGroupDL* dlOut = new RowGroupDL(1, jobInfo.fifoSize);
dlOut->OID(CNX_VTABLE_ID);
spdlOut->rowGroupDL(dlOut);
JobStepAssociation jsaOut;
jsaOut.outAdd(spdlOut);
jobInfo.annexStep->outputAssociation(jsaOut);
querySteps.push_back(jobInfo.annexStep);
dynamic_cast<TupleAnnexStep*>(jobInfo.annexStep.get())->initialize(rg, jobInfo);
deliverySteps[CNX_VTABLE_ID] = jobInfo.annexStep;
}
}
} // namespace joblist

View File

@@ -1305,7 +1305,7 @@ inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const
else
ts->rowMemory[p.group].getRow(p.row, &row);
return row.hash();
return row.hash(ts->fLastCol);
}
inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const
@@ -1322,10 +1322,10 @@ inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition&
else
ts->rowMemory[d2.group].getRow(d2.row, &r2);
return r1.equals(r2);
return r1.equals(r2, ts->fLastCol);
}
TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo)
TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount)
: JobStep(jobInfo)
, fTableOID(tableOID)
, output(NULL)
@@ -1340,6 +1340,7 @@ TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInf
, joinRan(false)
, sessionMemLimit(jobInfo.umMemLimit)
, fTimeZone(jobInfo.timeZone)
, fLastCol(keyCount - 1)
{
uniquer.reset(new Uniquer_t(10, Hasher(this), Eq(this), allocator));
fExtendedInfo = "TUN: ";

View File

@@ -43,7 +43,7 @@ using normalizeFunctionsT =
class TupleUnion : public JobStep, public TupleDeliveryStep
{
public:
TupleUnion(execplan::CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo);
TupleUnion(execplan::CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount);
~TupleUnion() override;
void run() override;
@@ -200,6 +200,7 @@ class TupleUnion : public JobStep, public TupleDeliveryStep
boost::shared_ptr<int64_t> sessionMemLimit;
long fTimeZone;
uint32_t fLastCol;
};
} // namespace joblist

View File

@@ -505,7 +505,6 @@ bool sortItemIsInGroupRec(Item* sort_item, Item* group_item)
Item* ifp_sort_arg = ifp_sort->arguments()[i];
if (ifp_sort_arg->type() == Item::FUNC_ITEM || ifp_sort_arg->type() == Item::FIELD_ITEM)
{
Item* ifp_sort_arg = ifp_sort->arguments()[i];
found = sortItemIsInGroupRec(ifp_sort_arg, group_item);
}
else if (ifp_sort_arg->type() == Item::REF_ITEM)
@@ -2736,7 +2735,8 @@ ReturnedColumn* buildReturnedColumnNull(gp_walk_info& gwi)
return rc;
}
ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& nonSupport, bool /*isRefItem*/)
ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& nonSupport, bool /*isRefItem*/,
bool isUnion)
{
ReturnedColumn* rc = NULL;
@@ -2758,7 +2758,7 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non
{
Item_field* ifp = (Item_field*)item;
return wrapIntoAggregate(buildSimpleColumn(ifp, gwi), gwi, ifp);
return wrapIntoAggregate(buildSimpleColumn(ifp, gwi, isUnion), gwi, ifp);
}
case Item::NULL_ITEM: return buildReturnedColumnNull(gwi);
case Item::CONST_ITEM:
@@ -2797,10 +2797,10 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non
}
if (func_name == "+" || func_name == "-" || func_name == "*" || func_name == "/")
return buildArithmeticColumn(ifp, gwi, nonSupport);
return buildArithmeticColumn(ifp, gwi, nonSupport, isUnion);
else
{
return buildFunctionColumn(ifp, gwi, nonSupport);
return buildFunctionColumn(ifp, gwi, nonSupport, false, isUnion);
}
}
@@ -2817,11 +2817,13 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non
{
case Item::SUM_FUNC_ITEM: return buildAggregateColumn(*(ref->ref), gwi);
case Item::FIELD_ITEM: return buildReturnedColumn(*(ref->ref), gwi, nonSupport);
case Item::FIELD_ITEM: return buildReturnedColumn(*(ref->ref), gwi, nonSupport, false, isUnion);
case Item::REF_ITEM: return buildReturnedColumn(*(((Item_ref*)(*(ref->ref)))->ref), gwi, nonSupport);
case Item::REF_ITEM:
return buildReturnedColumn(*(((Item_ref*)(*(ref->ref)))->ref), gwi, nonSupport, false, isUnion);
case Item::FUNC_ITEM: return buildFunctionColumn((Item_func*)(*(ref->ref)), gwi, nonSupport);
case Item::FUNC_ITEM:
return buildFunctionColumn((Item_func*)(*(ref->ref)), gwi, nonSupport, false, isUnion);
case Item::WINDOW_FUNC_ITEM: return buildWindowFunctionColumn(*(ref->ref), gwi, nonSupport);
@@ -2833,7 +2835,7 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non
default:
if (ref->ref_type() == Item_ref::DIRECT_REF)
{
return buildReturnedColumn(ref->real_item(), gwi, nonSupport);
return buildReturnedColumn(ref->real_item(), gwi, nonSupport, false, isUnion);
}
gwi.fatalParseError = true;
gwi.parseErrorText = "Unknown REF item";
@@ -2922,11 +2924,12 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non
return rc;
}
ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool isRefItem)
ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool isRefItem,
bool isUnion)
{
bool disableWrapping = gwi.disableWrapping;
gwi.disableWrapping = gwi.disableWrapping || itemDisablesWrapping(item, gwi);
ReturnedColumn* rc = buildReturnedColumnBody(item, gwi, nonSupport, isRefItem);
ReturnedColumn* rc = buildReturnedColumnBody(item, gwi, nonSupport, isRefItem, isUnion);
gwi.disableWrapping = disableWrapping;
return rc;
}
@@ -2960,7 +2963,7 @@ ReturnedColumn* buildBooleanConstantColumn(Item* item, gp_walk_info& gwi, bool&
return cc;
}
ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bool& nonSupport)
ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool isUnion)
{
if (get_fe_conn_info_ptr() == NULL)
{
@@ -2989,7 +2992,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
if (gwi.clauseType == SELECT || /*gwi.clauseType == HAVING || */ gwi.clauseType == GROUP_BY ||
gwi.clauseType == FROM) // select list
{
lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport));
lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion));
if (!lhs->data() && (sfitempp[0]->type() == Item::FUNC_ITEM))
{
@@ -3004,12 +3007,12 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
gwi.fatalParseError = false;
// ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion);
if (rc)
lhs = new ParseTree(rc);
}
rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport));
rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion));
if (!rhs->data() && (sfitempp[1]->type() == Item::FUNC_ITEM))
{
@@ -3024,7 +3027,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
gwi.fatalParseError = false;
// ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion);
if (rc)
rhs = new ParseTree(rc);
}
@@ -3035,7 +3038,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
{
if (gwi.ptWorkStack.empty())
{
rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport));
rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion));
}
else
{
@@ -3047,7 +3050,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
{
if (gwi.rcWorkStack.empty())
{
rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport));
rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion));
}
else
{
@@ -3060,7 +3063,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
{
if (gwi.ptWorkStack.empty())
{
lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport));
lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion));
}
else
{
@@ -3072,7 +3075,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
{
if (gwi.rcWorkStack.empty())
{
lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport));
lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion));
}
else
{
@@ -3105,13 +3108,13 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
if (gwi.clauseType == SELECT || gwi.clauseType == HAVING || gwi.clauseType == GROUP_BY) // select clause
{
rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport));
rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion));
}
else
{
if (gwi.rcWorkStack.empty())
{
rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport));
rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion));
}
else
{
@@ -3233,17 +3236,17 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
}
return ac;
}
ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport)
ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool isUnion)
{
bool disableWrapping = gwi.disableWrapping;
gwi.disableWrapping = gwi.disableWrapping || itemDisablesWrapping(item, gwi);
ReturnedColumn* rc = buildArithmeticColumnBody(item, gwi, nonSupport);
ReturnedColumn* rc = buildArithmeticColumnBody(item, gwi, nonSupport, isUnion);
gwi.disableWrapping = disableWrapping;
return rc;
}
ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport,
bool selectBetweenIn)
bool selectBetweenIn, bool isUnion)
{
if (get_fe_conn_info_ptr() == NULL)
{
@@ -3306,12 +3309,12 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
// Arithmetic exp
if (funcName == "+" || funcName == "-" || funcName == "*" || funcName == "/")
{
return buildArithmeticColumn(ifp, gwi, nonSupport);
return buildArithmeticColumn(ifp, gwi, nonSupport, isUnion);
}
else if (funcName == "case")
{
fc = buildCaseFunction(ifp, gwi, nonSupport);
fc = buildCaseFunction(ifp, gwi, nonSupport, isUnion);
}
else if ((funcName == "charset" || funcName == "collation") && ifp->argument_count() == 1 &&
@@ -3486,7 +3489,7 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
rc = buildBooleanConstantColumn(ifp->arguments()[i], gwi, nonSupport);
else
{
rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport);
rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport, false, isUnion);
}
// MCOL-1510 It must be a temp table field, so find the corresponding column.
@@ -3853,16 +3856,17 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
return fc;
}
ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn)
ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn,
bool isUnion)
{
bool disableWrapping = gwi.disableWrapping;
gwi.disableWrapping = gwi.disableWrapping || itemDisablesWrapping(ifp, gwi);
ReturnedColumn* rc = buildFunctionColumnBody(ifp, gwi, nonSupport, selectBetweenIn);
ReturnedColumn* rc = buildFunctionColumnBody(ifp, gwi, nonSupport, selectBetweenIn, isUnion);
gwi.disableWrapping = disableWrapping;
return rc;
}
FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport)
FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool isUnion)
{
if (get_fe_conn_info_ptr() == NULL)
{
@@ -3945,7 +3949,7 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS
// rwWorkStack or ptWorkStack.
// For example, simple predicates, such as 1=1 or 1=0, land in the
// ptWorkStack but other stuff might land in the rwWorkStack
ReturnedColumn* parm = buildReturnedColumn(item->arguments()[i], gwi, nonSupport);
ReturnedColumn* parm = buildReturnedColumn(item->arguments()[i], gwi, nonSupport, false, isUnion);
if (parm)
{
@@ -4085,7 +4089,7 @@ ConstantColumn* buildDecimalColumn(const Item* idp, const std::string& valStr, g
return cc;
}
SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi)
SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi, bool isUnion)
{
if (!gwi.csc)
{
@@ -4100,6 +4104,14 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi)
strcmp(ifp->cached_table->db.str, "information_schema") == 0)
isInformationSchema = true;
if (isUnion && !isInformationSchema)
{
auto* rc = gwi.returnedCols[ifp->field->field_index]->clone();
rc->orderPos(ifp->field->field_index);
gwi.returnedCols[ifp->field->field_index]->incRefCount();
return dynamic_cast<SimpleColumn*>(rc);
}
// support FRPM subquery. columns from the derived table has no definition
if ((!ifp->field || !ifp->db_name.str || strlen(ifp->db_name.str) == 0) && !isInformationSchema)
return buildSimpleColFromDerivedTable(gwi, ifp);
@@ -4557,7 +4569,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
case Item::FIELD_ITEM:
{
Item_field* ifp = static_cast<Item_field*>(sfitemp);
SimpleColumn* sc = buildSimpleColumn(ifp, gwi);
SimpleColumn* sc = buildSimpleColumn(ifp, gwi, false);
if (!sc)
{
@@ -5674,7 +5686,7 @@ int processGroupBy(SELECT_LEX& select_lex, gp_walk_info& gwi, const bool withRol
{
Item_field* ifp = (Item_field*)groupItem;
// this GB col could be an alias of F&E on the SELECT clause, not necessarily a field.
ReturnedColumn* rc = buildSimpleColumn(ifp, gwi);
ReturnedColumn* rc = buildSimpleColumn(ifp, gwi, false);
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(rc);
if (sc)
@@ -6310,7 +6322,8 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
if (histogram)
{
SchemaAndTableName tableName = {ifp->field->table->s->db.str, ifp->field->table->s->table_name.str};
SchemaAndTableName tableName = {ifp->field->table->s->db.str,
ifp->field->table->s->table_name.str};
gwi.tableStatisticsMap[tableName][ifp->field->field_name.str] = *histogram;
}
}
@@ -6420,7 +6433,7 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector
collectAllCols(gwi, ifp);
break;
}
SimpleColumn* sc = buildSimpleColumn(ifp, gwi);
SimpleColumn* sc = buildSimpleColumn(ifp, gwi, false);
if (sc)
{
@@ -7015,32 +7028,90 @@ int processOrderBy(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep,
gwi.returnedCols.push_back(minSc);
}
// ORDER BY translation part
// process UNION ORDER BY part
if (!isUnion && !gwi.hasWindowFunc && gwi.subSelectType == CalpontSelectExecutionPlan::MAIN_SELECT)
{
{
if (unionSel)
{
order_list = select_lex.master_unit()->global_parameters()->order_list;
ordercol = static_cast<ORDER*>(order_list.first);
for (; ordercol; ordercol = ordercol->next)
{
Item* ord_item = *(ordercol->item);
ReturnedColumn* rc = NULL;
if (ord_item->name.length)
{
// for union order by 1 case. For unknown reason, it doesn't show in_field_list
if (ord_item->type() == Item::CONST_ITEM && ord_item->cmp_type() == INT_RESULT)
{
}
else if (ord_item->type() == Item::SUBSELECT_ITEM)
if (ordercol->in_field_list && ordercol->item &&
(ordercol->counter_used || (*ordercol->item)->type() == Item::FIELD_ITEM))
{
auto* ifp = dynamic_cast<Item_field*>(*ordercol->item);
auto pos = ordercol->counter_used ? ordercol->counter - 1 : ifp->field->field_index;
rc = gwi.returnedCols[pos]->clone();
rc->orderPos(pos);
// can not be optimized off if used in order by with counter.
// set with self derived table alias if it's derived table
gwi.returnedCols[pos]->incRefCount();
}
else
{
Item* ord_item = *(ordercol->item);
// ignore not_used column on order by.
if ((ord_item->type() == Item::CONST_ITEM && ord_item->cmp_type() == INT_RESULT) &&
ord_item->full_name() && !strcmp(ord_item->full_name(), "Not_used"))
{
continue;
}
else if (ord_item->type() == Item::CONST_ITEM && ord_item->cmp_type() == INT_RESULT)
{
// DRRTUY This section looks useless b/c there is no
// way to put constant INT into an ORDER BY list
rc = gwi.returnedCols[((Item_int*)ord_item)->val_int() - 1]->clone();
}
else if (ord_item->type() == Item::SUBSELECT_ITEM)
{
gwi.fatalParseError = true;
}
else if ((ord_item->type() == Item::FUNC_ITEM) &&
(((Item_func*)ord_item)->functype() == Item_func::COLLATE_FUNC))
{
push_warning(gwi.thd, Sql_condition::WARN_LEVEL_NOTE, WARN_OPTION_IGNORED,
"COLLATE is ignored in ColumnStore");
continue;
}
else
{
rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError, false, true);
rc = wrapIntoAggregate(rc, gwi, ord_item);
}
// @bug5501 try item_ptr if item can not be fixed. For some
// weird dml statement state, item can not be fixed but the
// infomation is available in item_ptr.
if (!rc || gwi.fatalParseError)
{
Item* item_ptr = ordercol->item_ptr;
while (item_ptr->type() == Item::REF_ITEM)
item_ptr = *(((Item_ref*)item_ptr)->ref);
rc = buildReturnedColumn(item_ptr, gwi, gwi.fatalParseError, false, true);
}
if (!rc)
{
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_ORDER_BY);
gwi.parseErrorText = emsg;
setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, emsg, gwi);
return ER_CHECK_NOT_IMPLEMENTED;
}
}
if (ordercol->direction == ORDER::ORDER_ASC)
rc->asc(true);
else
rc->asc(false);
gwi.orderByCols.push_back(SRCP(rc));
}
}
@@ -7137,7 +7208,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
for (uint32_t i = 0; i < funcFieldVec.size(); i++)
{
SimpleColumn* sc = buildSimpleColumn(funcFieldVec[i], gwi);
SimpleColumn* sc = buildSimpleColumn(funcFieldVec[i], gwi, false);
if (!sc || gwi.fatalParseError)
{
@@ -7196,10 +7267,10 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
bool unionSel = (!isUnion && select_lex.master_unit()->is_unit_op()) ? true : false;
gwi.clauseType = GROUP_BY;
// Group by list. not valid for union main query
if (!unionSel)
{
gwi.clauseType = GROUP_BY;
if ((rc = processGroupBy(select_lex, gwi, withRollup)))
{
return rc;
@@ -7234,6 +7305,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
minSc = gwi.returnedCols[0];
else if (!gwi.additionalRetCols.empty())
minSc = gwi.additionalRetCols[0];
minSc->orderPos(0);
}
// @bug3523, count(*) on subquery always pick column[0].
@@ -7250,6 +7322,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
sc1->viewName(sc->viewName());
sc1->partitions(sc->partitions());
sc1->colPosition(0);
sc1->orderPos(0);
sc1->timeZone(gwi.timeZone);
minSc.reset(sc1);
}

View File

@@ -119,7 +119,7 @@ void gp_walk(const Item* item, void* arg)
if (ifp)
{
// XXX: this looks awfuly wrong.
execplan::SimpleColumn* scp = buildSimpleColumn(ifp, *gwip);
execplan::SimpleColumn* scp = buildSimpleColumn(ifp, *gwip, false);
if (!scp)
break;

View File

@@ -95,16 +95,20 @@ enum ClauseType
ORDER_BY
};
struct SchemaAndTableName {
struct SchemaAndTableName
{
std::string schema;
std::string table;
bool operator==(const SchemaAndTableName& other) const {
bool operator==(const SchemaAndTableName& other) const
{
return schema == other.schema && table == other.table;
}
};
struct SchemaAndTableNameHash {
std::size_t operator()(const SchemaAndTableName& k) const {
struct SchemaAndTableNameHash
{
std::size_t operator()(const SchemaAndTableName& k) const
{
return std::hash<std::string>()(k.schema + k.table);
}
};
@@ -117,7 +121,8 @@ typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
using ColumnName = std::string;
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
using TableStatisticsMap =
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
// This structure is used to store MDB AST -> CSEP translation context.
// There is a column statistics for some columns in a query.
@@ -434,13 +439,15 @@ const std::string bestTableName(const Item_field* ifp);
// execution plan util functions prototypes
execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport,
bool isRefItem = false);
bool isRefItem = false, bool isUnion = false);
execplan::ReturnedColumn* buildFunctionColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport,
bool selectBetweenIn = false);
execplan::ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport);
bool selectBetweenIn = false, bool isUnion = false);
execplan::ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport,
bool isUnion);
execplan::ConstantColumn* buildDecimalColumn(const Item* item, const std::string& str, gp_walk_info& gwi);
execplan::SimpleColumn* buildSimpleColumn(Item_field* item, gp_walk_info& gwi);
execplan::FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport);
execplan::SimpleColumn* buildSimpleColumn(Item_field* item, gp_walk_info& gwi, bool isUnion);
execplan::FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport,
bool isUnion);
execplan::ParseTree* buildParseTree(Item* item, gp_walk_info& gwi, bool& nonSupport);
execplan::ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi);
execplan::ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& nonSupport);

View File

@@ -633,15 +633,6 @@ select_handler* create_columnstore_select_handler_(THD* thd, SELECT_LEX* sel_lex
return nullptr;
}
// MCOL-5432 Disable partial pushdown of the UNION operation if the query
// involves an order by or a limit clause.
if (sel_lex && sel_unit &&
(sel_unit->global_parameters()->limit_params.explicit_limit == true ||
sel_unit->global_parameters()->order_list.elements != 0))
{
return nullptr;
}
std::vector<SELECT_LEX*> select_lex_vec;
if (sel_unit && !sel_lex)
@@ -912,14 +903,6 @@ select_handler* create_columnstore_unit_handler(THD* thd, SELECT_LEX_UNIT* sel_u
return nullptr;
}
// MCOL-5432 Disable UNION pushdown if the query involves an order by
// or a limit clause.
if (sel_unit->global_parameters()->limit_params.explicit_limit == true ||
sel_unit->global_parameters()->order_list.elements != 0)
{
return nullptr;
}
return create_columnstore_select_handler_(thd, 0, sel_unit);
}

View File

@@ -423,7 +423,7 @@ execplan::ReturnedColumn* buildPseudoColumn(Item* item, gp_walk_info& gwi, bool&
if (!field->field || !field->db_name.str || strlen(field->db_name.str) == 0)
return nullOnError(gwi, funcName);
SimpleColumn* sc = buildSimpleColumn(field, gwi);
SimpleColumn* sc = buildSimpleColumn(field, gwi, false);
if (!sc)
return nullOnError(gwi, funcName);