diff --git a/dbcon/joblist/jlf_tuplejoblist.cpp b/dbcon/joblist/jlf_tuplejoblist.cpp index 96add786b..c256351ec 100644 --- a/dbcon/joblist/jlf_tuplejoblist.cpp +++ b/dbcon/joblist/jlf_tuplejoblist.cpp @@ -5049,7 +5049,7 @@ void associateTupleJobSteps(JobStepVector& querySteps, JobStepVector& projectSte adjustLastStep(querySteps, deliverySteps, jobInfo); // to match the select clause } -SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo) +SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, uint32_t keyCount) { vector inputRGs; vector distinct; @@ -5124,7 +5124,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& dl->OID(CNX_VTABLE_ID); JobStepAssociation jsa; jsa.outAdd(spdl); - TupleUnion* unionStep = new TupleUnion(CNX_VTABLE_ID, jobInfo); + TupleUnion* unionStep = new TupleUnion(CNX_VTABLE_ID, jobInfo, keyCount); unionStep->inputAssociation(jsaToUnion); unionStep->outputAssociation(jsa); @@ -5167,6 +5167,11 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& for (size_t i = 0; i < jobInfo.deliveredCols.size(); i++) { + auto* cc = dynamic_cast(jobInfo.deliveredCols[i].get()); + if (cc) + { + jobInfo.deliveredCols[i].reset(new SimpleColumn(*jobInfo.deliveredCols[i])); + } CalpontSystemCatalog::ColType ct = jobInfo.deliveredCols[i]->resultType(); // XXX remove after connector change ct.colDataType = types[i]; diff --git a/dbcon/joblist/jlf_tuplejoblist.h b/dbcon/joblist/jlf_tuplejoblist.h index 8514f6443..b49c9840c 100644 --- a/dbcon/joblist/jlf_tuplejoblist.h +++ b/dbcon/joblist/jlf_tuplejoblist.h @@ -129,7 +129,7 @@ void associateTupleJobSteps(JobStepVector& querySteps, JobStepVector& projectSte void orExpresssion(const execplan::Operator* op, JobInfo& jobInfo); // union the queries and return the tuple union step -SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo); +SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& jobInfo, uint32_t keyCount); // Used for join graph analysis. // WHITE - node is not processed. diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index d68119f8b..c4e85005c 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -1985,29 +1985,200 @@ void makeJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVec makeVtableModeSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps); } -void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps, - JobStepVector&, DeliveredTableMap& deliverySteps) +void fixUnionExpressionCol(ParseTree* tree, void* obj) { - CalpontSelectExecutionPlan::SelectList& selectVec = csep->unionVec(); + if (tree->left() || tree->right()) + { + return; + } + auto* ac = dynamic_cast(tree->data()); + if (ac && ac->expression()) + { + ac->expression()->walk(fixUnionExpressionCol, obj); + ac->setSimpleColumnList(); + return; + } + auto* fc = dynamic_cast(tree->data()); + if (fc && !fc->functionParms().empty()) + { + for (auto& parm : fc->functionParms()) + { + parm->walk(fixUnionExpressionCol, obj); + } + fc->setSimpleColumnList(); + return; + } + auto* csep = static_cast(obj); + auto* rc = dynamic_cast(tree->data()); + if (rc) + { + if (dynamic_cast(rc) || rc->orderPos() == -1ull) + { + return; + } + auto* newrc = csep->returnedCols()[rc->orderPos()]->clone(); + csep->returnedCols()[rc->orderPos()]->incRefCount(); + tree->data(newrc); + } +} + +void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps, + JobStepVector& /*projectSteps*/, DeliveredTableMap& deliverySteps) +{ + CalpontSelectExecutionPlan::SelectList& unionVec = csep->unionVec(); uint8_t distinctUnionNum = csep->distinctUnionNum(); - RetColsVector unionRetCols = csep->returnedCols(); + uint32_t unionRetColsCount = csep->returnedCols().size(); JobStepVector unionFeeders; - for (CalpontSelectExecutionPlan::SelectList::iterator cit = selectVec.begin(); cit != selectVec.end(); - cit++) + std::decay_torderByCols())> expOrderByCols; + for (auto& obc : csep->orderByCols()) { - // @bug4848, enhance and unify limit handling. - SJSTEP sub = doUnionSub(cit->get(), jobInfo); + if (obc->orderPos() != -1ull) + { + continue; + } + if (dynamic_cast(obc.get()) == nullptr && + dynamic_cast(obc.get()) == nullptr) + { + // Arithmetic & function columns need special processing + expOrderByCols.push_back(obc); + } + } + + for (auto& unionSub : unionVec) + { + auto* unionCSEP = dynamic_cast(unionSub.get()); + for (auto& obc : expOrderByCols) + { + // Replace any leaf of expressions in the ORDER BY list with the corresponding column for each table in + // the UNION, and add the expression to the returned columns. + auto* col = obc->clone(); + auto* ac = dynamic_cast(col); + auto* fc = dynamic_cast(col); + if (ac) + { + ac->expression()->walk(fixUnionExpressionCol, unionCSEP); + ac->setSimpleColumnList(); + } + else if (fc) + { + for (auto& parm : fc->functionParms()) + { + parm->walk(fixUnionExpressionCol, unionCSEP); + } + fc->setSimpleColumnList(); + } + unionCSEP->returnedCols().emplace_back(col); + } + SJSTEP sub = doUnionSub(unionSub.get(), jobInfo); querySteps.push_back(sub); unionFeeders.push_back(sub); } - jobInfo.deliveredCols = unionRetCols; - SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo)); + for (auto& obc : expOrderByCols) + { + // Add a SimpleColumn to the outer query for the every ORDER BY expression + auto* sc = new SimpleColumn(*obc.get()); + csep->returnedCols().emplace_back(sc); + sc->colPosition(csep->returnedCols().size() - 1); + sc->orderPos(csep->returnedCols().size() - 1); + obc->orderPos(csep->returnedCols().size() - 1); + } + + jobInfo.deliveredCols = csep->returnedCols(); + SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo, unionRetColsCount)); querySteps.push_back(unionStep); uint16_t stepNo = jobInfo.subId * 10000; numberSteps(querySteps, stepNo, jobInfo.traceFlags); deliverySteps[execplan::CNX_VTABLE_ID] = unionStep; + if (!csep->orderByCols().empty() || csep->limitStart() != 0 || csep->limitNum() != -1ull) + { + jobInfo.limitStart = csep->limitStart(); + jobInfo.limitCount = csep->limitNum(); + jobInfo.orderByThreads = csep->orderByThreads(); + for (auto& obc : csep->orderByCols()) + { + auto* osc = dynamic_cast(obc.get()); + if (osc) + { + auto* sc = dynamic_cast(jobInfo.deliveredCols[obc->orderPos()].get()); + idbassert(sc); + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->colPosition(obc->orderPos()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + obc->orderPos()); + jobInfo.orderByColVec.emplace_back(getTupleKey(jobInfo, sc), obc->asc()); + } + else + { + auto* tus = dynamic_cast(unionStep.get()); + auto& keys = tus->getOutputRowGroup().getKeys(); + idbassert(obc->orderPos() < keys.size()); + jobInfo.orderByColVec.emplace_back(keys[obc->orderPos()], obc->asc()); + } + } + + for (auto& rc : csep->returnedCols()) + { + // Replace ConstantColumns with SimpleColumns and fix OIDs + auto* sc = dynamic_cast(rc.get()); + if (sc) + { + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + } + else + { + sc = new SimpleColumn(*rc.get()); + rc.reset(sc); + sc->schemaName(""); + sc->tableAlias(querySteps[0]->alias()); + sc->oid(tableOid(sc, jobInfo.csc) + 1 + rc->colPosition()); + } + } + doProject(csep->returnedCols(), jobInfo); + checkReturnedColumns(csep, jobInfo); + auto* tas = new TupleAnnexStep(jobInfo); + jobInfo.annexStep.reset(tas); + tas->setLimit(jobInfo.limitStart, jobInfo.limitCount); + if (!jobInfo.orderByColVec.empty()) + { + tas->addOrderBy(new LimitedOrderBy()); + if (jobInfo.orderByThreads > 1) + { + tas->setParallelOp(); + } + tas->setMaxThreads(jobInfo.orderByThreads); + } + auto* tds = dynamic_cast(unionStep.get()); + RowGroup rg = tds->getDeliveredRowGroup(); + + AnyDataListSPtr spdlIn(new AnyDataList()); + RowGroupDL* dlIn; + if (!jobInfo.orderByColVec.empty()) + dlIn = new RowGroupDL(jobInfo.orderByThreads, jobInfo.fifoSize); + else + dlIn = new RowGroupDL(1, jobInfo.fifoSize); + dlIn->OID(CNX_VTABLE_ID); + spdlIn->rowGroupDL(dlIn); + JobStepAssociation jsaIn; + jsaIn.outAdd(spdlIn); + dynamic_cast(tds)->outputAssociation(jsaIn); + jobInfo.annexStep->inputAssociation(jsaIn); + + AnyDataListSPtr spdlOut(new AnyDataList()); + RowGroupDL* dlOut = new RowGroupDL(1, jobInfo.fifoSize); + dlOut->OID(CNX_VTABLE_ID); + spdlOut->rowGroupDL(dlOut); + JobStepAssociation jsaOut; + jsaOut.outAdd(spdlOut); + jobInfo.annexStep->outputAssociation(jsaOut); + + querySteps.push_back(jobInfo.annexStep); + dynamic_cast(jobInfo.annexStep.get())->initialize(rg, jobInfo); + deliverySteps[CNX_VTABLE_ID] = jobInfo.annexStep; + } } } // namespace joblist diff --git a/dbcon/joblist/tupleunion.cpp b/dbcon/joblist/tupleunion.cpp index cde15c308..fd9212702 100644 --- a/dbcon/joblist/tupleunion.cpp +++ b/dbcon/joblist/tupleunion.cpp @@ -1305,7 +1305,7 @@ inline uint64_t TupleUnion::Hasher::operator()(const RowPosition& p) const else ts->rowMemory[p.group].getRow(p.row, &row); - return row.hash(); + return row.hash(ts->fLastCol); } inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& d2) const @@ -1322,10 +1322,10 @@ inline bool TupleUnion::Eq::operator()(const RowPosition& d1, const RowPosition& else ts->rowMemory[d2.group].getRow(d2.row, &r2); - return r1.equals(r2); + return r1.equals(r2, ts->fLastCol); } -TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo) +TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount) : JobStep(jobInfo) , fTableOID(tableOID) , output(NULL) @@ -1340,6 +1340,7 @@ TupleUnion::TupleUnion(CalpontSystemCatalog::OID tableOID, const JobInfo& jobInf , joinRan(false) , sessionMemLimit(jobInfo.umMemLimit) , fTimeZone(jobInfo.timeZone) + , fLastCol(keyCount - 1) { uniquer.reset(new Uniquer_t(10, Hasher(this), Eq(this), allocator)); fExtendedInfo = "TUN: "; diff --git a/dbcon/joblist/tupleunion.h b/dbcon/joblist/tupleunion.h index 551642b99..248ae2fd5 100644 --- a/dbcon/joblist/tupleunion.h +++ b/dbcon/joblist/tupleunion.h @@ -43,7 +43,7 @@ using normalizeFunctionsT = class TupleUnion : public JobStep, public TupleDeliveryStep { public: - TupleUnion(execplan::CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo); + TupleUnion(execplan::CalpontSystemCatalog::OID tableOID, const JobInfo& jobInfo, uint32_t keyCount); ~TupleUnion() override; void run() override; @@ -200,6 +200,7 @@ class TupleUnion : public JobStep, public TupleDeliveryStep boost::shared_ptr sessionMemLimit; long fTimeZone; + uint32_t fLastCol; }; } // namespace joblist diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 683021a6b..a6715e26a 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -505,7 +505,6 @@ bool sortItemIsInGroupRec(Item* sort_item, Item* group_item) Item* ifp_sort_arg = ifp_sort->arguments()[i]; if (ifp_sort_arg->type() == Item::FUNC_ITEM || ifp_sort_arg->type() == Item::FIELD_ITEM) { - Item* ifp_sort_arg = ifp_sort->arguments()[i]; found = sortItemIsInGroupRec(ifp_sort_arg, group_item); } else if (ifp_sort_arg->type() == Item::REF_ITEM) @@ -2736,7 +2735,8 @@ ReturnedColumn* buildReturnedColumnNull(gp_walk_info& gwi) return rc; } -ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& nonSupport, bool /*isRefItem*/) +ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& nonSupport, bool /*isRefItem*/, + bool isUnion) { ReturnedColumn* rc = NULL; @@ -2758,7 +2758,7 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non { Item_field* ifp = (Item_field*)item; - return wrapIntoAggregate(buildSimpleColumn(ifp, gwi), gwi, ifp); + return wrapIntoAggregate(buildSimpleColumn(ifp, gwi, isUnion), gwi, ifp); } case Item::NULL_ITEM: return buildReturnedColumnNull(gwi); case Item::CONST_ITEM: @@ -2797,10 +2797,10 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non } if (func_name == "+" || func_name == "-" || func_name == "*" || func_name == "/") - return buildArithmeticColumn(ifp, gwi, nonSupport); + return buildArithmeticColumn(ifp, gwi, nonSupport, isUnion); else { - return buildFunctionColumn(ifp, gwi, nonSupport); + return buildFunctionColumn(ifp, gwi, nonSupport, false, isUnion); } } @@ -2817,11 +2817,13 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non { case Item::SUM_FUNC_ITEM: return buildAggregateColumn(*(ref->ref), gwi); - case Item::FIELD_ITEM: return buildReturnedColumn(*(ref->ref), gwi, nonSupport); + case Item::FIELD_ITEM: return buildReturnedColumn(*(ref->ref), gwi, nonSupport, false, isUnion); - case Item::REF_ITEM: return buildReturnedColumn(*(((Item_ref*)(*(ref->ref)))->ref), gwi, nonSupport); + case Item::REF_ITEM: + return buildReturnedColumn(*(((Item_ref*)(*(ref->ref)))->ref), gwi, nonSupport, false, isUnion); - case Item::FUNC_ITEM: return buildFunctionColumn((Item_func*)(*(ref->ref)), gwi, nonSupport); + case Item::FUNC_ITEM: + return buildFunctionColumn((Item_func*)(*(ref->ref)), gwi, nonSupport, false, isUnion); case Item::WINDOW_FUNC_ITEM: return buildWindowFunctionColumn(*(ref->ref), gwi, nonSupport); @@ -2833,7 +2835,7 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non default: if (ref->ref_type() == Item_ref::DIRECT_REF) { - return buildReturnedColumn(ref->real_item(), gwi, nonSupport); + return buildReturnedColumn(ref->real_item(), gwi, nonSupport, false, isUnion); } gwi.fatalParseError = true; gwi.parseErrorText = "Unknown REF item"; @@ -2922,11 +2924,12 @@ ReturnedColumn* buildReturnedColumnBody(Item* item, gp_walk_info& gwi, bool& non return rc; } -ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool isRefItem) +ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool isRefItem, + bool isUnion) { bool disableWrapping = gwi.disableWrapping; gwi.disableWrapping = gwi.disableWrapping || itemDisablesWrapping(item, gwi); - ReturnedColumn* rc = buildReturnedColumnBody(item, gwi, nonSupport, isRefItem); + ReturnedColumn* rc = buildReturnedColumnBody(item, gwi, nonSupport, isRefItem, isUnion); gwi.disableWrapping = disableWrapping; return rc; } @@ -2960,7 +2963,7 @@ ReturnedColumn* buildBooleanConstantColumn(Item* item, gp_walk_info& gwi, bool& return cc; } -ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bool& nonSupport) +ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool isUnion) { if (get_fe_conn_info_ptr() == NULL) { @@ -2989,7 +2992,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo if (gwi.clauseType == SELECT || /*gwi.clauseType == HAVING || */ gwi.clauseType == GROUP_BY || gwi.clauseType == FROM) // select list { - lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion)); if (!lhs->data() && (sfitempp[0]->type() == Item::FUNC_ITEM)) { @@ -3004,12 +3007,12 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo gwi.fatalParseError = false; // ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi); - ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport); + ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion); if (rc) lhs = new ParseTree(rc); } - rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport)); + rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion)); if (!rhs->data() && (sfitempp[1]->type() == Item::FUNC_ITEM)) { @@ -3024,7 +3027,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo gwi.fatalParseError = false; // ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi); - ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport); + ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion); if (rc) rhs = new ParseTree(rc); } @@ -3035,7 +3038,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo { if (gwi.ptWorkStack.empty()) { - rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport)); + rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion)); } else { @@ -3047,7 +3050,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo { if (gwi.rcWorkStack.empty()) { - rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport)); + rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, false, isUnion)); } else { @@ -3060,7 +3063,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo { if (gwi.ptWorkStack.empty()) { - lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion)); } else { @@ -3072,7 +3075,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo { if (gwi.rcWorkStack.empty()) { - lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion)); } else { @@ -3105,13 +3108,13 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo if (gwi.clauseType == SELECT || gwi.clauseType == HAVING || gwi.clauseType == GROUP_BY) // select clause { - rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion)); } else { if (gwi.rcWorkStack.empty()) { - rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + rhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, false, isUnion)); } else { @@ -3233,17 +3236,17 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo } return ac; } -ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport) +ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool isUnion) { bool disableWrapping = gwi.disableWrapping; gwi.disableWrapping = gwi.disableWrapping || itemDisablesWrapping(item, gwi); - ReturnedColumn* rc = buildArithmeticColumnBody(item, gwi, nonSupport); + ReturnedColumn* rc = buildArithmeticColumnBody(item, gwi, nonSupport, isUnion); gwi.disableWrapping = disableWrapping; return rc; } ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, - bool selectBetweenIn) + bool selectBetweenIn, bool isUnion) { if (get_fe_conn_info_ptr() == NULL) { @@ -3306,12 +3309,12 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& // Arithmetic exp if (funcName == "+" || funcName == "-" || funcName == "*" || funcName == "/") { - return buildArithmeticColumn(ifp, gwi, nonSupport); + return buildArithmeticColumn(ifp, gwi, nonSupport, isUnion); } else if (funcName == "case") { - fc = buildCaseFunction(ifp, gwi, nonSupport); + fc = buildCaseFunction(ifp, gwi, nonSupport, isUnion); } else if ((funcName == "charset" || funcName == "collation") && ifp->argument_count() == 1 && @@ -3486,7 +3489,7 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& rc = buildBooleanConstantColumn(ifp->arguments()[i], gwi, nonSupport); else { - rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport); + rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport, false, isUnion); } // MCOL-1510 It must be a temp table field, so find the corresponding column. @@ -3853,16 +3856,17 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& return fc; } -ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn) +ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn, + bool isUnion) { bool disableWrapping = gwi.disableWrapping; gwi.disableWrapping = gwi.disableWrapping || itemDisablesWrapping(ifp, gwi); - ReturnedColumn* rc = buildFunctionColumnBody(ifp, gwi, nonSupport, selectBetweenIn); + ReturnedColumn* rc = buildFunctionColumnBody(ifp, gwi, nonSupport, selectBetweenIn, isUnion); gwi.disableWrapping = disableWrapping; return rc; } -FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport) +FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool isUnion) { if (get_fe_conn_info_ptr() == NULL) { @@ -3945,7 +3949,7 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS // rwWorkStack or ptWorkStack. // For example, simple predicates, such as 1=1 or 1=0, land in the // ptWorkStack but other stuff might land in the rwWorkStack - ReturnedColumn* parm = buildReturnedColumn(item->arguments()[i], gwi, nonSupport); + ReturnedColumn* parm = buildReturnedColumn(item->arguments()[i], gwi, nonSupport, false, isUnion); if (parm) { @@ -4085,7 +4089,7 @@ ConstantColumn* buildDecimalColumn(const Item* idp, const std::string& valStr, g return cc; } -SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) +SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi, bool isUnion) { if (!gwi.csc) { @@ -4100,6 +4104,14 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) strcmp(ifp->cached_table->db.str, "information_schema") == 0) isInformationSchema = true; + if (isUnion && !isInformationSchema) + { + auto* rc = gwi.returnedCols[ifp->field->field_index]->clone(); + rc->orderPos(ifp->field->field_index); + gwi.returnedCols[ifp->field->field_index]->incRefCount(); + return dynamic_cast(rc); + } + // support FRPM subquery. columns from the derived table has no definition if ((!ifp->field || !ifp->db_name.str || strlen(ifp->db_name.str) == 0) && !isInformationSchema) return buildSimpleColFromDerivedTable(gwi, ifp); @@ -4557,7 +4569,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi) case Item::FIELD_ITEM: { Item_field* ifp = static_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi, false); if (!sc) { @@ -5674,7 +5686,7 @@ int processGroupBy(SELECT_LEX& select_lex, gp_walk_info& gwi, const bool withRol { Item_field* ifp = (Item_field*)groupItem; // this GB col could be an alias of F&E on the SELECT clause, not necessarily a field. - ReturnedColumn* rc = buildSimpleColumn(ifp, gwi); + ReturnedColumn* rc = buildSimpleColumn(ifp, gwi, false); SimpleColumn* sc = dynamic_cast(rc); if (sc) @@ -6310,7 +6322,8 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); if (histogram) { - SchemaAndTableName tableName = {ifp->field->table->s->db.str, ifp->field->table->s->table_name.str}; + SchemaAndTableName tableName = {ifp->field->table->s->db.str, + ifp->field->table->s->table_name.str}; gwi.tableStatisticsMap[tableName][ifp->field->field_name.str] = *histogram; } } @@ -6420,7 +6433,7 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector collectAllCols(gwi, ifp); break; } - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi, false); if (sc) { @@ -7015,32 +7028,90 @@ int processOrderBy(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, gwi.returnedCols.push_back(minSc); } - // ORDER BY translation part + // process UNION ORDER BY part if (!isUnion && !gwi.hasWindowFunc && gwi.subSelectType == CalpontSelectExecutionPlan::MAIN_SELECT) { + if (unionSel) { - if (unionSel) - order_list = select_lex.master_unit()->global_parameters()->order_list; - + order_list = select_lex.master_unit()->global_parameters()->order_list; ordercol = static_cast(order_list.first); for (; ordercol; ordercol = ordercol->next) { - Item* ord_item = *(ordercol->item); + ReturnedColumn* rc = NULL; - if (ord_item->name.length) + if (ordercol->in_field_list && ordercol->item && + (ordercol->counter_used || (*ordercol->item)->type() == Item::FIELD_ITEM)) { - // for union order by 1 case. For unknown reason, it doesn't show in_field_list - if (ord_item->type() == Item::CONST_ITEM && ord_item->cmp_type() == INT_RESULT) + auto* ifp = dynamic_cast(*ordercol->item); + auto pos = ordercol->counter_used ? ordercol->counter - 1 : ifp->field->field_index; + rc = gwi.returnedCols[pos]->clone(); + rc->orderPos(pos); + // can not be optimized off if used in order by with counter. + // set with self derived table alias if it's derived table + gwi.returnedCols[pos]->incRefCount(); + } + else + { + Item* ord_item = *(ordercol->item); + + // ignore not_used column on order by. + if ((ord_item->type() == Item::CONST_ITEM && ord_item->cmp_type() == INT_RESULT) && + ord_item->full_name() && !strcmp(ord_item->full_name(), "Not_used")) { + continue; + } + else if (ord_item->type() == Item::CONST_ITEM && ord_item->cmp_type() == INT_RESULT) + { + // DRRTUY This section looks useless b/c there is no + // way to put constant INT into an ORDER BY list + rc = gwi.returnedCols[((Item_int*)ord_item)->val_int() - 1]->clone(); } else if (ord_item->type() == Item::SUBSELECT_ITEM) { + gwi.fatalParseError = true; + } + else if ((ord_item->type() == Item::FUNC_ITEM) && + (((Item_func*)ord_item)->functype() == Item_func::COLLATE_FUNC)) + { + push_warning(gwi.thd, Sql_condition::WARN_LEVEL_NOTE, WARN_OPTION_IGNORED, + "COLLATE is ignored in ColumnStore"); + continue; } else { + rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError, false, true); + + rc = wrapIntoAggregate(rc, gwi, ord_item); + } + // @bug5501 try item_ptr if item can not be fixed. For some + // weird dml statement state, item can not be fixed but the + // infomation is available in item_ptr. + if (!rc || gwi.fatalParseError) + { + Item* item_ptr = ordercol->item_ptr; + + while (item_ptr->type() == Item::REF_ITEM) + item_ptr = *(((Item_ref*)item_ptr)->ref); + + rc = buildReturnedColumn(item_ptr, gwi, gwi.fatalParseError, false, true); + } + + if (!rc) + { + string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_ORDER_BY); + gwi.parseErrorText = emsg; + setError(gwi.thd, ER_CHECK_NOT_IMPLEMENTED, emsg, gwi); + return ER_CHECK_NOT_IMPLEMENTED; } } + + if (ordercol->direction == ORDER::ORDER_ASC) + rc->asc(true); + else + rc->asc(false); + + gwi.orderByCols.push_back(SRCP(rc)); } } @@ -7137,7 +7208,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (uint32_t i = 0; i < funcFieldVec.size(); i++) { - SimpleColumn* sc = buildSimpleColumn(funcFieldVec[i], gwi); + SimpleColumn* sc = buildSimpleColumn(funcFieldVec[i], gwi, false); if (!sc || gwi.fatalParseError) { @@ -7196,10 +7267,10 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i bool unionSel = (!isUnion && select_lex.master_unit()->is_unit_op()) ? true : false; + gwi.clauseType = GROUP_BY; // Group by list. not valid for union main query if (!unionSel) { - gwi.clauseType = GROUP_BY; if ((rc = processGroupBy(select_lex, gwi, withRollup))) { return rc; @@ -7234,6 +7305,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i minSc = gwi.returnedCols[0]; else if (!gwi.additionalRetCols.empty()) minSc = gwi.additionalRetCols[0]; + minSc->orderPos(0); } // @bug3523, count(*) on subquery always pick column[0]. @@ -7250,6 +7322,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i sc1->viewName(sc->viewName()); sc1->partitions(sc->partitions()); sc1->colPosition(0); + sc1->orderPos(0); sc1->timeZone(gwi.timeZone); minSc.reset(sc1); } diff --git a/dbcon/mysql/ha_mcs_execplan_walks.cpp b/dbcon/mysql/ha_mcs_execplan_walks.cpp index 802f382c6..204bd290e 100644 --- a/dbcon/mysql/ha_mcs_execplan_walks.cpp +++ b/dbcon/mysql/ha_mcs_execplan_walks.cpp @@ -119,7 +119,7 @@ void gp_walk(const Item* item, void* arg) if (ifp) { // XXX: this looks awfuly wrong. - execplan::SimpleColumn* scp = buildSimpleColumn(ifp, *gwip); + execplan::SimpleColumn* scp = buildSimpleColumn(ifp, *gwip, false); if (!scp) break; diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index e520a79e9..8e2a53fad 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -95,16 +95,20 @@ enum ClauseType ORDER_BY }; -struct SchemaAndTableName { +struct SchemaAndTableName +{ std::string schema; std::string table; - bool operator==(const SchemaAndTableName& other) const { + bool operator==(const SchemaAndTableName& other) const + { return schema == other.schema && table == other.table; } }; -struct SchemaAndTableNameHash { - std::size_t operator()(const SchemaAndTableName& k) const { +struct SchemaAndTableNameHash +{ + std::size_t operator()(const SchemaAndTableName& k) const + { return std::hash()(k.schema + k.table); } }; @@ -117,7 +121,8 @@ typedef std::tr1::unordered_map> TableOnExprList typedef std::tr1::unordered_map TableOuterJoinMap; using ColumnName = std::string; using ColumnStatisticsMap = std::unordered_map; -using TableStatisticsMap = std::unordered_map; +using TableStatisticsMap = + std::unordered_map; // This structure is used to store MDB AST -> CSEP translation context. // There is a column statistics for some columns in a query. @@ -434,13 +439,15 @@ const std::string bestTableName(const Item_field* ifp); // execution plan util functions prototypes execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, - bool isRefItem = false); + bool isRefItem = false, bool isUnion = false); execplan::ReturnedColumn* buildFunctionColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, - bool selectBetweenIn = false); -execplan::ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport); + bool selectBetweenIn = false, bool isUnion = false); +execplan::ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, + bool isUnion); execplan::ConstantColumn* buildDecimalColumn(const Item* item, const std::string& str, gp_walk_info& gwi); -execplan::SimpleColumn* buildSimpleColumn(Item_field* item, gp_walk_info& gwi); -execplan::FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport); +execplan::SimpleColumn* buildSimpleColumn(Item_field* item, gp_walk_info& gwi, bool isUnion); +execplan::FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport, + bool isUnion); execplan::ParseTree* buildParseTree(Item* item, gp_walk_info& gwi, bool& nonSupport); execplan::ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi); execplan::ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& nonSupport); diff --git a/dbcon/mysql/ha_mcs_pushdown.cpp b/dbcon/mysql/ha_mcs_pushdown.cpp index 55681af49..134188dce 100644 --- a/dbcon/mysql/ha_mcs_pushdown.cpp +++ b/dbcon/mysql/ha_mcs_pushdown.cpp @@ -633,15 +633,6 @@ select_handler* create_columnstore_select_handler_(THD* thd, SELECT_LEX* sel_lex return nullptr; } - // MCOL-5432 Disable partial pushdown of the UNION operation if the query - // involves an order by or a limit clause. - if (sel_lex && sel_unit && - (sel_unit->global_parameters()->limit_params.explicit_limit == true || - sel_unit->global_parameters()->order_list.elements != 0)) - { - return nullptr; - } - std::vector select_lex_vec; if (sel_unit && !sel_lex) @@ -912,14 +903,6 @@ select_handler* create_columnstore_unit_handler(THD* thd, SELECT_LEX_UNIT* sel_u return nullptr; } - // MCOL-5432 Disable UNION pushdown if the query involves an order by - // or a limit clause. - if (sel_unit->global_parameters()->limit_params.explicit_limit == true || - sel_unit->global_parameters()->order_list.elements != 0) - { - return nullptr; - } - return create_columnstore_select_handler_(thd, 0, sel_unit); } diff --git a/dbcon/mysql/ha_pseudocolumn.cpp b/dbcon/mysql/ha_pseudocolumn.cpp index e9053b5ac..8ac34be45 100644 --- a/dbcon/mysql/ha_pseudocolumn.cpp +++ b/dbcon/mysql/ha_pseudocolumn.cpp @@ -423,7 +423,7 @@ execplan::ReturnedColumn* buildPseudoColumn(Item* item, gp_walk_info& gwi, bool& if (!field->field || !field->db_name.str || strlen(field->db_name.str) == 0) return nullOnError(gwi, funcName); - SimpleColumn* sc = buildSimpleColumn(field, gwi); + SimpleColumn* sc = buildSimpleColumn(field, gwi, false); if (!sc) return nullOnError(gwi, funcName);