mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
Rename packages to MariaDB-columnstore-engine, MariaDB-columnstore-libs and MariaDB-columnstore-platform. Also add the "columnstore-" prefix the the components so that MariaDB's packaging system understands then and add a line to include them in MariaDB's packaging. In addition * Fix S3 building for dist source build * Fix Debian 10 dependency issue * Fix git handling for dist builds * Add support for MariaDB's RPM building * Use MariaDB's PCRE and readline * Removes a few dead files * Fix Boost noncopyable includes
2068 lines
71 KiB
C++
2068 lines
71 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (C) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
// $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $
|
|
|
|
#include <iostream>
|
|
#include <stack>
|
|
#include <iterator>
|
|
#include <algorithm>
|
|
//#define NDEBUG
|
|
#include <cassert>
|
|
#include <vector>
|
|
#include <set>
|
|
#include <map>
|
|
#include <limits>
|
|
using namespace std;
|
|
|
|
#include <boost/scoped_ptr.hpp>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/uuid/uuid_io.hpp>
|
|
using namespace boost;
|
|
|
|
#include "joblistfactory.h"
|
|
|
|
#include "calpontexecutionplan.h"
|
|
#include "calpontselectexecutionplan.h"
|
|
#include "calpontsystemcatalog.h"
|
|
#include "dbrm.h"
|
|
#include "filter.h"
|
|
#include "simplefilter.h"
|
|
#include "constantfilter.h"
|
|
#include "existsfilter.h"
|
|
#include "selectfilter.h"
|
|
#include "returnedcolumn.h"
|
|
#include "aggregatecolumn.h"
|
|
#include "windowfunctioncolumn.h"
|
|
#include "arithmeticcolumn.h"
|
|
#include "constantcolumn.h"
|
|
#include "functioncolumn.h"
|
|
#include "groupconcatcolumn.h"
|
|
#include "pseudocolumn.h"
|
|
#include "simplecolumn.h"
|
|
#include "rowcolumn.h"
|
|
#include "treenodeimpl.h"
|
|
#include "udafcolumn.h"
|
|
using namespace execplan;
|
|
|
|
#include "configcpp.h"
|
|
using namespace config;
|
|
|
|
#include "messagelog.h"
|
|
using namespace logging;
|
|
|
|
#include "elementtype.h"
|
|
#include "joblist.h"
|
|
#include "jobstep.h"
|
|
#include "primitivestep.h"
|
|
#include "jl_logger.h"
|
|
#include "jlf_execplantojoblist.h"
|
|
#include "rowaggregation.h"
|
|
#include "tuplehashjoin.h"
|
|
#include "tupleunion.h"
|
|
#include "expressionstep.h"
|
|
#include "tupleconstantstep.h"
|
|
#include "tuplehavingstep.h"
|
|
#include "windowfunctionstep.h"
|
|
|
|
#include "jlf_common.h"
|
|
#include "jlf_graphics.h"
|
|
#include "jlf_subquery.h"
|
|
#include "jlf_tuplejoblist.h"
|
|
|
|
#include "rowgroup.h"
|
|
using namespace rowgroup;
|
|
|
|
#include "mcsv1_udaf.h"
|
|
|
|
namespace
|
|
{
|
|
using namespace joblist;
|
|
|
|
|
|
void projectSimpleColumn(const SimpleColumn* sc, JobStepVector& jsv, JobInfo& jobInfo)
|
|
{
|
|
if (sc == NULL)
|
|
throw logic_error("projectSimpleColumn: sc is null");
|
|
|
|
CalpontSystemCatalog::OID oid = sc->oid();
|
|
CalpontSystemCatalog::OID tbl_oid = tableOid(sc, jobInfo.csc);
|
|
string alias(extractTableAlias(sc));
|
|
string view(sc->viewName());
|
|
CalpontSystemCatalog::OID dictOid = 0;
|
|
CalpontSystemCatalog::ColType ct;
|
|
pColStep* pcs = NULL;
|
|
pDictionaryStep* pds = NULL;
|
|
bool tokenOnly = false;
|
|
TupleInfo ti;
|
|
|
|
if (!sc->schemaName().empty())
|
|
{
|
|
SJSTEP sjstep;
|
|
|
|
// always tuples after release 3.0
|
|
// if (!jobInfo.tryTuples)
|
|
// jobInfo.tables.insert(make_table(sc->schemaName(), sc->tableName()));
|
|
|
|
// if (jobInfo.trace)
|
|
// cout << "doProject Emit pCol for SimpleColumn " << oid << endl;
|
|
|
|
const PseudoColumn* pc = dynamic_cast<const PseudoColumn*>(sc);
|
|
ct = sc->colType();
|
|
|
|
//XXX use this before connector sets colType in sc correctly.
|
|
// type of pseudo column is set by connector
|
|
if (sc->isColumnStore() && !pc)
|
|
ct = jobInfo.csc->colType(sc->oid());
|
|
|
|
//X
|
|
if (pc == NULL)
|
|
pcs = new pColStep(oid, tbl_oid, ct, jobInfo);
|
|
else
|
|
pcs = new PseudoColStep(oid, tbl_oid, pc->pseudoType(), ct, jobInfo);
|
|
|
|
pcs->alias(alias);
|
|
pcs->view(view);
|
|
pcs->name(sc->columnName());
|
|
pcs->cardinality(sc->cardinality());
|
|
//pcs->setOrderRids(true);
|
|
|
|
sjstep.reset(pcs);
|
|
jsv.push_back(sjstep);
|
|
|
|
dictOid = isDictCol(ct);
|
|
ti = setTupleInfo(ct, oid, jobInfo, tbl_oid, sc, alias);
|
|
pcs->tupleId(ti.key);
|
|
|
|
if (dictOid > 0 && jobInfo.hasAggregation)
|
|
{
|
|
map<uint32_t, bool>::iterator it =
|
|
jobInfo.tokenOnly.find(getTupleKey(jobInfo, sc));
|
|
|
|
if (it != jobInfo.tokenOnly.end())
|
|
tokenOnly = it->second;
|
|
}
|
|
|
|
if (dictOid > 0 && !tokenOnly)
|
|
{
|
|
//This is a double-step step
|
|
// if (jobInfo.trace)
|
|
// cout << "doProject Emit pGetSignature for SimpleColumn " << dictOid << endl;
|
|
|
|
pds = new pDictionaryStep(dictOid, tbl_oid, ct, jobInfo);
|
|
jobInfo.keyInfo->dictOidToColOid[dictOid] = oid;
|
|
pds->alias(alias);
|
|
pds->view(view);
|
|
pds->name(sc->columnName());
|
|
pds->cardinality(sc->cardinality());
|
|
//pds->setOrderRids(true);
|
|
|
|
//Associate these two linked steps
|
|
JobStepAssociation outJs;
|
|
AnyDataListSPtr spdl1(new AnyDataList());
|
|
RowGroupDL* dl1 = new RowGroupDL(1, jobInfo.fifoSize);
|
|
spdl1->rowGroupDL(dl1);
|
|
dl1->OID(oid);
|
|
|
|
// not a tokenOnly column
|
|
setTupleInfo(ct, dictOid, jobInfo, tbl_oid, sc, alias);
|
|
jobInfo.tokenOnly[getTupleKey(jobInfo, sc)] = false;
|
|
outJs.outAdd(spdl1);
|
|
|
|
pcs->outputAssociation(outJs);
|
|
pds->inputAssociation(outJs);
|
|
|
|
sjstep.reset(pds);
|
|
jsv.push_back(sjstep);
|
|
|
|
oid = dictOid; // dictionary column
|
|
ti = setTupleInfo(ct, oid, jobInfo, tbl_oid, sc, alias);
|
|
pds->tupleId(ti.key);
|
|
jobInfo.keyInfo->dictKeyMap[pcs->tupleId()] = ti.key;
|
|
}
|
|
}
|
|
else // must be vtable mode
|
|
{
|
|
oid = (tbl_oid + 1) + sc->colPosition();
|
|
ct = jobInfo.vtableColTypes[UniqId(oid, alias, "", "")];
|
|
ti = setTupleInfo(ct, oid, jobInfo, tbl_oid, sc, alias);
|
|
}
|
|
|
|
if (dictOid > 0 && tokenOnly)
|
|
{
|
|
// scale is not used by string columns
|
|
// borrow it to indicate token is used in projection, not the real string.
|
|
ti.scale = 8;
|
|
}
|
|
|
|
jobInfo.pjColList.push_back(ti);
|
|
}
|
|
|
|
const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo)
|
|
{
|
|
JobStepVector jsv;
|
|
SJSTEP sjstep;
|
|
|
|
for (unsigned i = 0; i < retCols.size(); i++)
|
|
{
|
|
const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(retCols[i].get());
|
|
const WindowFunctionColumn* wc = NULL;
|
|
|
|
if (sc != NULL)
|
|
{
|
|
projectSimpleColumn(sc, jsv, jobInfo);
|
|
}
|
|
else if ((wc = dynamic_cast<const WindowFunctionColumn*>(retCols[i].get())) != NULL)
|
|
{
|
|
//put place hold column in projection list
|
|
uint64_t eid = wc->expressionId();
|
|
CalpontSystemCatalog::ColType ct = wc->resultType();
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, retCols[i].get()->alias(), jobInfo));
|
|
jobInfo.pjColList.push_back(ti);
|
|
}
|
|
else
|
|
{
|
|
const ArithmeticColumn* ac = NULL;
|
|
const FunctionColumn* fc = NULL;
|
|
const ConstantColumn* cc = NULL;
|
|
uint64_t eid = -1;
|
|
CalpontSystemCatalog::ColType ct;
|
|
ExpressionStep* es = new ExpressionStep(jobInfo);
|
|
es->expression(retCols[i], jobInfo);
|
|
sjstep.reset(es);
|
|
|
|
if ((ac = dynamic_cast<const ArithmeticColumn*>(retCols[i].get())) != NULL)
|
|
{
|
|
eid = ac->expressionId();
|
|
ct = ac->resultType();
|
|
}
|
|
else if ((fc = dynamic_cast<const FunctionColumn*>(retCols[i].get())) != NULL)
|
|
{
|
|
eid = fc->expressionId();
|
|
ct = fc->resultType();
|
|
}
|
|
else if ((cc = dynamic_cast<const ConstantColumn*>(retCols[i].get())) != NULL)
|
|
{
|
|
eid = cc->expressionId();
|
|
ct = cc->resultType();
|
|
}
|
|
else
|
|
{
|
|
std::ostringstream errmsg;
|
|
errmsg << "doProject: unhandled returned column: " << typeid(*retCols[i]).name();
|
|
cerr << boldStart << errmsg.str() << boldStop << endl;
|
|
throw logic_error(errmsg.str());
|
|
}
|
|
|
|
// set expression tuple Info
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, retCols[i].get()->alias(), jobInfo));
|
|
uint32_t key = ti.key;
|
|
|
|
if (retCols[i]->windowfunctionColumnList().size() > 0)
|
|
jobInfo.expressionVec.push_back(key);
|
|
else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key)
|
|
== jobInfo.expressionVec.end())
|
|
{
|
|
jobInfo.returnedExpressions.push_back(sjstep);
|
|
}
|
|
|
|
//put place hold column in projection list
|
|
jobInfo.pjColList.push_back(ti);
|
|
}
|
|
}
|
|
|
|
return jsv;
|
|
}
|
|
|
|
void checkHavingClause(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
|
|
{
|
|
TupleHavingStep* ths = new TupleHavingStep(jobInfo);
|
|
ths->expressionFilter(csep->having(), jobInfo);
|
|
jobInfo.havingStep.reset(ths);
|
|
|
|
// simple columns in select clause
|
|
set<UniqId> scInSelect;
|
|
|
|
for (RetColsVector::iterator i = jobInfo.nonConstCols.begin();
|
|
i != jobInfo.nonConstCols.end();
|
|
i++)
|
|
{
|
|
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->get());
|
|
|
|
if (sc != NULL)
|
|
{
|
|
if (sc->schemaName().empty())
|
|
sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
|
|
|
|
scInSelect.insert(UniqId(sc));
|
|
}
|
|
}
|
|
|
|
// simple columns in gruop by clause
|
|
set<UniqId> scInGroupBy;
|
|
|
|
for (RetColsVector::iterator i = csep->groupByCols().begin();
|
|
i != csep->groupByCols().end();
|
|
i++)
|
|
{
|
|
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->get());
|
|
|
|
if (sc != NULL)
|
|
{
|
|
if (sc->schemaName().empty() && sc->oid() == 0)
|
|
{
|
|
if (sc->colPosition() == -1)
|
|
{
|
|
// from select subquery
|
|
SRCP ss = csep->returnedCols()[sc->orderPos()];
|
|
(*i) = ss;
|
|
}
|
|
else
|
|
{
|
|
sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
|
|
}
|
|
}
|
|
|
|
scInGroupBy.insert(UniqId(sc));
|
|
}
|
|
}
|
|
|
|
bool aggInHaving = false;
|
|
const vector<ReturnedColumn*>& columns = ths->columns();
|
|
|
|
for (vector<ReturnedColumn*>::const_iterator i = columns.begin(); i != columns.end(); i++)
|
|
{
|
|
// evaluate aggregate columns in having
|
|
AggregateColumn* agc = dynamic_cast<AggregateColumn*>(*i);
|
|
|
|
if (agc)
|
|
{
|
|
addAggregateColumn(agc, -1, jobInfo.nonConstCols, jobInfo);
|
|
aggInHaving = true;
|
|
}
|
|
else
|
|
{
|
|
// simple columns used in having and in group by clause must be in rowgroup
|
|
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(*i);
|
|
|
|
if (sc != NULL)
|
|
{
|
|
if (sc->schemaName().empty())
|
|
sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
|
|
|
|
UniqId scId(sc);
|
|
|
|
if (scInGroupBy.find(scId) != scInGroupBy.end() &&
|
|
scInSelect.find(scId) == scInSelect.end())
|
|
{
|
|
jobInfo.nonConstCols.push_back(SRCP(sc->clone()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (aggInHaving == false)
|
|
{
|
|
// treated the same as where clause if no aggregate column in having.
|
|
jobInfo.havingStep.reset();
|
|
|
|
// parse the having expression
|
|
ParseTree* filters = csep->having();
|
|
|
|
if (filters != 0)
|
|
{
|
|
JLF_ExecPlanToJobList::walkTree(filters, jobInfo);
|
|
}
|
|
|
|
if (!jobInfo.stack.empty())
|
|
{
|
|
idbassert(jobInfo.stack.size() == 1);
|
|
jobInfo.havingStepVec = jobInfo.stack.top();
|
|
jobInfo.stack.pop();
|
|
}
|
|
}
|
|
}
|
|
|
|
void preProcessFunctionOnAggregation(const vector<SimpleColumn*>& scs,
|
|
const vector<AggregateColumn*>& aggs,
|
|
const vector<WindowFunctionColumn*>& wcs,
|
|
JobInfo& jobInfo)
|
|
{
|
|
// append the simple columns if not already projected
|
|
set<UniqId> scProjected;
|
|
|
|
for (RetColsVector::iterator i = jobInfo.projectionCols.begin();
|
|
i != jobInfo.projectionCols.end();
|
|
i++)
|
|
{
|
|
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->get());
|
|
|
|
if (sc != NULL)
|
|
{
|
|
if (sc->schemaName().empty())
|
|
sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
|
|
|
|
scProjected.insert(UniqId(sc));
|
|
}
|
|
}
|
|
|
|
for (vector<SimpleColumn*>::const_iterator i = scs.begin(); i != scs.end(); i++)
|
|
{
|
|
if (scProjected.find(UniqId(*i)) == scProjected.end())
|
|
{
|
|
jobInfo.projectionCols.push_back(SRCP((*i)->clone()));
|
|
scProjected.insert(UniqId(*i));
|
|
}
|
|
}
|
|
|
|
// append the aggregate columns in arithmetic/function column to the projection list
|
|
for (vector<AggregateColumn*>::const_iterator i = aggs.begin(); i != aggs.end(); i++)
|
|
{
|
|
addAggregateColumn(*i, -1, jobInfo.projectionCols, jobInfo);
|
|
if (wcs.size() > 0)
|
|
{
|
|
jobInfo.nonConstDelCols.push_back(SRCP((*i)->clone()));
|
|
}
|
|
}
|
|
}
|
|
|
|
void checkReturnedColumns(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
|
|
{
|
|
for (uint64_t i = 0; i < jobInfo.deliveredCols.size(); i++)
|
|
{
|
|
if (NULL == dynamic_cast<const ConstantColumn*>(jobInfo.deliveredCols[i].get()))
|
|
jobInfo.nonConstCols.push_back(jobInfo.deliveredCols[i]);
|
|
}
|
|
|
|
// save the original delivered non constant columns
|
|
jobInfo.nonConstDelCols = jobInfo.nonConstCols;
|
|
|
|
if (jobInfo.nonConstCols.size() != jobInfo.deliveredCols.size())
|
|
{
|
|
jobInfo.constantCol = CONST_COL_EXIST;
|
|
|
|
// bug 2531, all constant column.
|
|
if (jobInfo.nonConstCols.size() == 0)
|
|
{
|
|
if (csep->columnMap().size() > 0)
|
|
jobInfo.nonConstCols.push_back((*(csep->columnMap().begin())).second);
|
|
else
|
|
jobInfo.constantCol = CONST_COL_ONLY;
|
|
}
|
|
}
|
|
|
|
for (uint64_t i = 0; i < jobInfo.nonConstCols.size(); i++)
|
|
{
|
|
AggregateColumn* agc = dynamic_cast<AggregateColumn*>(jobInfo.nonConstCols[i].get());
|
|
|
|
if (agc)
|
|
addAggregateColumn(agc, i, jobInfo.nonConstCols, jobInfo);
|
|
}
|
|
|
|
if (csep->having() != NULL)
|
|
checkHavingClause(csep, jobInfo);
|
|
|
|
jobInfo.projectionCols = jobInfo.nonConstCols;
|
|
|
|
for (uint64_t i = 0; i < jobInfo.nonConstCols.size(); i++)
|
|
{
|
|
const ArithmeticColumn* ac =
|
|
dynamic_cast<const ArithmeticColumn*>(jobInfo.nonConstCols[i].get());
|
|
const FunctionColumn* fc =
|
|
dynamic_cast<const FunctionColumn*>(jobInfo.nonConstCols[i].get());
|
|
|
|
if (ac != NULL && ac->aggColumnList().size() > 0)
|
|
{
|
|
jobInfo.nonConstCols[i]->outputIndex(i);
|
|
preProcessFunctionOnAggregation(ac->simpleColumnList(), ac->aggColumnList(), ac->windowfunctionColumnList(), jobInfo);
|
|
}
|
|
else if (fc != NULL && fc->aggColumnList().size() > 0)
|
|
{
|
|
jobInfo.nonConstCols[i]->outputIndex(i);
|
|
preProcessFunctionOnAggregation(fc->simpleColumnList(), fc->aggColumnList(), fc->windowfunctionColumnList(), jobInfo);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
This function is to get a unique non-constant column list for grouping.
|
|
After sub-query is supported, GROUP BY column can be a column from SELECT or FROM sub-queries,
|
|
which has empty schema name, and 0 oid (if SELECT). In order to distinguish these columns,
|
|
data member fSequence is used to indicate the column position in FROM sub-query's select list,
|
|
the table OID for sub-query vtable is assumed to CNX_VTABLE_ID, the column OIDs for that vtable
|
|
is caculated based on this table OID and column position.
|
|
The data member fOrderPos is used to indicate the column position in the outer select clause,
|
|
this value is set to -1 if the column is not selected (implicit group by). For select sub-query,
|
|
the fSequence is not set, so orderPos is used to locate the column.
|
|
*/
|
|
void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
|
|
{
|
|
// order by columns may be not in the select and [group by] clause
|
|
const CalpontSelectExecutionPlan::OrderByColumnList& orderByCols = csep->orderByCols();
|
|
|
|
for (uint64_t i = 0; i < orderByCols.size(); i++)
|
|
{
|
|
if (orderByCols[i]->orderPos() == (uint64_t)(-1))
|
|
{
|
|
// @bug 4531, skip window functions, should be already added.
|
|
if (dynamic_cast<WindowFunctionColumn*>(orderByCols[i].get()) != NULL ||
|
|
orderByCols[i]->windowfunctionColumnList().size() > 0)
|
|
continue;
|
|
|
|
jobInfo.deliveredCols.push_back(orderByCols[i]);
|
|
|
|
// @bug 3025
|
|
// Append the non-aggregate orderby column to group by, if there is group by clause.
|
|
// Duplicates will be removed by next if block.
|
|
if (csep->groupByCols().size() > 0)
|
|
{
|
|
// Not an aggregate column and not an expression of aggregation.
|
|
if (dynamic_cast<AggregateColumn*>(orderByCols[i].get()) == NULL &&
|
|
orderByCols[i]->aggColumnList().empty())
|
|
csep->groupByCols().push_back(orderByCols[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (csep->groupByCols().size() > 0)
|
|
{
|
|
set<UniqId> colInGroupBy;
|
|
RetColsVector uniqGbCols;
|
|
|
|
for (RetColsVector::iterator i = csep->groupByCols().begin();
|
|
i != csep->groupByCols().end();
|
|
i++)
|
|
{
|
|
// skip constant columns
|
|
if (dynamic_cast<ConstantColumn*>(i->get()) != NULL)
|
|
continue;
|
|
|
|
ReturnedColumn* rc = i->get();
|
|
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(rc);
|
|
|
|
bool selectSubquery = false;
|
|
|
|
if (sc && sc->schemaName().empty() && sc->oid() == 0)
|
|
{
|
|
if (sc->colPosition() == -1)
|
|
{
|
|
// from select subquery
|
|
// sc->orderPos() should NOT be -1 because it is a SELECT sub-query.
|
|
SRCP ss = csep->returnedCols()[sc->orderPos()];
|
|
(*i) = ss;
|
|
selectSubquery = true;
|
|
|
|
// At this point whatever sc pointed to is invalid
|
|
// update the rc and sc
|
|
rc = ss.get();
|
|
sc = dynamic_cast<SimpleColumn*>(rc);
|
|
}
|
|
else
|
|
{
|
|
sc->oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition());
|
|
}
|
|
}
|
|
|
|
UniqId col;
|
|
|
|
if (sc)
|
|
col = UniqId(sc);
|
|
else
|
|
col = UniqId(rc->expressionId(), rc->alias(), "", "");
|
|
|
|
if (colInGroupBy.find(col) == colInGroupBy.end() || selectSubquery)
|
|
{
|
|
colInGroupBy.insert(col);
|
|
uniqGbCols.push_back(*i);
|
|
}
|
|
}
|
|
|
|
if (csep->groupByCols().size() != uniqGbCols.size())
|
|
(csep)->groupByCols(uniqGbCols);
|
|
}
|
|
}
|
|
|
|
void checkAggregation(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
|
|
{
|
|
checkGroupByCols(csep, jobInfo);
|
|
checkReturnedColumns(csep, jobInfo);
|
|
RetColsVector& retCols = jobInfo.projectionCols;
|
|
|
|
jobInfo.hasDistinct = csep->distinct();
|
|
|
|
// DISTINCT with window functions must be done in tupleannexstep
|
|
if (csep->distinct() == true && jobInfo.windowDels.size() == 0)
|
|
{
|
|
jobInfo.hasAggregation = true;
|
|
}
|
|
else if (csep->groupByCols().size() > 0)
|
|
{
|
|
// groupby without aggregate functions is supported.
|
|
jobInfo.hasAggregation = true;
|
|
}
|
|
else
|
|
{
|
|
for (uint64_t i = 0; i < retCols.size(); i++)
|
|
{
|
|
if (dynamic_cast<AggregateColumn*>(retCols[i].get()) != NULL)
|
|
{
|
|
jobInfo.hasAggregation = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void updateAggregateColType(AggregateColumn* ac, const SRCP& srcp, int op, JobInfo& jobInfo)
|
|
{
|
|
CalpontSystemCatalog::ColType ct;
|
|
const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(srcp.get());
|
|
const ArithmeticColumn* ar = NULL;
|
|
const FunctionColumn* fc = NULL;
|
|
|
|
if (sc != NULL)
|
|
ct = sc->resultType();
|
|
else if ((ar = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
|
|
ct = ar->resultType();
|
|
else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
|
|
ct = fc->resultType();
|
|
|
|
if (op == AggregateColumn::STDDEV_POP || op == AggregateColumn::STDDEV_SAMP ||
|
|
op == AggregateColumn::VAR_POP || op == AggregateColumn::VAR_SAMP)
|
|
{
|
|
ct.colWidth = sizeof(double);
|
|
ct.colDataType = CalpontSystemCatalog::DOUBLE;
|
|
ct.scale = 0;
|
|
ct.precision = -1;
|
|
}
|
|
else if (op == AggregateColumn::UDAF)
|
|
{
|
|
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(ac);
|
|
|
|
if (udafc)
|
|
{
|
|
mcsv1sdk::mcsv1Context& udafContext = udafc->getContext();
|
|
ct.colDataType = udafContext.getResultType();
|
|
ct.colWidth = udafContext.getColWidth();
|
|
ct.scale = udafContext.getScale();
|
|
ct.precision = udafContext.getPrecision();
|
|
}
|
|
else
|
|
{
|
|
ct = ac->resultType();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ct = ac->resultType();
|
|
}
|
|
|
|
ac->resultType(ct);
|
|
|
|
// update the original if this aggregate column is cloned from function on aggregation
|
|
pair<multimap<ReturnedColumn*, ReturnedColumn*>::iterator,
|
|
multimap<ReturnedColumn*, ReturnedColumn*>::iterator> range =
|
|
jobInfo.cloneAggregateColMap.equal_range(ac);
|
|
|
|
for (multimap<ReturnedColumn*, ReturnedColumn*>::iterator i = range.first; i != range.second; ++i)
|
|
(i->second)->resultType(ct);
|
|
}
|
|
|
|
|
|
const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
|
|
{
|
|
vector<uint32_t> projectKeys; // projected column keys -- unique
|
|
RetColsVector pcv; // projected column vector -- may have duplicates
|
|
|
|
// add the groupby cols in the front part of the project column vector (pcv)
|
|
const CalpontSelectExecutionPlan::GroupByColumnList& groupByCols = csep->groupByCols();
|
|
uint64_t lastGroupByPos = 0;
|
|
|
|
for (uint64_t i = 0; i < groupByCols.size(); i++)
|
|
{
|
|
pcv.push_back(groupByCols[i]);
|
|
lastGroupByPos++;
|
|
|
|
const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(groupByCols[i].get());
|
|
const ArithmeticColumn* ac = NULL;
|
|
const FunctionColumn* fc = NULL;
|
|
|
|
if (sc != NULL)
|
|
{
|
|
CalpontSystemCatalog::OID gbOid = sc->oid();
|
|
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
|
|
CalpontSystemCatalog::OID dictOid = 0;
|
|
CalpontSystemCatalog::ColType ct;
|
|
string alias(extractTableAlias(sc));
|
|
string view(sc->viewName());
|
|
|
|
if (!sc->schemaName().empty())
|
|
{
|
|
ct = sc->colType();
|
|
|
|
//XXX use this before connector sets colType in sc correctly.
|
|
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
|
ct = jobInfo.csc->colType(sc->oid());
|
|
|
|
//X
|
|
dictOid = isDictCol(ct);
|
|
}
|
|
else
|
|
{
|
|
gbOid = (tblOid + 1) + sc->colPosition();
|
|
ct = jobInfo.vtableColTypes[UniqId(gbOid, alias, "", "")];
|
|
}
|
|
|
|
// As of bug3695, make sure varbinary is not used in group by.
|
|
if (ct.colDataType == CalpontSystemCatalog::VARBINARY)
|
|
throw runtime_error ("VARBINARY in group by is not supported.");
|
|
|
|
TupleInfo ti(setTupleInfo(ct, gbOid, jobInfo, tblOid, sc, alias));
|
|
uint32_t tupleKey = ti.key;
|
|
|
|
if (find(projectKeys.begin(), projectKeys.end(), tupleKey) == projectKeys.end())
|
|
projectKeys.push_back(tupleKey);
|
|
|
|
// for dictionary columns, replace the token oid with string oid
|
|
if (dictOid > 0)
|
|
{
|
|
jobInfo.tokenOnly[tupleKey] = false;
|
|
ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
|
|
jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
|
|
tupleKey = ti.key;
|
|
}
|
|
|
|
jobInfo.groupByColVec.push_back(tupleKey);
|
|
}
|
|
else if ((ac = dynamic_cast<const ArithmeticColumn*>(groupByCols[i].get())) != NULL)
|
|
{
|
|
uint64_t eid = ac->expressionId();
|
|
CalpontSystemCatalog::ColType ct = ac->resultType();
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, ac->alias(), jobInfo));
|
|
uint32_t tupleKey = ti.key;
|
|
jobInfo.groupByColVec.push_back(tupleKey);
|
|
|
|
if (find(projectKeys.begin(), projectKeys.end(), tupleKey) == projectKeys.end())
|
|
projectKeys.push_back(tupleKey);
|
|
}
|
|
else if ((fc = dynamic_cast<const FunctionColumn*>(groupByCols[i].get())) != NULL)
|
|
{
|
|
uint64_t eid = fc->expressionId();
|
|
CalpontSystemCatalog::ColType ct = fc->resultType();
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, fc->alias(), jobInfo));
|
|
uint32_t tupleKey = ti.key;
|
|
jobInfo.groupByColVec.push_back(tupleKey);
|
|
|
|
if (find(projectKeys.begin(), projectKeys.end(), tupleKey) == projectKeys.end())
|
|
projectKeys.push_back(tupleKey);
|
|
}
|
|
else
|
|
{
|
|
std::ostringstream errmsg;
|
|
errmsg << "doAggProject: unsupported group by column: "
|
|
<< typeid(*groupByCols[i]).name();
|
|
cerr << boldStart << errmsg.str() << boldStop << endl;
|
|
throw logic_error(errmsg.str());
|
|
}
|
|
}
|
|
|
|
// process the returned columns
|
|
RetColsVector& retCols = jobInfo.projectionCols;
|
|
SRCP srcp;
|
|
|
|
for (uint64_t i = 0; i < retCols.size(); i++)
|
|
{
|
|
GroupConcatColumn* gcc = dynamic_cast<GroupConcatColumn*>(retCols[i].get());
|
|
|
|
if (gcc != NULL)
|
|
{
|
|
srcp = gcc->aggParms()[0];
|
|
const RowColumn* rcp = dynamic_cast<const RowColumn*>(srcp.get());
|
|
|
|
const vector<SRCP>& cols = rcp->columnVec();
|
|
|
|
for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
|
|
{
|
|
if (dynamic_cast<const ConstantColumn*>(j->get()) == NULL)
|
|
retCols.push_back(*j);
|
|
}
|
|
|
|
vector<SRCP>& orderCols = gcc->orderCols();
|
|
|
|
for (vector<SRCP>::iterator k = orderCols.begin(); k != orderCols.end(); k++)
|
|
{
|
|
if (dynamic_cast<const ConstantColumn*>(k->get()) == NULL)
|
|
retCols.push_back(*k);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
#if 0
|
|
// MCOL-1201 Add support for multi-parameter UDAnF
|
|
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(retCols[i].get());
|
|
|
|
if (udafc != NULL)
|
|
{
|
|
srcp = udafc->aggParms()[0];
|
|
const RowColumn* rcp = dynamic_cast<const RowColumn*>(srcp.get());
|
|
|
|
const vector<SRCP>& cols = rcp->columnVec();
|
|
|
|
for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
|
|
{
|
|
srcp = *j;
|
|
|
|
if (dynamic_cast<const ConstantColumn*>(srcp.get()) == NULL)
|
|
retCols.push_back(srcp);
|
|
|
|
// Do we need this?
|
|
const ArithmeticColumn* ac = dynamic_cast<const ArithmeticColumn*>(srcp.get());
|
|
const FunctionColumn* fc = dynamic_cast<const FunctionColumn*>(srcp.get());
|
|
|
|
if (ac != NULL || fc != NULL)
|
|
{
|
|
// bug 3728, make a dummy expression step for each expression.
|
|
scoped_ptr<ExpressionStep> es(new ExpressionStep(jobInfo));
|
|
es->expression(srcp, jobInfo);
|
|
}
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
#endif
|
|
srcp = retCols[i];
|
|
const AggregateColumn* ag = dynamic_cast<const AggregateColumn*>(retCols[i].get());
|
|
|
|
// bug 3728 Make a dummy expression for srcp if it is an
|
|
// expression. This is needed to fill in some stuff.
|
|
// Note that es.expression does nothing if the item is not an expression.
|
|
if (ag == NULL)
|
|
{
|
|
// Not an aggregate. Make a dummy expression for the item
|
|
ExpressionStep es;
|
|
es.expression(srcp, jobInfo);
|
|
}
|
|
else
|
|
{
|
|
// MCOL-1201 multi-argument aggregate. make a dummy expression
|
|
// step for each argument that is an expression.
|
|
for (uint32_t i = 0; i < ag->aggParms().size(); ++i)
|
|
{
|
|
srcp = ag->aggParms()[i];
|
|
ExpressionStep es;
|
|
es.expression(srcp, jobInfo);
|
|
}
|
|
}
|
|
}
|
|
|
|
map<uint32_t, CalpontSystemCatalog::OID> dictMap; // bug 1853, the tupleKey - dictoid map
|
|
|
|
for (uint64_t i = 0; i < retCols.size(); i++)
|
|
{
|
|
srcp = retCols[i];
|
|
const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(srcp.get());
|
|
AggregateColumn* aggc = dynamic_cast<AggregateColumn*>(srcp.get());
|
|
bool doDistinct = (csep->distinct() && csep->groupByCols().empty());
|
|
// Use this instead of the above line to mimic MariaDB's sql_mode = 'ONLY_FULL_GROUP_BY'
|
|
// bool doDistinct = (csep->distinct() &&
|
|
// csep->groupByCols().empty() &&
|
|
// !jobInfo.hasAggregation);
|
|
uint32_t tupleKey = -1;
|
|
string alias;
|
|
string view;
|
|
|
|
// returned column could be groupby column, a simplecoulumn not an aggregatecolumn
|
|
int op = 0;
|
|
CalpontSystemCatalog::OID dictOid = 0;
|
|
CalpontSystemCatalog::ColType ct, aggCt;
|
|
|
|
if (aggc)
|
|
{
|
|
GroupConcatColumn* gcc = dynamic_cast<GroupConcatColumn*>(retCols[i].get());
|
|
|
|
if (gcc != NULL)
|
|
{
|
|
jobInfo.groupConcatCols.push_back(retCols[i]);
|
|
|
|
uint64_t eid = gcc->expressionId();
|
|
ct = gcc->resultType();
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, gcc->alias(), jobInfo));
|
|
tupleKey = ti.key;
|
|
jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp()));
|
|
// not a tokenOnly column. Mark all the columns involved
|
|
srcp = gcc->aggParms()[0];
|
|
const RowColumn* rowCol = dynamic_cast<const RowColumn*>(srcp.get());
|
|
|
|
if (rowCol)
|
|
{
|
|
const std::vector<SRCP>& cols = rowCol->columnVec();
|
|
|
|
for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
|
|
{
|
|
sc = dynamic_cast<const SimpleColumn*>(j->get());
|
|
|
|
if (sc)
|
|
{
|
|
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
|
|
alias = extractTableAlias(sc);
|
|
ct = sc->colType();
|
|
TupleInfo ti(setTupleInfo(ct, sc->oid(), jobInfo, tblOid, sc, alias));
|
|
jobInfo.tokenOnly[ti.key] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// Aggregate column not group concat
|
|
AggParms& aggParms = aggc->aggParms();
|
|
|
|
for (uint32_t parm = 0; parm < aggParms.size(); ++parm)
|
|
{
|
|
// Only do the optimization of converting to count(*) if
|
|
// there is only one parameter.
|
|
if (aggParms.size() == 1 && aggc->constCol().get() != NULL)
|
|
{
|
|
// replace the aggregate on constant with a count(*)
|
|
SRCP clone;
|
|
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(aggc);
|
|
|
|
if (udafc)
|
|
{
|
|
clone.reset(new UDAFColumn(*udafc, aggc->sessionID()));
|
|
}
|
|
else
|
|
{
|
|
clone.reset(new AggregateColumn(*aggc, aggc->sessionID()));
|
|
}
|
|
|
|
jobInfo.constAggregate.insert(make_pair(i, clone));
|
|
aggc->aggOp(AggregateColumn::COUNT_ASTERISK);
|
|
aggc->distinct(false);
|
|
}
|
|
|
|
srcp = aggParms[parm];
|
|
sc = dynamic_cast<const SimpleColumn*>(srcp.get());
|
|
|
|
if (parm == 0)
|
|
{
|
|
op = aggc->aggOp();
|
|
}
|
|
else
|
|
{
|
|
op = AggregateColumn::MULTI_PARM;
|
|
}
|
|
|
|
doDistinct = aggc->distinct();
|
|
|
|
if (aggParms.size() == 1)
|
|
{
|
|
// Set the col type based on the single parm.
|
|
// Changing col type based on a parm if multiple parms
|
|
// doesn't really make sense.
|
|
if (op != AggregateColumn::SUM && op != AggregateColumn::DISTINCT_SUM &&
|
|
op != AggregateColumn::AVG && op != AggregateColumn::DISTINCT_AVG)
|
|
{
|
|
updateAggregateColType(aggc, srcp, op, jobInfo);
|
|
}
|
|
}
|
|
|
|
aggCt = aggc->resultType();
|
|
|
|
// As of bug3695, make sure varbinary is not used in aggregation.
|
|
// TODO: allow for UDAF
|
|
if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY)
|
|
throw runtime_error ("VARBINARY in aggregate function is not supported.");
|
|
|
|
// Project the parm columns or expressions
|
|
if (sc != NULL)
|
|
{
|
|
CalpontSystemCatalog::OID retOid = sc->oid();
|
|
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
|
|
alias = extractTableAlias(sc);
|
|
view = sc->viewName();
|
|
|
|
if (!sc->schemaName().empty())
|
|
{
|
|
ct = sc->colType();
|
|
|
|
//XXX use this before connector sets colType in sc correctly.
|
|
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
|
ct = jobInfo.csc->colType(sc->oid());
|
|
|
|
//X
|
|
dictOid = isDictCol(ct);
|
|
}
|
|
else
|
|
{
|
|
retOid = (tblOid + 1) + sc->colPosition();
|
|
ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
|
|
}
|
|
|
|
TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
|
|
tupleKey = ti.key;
|
|
|
|
// this is a string column
|
|
if (dictOid > 0)
|
|
{
|
|
map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);
|
|
|
|
// if the column has never seen, and the op is count: possible need count only.
|
|
if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
|
|
{
|
|
if (findit == jobInfo.tokenOnly.end())
|
|
jobInfo.tokenOnly[tupleKey] = true;
|
|
}
|
|
// if aggregate other than count, token is not enough.
|
|
else if (op != 0 || doDistinct)
|
|
{
|
|
jobInfo.tokenOnly[tupleKey] = false;
|
|
}
|
|
|
|
findit = jobInfo.tokenOnly.find(tupleKey);
|
|
|
|
if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
|
|
{
|
|
dictMap[tupleKey] = dictOid;
|
|
jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
|
|
ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
|
|
jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const ArithmeticColumn* ac = NULL;
|
|
const FunctionColumn* fc = NULL;
|
|
const WindowFunctionColumn* wc = NULL;
|
|
bool hasAggCols = false;
|
|
bool hasWndCols = false;
|
|
|
|
if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
|
|
{
|
|
if (ac->aggColumnList().size() > 0)
|
|
hasAggCols = true;
|
|
if (ac->windowfunctionColumnList().size() > 0)
|
|
hasWndCols = true;
|
|
}
|
|
else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
|
|
{
|
|
if (fc->aggColumnList().size() > 0)
|
|
hasAggCols = true;
|
|
if (fc->windowfunctionColumnList().size() > 0)
|
|
hasWndCols = true;
|
|
}
|
|
else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
|
|
{
|
|
std::ostringstream errmsg;
|
|
errmsg << "Invalid aggregate function nesting.";
|
|
cerr << boldStart << errmsg.str() << boldStop << endl;
|
|
throw logic_error(errmsg.str());
|
|
}
|
|
else if (dynamic_cast<const ConstantColumn*>(srcp.get()) != NULL)
|
|
{
|
|
}
|
|
else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
|
|
{
|
|
std::ostringstream errmsg;
|
|
errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
|
|
cerr << boldStart << errmsg.str() << boldStop << endl;
|
|
throw logic_error(errmsg.str());
|
|
}
|
|
|
|
uint64_t eid = srcp.get()->expressionId();
|
|
ct = srcp.get()->resultType();
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
|
|
tupleKey = ti.key;
|
|
|
|
if (hasAggCols && !hasWndCols)
|
|
jobInfo.expressionVec.push_back(tupleKey);
|
|
}
|
|
|
|
// add to project list
|
|
vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);
|
|
|
|
if (keyIt == projectKeys.end())
|
|
{
|
|
RetColsVector::iterator it = pcv.end();
|
|
|
|
if (doDistinct)
|
|
it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
|
|
else
|
|
it = pcv.insert(pcv.end(), srcp);
|
|
|
|
projectKeys.insert(projectKeys.begin() + std::distance(pcv.begin(), it), tupleKey);
|
|
}
|
|
else if (doDistinct) // @bug4250, move forward distinct column if necessary.
|
|
{
|
|
uint32_t pos = std::distance(projectKeys.begin(), keyIt);
|
|
|
|
if (pos >= lastGroupByPos)
|
|
{
|
|
pcv[pos] = pcv[lastGroupByPos];
|
|
pcv[lastGroupByPos] = srcp;
|
|
projectKeys[pos] = projectKeys[lastGroupByPos];
|
|
projectKeys[lastGroupByPos] = tupleKey;
|
|
lastGroupByPos++;
|
|
}
|
|
}
|
|
|
|
if (doDistinct && dictOid > 0)
|
|
tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];
|
|
|
|
// remember the columns to be returned
|
|
jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));
|
|
|
|
// bug 1499 distinct processing, save unique distinct columns
|
|
if (doDistinct &&
|
|
(jobInfo.distinctColVec.end() ==
|
|
find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
|
|
{
|
|
jobInfo.distinctColVec.push_back(tupleKey);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Not an Aggregate
|
|
// simple column selected
|
|
if (sc != NULL)
|
|
{
|
|
// one column only need project once
|
|
CalpontSystemCatalog::OID retOid = sc->oid();
|
|
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
|
|
alias = extractTableAlias(sc);
|
|
view = sc->viewName();
|
|
|
|
if (!sc->schemaName().empty())
|
|
{
|
|
ct = sc->colType();
|
|
|
|
//XXX use this before connector sets colType in sc correctly.
|
|
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
|
ct = jobInfo.csc->colType(sc->oid());
|
|
|
|
//X
|
|
dictOid = isDictCol(ct);
|
|
}
|
|
else
|
|
{
|
|
retOid = (tblOid + 1) + sc->colPosition();
|
|
ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
|
|
}
|
|
|
|
TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
|
|
tupleKey = ti.key;
|
|
|
|
// this is a string column
|
|
if (dictOid > 0)
|
|
{
|
|
map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);
|
|
|
|
// if the column has never seen, and the op is count: possible need count only.
|
|
if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
|
|
{
|
|
if (findit == jobInfo.tokenOnly.end())
|
|
jobInfo.tokenOnly[tupleKey] = true;
|
|
}
|
|
// if aggregate other than count, token is not enough.
|
|
else if (op != 0 || doDistinct)
|
|
{
|
|
jobInfo.tokenOnly[tupleKey] = false;
|
|
}
|
|
|
|
findit = jobInfo.tokenOnly.find(tupleKey);
|
|
|
|
if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
|
|
{
|
|
dictMap[tupleKey] = dictOid;
|
|
jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
|
|
ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
|
|
jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const ArithmeticColumn* ac = NULL;
|
|
const FunctionColumn* fc = NULL;
|
|
const WindowFunctionColumn* wc = NULL;
|
|
bool hasAggCols = false;
|
|
bool hasWndCols = false;
|
|
|
|
if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
|
|
{
|
|
if (ac->aggColumnList().size() > 0)
|
|
hasAggCols = true;
|
|
if (ac->windowfunctionColumnList().size() > 0)
|
|
hasWndCols = true;
|
|
}
|
|
else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
|
|
{
|
|
if (fc->aggColumnList().size() > 0)
|
|
hasAggCols = true;
|
|
if (fc->windowfunctionColumnList().size() > 0)
|
|
hasWndCols = true;
|
|
}
|
|
else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
|
|
{
|
|
std::ostringstream errmsg;
|
|
errmsg << "Invalid aggregate function nesting.";
|
|
cerr << boldStart << errmsg.str() << boldStop << endl;
|
|
throw logic_error(errmsg.str());
|
|
}
|
|
else if (dynamic_cast<const ConstantColumn*>(srcp.get()) != NULL)
|
|
{
|
|
}
|
|
else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
|
|
{
|
|
std::ostringstream errmsg;
|
|
errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
|
|
cerr << boldStart << errmsg.str() << boldStop << endl;
|
|
throw logic_error(errmsg.str());
|
|
}
|
|
|
|
uint64_t eid = srcp.get()->expressionId();
|
|
ct = srcp.get()->resultType();
|
|
TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
|
|
tupleKey = ti.key;
|
|
|
|
if (hasAggCols && !hasWndCols)
|
|
jobInfo.expressionVec.push_back(tupleKey);
|
|
}
|
|
|
|
// add to project list
|
|
vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);
|
|
|
|
if (keyIt == projectKeys.end())
|
|
{
|
|
RetColsVector::iterator it = pcv.end();
|
|
|
|
if (doDistinct)
|
|
it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
|
|
else
|
|
it = pcv.insert(pcv.end(), srcp);
|
|
|
|
projectKeys.insert(projectKeys.begin() + std::distance(pcv.begin(), it), tupleKey);
|
|
}
|
|
else if (doDistinct) // @bug4250, move forward distinct column if necessary.
|
|
{
|
|
uint32_t pos = std::distance(projectKeys.begin(), keyIt);
|
|
|
|
if (pos >= lastGroupByPos)
|
|
{
|
|
pcv[pos] = pcv[lastGroupByPos];
|
|
pcv[lastGroupByPos] = srcp;
|
|
projectKeys[pos] = projectKeys[lastGroupByPos];
|
|
projectKeys[lastGroupByPos] = tupleKey;
|
|
lastGroupByPos++;
|
|
}
|
|
}
|
|
|
|
if (doDistinct && dictOid > 0)
|
|
tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];
|
|
|
|
// remember the columns to be returned
|
|
jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));
|
|
|
|
// bug 1499 distinct processing, save unique distinct columns
|
|
if (doDistinct &&
|
|
(jobInfo.distinctColVec.end() ==
|
|
find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
|
|
{
|
|
jobInfo.distinctColVec.push_back(tupleKey);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// for dictionary columns not count only, replace the token oid with string oid
|
|
for (vector<pair<uint32_t, int> >::iterator it = jobInfo.returnedColVec.begin();
|
|
it != jobInfo.returnedColVec.end(); it++)
|
|
{
|
|
// if the column is a dictionary column and not count only
|
|
bool tokenOnly = false;
|
|
map<uint32_t, bool>::iterator i = jobInfo.tokenOnly.find(it->first);
|
|
|
|
if (i != jobInfo.tokenOnly.end())
|
|
tokenOnly = i->second;
|
|
|
|
if (dictMap.find(it->first) != dictMap.end() && !tokenOnly)
|
|
{
|
|
uint32_t tupleKey = jobInfo.keyInfo->dictKeyMap[it->first];
|
|
int op = it->second;
|
|
*it = make_pair(tupleKey, op);
|
|
}
|
|
}
|
|
|
|
return doProject(pcv, jobInfo);
|
|
}
|
|
|
|
|
|
template <typename T>
|
|
class Uniqer : public unary_function<typename T::value_type, void>
|
|
{
|
|
private:
|
|
typedef typename T::mapped_type Mt_;
|
|
class Pred : public unary_function<const Mt_, bool>
|
|
{
|
|
public:
|
|
Pred(const Mt_& retCol) : fRetCol(retCol) { }
|
|
bool operator()(const Mt_ rc) const
|
|
{
|
|
return fRetCol->sameColumn(rc.get());
|
|
}
|
|
private:
|
|
const Mt_& fRetCol;
|
|
};
|
|
public:
|
|
void operator()(typename T::value_type mapItem)
|
|
{
|
|
Pred pred(mapItem.second);
|
|
RetColsVector::iterator iter;
|
|
iter = find_if(fRetColsVec.begin(), fRetColsVec.end(), pred);
|
|
|
|
if (iter == fRetColsVec.end())
|
|
{
|
|
//Add this ReturnedColumn
|
|
fRetColsVec.push_back(mapItem.second);
|
|
}
|
|
}
|
|
RetColsVector fRetColsVec;
|
|
};
|
|
|
|
uint16_t numberSteps(JobStepVector& steps, uint16_t stepNo, uint32_t flags)
|
|
{
|
|
JobStepVector::iterator iter = steps.begin();
|
|
JobStepVector::iterator end = steps.end();
|
|
|
|
while (iter != end)
|
|
{
|
|
// don't number the delimiters
|
|
//if (dynamic_cast<OrDelimiter*>(iter->get()) != NULL)
|
|
//{
|
|
// ++iter;
|
|
// continue;
|
|
//}
|
|
|
|
JobStep* pJobStep = iter->get();
|
|
pJobStep->stepId(stepNo);
|
|
pJobStep->setTraceFlags(flags);
|
|
stepNo++;
|
|
++iter;
|
|
}
|
|
|
|
return stepNo;
|
|
}
|
|
|
|
void changePcolStepToPcolScan(JobStepVector::iterator& it, JobStepVector::iterator& end)
|
|
{
|
|
// make sure no pseudo column is a scan column
|
|
idbassert(dynamic_cast<PseudoColStep*>(it->get()) == NULL);
|
|
|
|
pColStep* colStep = dynamic_cast<pColStep*>(it->get());
|
|
pColScanStep* scanStep = 0;
|
|
|
|
//Might be a pDictionaryScan step
|
|
if (colStep)
|
|
{
|
|
scanStep = new pColScanStep(*colStep);
|
|
}
|
|
else
|
|
{
|
|
//If we have a pDictionaryScan-pColStep duo, then change the pColStep
|
|
if (typeid(*(it->get())) == typeid(pDictionaryScan) &&
|
|
std::distance(it, end) > 1 &&
|
|
typeid(*((it + 1)->get())) == typeid(pColStep))
|
|
{
|
|
++it;
|
|
colStep = dynamic_cast<pColStep*>(it->get());
|
|
scanStep = new pColScanStep(*colStep);
|
|
}
|
|
}
|
|
|
|
if (scanStep)
|
|
{
|
|
it->reset(scanStep);
|
|
}
|
|
}
|
|
|
|
// optimize filter order
|
|
// perform none string filters first because string filter joins the tokens.
|
|
void optimizeFilterOrder(JobStepVector& qsv)
|
|
{
|
|
// move all none string filters
|
|
uint64_t pdsPos = 0;
|
|
|
|
// int64_t orbranch = 0;
|
|
for (; pdsPos < qsv.size(); ++pdsPos)
|
|
{
|
|
// skip the or branches
|
|
// OrDelimiterLhs* lhs = dynamic_cast<OrDelimiterLhs*>(qsv[pdsPos].get());
|
|
// if (lhs != NULL)
|
|
// {
|
|
// orbranch++;
|
|
// continue;
|
|
// }
|
|
//
|
|
// if (orbranch > 0)
|
|
// {
|
|
// UnionStep* us = dynamic_cast<UnionStep*>(qsv[pdsPos].get());
|
|
// if (us)
|
|
// orbranch--;
|
|
// }
|
|
// else
|
|
{
|
|
pDictionaryScan* pds = dynamic_cast<pDictionaryScan*>(qsv[pdsPos].get());
|
|
|
|
if (pds)
|
|
break;
|
|
}
|
|
}
|
|
|
|
// no pDictionaryScan step
|
|
if (pdsPos >= qsv.size())
|
|
return;
|
|
|
|
// get the filter steps that are not in or branches
|
|
vector<uint64_t> pcolIdVec;
|
|
JobStepVector pcolStepVec;
|
|
|
|
// orbranch = 0;
|
|
for (uint64_t i = pdsPos; i < qsv.size(); ++i)
|
|
{
|
|
// OrDelimiterLhs* lhs = dynamic_cast<OrDelimiterLhs*>(qsv[pdsPos].get());
|
|
// if (lhs != NULL)
|
|
// {
|
|
// orbranch++;
|
|
// continue;
|
|
// }
|
|
|
|
// if (orbranch > 0)
|
|
// {
|
|
// UnionStep* us = dynamic_cast<UnionStep*>(qsv[pdsPos].get());
|
|
// if (us)
|
|
// orbranch--;
|
|
// }
|
|
// else
|
|
{
|
|
pColStep* pcol = dynamic_cast<pColStep*>(qsv[i].get());
|
|
|
|
if (pcol != NULL && pcol->filterCount() > 0)
|
|
pcolIdVec.push_back(i);
|
|
}
|
|
}
|
|
|
|
for (vector<uint64_t>::reverse_iterator r = pcolIdVec.rbegin(); r < pcolIdVec.rend(); ++r)
|
|
{
|
|
pcolStepVec.push_back(qsv[*r]);
|
|
qsv.erase(qsv.begin() + (*r));
|
|
}
|
|
|
|
qsv.insert(qsv.begin() + pdsPos, pcolStepVec.rbegin(), pcolStepVec.rend());
|
|
}
|
|
|
|
void exceptionHandler(JobList* joblist, const JobInfo& jobInfo, const string& logMsg,
|
|
logging::LOG_TYPE logLevel = LOG_TYPE_ERROR)
|
|
{
|
|
cerr << "### JobListFactory ses:" << jobInfo.sessionId << " caught: " << logMsg << endl;
|
|
Message::Args args;
|
|
args.add(logMsg);
|
|
jobInfo.logger->logMessage(logLevel, LogMakeJobList, args,
|
|
LoggingID(5, jobInfo.sessionId, jobInfo.txnId, 0));
|
|
// dummy delivery map, workaround for (qb == 2) in main.cpp
|
|
DeliveredTableMap dtm;
|
|
SJSTEP dummyStep;
|
|
dtm[0] = dummyStep;
|
|
joblist->addDelivery(dtm);
|
|
}
|
|
|
|
|
|
void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
|
JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps)
|
|
{
|
|
ParseTree* filters = csep->filters();
|
|
jobInfo.deliveredCols = csep->returnedCols();
|
|
|
|
if (filters != 0)
|
|
{
|
|
JLF_ExecPlanToJobList::walkTree(filters, jobInfo);
|
|
}
|
|
|
|
if (jobInfo.trace)
|
|
cout << endl << "Stack: " << endl;
|
|
|
|
if (!jobInfo.stack.empty())
|
|
{
|
|
idbassert(jobInfo.stack.size() == 1);
|
|
querySteps = jobInfo.stack.top();
|
|
jobInfo.stack.pop();
|
|
|
|
// do some filter order optimization
|
|
optimizeFilterOrder(querySteps);
|
|
}
|
|
|
|
if (jobInfo.selectAndFromSubs.size() > 0)
|
|
{
|
|
querySteps.insert(querySteps.begin(),
|
|
jobInfo.selectAndFromSubs.begin(), jobInfo.selectAndFromSubs.end());
|
|
}
|
|
|
|
// bug4531, window function support
|
|
WindowFunctionStep::checkWindowFunction(csep, jobInfo);
|
|
|
|
// bug3391, move forward the aggregation check for no aggregate having clause.
|
|
checkAggregation(csep, jobInfo);
|
|
|
|
// include filters in having clause, if any.
|
|
if (jobInfo.havingStepVec.size() > 0)
|
|
querySteps.insert(querySteps.begin(),
|
|
jobInfo.havingStepVec.begin(), jobInfo.havingStepVec.end());
|
|
|
|
//Need to change the leading pColStep to a pColScanStep
|
|
//Keep a list of the (table OIDs,alias) that we've already processed for @bug 598 self-join
|
|
set<uint32_t> seenTableIds;
|
|
|
|
//Stack of seenTables to make sure the left-hand side and right-hand have the same content
|
|
stack<set<uint32_t> > seenTableStack;
|
|
|
|
if (!querySteps.empty())
|
|
{
|
|
JobStepVector::iterator iter = querySteps.begin();
|
|
JobStepVector::iterator end = querySteps.end();
|
|
|
|
for (; iter != end; ++iter)
|
|
{
|
|
idbassert(iter->get());
|
|
|
|
// As of bug3695, make sure varbinary is not used in filters.
|
|
if (typeid(*(iter->get())) == typeid(pColStep))
|
|
{
|
|
// only pcolsteps, no pcolscan yet.
|
|
pColStep* pcol = dynamic_cast<pColStep*>(iter->get());
|
|
|
|
if (pcol->colType().colDataType == CalpontSystemCatalog::VARBINARY)
|
|
{
|
|
if (pcol->filterCount() != 1)
|
|
throw runtime_error ("VARBINARY in filter or function is not supported.");
|
|
|
|
// error out if the filter is not "is null" or "is not null"
|
|
// should block "= null" and "!= null" ???
|
|
messageqcpp::ByteStream filter = pcol->filterString();
|
|
uint8_t op = 0;
|
|
filter >> op;
|
|
bool nullOp = (op == COMPARE_EQ || op == COMPARE_NE || op == COMPARE_NIL);
|
|
filter >> op; // skip roundFlag
|
|
uint64_t value = 0;
|
|
filter >> value;
|
|
nullOp = nullOp && (value == 0xfffffffffffffffeULL);
|
|
|
|
if (nullOp == false)
|
|
throw runtime_error ("VARBINARY in filter or function is not supported.");
|
|
}
|
|
}
|
|
|
|
// // save the current seentable for right-hand side
|
|
// if (typeid(*(iter->get())) == typeid(OrDelimiterLhs))
|
|
// {
|
|
// seenTableStack.push(seenTableIds);
|
|
// continue;
|
|
// }
|
|
//
|
|
// // restore the seentable
|
|
// else if (typeid(*(iter->get())) == typeid(OrDelimiterRhs))
|
|
// {
|
|
// seenTableIds = seenTableStack.top();
|
|
// seenTableStack.pop();
|
|
// continue;
|
|
// }
|
|
|
|
if (typeid(*(iter->get())) == typeid(pColStep))
|
|
{
|
|
pColStep* colStep = dynamic_cast<pColStep*>(iter->get());
|
|
string alias(colStep->alias());
|
|
string view(colStep->view());
|
|
//If this is the first time we've seen this table or alias
|
|
uint32_t tableId = 0;
|
|
tableId = getTableKey(jobInfo, colStep->tupleId());
|
|
|
|
if (seenTableIds.find(tableId) == seenTableIds.end())
|
|
changePcolStepToPcolScan(iter, end);
|
|
|
|
//Mark this OID as seen
|
|
seenTableIds.insert(tableId);
|
|
}
|
|
}
|
|
}
|
|
|
|
//build the project steps
|
|
if (jobInfo.deliveredCols.empty())
|
|
{
|
|
throw logic_error("No delivery column.");
|
|
}
|
|
|
|
// if any aggregate columns
|
|
if (jobInfo.hasAggregation == true)
|
|
{
|
|
projectSteps = doAggProject(csep, jobInfo);
|
|
}
|
|
else
|
|
{
|
|
projectSteps = doProject(jobInfo.nonConstCols, jobInfo);
|
|
}
|
|
|
|
// bug3736, have jobInfo include the column map info.
|
|
const CalpontSelectExecutionPlan::ColumnMap& retCols = csep->columnMap();
|
|
CalpontSelectExecutionPlan::ColumnMap::const_iterator i = retCols.begin();
|
|
|
|
for (; i != retCols.end(); i++)
|
|
{
|
|
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(i->second.get());
|
|
|
|
if (sc && !sc->schemaName().empty())
|
|
{
|
|
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
|
|
CalpontSystemCatalog::ColType ct = sc->colType();
|
|
|
|
//XXX use this before connector sets colType in sc correctly.
|
|
if (sc->isColumnStore() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
|
|
ct = jobInfo.csc->colType(sc->oid());
|
|
|
|
//X
|
|
|
|
string alias(extractTableAlias(sc));
|
|
TupleInfo ti(setTupleInfo(ct, sc->oid(), jobInfo, tblOid, sc, alias));
|
|
uint32_t colKey = ti.key;
|
|
uint32_t tblKey = getTableKey(jobInfo, colKey);
|
|
jobInfo.columnMap[tblKey].push_back(colKey);
|
|
|
|
if (jobInfo.tableColMap.find(tblKey) == jobInfo.tableColMap.end())
|
|
jobInfo.tableColMap[tblKey] = i->second;
|
|
}
|
|
}
|
|
|
|
// special case, select without a table, like: select 1;
|
|
if (jobInfo.constantCol == CONST_COL_ONLY)
|
|
return;
|
|
|
|
//If there are no filters (select * from table;) then add one simple scan
|
|
//TODO: more work here...
|
|
// @bug 497 fix. populate a map of tableoid for querysteps. tablescan
|
|
// cols whose table does not belong to the map
|
|
typedef set<uint32_t> tableIDMap_t;
|
|
tableIDMap_t tableIDMap;
|
|
JobStepVector::iterator qsiter = querySteps.begin();
|
|
JobStepVector::iterator qsend = querySteps.end();
|
|
uint32_t tableId = 0;
|
|
|
|
while (qsiter != qsend)
|
|
{
|
|
JobStep* js = qsiter->get();
|
|
|
|
if (js->tupleId() != (uint64_t) - 1)
|
|
tableId = getTableKey(jobInfo, js->tupleId());
|
|
|
|
tableIDMap.insert(tableId);
|
|
++qsiter;
|
|
}
|
|
|
|
JobStepVector::iterator jsiter = projectSteps.begin();
|
|
JobStepVector::iterator jsend = projectSteps.end();
|
|
|
|
while (jsiter != jsend)
|
|
{
|
|
JobStep* js = jsiter->get();
|
|
|
|
if (js->tupleId() != (uint64_t) - 1)
|
|
tableId = getTableKey(jobInfo, js->tupleId());
|
|
else
|
|
tableId = getTableKey(jobInfo, js);
|
|
|
|
if (typeid(*(jsiter->get())) == typeid(pColStep) &&
|
|
tableIDMap.find(tableId) == tableIDMap.end())
|
|
{
|
|
SJSTEP step0 = *jsiter;
|
|
pColStep* colStep = dynamic_cast<pColStep*>(step0.get());
|
|
pColScanStep* scanStep = new pColScanStep(*colStep);
|
|
//clear out any output association so we get a nice, new one during association
|
|
scanStep->outputAssociation(JobStepAssociation());
|
|
step0.reset(scanStep);
|
|
querySteps.push_back(step0);
|
|
js = step0.get();
|
|
tableId = getTableKey(jobInfo, js->tupleId());
|
|
tableIDMap.insert(tableId);
|
|
}
|
|
|
|
++jsiter;
|
|
}
|
|
}
|
|
|
|
|
|
// v-table mode
|
|
void makeVtableModeSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
|
JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps)
|
|
{
|
|
// special case for outer query order by limit -- return all
|
|
if (jobInfo.subId == 0 && csep->hasOrderBy() && !csep->specHandlerProcessed())
|
|
{
|
|
jobInfo.limitCount = (uint64_t) - 1;
|
|
}
|
|
// support order by and limit in sub-query/union or
|
|
// GROUP BY handler processed outer query order
|
|
else if (csep->orderByCols().size() > 0)
|
|
{
|
|
addOrderByAndLimit(csep, jobInfo);
|
|
}
|
|
// limit without order by in any query
|
|
else
|
|
{
|
|
jobInfo.limitStart = csep->limitStart();
|
|
jobInfo.limitCount = csep->limitNum();
|
|
}
|
|
|
|
// Bug 2123. Added overrideLargeSideEstimate parm below. True if the query was written
|
|
// with a hint telling us to skip the estimatation process for determining the large side
|
|
// table and instead use the table order in the from clause.
|
|
associateTupleJobSteps(querySteps, projectSteps, deliverySteps,
|
|
jobInfo, csep->overrideLargeSideEstimate());
|
|
uint16_t stepNo = jobInfo.subId * 10000;
|
|
numberSteps(querySteps, stepNo, jobInfo.traceFlags);
|
|
// SJSTEP ds = deliverySteps.begin()->second;
|
|
idbassert(deliverySteps.begin()->second.get());
|
|
// ds->stepId(stepNo);
|
|
// ds->setTraceFlags(jobInfo.traceFlags);
|
|
}
|
|
|
|
}
|
|
|
|
namespace joblist
|
|
{
|
|
|
|
void makeJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
|
JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps)
|
|
{
|
|
// v-table mode, switch to tuple methods and return the tuple joblist.
|
|
//@Bug 1958 Build table list only for tryTuples.
|
|
const CalpontSelectExecutionPlan::SelectList& fromSubquery = csep->derivedTableList();
|
|
int i = 0;
|
|
|
|
for (CalpontSelectExecutionPlan::TableList::const_iterator it = csep->tableList().begin();
|
|
it != csep->tableList().end();
|
|
it++)
|
|
{
|
|
CalpontSystemCatalog::OID oid;
|
|
|
|
if (it->schema.empty())
|
|
oid = doFromSubquery(fromSubquery[i++].get(), it->alias, it->view, jobInfo);
|
|
else if (it->fisColumnStore)
|
|
oid = jobInfo.csc->tableRID(*it).objnum;
|
|
else
|
|
oid = 0;
|
|
|
|
uint32_t tableUid = makeTableKey(jobInfo, oid, it->table, it->alias, it->schema, it->view);
|
|
jobInfo.tableList.push_back(tableUid);
|
|
}
|
|
|
|
// add select suqueries
|
|
preprocessSelectSubquery(csep, jobInfo);
|
|
|
|
// semi-join may appear in having clause
|
|
if (csep->having() != NULL)
|
|
preprocessHavingClause(csep, jobInfo);
|
|
|
|
// parse plan and make jobstep list
|
|
parseExecutionPlan(csep, jobInfo, querySteps, projectSteps, deliverySteps);
|
|
makeVtableModeSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
|
|
}
|
|
|
|
void makeUnionJobSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
|
|
JobStepVector& querySteps, JobStepVector&, DeliveredTableMap& deliverySteps)
|
|
{
|
|
CalpontSelectExecutionPlan::SelectList& selectVec = csep->unionVec();
|
|
uint8_t distinctUnionNum = csep->distinctUnionNum();
|
|
RetColsVector unionRetCols = csep->returnedCols();
|
|
JobStepVector unionFeeders;
|
|
|
|
for (CalpontSelectExecutionPlan::SelectList::iterator cit = selectVec.begin();
|
|
cit != selectVec.end();
|
|
cit++)
|
|
{
|
|
// @bug4848, enhance and unify limit handling.
|
|
SJSTEP sub = doUnionSub(cit->get(), jobInfo);
|
|
querySteps.push_back(sub);
|
|
unionFeeders.push_back(sub);
|
|
}
|
|
|
|
jobInfo.deliveredCols = unionRetCols;
|
|
SJSTEP unionStep(unionQueries(unionFeeders, distinctUnionNum, jobInfo));
|
|
querySteps.push_back(unionStep);
|
|
uint16_t stepNo = jobInfo.subId * 10000;
|
|
numberSteps(querySteps, stepNo, jobInfo.traceFlags);
|
|
deliverySteps[execplan::CNX_VTABLE_ID] = unionStep;
|
|
}
|
|
|
|
|
|
}
|
|
|
|
namespace
|
|
{
|
|
|
|
SJLP makeJobList_(
|
|
CalpontExecutionPlan* cplan,
|
|
ResourceManager* rm,
|
|
bool isExeMgr,
|
|
unsigned& errCode, string& emsg)
|
|
{
|
|
CalpontSelectExecutionPlan* csep = dynamic_cast<CalpontSelectExecutionPlan*>(cplan);
|
|
boost::shared_ptr<CalpontSystemCatalog> csc = CalpontSystemCatalog::makeCalpontSystemCatalog(csep->sessionID());
|
|
|
|
static config::Config* sysConfig = config::Config::makeConfig();
|
|
int pmsConfigured = atoi(sysConfig->getConfig("PrimitiveServers", "Count").c_str());
|
|
|
|
// We have to go ahead and create JobList now so we can store the joblist's
|
|
// projectTableOID pointer in JobInfo for use during jobstep creation.
|
|
SErrorInfo errorInfo(new ErrorInfo());
|
|
boost::shared_ptr<TupleKeyInfo> keyInfo(new TupleKeyInfo);
|
|
boost::shared_ptr<int> subCount(new int);
|
|
*subCount = 0;
|
|
JobList* jl = new TupleJobList(isExeMgr);
|
|
jl->setPMsConfigured(pmsConfigured);
|
|
jl->priority(csep->priority());
|
|
jl->errorInfo(errorInfo);
|
|
rm->setTraceFlags(csep->traceFlags());
|
|
|
|
//Stuff a util struct with some stuff we always need
|
|
JobInfo jobInfo(rm);
|
|
jobInfo.sessionId = csep->sessionID();
|
|
jobInfo.txnId = csep->txnID();
|
|
jobInfo.verId = csep->verID();
|
|
jobInfo.statementId = csep->statementID();
|
|
jobInfo.queryType = csep->queryType();
|
|
jobInfo.csc = csc;
|
|
//TODO: clean up the vestiges of the bool trace
|
|
jobInfo.trace = csep->traceOn();
|
|
jobInfo.traceFlags = csep->traceFlags();
|
|
jobInfo.isExeMgr = isExeMgr;
|
|
// jobInfo.tryTuples = tryTuples; // always tuples after release 3.0
|
|
jobInfo.stringScanThreshold = csep->stringScanThreshold();
|
|
jobInfo.errorInfo = errorInfo;
|
|
jobInfo.keyInfo = keyInfo;
|
|
jobInfo.subCount = subCount;
|
|
jobInfo.projectingTableOID = jl->projectingTableOIDPtr();
|
|
jobInfo.jobListPtr = jl;
|
|
jobInfo.stringTableThreshold = csep->stringTableThreshold();
|
|
jobInfo.localQuery = csep->localQuery();
|
|
jobInfo.uuid = csep->uuid();
|
|
jobInfo.timeZone = csep->timeZone();
|
|
|
|
/* disk-based join vars */
|
|
jobInfo.smallSideLimit = csep->djsSmallSideLimit();
|
|
jobInfo.largeSideLimit = csep->djsLargeSideLimit();
|
|
jobInfo.partitionSize = csep->djsPartitionSize();
|
|
jobInfo.umMemLimit.reset(new int64_t);
|
|
*(jobInfo.umMemLimit) = csep->umMemLimit();
|
|
jobInfo.isDML = csep->isDML();
|
|
|
|
jobInfo.smallSideUsage.reset(new int64_t);
|
|
*jobInfo.smallSideUsage = 0;
|
|
|
|
// set fifoSize to 1 for CalpontSystemCatalog query
|
|
if (csep->sessionID() & 0x80000000)
|
|
jobInfo.fifoSize = 1;
|
|
else if (csep->traceOn())
|
|
cout << (*csep) << endl;
|
|
|
|
try
|
|
{
|
|
JobStepVector querySteps;
|
|
JobStepVector projectSteps;
|
|
DeliveredTableMap deliverySteps;
|
|
|
|
if (csep->unionVec().size() == 0)
|
|
makeJobSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
|
|
else
|
|
makeUnionJobSteps(csep, jobInfo, querySteps, projectSteps, deliverySteps);
|
|
|
|
uint16_t stepNo = numberSteps(querySteps, 0, jobInfo.traceFlags);
|
|
stepNo = numberSteps(projectSteps, stepNo, jobInfo.traceFlags);
|
|
|
|
struct timeval stTime;
|
|
|
|
if (jobInfo.trace)
|
|
{
|
|
ostringstream oss;
|
|
oss << endl;
|
|
oss << endl << "job parms: " << endl;
|
|
oss << "maxBuckets = " << jobInfo.maxBuckets << ", maxElems = " << jobInfo.maxElems <<
|
|
", flushInterval = " << jobInfo.flushInterval <<
|
|
", fifoSize = " << jobInfo.fifoSize <<
|
|
", ScanLimit/Threshold = " << jobInfo.scanLbidReqLimit << "/" <<
|
|
jobInfo.scanLbidReqThreshold << endl;
|
|
oss << "UUID: " << jobInfo.uuid << endl;
|
|
oss << endl << "job filter steps: " << endl;
|
|
ostream_iterator<JobStepVector::value_type> oIter(oss, "\n");
|
|
copy(querySteps.begin(), querySteps.end(), oIter);
|
|
oss << endl << "job project steps: " << endl;
|
|
copy(projectSteps.begin(), projectSteps.end(), oIter);
|
|
oss << endl << "job delivery steps: " << endl;
|
|
DeliveredTableMap::iterator dsi = deliverySteps.begin();
|
|
|
|
while (dsi != deliverySteps.end())
|
|
{
|
|
oss << dynamic_cast<const JobStep*>(dsi->second.get()) << endl;
|
|
++dsi;
|
|
}
|
|
|
|
oss << endl;
|
|
gettimeofday(&stTime, 0);
|
|
|
|
struct tm tmbuf;
|
|
#ifdef _MSC_VER
|
|
errno_t p = 0;
|
|
time_t t = stTime.tv_sec;
|
|
p = localtime_s(&tmbuf, &t);
|
|
|
|
if (p != 0)
|
|
memset(&tmbuf, 0, sizeof(tmbuf));
|
|
|
|
#else
|
|
localtime_r(&stTime.tv_sec, &tmbuf);
|
|
#endif
|
|
ostringstream tms;
|
|
tms << setfill('0')
|
|
<< setw(4) << (tmbuf.tm_year + 1900)
|
|
<< setw(2) << (tmbuf.tm_mon + 1)
|
|
<< setw(2) << (tmbuf.tm_mday)
|
|
<< setw(2) << (tmbuf.tm_hour)
|
|
<< setw(2) << (tmbuf.tm_min)
|
|
<< setw(2) << (tmbuf.tm_sec)
|
|
<< setw(6) << (stTime.tv_usec);
|
|
string tmstr(tms.str());
|
|
string jsrname("jobstep." + tmstr + ".dot");
|
|
ofstream dotFile(jsrname.c_str());
|
|
jlf_graphics::writeDotCmds(dotFile, querySteps, projectSteps);
|
|
|
|
char timestamp[80];
|
|
#ifdef _MSC_VER
|
|
t = stTime.tv_sec;
|
|
p = ctime_s(timestamp, 80, &t);
|
|
|
|
if (p != 0)
|
|
strcpy(timestamp, "UNKNOWN");
|
|
|
|
#else
|
|
ctime_r((const time_t*)&stTime.tv_sec, timestamp);
|
|
#endif
|
|
oss << "runtime updates: start at " << timestamp;
|
|
cout << oss.str();
|
|
Message::Args args;
|
|
args.add(oss.str());
|
|
jobInfo.logger->logMessage(LOG_TYPE_DEBUG, LogSQLTrace, args,
|
|
LoggingID(5, jobInfo.sessionId, jobInfo.txnId, 0));
|
|
cout << flush;
|
|
}
|
|
else
|
|
{
|
|
gettimeofday(&stTime, 0);
|
|
}
|
|
|
|
// Finish initializing the JobList object
|
|
jl->addQuery(querySteps);
|
|
jl->addProject(projectSteps);
|
|
jl->addDelivery(deliverySteps);
|
|
|
|
dynamic_cast<TupleJobList*>(jl)->setDeliveryFlag(true);
|
|
}
|
|
catch (IDBExcept& iex)
|
|
{
|
|
jobInfo.errorInfo->errCode = iex.errorCode();
|
|
errCode = iex.errorCode();
|
|
exceptionHandler(jl, jobInfo, iex.what(), LOG_TYPE_DEBUG);
|
|
emsg = iex.what();
|
|
goto bailout;
|
|
}
|
|
catch (const std::exception& ex)
|
|
{
|
|
jobInfo.errorInfo->errCode = makeJobListErr;
|
|
errCode = makeJobListErr;
|
|
exceptionHandler(jl, jobInfo, ex.what());
|
|
emsg = ex.what();
|
|
goto bailout;
|
|
}
|
|
catch (...)
|
|
{
|
|
jobInfo.errorInfo->errCode = makeJobListErr;
|
|
errCode = makeJobListErr;
|
|
exceptionHandler(jl, jobInfo, "an exception");
|
|
emsg = "An unknown internal joblist error";
|
|
goto bailout;
|
|
}
|
|
|
|
goto done;
|
|
|
|
bailout:
|
|
delete jl;
|
|
jl = 0;
|
|
|
|
if (emsg.empty())
|
|
emsg = "An unknown internal joblist error";
|
|
|
|
done:
|
|
SJLP jlp(jl);
|
|
return jlp;
|
|
}
|
|
|
|
}
|
|
|
|
namespace joblist
|
|
{
|
|
|
|
/* static */
|
|
SJLP JobListFactory::makeJobList(
|
|
CalpontExecutionPlan* cplan,
|
|
ResourceManager* rm,
|
|
bool tryTuple,
|
|
bool isExeMgr)
|
|
{
|
|
SJLP ret;
|
|
string emsg;
|
|
unsigned errCode = 0;
|
|
|
|
ret = makeJobList_(cplan, rm, isExeMgr, errCode, emsg);
|
|
|
|
if (!ret)
|
|
{
|
|
ret.reset(new TupleJobList(isExeMgr));
|
|
SErrorInfo errorInfo(new ErrorInfo);
|
|
errorInfo->errCode = errCode;
|
|
errorInfo->errMsg = emsg;
|
|
ret->errorInfo(errorInfo);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
}
|
|
// vim:ts=4 sw=4:
|
|
|