You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Merge pull request #1828 from tntnatbry/MCOL-4543-4589
MCOL -4543/MCOL-4589 Subquery optimization
This commit is contained in:
@ -57,7 +57,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
|
||||
for (uint i = 0; i < derivedTbList.size(); i++)
|
||||
{
|
||||
CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
|
||||
CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList cols = plan->returnedCols();
|
||||
vector<CalpontSelectExecutionPlan::ReturnedColumnList> unionColVec;
|
||||
|
||||
@ -73,7 +73,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
for (uint j = 0; j < plan->unionVec().size(); j++)
|
||||
{
|
||||
unionColVec.push_back(
|
||||
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols());
|
||||
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols());
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,7 +82,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
|
||||
for (uint j = 0; j < plan->unionVec().size(); j++)
|
||||
{
|
||||
if (dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty())
|
||||
if (reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty())
|
||||
{
|
||||
horizontalOptimization = false;
|
||||
break;
|
||||
@ -93,7 +93,29 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
{
|
||||
int64_t val = 1;
|
||||
|
||||
for (uint i = 0; i < cols.size(); i++)
|
||||
// TODO MCOL-4543 Only project those columns from the subquery
|
||||
// which are referenced in the outer select. So for example,
|
||||
// if a table t contains 10 columns c1 ... c10 :
|
||||
// "select count(c2) from (select * from t) q;"
|
||||
// with p being the subquery execution plan, p->columnMap()
|
||||
// and p->returnedCols() should both be of size 1, instead
|
||||
// of 10, with entries for c2 in each.
|
||||
//
|
||||
// We are currently performing a dumb optimization:
|
||||
// Instead of just referencing c2, we are referencing (c1,c2)
|
||||
// for the above query. This is due to complexity associated
|
||||
// with modifying ReturnedColumn::colPosition()
|
||||
// (from a value of 1 to a value of 0) of the outer query
|
||||
// which references c2. So essentially, if c2 is replaced by c10
|
||||
// in the above query, we fallback to projecting all 10 columns
|
||||
// of the subquery in ExeMgr.
|
||||
// This will be addressed in future.
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList nonConstCols;
|
||||
vector<CalpontSelectExecutionPlan::ReturnedColumnList> nonConstUnionColVec(unionColVec.size());
|
||||
|
||||
int64_t lastNonConstIndex = -1;
|
||||
|
||||
for (int64_t i = cols.size() - 1; i >= 0; i--)
|
||||
{
|
||||
//if (cols[i]->derivedTable().empty())
|
||||
if (cols[i]->refCount() == 0)
|
||||
@ -101,22 +123,84 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
if (cols[i]->derivedRefCol())
|
||||
cols[i]->derivedRefCol()->decRefCount();
|
||||
|
||||
cols[i].reset(new ConstantColumn(val));
|
||||
(dynamic_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
if (lastNonConstIndex == -1)
|
||||
{
|
||||
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(cols[i].get());
|
||||
|
||||
if (sc && (plan->columnMap().count(sc->columnName()) == 1))
|
||||
{
|
||||
plan->columnMap().erase(sc->columnName());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cols[i].reset(new ConstantColumn(val));
|
||||
(reinterpret_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
}
|
||||
|
||||
for (uint j = 0; j < unionColVec.size(); j++)
|
||||
{
|
||||
unionColVec[j][i].reset(new ConstantColumn(val));
|
||||
(dynamic_cast<ConstantColumn*>(unionColVec[j][i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
if (lastNonConstIndex == -1)
|
||||
{
|
||||
CalpontSelectExecutionPlan* unionSubPlan =
|
||||
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
|
||||
|
||||
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(unionSubPlan->returnedCols()[i].get());
|
||||
|
||||
if (sc && (unionSubPlan->columnMap().count(sc->columnName()) == 1))
|
||||
{
|
||||
unionSubPlan->columnMap().erase(sc->columnName());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unionColVec[j][i].reset(new ConstantColumn(val));
|
||||
(reinterpret_cast<ConstantColumn*>(unionColVec[j][i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (lastNonConstIndex == -1)
|
||||
{
|
||||
lastNonConstIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastNonConstIndex == -1)
|
||||
{
|
||||
// None of the subquery columns are referenced, just use the first one
|
||||
if (!cols.empty())
|
||||
{
|
||||
cols[0].reset(new ConstantColumn(val));
|
||||
(reinterpret_cast<ConstantColumn*>(cols[0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
nonConstCols.push_back(cols[0]);
|
||||
|
||||
for (uint j = 0; j < unionColVec.size(); j++)
|
||||
{
|
||||
unionColVec[j][0].reset(new ConstantColumn(val));
|
||||
(reinterpret_cast<ConstantColumn*>(unionColVec[j][0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
nonConstUnionColVec[j].push_back(unionColVec[j][0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nonConstCols.assign(cols.begin(), cols.begin() + lastNonConstIndex + 1);
|
||||
|
||||
for (uint j = 0; j < unionColVec.size(); j++)
|
||||
{
|
||||
nonConstUnionColVec[j].assign(unionColVec[j].begin(), unionColVec[j].begin() + lastNonConstIndex + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// set back
|
||||
plan->returnedCols(cols);
|
||||
plan->returnedCols(nonConstCols);
|
||||
|
||||
for (uint j = 0; j < unionColVec.size(); j++)
|
||||
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols(unionColVec[j]);
|
||||
{
|
||||
CalpontSelectExecutionPlan* unionSubPlan =
|
||||
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
|
||||
unionSubPlan->returnedCols(nonConstUnionColVec[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,7 +235,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
|
||||
for (uint i = 0; i < derivedTbList.size(); i++)
|
||||
{
|
||||
CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
|
||||
CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols();
|
||||
mapIt = derivedTbFilterMap.find(plan->derivedTbAlias());
|
||||
|
||||
@ -181,7 +265,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
for (uint j = 0; j < plan->unionVec().size(); j++)
|
||||
{
|
||||
CalpontSelectExecutionPlan* unionPlan =
|
||||
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
|
||||
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols();
|
||||
ParseTree* mainFilterForUnion = new ParseTree();
|
||||
mainFilterForUnion->copyTree(*(mapIt->second));
|
||||
|
@ -2920,15 +2920,46 @@ SimpleColumn* getSmallestColumn(boost::shared_ptr<CalpontSystemCatalog> csc,
|
||||
|
||||
if (tan.alias == csep->derivedTbAlias())
|
||||
{
|
||||
assert (!csep->returnedCols().empty());
|
||||
ReturnedColumn* rc = dynamic_cast<ReturnedColumn*>(csep->returnedCols()[0].get());
|
||||
const CalpontSelectExecutionPlan::ReturnedColumnList& cols = csep->returnedCols();
|
||||
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator iter;
|
||||
|
||||
ReturnedColumn* rc;
|
||||
|
||||
for (iter = cols.begin(); iter != cols.end(); iter++)
|
||||
{
|
||||
if ((*iter)->refCount() != 0)
|
||||
{
|
||||
rc = dynamic_cast<ReturnedColumn*>(iter->get());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (iter == cols.end())
|
||||
{
|
||||
assert (!cols.empty());
|
||||
|
||||
// We take cols[0] here due to the optimization happening in
|
||||
// derivedTableOptimization. All cols with refCount 0 from
|
||||
// the end of the cols list are optimized out, until the
|
||||
// first column with non-zero refCount is encountered. So
|
||||
// here, if instead of cols[0], we take cols[1] (based on
|
||||
// some logic) and increment it's refCount, then cols[0] is
|
||||
// not optimized out in derivedTableOptimization and is
|
||||
// added as a ConstantColumn to the derived table's returned
|
||||
// column list. This later causes an ineffective row group
|
||||
// with row of the form (1, cols[1]_value1) to be created in ExeMgr.
|
||||
rc = dynamic_cast<ReturnedColumn*>(cols[0].get());
|
||||
|
||||
// @bug5634 derived table optimization.
|
||||
rc->incRefCount();
|
||||
}
|
||||
|
||||
SimpleColumn* sc = new SimpleColumn();
|
||||
sc->columnName(rc->alias());
|
||||
sc->sequence(0);
|
||||
sc->tableAlias(tan.alias);
|
||||
sc->timeZone(gwi.thd->variables.time_zone->get_name()->ptr());
|
||||
// @bug5634 derived table optimization.
|
||||
rc->incRefCount();
|
||||
sc->derivedTable(csep->derivedTbAlias());
|
||||
sc->derivedRefCol(rc);
|
||||
return sc;
|
||||
|
Reference in New Issue
Block a user