1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

Merge pull request #1828 from tntnatbry/MCOL-4543-4589

MCOL -4543/MCOL-4589 Subquery optimization
This commit is contained in:
Roman Nozdrin
2021-04-14 13:50:46 +03:00
committed by GitHub
5 changed files with 446 additions and 16 deletions

View File

@ -57,7 +57,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
for (uint i = 0; i < derivedTbList.size(); i++)
{
CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
CalpontSelectExecutionPlan::ReturnedColumnList cols = plan->returnedCols();
vector<CalpontSelectExecutionPlan::ReturnedColumnList> unionColVec;
@ -73,7 +73,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
for (uint j = 0; j < plan->unionVec().size(); j++)
{
unionColVec.push_back(
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols());
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols());
}
}
@ -82,7 +82,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
for (uint j = 0; j < plan->unionVec().size(); j++)
{
if (dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty())
if (reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty())
{
horizontalOptimization = false;
break;
@ -93,7 +93,29 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
{
int64_t val = 1;
for (uint i = 0; i < cols.size(); i++)
// TODO MCOL-4543 Only project those columns from the subquery
// which are referenced in the outer select. So for example,
// if a table t contains 10 columns c1 ... c10 :
// "select count(c2) from (select * from t) q;"
// with p being the subquery execution plan, p->columnMap()
// and p->returnedCols() should both be of size 1, instead
// of 10, with entries for c2 in each.
//
// We are currently performing a dumb optimization:
// Instead of just referencing c2, we are referencing (c1,c2)
// for the above query. This is due to complexity associated
// with modifying ReturnedColumn::colPosition()
// (from a value of 1 to a value of 0) of the outer query
// which references c2. So essentially, if c2 is replaced by c10
// in the above query, we fallback to projecting all 10 columns
// of the subquery in ExeMgr.
// This will be addressed in future.
CalpontSelectExecutionPlan::ReturnedColumnList nonConstCols;
vector<CalpontSelectExecutionPlan::ReturnedColumnList> nonConstUnionColVec(unionColVec.size());
int64_t lastNonConstIndex = -1;
for (int64_t i = cols.size() - 1; i >= 0; i--)
{
//if (cols[i]->derivedTable().empty())
if (cols[i]->refCount() == 0)
@ -101,22 +123,84 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
if (cols[i]->derivedRefCol())
cols[i]->derivedRefCol()->decRefCount();
cols[i].reset(new ConstantColumn(val));
(dynamic_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
if (lastNonConstIndex == -1)
{
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(cols[i].get());
if (sc && (plan->columnMap().count(sc->columnName()) == 1))
{
plan->columnMap().erase(sc->columnName());
}
}
else
{
cols[i].reset(new ConstantColumn(val));
(reinterpret_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
}
for (uint j = 0; j < unionColVec.size(); j++)
{
unionColVec[j][i].reset(new ConstantColumn(val));
(dynamic_cast<ConstantColumn*>(unionColVec[j][i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
if (lastNonConstIndex == -1)
{
CalpontSelectExecutionPlan* unionSubPlan =
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(unionSubPlan->returnedCols()[i].get());
if (sc && (unionSubPlan->columnMap().count(sc->columnName()) == 1))
{
unionSubPlan->columnMap().erase(sc->columnName());
}
}
else
{
unionColVec[j][i].reset(new ConstantColumn(val));
(reinterpret_cast<ConstantColumn*>(unionColVec[j][i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
}
}
}
else if (lastNonConstIndex == -1)
{
lastNonConstIndex = i;
}
}
if (lastNonConstIndex == -1)
{
// None of the subquery columns are referenced, just use the first one
if (!cols.empty())
{
cols[0].reset(new ConstantColumn(val));
(reinterpret_cast<ConstantColumn*>(cols[0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
nonConstCols.push_back(cols[0]);
for (uint j = 0; j < unionColVec.size(); j++)
{
unionColVec[j][0].reset(new ConstantColumn(val));
(reinterpret_cast<ConstantColumn*>(unionColVec[j][0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
nonConstUnionColVec[j].push_back(unionColVec[j][0]);
}
}
}
else
{
nonConstCols.assign(cols.begin(), cols.begin() + lastNonConstIndex + 1);
for (uint j = 0; j < unionColVec.size(); j++)
{
nonConstUnionColVec[j].assign(unionColVec[j].begin(), unionColVec[j].begin() + lastNonConstIndex + 1);
}
}
// set back
plan->returnedCols(cols);
plan->returnedCols(nonConstCols);
for (uint j = 0; j < unionColVec.size(); j++)
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols(unionColVec[j]);
{
CalpontSelectExecutionPlan* unionSubPlan =
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
unionSubPlan->returnedCols(nonConstUnionColVec[j]);
}
}
}
@ -151,7 +235,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
for (uint i = 0; i < derivedTbList.size(); i++)
{
CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols();
mapIt = derivedTbFilterMap.find(plan->derivedTbAlias());
@ -181,7 +265,7 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
for (uint j = 0; j < plan->unionVec().size(); j++)
{
CalpontSelectExecutionPlan* unionPlan =
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols();
ParseTree* mainFilterForUnion = new ParseTree();
mainFilterForUnion->copyTree(*(mapIt->second));

View File

@ -2920,15 +2920,46 @@ SimpleColumn* getSmallestColumn(boost::shared_ptr<CalpontSystemCatalog> csc,
if (tan.alias == csep->derivedTbAlias())
{
assert (!csep->returnedCols().empty());
ReturnedColumn* rc = dynamic_cast<ReturnedColumn*>(csep->returnedCols()[0].get());
const CalpontSelectExecutionPlan::ReturnedColumnList& cols = csep->returnedCols();
CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator iter;
ReturnedColumn* rc;
for (iter = cols.begin(); iter != cols.end(); iter++)
{
if ((*iter)->refCount() != 0)
{
rc = dynamic_cast<ReturnedColumn*>(iter->get());
break;
}
}
if (iter == cols.end())
{
assert (!cols.empty());
// We take cols[0] here due to the optimization happening in
// derivedTableOptimization. All cols with refCount 0 from
// the end of the cols list are optimized out, until the
// first column with non-zero refCount is encountered. So
// here, if instead of cols[0], we take cols[1] (based on
// some logic) and increment it's refCount, then cols[0] is
// not optimized out in derivedTableOptimization and is
// added as a ConstantColumn to the derived table's returned
// column list. This later causes an ineffective row group
// with row of the form (1, cols[1]_value1) to be created in ExeMgr.
rc = dynamic_cast<ReturnedColumn*>(cols[0].get());
// @bug5634 derived table optimization.
rc->incRefCount();
}
SimpleColumn* sc = new SimpleColumn();
sc->columnName(rc->alias());
sc->sequence(0);
sc->tableAlias(tan.alias);
sc->timeZone(gwi.thd->variables.time_zone->get_name()->ptr());
// @bug5634 derived table optimization.
rc->incRefCount();
sc->derivedTable(csep->derivedTbAlias());
sc->derivedRefCol(rc);
return sc;