You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-4543 Subquery optimization.
For a query of the form: SELECT COUNT(c2) FROM (SELECT * FROM t1) q; where t1 contains 10 columns c1, c2, ... , c10. We currently create an intermediate RowGroup in ExeMgr with a row of the form (1, c2_value1, 1, 1, 1, 1, 1, 1, 1, 1), i.e. for all the columns of the subquery which are not referenced in the outer query, we substitute a constant value, which is wasteful. With this optimization, we are trimming the RowGroup to a row of the form (1, c2_value1). This can have non-trivial query execution time improvements if the subquery contains large number of columns (such as a "select *" on a very wide table) and the outer query is only referencing a subset of these columns with lower index values from the subquery (as an example, c1 or c2 above). That is, the current limitation of this optimization is we are not removing those non-referenced subquery columns (c1 in the query above) which are to the left of a referenced column.
This commit is contained in:
@ -93,7 +93,28 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
{
|
||||
int64_t val = 1;
|
||||
|
||||
for (uint i = 0; i < cols.size(); i++)
|
||||
// TODO MCOL-4543 Only project those columns from the subquery
|
||||
// which are referenced in the outer select. So for example,
|
||||
// if a table t contains 10 columns c1 ... c10 :
|
||||
// "select count(c2) from (select * from t) q;"
|
||||
// with p being the subquery execution plan, p->columnMap()
|
||||
// and p->returnedCols() should both be of size 1, instead
|
||||
// of 10, with entries for c2 in each.
|
||||
//
|
||||
// We are currently performing a dumb optimization:
|
||||
// Instead of just referencing c2, we are referencing (c1,c2)
|
||||
// for the above query. This is due to complexity associated
|
||||
// with modifying ReturnedColumn::colPosition()
|
||||
// (from a value of 1 to a value of 0) of the outer query
|
||||
// which references c2. So essentially, if c2 is replaced by c10
|
||||
// in the above query, we fallback to projecting all 10 columns
|
||||
// of the subquery in ExeMgr.
|
||||
// This will be addressed in future.
|
||||
CalpontSelectExecutionPlan::ReturnedColumnList nonConstCols;
|
||||
|
||||
int64_t lastNonConstIndex = -1;
|
||||
|
||||
for (int64_t i = cols.size() - 1; i >= 0; i--)
|
||||
{
|
||||
//if (cols[i]->derivedTable().empty())
|
||||
if (cols[i]->refCount() == 0)
|
||||
@ -101,8 +122,20 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
if (cols[i]->derivedRefCol())
|
||||
cols[i]->derivedRefCol()->decRefCount();
|
||||
|
||||
cols[i].reset(new ConstantColumn(val));
|
||||
(dynamic_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
if ((lastNonConstIndex == -1) && unionColVec.empty())
|
||||
{
|
||||
SimpleColumn* sc = dynamic_cast<SimpleColumn*>(cols[i].get());
|
||||
|
||||
if (sc && (plan->columnMap().count(sc->columnName()) == 1))
|
||||
{
|
||||
plan->columnMap().erase(sc->columnName());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cols[i].reset(new ConstantColumn(val));
|
||||
(dynamic_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
}
|
||||
|
||||
for (uint j = 0; j < unionColVec.size(); j++)
|
||||
{
|
||||
@ -110,10 +143,36 @@ void derivedTableOptimization(THD* thd, SCSEP& csep)
|
||||
(dynamic_cast<ConstantColumn*>(unionColVec[j][i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
}
|
||||
}
|
||||
else if (lastNonConstIndex == -1)
|
||||
{
|
||||
lastNonConstIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastNonConstIndex == -1)
|
||||
{
|
||||
// None of the subquery columns are referenced, just use the first one
|
||||
if (!cols.empty())
|
||||
{
|
||||
cols[0].reset(new ConstantColumn(val));
|
||||
(dynamic_cast<ConstantColumn*>(cols[0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
|
||||
nonConstCols.push_back(cols[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nonConstCols.assign(cols.begin(), cols.begin() + lastNonConstIndex + 1);
|
||||
}
|
||||
|
||||
// set back
|
||||
plan->returnedCols(cols);
|
||||
if (unionColVec.empty())
|
||||
{
|
||||
plan->returnedCols(nonConstCols);
|
||||
}
|
||||
else
|
||||
{
|
||||
plan->returnedCols(cols);
|
||||
}
|
||||
|
||||
for (uint j = 0; j < unionColVec.size(); j++)
|
||||
dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols(unionColVec[j]);
|
||||
|
Reference in New Issue
Block a user