1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4234: improve GROUP BY and ORDER BY interaction (#3194)

This patch fixes the problem in MCOL-4234 and also generally improves
behavior of GROUP BY.

It does so by introducing a "dummy" aggregate and by wrapping columns
into it. This allows for columns that are not in GROUP BY to be used
more freely, for example, in SELECT * FROM tbl GROUP BY col - all
columns that are not "col" will be wrapped into an aggregate and query
will proceed to execution.

The dummy aggregate itself does nothing more than remember last value
passed into it.

There also an additional error message that tries to explain what types
of expressions can be wrapped into an aggregate.
This commit is contained in:
Sergey Zefirov
2024-06-17 20:00:54 +03:00
committed by GitHub
parent b1045d27b6
commit 1122b64cb1
16 changed files with 312 additions and 25 deletions

View File

@ -474,6 +474,22 @@ bool sortItemIsInGrouping(Item* sort_item, ORDER* groupcol)
if (sort_item->type() == Item::SUM_FUNC_ITEM)
{
found = true;
return found;
}
{
// as we now can warp ORDER BY or SELECT expression into
// an aggregate, we can pass FIELD_ITEM as "found" as well.
Item* item = sort_item;
while (item->type() == Item::REF_ITEM)
{
const Item_ref* ref_item = static_cast<const Item_ref*>(item);
item = (Item*)*ref_item->ref;
}
if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || item->type() == Item::NULL_ITEM)
{
return true;
}
}
// A function that contains an aggregate function
@ -3190,8 +3206,8 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB(const Item* item)
if (item->field_type() == MYSQL_TYPE_BLOB)
{
// We default to BLOB, but then try to correct type,
// because many textual types in server have type_handler_blob
// (and variants) as their type.
// because many textual types in server have type_handler_blob
// (and variants) as their type.
ct.colDataType = CalpontSystemCatalog::BLOB;
const Item_result_field* irf = dynamic_cast<const Item_result_field*>(item);
if (irf && irf->result_field && !irf->result_field->binary())
@ -4911,6 +4927,66 @@ static void processAggregateColumnConstArg(gp_walk_info& gwi, SRCP& parm, Aggreg
}
}
void analyzeForImplicitGroupBy(Item* item, gp_walk_info& gwi)
{
if (gwi.implicitExplicitGroupBy)
{
return;
}
while (item->type() == Item::REF_ITEM)
{
Item_ref* ref = static_cast<Item_ref*>(item);
item = *ref->ref;
}
if (item->type() == Item::SUM_FUNC_ITEM)
{
// definitely an aggregate and thus needs an implicit group by.
gwi.implicitExplicitGroupBy = true;
return;
}
if (item->type() == Item::FUNC_ITEM)
{
Item_func* ifp = static_cast<Item_func*>(item);
for(uint32_t i = 0;i<ifp->argument_count() && !gwi.implicitExplicitGroupBy;i++)
{
analyzeForImplicitGroupBy(ifp->arguments()[i], gwi);
}
}
}
ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, SELECT_LEX& select_lex, Item* baseItem)
{
if (!gwi.implicitExplicitGroupBy)
{
return rc;
}
if (dynamic_cast<AggregateColumn*>(rc) != nullptr || dynamic_cast<ConstantColumn*>(rc) != nullptr)
{
return rc;
}
ORDER* groupcol = static_cast<ORDER*>(select_lex.group_list.first);
while (groupcol)
{
if (baseItem->eq(*groupcol->item, false))
{
return rc;
}
groupcol = groupcol->next;
}
AggregateColumn* ac = new AggregateColumn(gwi.sessionid);
ac->timeZone(gwi.timeZone);
ac->alias(rc->alias());
ac->aggOp(AggregateColumn::SELECT_SOME);
ac->asc(rc->asc());
ac->aggParms().push_back(SRCP(rc));
return ac;
}
ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
{
// MCOL-1201 For UDAnF multiple parameters
@ -5018,8 +5094,9 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
if (ord_col->type() == Item::CONST_ITEM && ord_col->cmp_type() == INT_RESULT)
{
Item_int* id = (Item_int*)ord_col;
int64_t index = id->val_int();
if (id->val_int() > (int)selCols.size())
if (index > (int)selCols.size() || index < 1)
{
gwi.fatalParseError = true;
@ -5029,8 +5106,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
return NULL;
}
rc = selCols[id->val_int() - 1]->clone();
rc->orderPos(id->val_int() - 1);
rc = selCols[index - 1]->clone();
rc->orderPos(index - 1);
}
else
{
@ -7394,6 +7471,32 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
}
#endif
// analyze SELECT and ORDER BY parts - do they have implicit GROUP BY induced by aggregates?
{
if (select_lex.group_list.first)
{
// we have an explicit GROUP BY.
gwi.implicitExplicitGroupBy = true;
}
else
{
// do we have an implicit GROUP BY?
List_iterator_fast<Item> it(select_lex.item_list);
Item* item;
while ((item = it++))
{
analyzeForImplicitGroupBy(item, gwi);
}
SQL_I_List<ORDER> order_list = select_lex.order_list;
ORDER* ordercol = static_cast<ORDER*>(order_list.first);
for (; ordercol; ordercol = ordercol->next)
{
analyzeForImplicitGroupBy(*(ordercol->item), gwi);
}
}
}
// populate returnedcolumnlist and columnmap
List_iterator_fast<Item> it(select_lex.item_list);
Item* item;
@ -7420,6 +7523,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
// @bug 5916. Need to keep checking until getting concret item in case
// of nested view.
Item* baseItem = item;
while (item->type() == Item::REF_ITEM)
{
Item_ref* ref = (Item_ref*)item;
@ -7444,8 +7548,6 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (sc)
{
boost::shared_ptr<SimpleColumn> spsc(sc);
string fullname;
String str;
ifp->print(&str, QT_ORDINARY);
@ -7461,10 +7563,14 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
sc->alias(itemAlias);
}
gwi.returnedCols.push_back(spsc);
// We need to look into GROUP BY columns to decide if we need to wrap a column.
ReturnedColumn* rc = wrapIntoAggregate(sc, gwi, select_lex, baseItem);
SRCP sprc(rc);
gwi.returnedCols.push_back(sprc);
gwi.columnMap.insert(
CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc));
CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), sprc));
TABLE_LIST* tmp = 0;
if (ifp->cached_table)
@ -8311,6 +8417,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
{
if ((*(ordercol->item))->type() == Item::WINDOW_FUNC_ITEM)
gwi.hasWindowFunc = true;
// XXX: TODO: implement a proper analysis of what we support.
// MCOL-2166 Looking for this sorting item in GROUP_BY items list.
// Shouldn't look into this if query doesn't have GROUP BY or
// aggregations
@ -8322,10 +8429,10 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
getColNameFromItem(osr, *ordercol->item);
Message::Args args;
args.add(ostream.str());
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args);
string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION, args);
gwi.parseErrorText = emsg;
setError(gwi.thd, ER_INTERNAL_ERROR, emsg, gwi);
return ERR_NOT_GROUPBY_EXPRESSION;
return ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION;
}
}
@ -8375,6 +8482,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
else
{
rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError);
rc = wrapIntoAggregate(rc, gwi, select_lex, ord_item);
}
// @bug5501 try item_ptr if item can not be fixed. For some
// weird dml statement state, item can not be fixed but the
@ -8406,6 +8515,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
gwi.orderByCols.push_back(SRCP(rc));
}
}
// make sure columnmap, returnedcols and count(*) arg_list are not empty
TableMap::iterator tb_iter = gwi.tableMap.begin();