1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

feat(PrimProc): MCOL-5852 disk-based GROUP_CONCAT & JSON_ARRAYAGG

* move GROUP_CONCAT/JSON_ARRAYAGG storage to the RowGroup from
  the RowAggregation*
* internal data structures (de)serialization
* get rid of a specialized classes for processing JSON_ARRAYAGG
* move the memory accounting to disk-based aggregation classes
* allow aggregation generations to be used for queries with
  GROUP_CONCAT/JSON_ARRAYAGG
* Remove the thread id from the error message as it interferes with the mtr
This commit is contained in:
Aleksei Antipovskii
2025-02-19 12:32:51 +01:00
committed by Alexey Antipovsky
parent 87d47fd7ae
commit 4bea7e59a0
25 changed files with 1339 additions and 2056 deletions

View File

@ -71,7 +71,6 @@ using namespace cal_impl_if;
#include "functioncolumn.h"
#include "groupconcatcolumn.h"
#include "intervalcolumn.h"
#include "jsonarrayaggcolumn.h"
#include "logicoperator.h"
#include "outerjoinonfilter.h"
#include "predicateoperator.h"
@ -96,7 +95,6 @@ const uint64_t SUB_BIT = 0x02;
const uint64_t AF_BIT = 0x04;
const uint64_t CORRELATED = 0x08;
// In certain cases, gp_walk is called recursively. When done so,
// we need to bookmark the rcWorkStack for those cases where a constant
// expression such as 1=1 is used in an if statement or function call.
@ -167,7 +165,7 @@ bool itemDisablesWrapping(Item* item, gp_walk_info& gwi);
void pushReturnedCol(gp_walk_info& gwi, Item* from, SRCP rc)
{
uint32_t i;
for ( i = 0; i < gwi.processed.size(); i++)
for (i = 0; i < gwi.processed.size(); i++)
{
Item* ith = gwi.processed[i].first;
@ -352,7 +350,8 @@ cal_impl_if::gp_walk_info::~gp_walk_info()
delete ptWorkStack.top();
ptWorkStack.pop();
}
for (uint32_t i=0;i<viewList.size();i++) {
for (uint32_t i = 0; i < viewList.size(); i++)
{
delete viewList[i];
}
viewList.clear();
@ -395,7 +394,8 @@ void clearDeleteStacks(gp_walk_info& gwi)
delete gwi.ptWorkStack.top();
gwi.ptWorkStack.pop();
}
for (uint32_t i=0;i<gwi.viewList.size();i++) {
for (uint32_t i = 0; i < gwi.viewList.size(); i++)
{
delete gwi.viewList[i];
}
gwi.viewList.clear();
@ -585,7 +585,8 @@ bool sortItemIsInGrouping(Item* sort_item, ORDER* groupcol)
const Item_ref* ref_item = static_cast<const Item_ref*>(item);
item = (Item*)*ref_item->ref;
}
if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || item->type() == Item::NULL_ITEM)
if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM ||
item->type() == Item::NULL_ITEM)
{
return true;
}
@ -1610,7 +1611,6 @@ uint32_t buildJoin(gp_walk_info& gwi, List<TABLE_LIST>& join_list,
ParseTree* pt = new ParseTree(onFilter);
outerJoinStack.push(pt);
}
}
else // inner join
{
@ -1706,7 +1706,7 @@ bool buildRowColumnFilter(gp_walk_info* gwip, RowColumn* rhs, RowColumn* lhs, It
// two entries have been popped from the stack already: lhs and rhs
stack<ReturnedColumn*> tmpStack;
vector<RowColumn*> valVec;
vector<SRCP> heldOutVals; // these vals are not rhs/lhs and need to be freed
vector<SRCP> heldOutVals; // these vals are not rhs/lhs and need to be freed
tmpStack.push(rhs);
tmpStack.push(lhs);
assert(gwip->rcWorkStack.size() >= ifp->argument_count() - 2);
@ -2116,7 +2116,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip)
sop.reset(new PredicateOperator(eqop));
SRCP scsp = gwip->scsp;
idbassert(scsp.get() != nullptr);
//sop->setOpType(gwip->scsp->resultType(), rhs->resultType());
// sop->setOpType(gwip->scsp->resultType(), rhs->resultType());
sop->setOpType(scsp->resultType(), rhs->resultType());
ConstantFilter* cf = 0;
@ -3425,8 +3425,9 @@ ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, Item* b
ac->charsetNumber(rc->charsetNumber());
ac->orderPos(rc->orderPos());
uint32_t i;
for(i=0; i < gwi.processed.size() && !gwi.processed[i].first->eq(baseItem, false);i++)
{ }
for (i = 0; i < gwi.processed.size() && !gwi.processed[i].first->eq(baseItem, false); i++)
{
}
if (i < gwi.processed.size())
{
ac->expressionId(gwi.processed[i].second);
@ -3441,7 +3442,6 @@ ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, Item* b
return ac;
}
ReturnedColumn* buildReturnedColumnNull(gp_walk_info& gwi)
{
if (gwi.condPush)
@ -3875,7 +3875,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
// Could have it set if there are aggregation funcs as this function arguments.
gwi.fatalParseError = false;
//ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi);
// ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport);
if (rc)
lhs = new ParseTree(rc);
@ -3895,7 +3895,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
// Could have it set if there are aggregation funcs as this function arguments.
gwi.fatalParseError = false;
//ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi);
// ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport);
if (rc)
rhs = new ParseTree(rc);
@ -4019,8 +4019,8 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
int32_t leftColWidth = leftColType.colWidth;
int32_t rightColWidth = rightColType.colWidth;
if ((leftColWidth == datatypes::MAXDECIMALWIDTH || rightColWidth == datatypes::MAXDECIMALWIDTH)
&& datatypes::isDecimal(mysqlType.colDataType))
if ((leftColWidth == datatypes::MAXDECIMALWIDTH || rightColWidth == datatypes::MAXDECIMALWIDTH) &&
datatypes::isDecimal(mysqlType.colDataType))
{
mysqlType.colWidth = datatypes::MAXDECIMALWIDTH;
@ -4078,7 +4078,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
strcasecmp(ac->alias().c_str(), gwi.returnedCols[i]->alias().c_str()) == 0)
{
ac->expressionId(gwi.returnedCols[i]->expressionId());
isOnSelectList = true;
isOnSelectList = true;
break;
}
}
@ -4114,7 +4114,8 @@ ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool&
return rc;
}
ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn)
ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport,
bool selectBetweenIn)
{
if (get_fe_conn_info_ptr() == NULL)
{
@ -4125,7 +4126,7 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
cal_connection_info* ci = static_cast<cal_connection_info*>(get_fe_conn_info_ptr());
string funcName = ifp->func_name();
if ( nullptr != dynamic_cast<Item_func_concat_operator_oracle*>(ifp))
if (nullptr != dynamic_cast<Item_func_concat_operator_oracle*>(ifp))
{
// the condition above is the only way to recognize this particular case.
funcName = "concat_operator_oracle";
@ -4356,9 +4357,9 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
if (mayHasBoolArg && isBoolType)
rc = buildBooleanConstantColumn(ifp->arguments()[i], gwi, nonSupport);
else
{
{
rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport);
}
}
// MCOL-1510 It must be a temp table field, so find the corresponding column.
if (!rc && ifp->arguments()[i]->type() == Item::REF_ITEM)
@ -5175,7 +5176,7 @@ void analyzeForImplicitGroupBy(Item* item, gp_walk_info& gwi)
if (item->type() == Item::FUNC_ITEM)
{
Item_func* ifp = static_cast<Item_func*>(item);
for(uint32_t i = 0;i<ifp->argument_count() && !gwi.implicitExplicitGroupBy;i++)
for (uint32_t i = 0; i < ifp->argument_count() && !gwi.implicitExplicitGroupBy; i++)
{
analyzeForImplicitGroupBy(ifp->arguments()[i], gwi);
}
@ -5224,7 +5225,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
}
else if (isp->sum_func() == Item_sum::JSON_ARRAYAGG_FUNC)
{
ac = new JsonArrayAggColumn(gwi.sessionid);
ac = new GroupConcatColumn(gwi.sessionid, true);
}
else if (isp->sum_func() == Item_sum::UDF_SUM_FUNC)
{
@ -5402,7 +5403,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
}
rowCol->columnVec(selCols);
(dynamic_cast<JsonArrayAggColumn*>(ac))->orderCols(orderCols);
(dynamic_cast<GroupConcatColumn*>(ac))->orderCols(orderCols);
parm.reset(rowCol);
ac->aggParms().push_back(parm);
@ -5410,7 +5411,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
{
string separator;
separator.assign(gc->get_separator()->ptr(), gc->get_separator()->length());
(dynamic_cast<JsonArrayAggColumn*>(ac))->separator(separator);
(dynamic_cast<GroupConcatColumn*>(ac))->separator(separator);
}
}
else if (isSupportedAggregateWithOneConstArg(isp, sfitempp))
@ -5482,21 +5483,23 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
{
//@bug5229. handle constant function on aggregate argument
ac->constCol(SRCP(rc));
// XXX: this skips restoration of clauseType.
// XXX: this skips restoration of clauseType.
break;
}
// the "rc" can be in gwi.no_parm_func_list. erase it from that list and
// then delete it.
// kludge, I know.
uint32_t i;
// the "rc" can be in gwi.no_parm_func_list. erase it from that list and
// then delete it.
// kludge, I know.
uint32_t i;
for (i = 0; gwi.no_parm_func_list[i] != rc && i < gwi.no_parm_func_list.size(); i++) { }
for (i = 0; gwi.no_parm_func_list[i] != rc && i < gwi.no_parm_func_list.size(); i++)
{
}
if (i < gwi.no_parm_func_list.size())
{
if (i < gwi.no_parm_func_list.size())
{
gwi.no_parm_func_list.erase(gwi.no_parm_func_list.begin() + i);
delete rc;
}
}
}
}
@ -6003,7 +6006,7 @@ void gp_walk(const Item* item, void* arg)
if (ifp)
{
// XXX: this looks awfuly wrong.
// XXX: this looks awfuly wrong.
SimpleColumn* scp = buildSimpleColumn(ifp, *gwip);
if (!scp)
@ -6012,7 +6015,7 @@ void gp_walk(const Item* item, void* arg)
string aliasTableName(scp->tableAlias());
scp->tableAlias(aliasTableName);
gwip->rcWorkStack.push(scp->clone());
boost::shared_ptr<SimpleColumn> scsp(scp);
boost::shared_ptr<SimpleColumn> scsp(scp);
gwip->scsp = scsp;
gwip->funcName.clear();
@ -6551,7 +6554,7 @@ void gp_walk(const Item* item, void* arg)
}
else if (col->type() == Item::FIELD_ITEM && gwip->clauseType == HAVING)
{
//ReturnedColumn* rc = buildAggFrmTempField(const_cast<Item*>(item), *gwip);
// ReturnedColumn* rc = buildAggFrmTempField(const_cast<Item*>(item), *gwip);
ReturnedColumn* rc = buildReturnedColumn(const_cast<Item*>(item), *gwip, gwip->fatalParseError);
if (rc)
gwip->rcWorkStack.push(rc);
@ -6566,7 +6569,7 @@ void gp_walk(const Item* item, void* arg)
SimpleColumn* thisSC = dynamic_cast<SimpleColumn*>(rc);
if (thisSC)
{
gwip->scsp.reset(thisSC->clone());
gwip->scsp.reset(thisSC->clone());
}
if (!rc && !cando)
{
@ -6785,14 +6788,14 @@ void parse_item(Item* item, vector<Item_field*>& field_vec, bool& hasNonSupportI
// MCOL-1510. This could be a non-supported function
// argument in form of a temp_table_field, so check
// and set hasNonSupportItem if it is so.
//ReturnedColumn* rc = NULL;
//if (gwi)
// ReturnedColumn* rc = NULL;
// if (gwi)
// rc = buildAggFrmTempField(ref, *gwi);
//if (!rc)
// if (!rc)
//{
Item_field* ifp = static_cast<Item_field*>(*(ref->ref));
field_vec.push_back(ifp);
Item_field* ifp = static_cast<Item_field*>(*(ref->ref));
field_vec.push_back(ifp);
//}
break;
}
@ -7512,7 +7515,7 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
if (!gwi.rcWorkStack.empty())
{
while(!gwi.rcWorkStack.empty())
while (!gwi.rcWorkStack.empty())
{
ReturnedColumn* t = gwi.rcWorkStack.top();
delete t;
@ -7521,7 +7524,7 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
}
if (!gwi.ptWorkStack.empty())
{
while(!gwi.ptWorkStack.empty())
while (!gwi.ptWorkStack.empty())
{
ParseTree* t = gwi.ptWorkStack.top();
delete t;
@ -7529,7 +7532,6 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
}
}
return 0;
}
@ -7818,7 +7820,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
}
gwi.clauseType = SELECT;
SELECT_LEX* oldSelectLex = gwi.select_lex; // XXX: SZ: should it be restored in case of error return?
SELECT_LEX* oldSelectLex = gwi.select_lex; // XXX: SZ: should it be restored in case of error return?
gwi.select_lex = &select_lex;
#ifdef DEBUG_WALK_COND
{
@ -7931,7 +7933,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
ReturnedColumn* rc = wrapIntoAggregate(sc, gwi, baseItem);
SRCP sprc(rc);
pushReturnedCol(gwi, baseItem, sprc);
pushReturnedCol(gwi, baseItem, sprc);
gwi.columnMap.insert(
CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), sprc));
@ -7968,7 +7970,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
// add this agg col to returnedColumnList
boost::shared_ptr<ReturnedColumn> spac(ac);
pushReturnedCol(gwi, item, spac);
pushReturnedCol(gwi, item, spac);
break;
}
@ -8027,7 +8029,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (!hasNonSupportItem && ifp->const_item() && !(parseInfo & AF_BIT) && tmpVec.size() == 0)
{
srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError));
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
if (ifp->name.length)
srcp->alias(ifp->name.str);
@ -8035,7 +8037,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
continue;
}
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
}
else // This was a vtable post-process block
{
@ -8057,7 +8059,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (ifp->name.length)
cc->alias(ifp->name.str);
pushReturnedCol(gwi, ifp, srcp);
pushReturnedCol(gwi, ifp, srcp);
// clear the error set by buildFunctionColumn
gwi.fatalParseError = false;
@ -8135,7 +8137,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (item->name.length)
srcp->alias(item->name.str);
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
}
break;
@ -8159,7 +8161,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
else
{
SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError));
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
if (item->name.length)
srcp->alias(item->name.str);
@ -8255,7 +8257,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
return ER_CHECK_NOT_IMPLEMENTED;
}
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
break;
}
case Item::TYPE_HOLDER:
@ -9105,7 +9107,6 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
int cp_get_table_plan(THD* thd, SCSEP& csep, cal_table_info& ti, long timeZone)
{
SubQueryChainHolder chainHolder;
bool allocated = false;
gp_walk_info* gwi;