You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	feat(optimizer): MCOL-5250 rewrite queries with DISTINCT (#3666)
* feat(optimizer): MCOL-5250 rewrite queries with DISTINCT
... as aggregated queries.
So query
```
SELECT DISTINCT <cols list>
FROM <from list>
WHERE <where clause>
HAVING <having clause>
ORDER BY <orderby list>
LIMIT <limit>
```
will become
```
SELECT *
FROM
  (
    SELECT <cols list>
    FROM <from list>
    WHERE <where clause>
    HAVING <having clause>
  ) a
GROUP BY 1,2,3,...,N
ORDER BY <orderby list>
LIMIT limit
```
* move ORDER BY to the outer query
* fix test
* reuse cloneWORecursiveSelects() in clone()
* fix subselect columns processing
			
			
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							736ec81e4d
						
					
				
				
					commit
					cfa9a7ff2c
				
			@@ -1197,4 +1197,49 @@ execplan::SCSEP CalpontSelectExecutionPlan::cloneForTableWORecursiveSelectsGbObH
 | 
				
			|||||||
  return newPlan;
 | 
					  return newPlan;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SCSEP CalpontSelectExecutionPlan::clone()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  auto newPlan = cloneWORecursiveSelects();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  newPlan->fSelectSubList.clear();
 | 
				
			||||||
 | 
					  for (const auto& subPlan : fSubSelects)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    auto* subCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(subPlan.get());
 | 
				
			||||||
 | 
					    idbassert_s(subCSEP != nullptr, "subPlan is not a CalpontSelectExecutionPlan");
 | 
				
			||||||
 | 
					    newPlan->fSubSelects.push_back(subCSEP->clone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  newPlan->fDerivedTableList.clear();
 | 
				
			||||||
 | 
					  for (const auto& drvTable: fDerivedTableList)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    auto* drvCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(drvTable.get());
 | 
				
			||||||
 | 
					    idbassert_s(drvCSEP != nullptr, "derivedTable is not a CalpontSelectExecutionPlan");
 | 
				
			||||||
 | 
					    newPlan->fDerivedTableList.push_back(drvCSEP->clone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  newPlan->fUnionVec.clear();
 | 
				
			||||||
 | 
					  for (const auto& subPlan : fUnionVec)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    auto* subCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(subPlan.get());
 | 
				
			||||||
 | 
					    idbassert_s(subCSEP != nullptr, "unionVec is not a CalpontSelectExecutionPlan");
 | 
				
			||||||
 | 
					    newPlan->fUnionVec.push_back(subCSEP->clone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  newPlan->fSelectSubList.clear();
 | 
				
			||||||
 | 
					  for (const auto& subPlan : fSelectSubList)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    auto* subCSEP = dynamic_cast<CalpontSelectExecutionPlan*>(subPlan.get());
 | 
				
			||||||
 | 
					    idbassert_s(subCSEP != nullptr, "subPlan is not a CalpontSelectExecutionPlan");
 | 
				
			||||||
 | 
					    newPlan->fSelectSubList.push_back(subCSEP->clone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  newPlan->fSubSelectList.clear();
 | 
				
			||||||
 | 
					  for (const auto& subPlan : fSubSelectList)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    newPlan->fSubSelectList.push_back(subPlan->clone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return newPlan;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}  // namespace execplan
 | 
					}  // namespace execplan
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -168,6 +168,7 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan
 | 
				
			|||||||
  execplan::SCSEP cloneForTableWORecursiveSelectsGbObHaving(
 | 
					  execplan::SCSEP cloneForTableWORecursiveSelectsGbObHaving(
 | 
				
			||||||
      const execplan::CalpontSystemCatalog::TableAliasName& targetTableAlias, const bool withFilters = true);
 | 
					      const execplan::CalpontSystemCatalog::TableAliasName& targetTableAlias, const bool withFilters = true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SCSEP clone();
 | 
				
			||||||
  /**
 | 
					  /**
 | 
				
			||||||
   * Access and mutator methods
 | 
					   * Access and mutator methods
 | 
				
			||||||
   */
 | 
					   */
 | 
				
			||||||
@@ -495,7 +496,7 @@ class CalpontSelectExecutionPlan : public CalpontExecutionPlan
 | 
				
			|||||||
  {
 | 
					  {
 | 
				
			||||||
    return fDerivedTableList;
 | 
					    return fDerivedTableList;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  void derivedTableList(SelectList& derivedTableList)
 | 
					  void derivedTableList(const SelectList& derivedTableList)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    fDerivedTableList = derivedTableList;
 | 
					    fDerivedTableList = derivedTableList;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -100,6 +100,7 @@ ReturnedColumn::ReturnedColumn(const ReturnedColumn& rhs, const uint32_t session
 | 
				
			|||||||
 , fSessionID(sessionID)
 | 
					 , fSessionID(sessionID)
 | 
				
			||||||
 , fSequence(rhs.fSequence)
 | 
					 , fSequence(rhs.fSequence)
 | 
				
			||||||
 , fCardinality(rhs.fCardinality)
 | 
					 , fCardinality(rhs.fCardinality)
 | 
				
			||||||
 | 
					 , fAlias(rhs.alias())
 | 
				
			||||||
 , fDistinct(rhs.fDistinct)
 | 
					 , fDistinct(rhs.fDistinct)
 | 
				
			||||||
 , fJoinInfo(rhs.fJoinInfo)
 | 
					 , fJoinInfo(rhs.fJoinInfo)
 | 
				
			||||||
 , fAsc(rhs.fAsc)
 | 
					 , fAsc(rhs.fAsc)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -131,7 +131,8 @@ void getSimpleColsExtended(execplan::ParseTree* n, void* obj)
 | 
				
			|||||||
  else if (selectFilter)
 | 
					  else if (selectFilter)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    selectFilter->setSimpleColumnListExtended();
 | 
					    selectFilter->setSimpleColumnListExtended();
 | 
				
			||||||
    list->insert(list->end(), selectFilter->simpleColumnListExtended().begin(), selectFilter->simpleColumnListExtended().end());
 | 
					    list->insert(list->end(), selectFilter->simpleColumnListExtended().begin(),
 | 
				
			||||||
 | 
					                 selectFilter->simpleColumnListExtended().end());
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  else if (cf)
 | 
					  else if (cf)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
@@ -861,4 +862,37 @@ std::optional<CalpontSystemCatalog::TableAliasName> sameTableCheck(
 | 
				
			|||||||
  return tan;
 | 
					  return tan;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					std::string getSimpleColumnAlias(const ReturnedColumn& origCol, int64_t colPos)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::string alias = origCol.alias();
 | 
				
			||||||
 | 
					  if (alias.empty())
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    if (auto* sc = dynamic_cast<const SimpleColumn*>(&origCol); sc)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      alias = sc->columnName();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else if (auto* fc = dynamic_cast<const FunctionColumn*>(&origCol); fc)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      alias = fc->functionName();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else if (auto* ac = dynamic_cast<const AggregateColumn*>(&origCol); ac)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      alias = ac->functionName();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else if (auto* wc = dynamic_cast<const WindowFunctionColumn*>(&origCol); wc)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      alias = wc->functionName();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if (alias.empty())
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    alias = "`$col_" + std::to_string(colPos) + "`";
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if (alias[0] != '`')
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    alias = "`" + alias + "`";
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return alias;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}  // namespace execplan
 | 
					}  // namespace execplan
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -413,4 +413,8 @@ ParseTree* replaceRefCol(ParseTree*& n, CalpontSelectExecutionPlan::ReturnedColu
 | 
				
			|||||||
std::optional<CalpontSystemCatalog::TableAliasName> sameTableCheck(
 | 
					std::optional<CalpontSystemCatalog::TableAliasName> sameTableCheck(
 | 
				
			||||||
    std::vector<SimpleColumn*> simpleColumnList);
 | 
					    std::vector<SimpleColumn*> simpleColumnList);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// utility function for constructing a reasonable alias for a SimpleColumn copy, based on the alias/column
 | 
				
			||||||
 | 
					/// name/function name of the original colum
 | 
				
			||||||
 | 
					std::string getSimpleColumnAlias(const ReturnedColumn& origCol, int64_t colPos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}  // namespace execplan
 | 
					}  // namespace execplan
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,8 @@
 | 
				
			|||||||
set(rbo_SRCS rulebased_optimizer.cpp rbo_apply_parallel_ces.cpp rbo_predicate_pushdown.cpp)
 | 
					set(rbo_SRCS
 | 
				
			||||||
 | 
					    rulebased_optimizer.cpp
 | 
				
			||||||
 | 
					    rbo_apply_parallel_ces.cpp
 | 
				
			||||||
 | 
					    rbo_apply_rewrite_distinct.cpp
 | 
				
			||||||
 | 
					    rbo_predicate_pushdown.cpp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
columnstore_library(rbo ${rbo_SRCS})
 | 
					columnstore_library(rbo ${rbo_SRCS})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -46,8 +46,6 @@ using ExtraSRRC = std::vector<std::unique_ptr<execplan::SimpleColumn>>;
 | 
				
			|||||||
using SCAndItsProjectionPosition = std::pair<execplan::SimpleColumn*, uint32_t>;
 | 
					using SCAndItsProjectionPosition = std::pair<execplan::SimpleColumn*, uint32_t>;
 | 
				
			||||||
using SCsAndTheirProjectionPositions = std::vector<SCAndItsProjectionPosition>;
 | 
					using SCsAndTheirProjectionPositions = std::vector<SCAndItsProjectionPosition>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const std::string RewrittenSubTableAliasPrefix = "$added_sub_";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
namespace details
 | 
					namespace details
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -562,8 +560,7 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO
 | 
				
			|||||||
    auto anyColumnStatistics = ctx.getGwi().findStatisticsForATable(schemaAndTableName);
 | 
					    auto anyColumnStatistics = ctx.getGwi().findStatisticsForATable(schemaAndTableName);
 | 
				
			||||||
    if (!table.isColumnstore() && anyColumnStatistics)
 | 
					    if (!table.isColumnstore() && anyColumnStatistics)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      std::string tableAlias = optimizer::RewrittenSubTableAliasPrefix + table.schema + "_" + table.table +
 | 
					      std::string tableAlias = getRewrittenSubTableAlias(table, ctx);
 | 
				
			||||||
                               "_" + std::to_string(ctx.getUniqueId());
 | 
					 | 
				
			||||||
      tableAliasToSCPositionsMap.insert({table, {tableAlias, {}, 0}});
 | 
					      tableAliasToSCPositionsMap.insert({table, {tableAlias, {}, 0}});
 | 
				
			||||||
      execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
 | 
					      execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
 | 
				
			||||||
      newTableList.push_back(tn);
 | 
					      newTableList.push_back(tn);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										180
									
								
								dbcon/rbo/rbo_apply_rewrite_distinct.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										180
									
								
								dbcon/rbo/rbo_apply_rewrite_distinct.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,180 @@
 | 
				
			|||||||
 | 
					/* Copyright (C) 2025 MariaDB Corporation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   This program is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					   modify it under the terms of the GNU General Public License
 | 
				
			||||||
 | 
					   as published by the Free Software Foundation; version 2 of
 | 
				
			||||||
 | 
					   the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					   GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					   along with this program; if not, write to the Free Software
 | 
				
			||||||
 | 
					   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
				
			||||||
 | 
					   MA 02110-1301, USA. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "rulebased_optimizer.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "calpontselectexecutionplan.h"
 | 
				
			||||||
 | 
					#include "aggregatecolumn.h"
 | 
				
			||||||
 | 
					#include "simplecolumn.h"
 | 
				
			||||||
 | 
					#include "existsfilter.h"
 | 
				
			||||||
 | 
					#include "functioncolumn.h"
 | 
				
			||||||
 | 
					#include "logicoperator.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace optimizer
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool rewriteDistinctFilter(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& /*ctx*/)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return csep.distinct() && csep.tableList().size() > 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					execplan::SRCP cloneAsSimpleColumn(const execplan::SRCP& rc, const std::string& tableAlias, int64_t colPos)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
 | 
				
			||||||
 | 
					  // fill SimpleColumn data
 | 
				
			||||||
 | 
					  rcCloned->schemaName("");
 | 
				
			||||||
 | 
					  rcCloned->tableName(tableAlias);
 | 
				
			||||||
 | 
					  rcCloned->oid(0);
 | 
				
			||||||
 | 
					  rcCloned->tableAlias(tableAlias);
 | 
				
			||||||
 | 
					  rcCloned->data("");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // fill ReturnedColumn data
 | 
				
			||||||
 | 
					  rcCloned->charsetNumber(rc->charsetNumber());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // fill TreeNode data
 | 
				
			||||||
 | 
					  rcCloned->derivedTable(tableAlias);
 | 
				
			||||||
 | 
					  rcCloned->derivedRefCol(rc.get());
 | 
				
			||||||
 | 
					  rcCloned->resultType(rc->resultType());
 | 
				
			||||||
 | 
					  rcCloned->operationType(rc->operationType());
 | 
				
			||||||
 | 
					  rcCloned->colPosition(colPos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (const auto* rcsc = dynamic_cast<execplan::SimpleColumn*>(rc.get()); rcsc != nullptr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    rcCloned->timeZone(rcsc->timeZone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (const auto* rcfc = dynamic_cast<execplan::FunctionColumn*>(rc.get()))
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    rcCloned->timeZone(rcfc->timeZone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (const auto* rcac = dynamic_cast<execplan::AggregateColumn*>(rc.get()))
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    rcCloned->timeZone(rcac->timeZone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (const auto* rcwc = dynamic_cast<execplan::WindowFunctionColumn*>(rc.get()))
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    rcCloned->timeZone(rcwc->timeZone());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  rc->incRefCount();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto colName = getSimpleColumnAlias(*rc, colPos);
 | 
				
			||||||
 | 
					  rcCloned->columnName(colName);
 | 
				
			||||||
 | 
					  rcCloned->alias("`" + tableAlias + "`." + colName);
 | 
				
			||||||
 | 
					  rcCloned->colSource(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return rcCloned;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool applyRewriteDistinct(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  auto origCSEP = csep.clone();
 | 
				
			||||||
 | 
					  auto tableAlias = getRewrittenSubTableAlias(csep.tableList()[0], ctx);
 | 
				
			||||||
 | 
					  origCSEP->location(execplan::CalpontSelectExecutionPlan::FROM);
 | 
				
			||||||
 | 
					  origCSEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
 | 
				
			||||||
 | 
					  origCSEP->derivedTbAlias(tableAlias);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  csep.subSelectList({});
 | 
				
			||||||
 | 
					  csep.subSelects({});
 | 
				
			||||||
 | 
					  csep.selectSubList({});
 | 
				
			||||||
 | 
					  csep.unionVec({});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  execplan::CalpontSelectExecutionPlan::TableList tblList;
 | 
				
			||||||
 | 
					  tblList.push_back(execplan::make_aliasview("", "", tableAlias, ""));
 | 
				
			||||||
 | 
					  csep.tableList(tblList);
 | 
				
			||||||
 | 
					  execplan::CalpontSelectExecutionPlan::SelectList derivedTblList;
 | 
				
			||||||
 | 
					  derivedTblList.emplace_back(origCSEP);
 | 
				
			||||||
 | 
					  csep.derivedTableList(derivedTblList);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  csep.distinct(false);
 | 
				
			||||||
 | 
					  csep.filters(nullptr);
 | 
				
			||||||
 | 
					  csep.having(nullptr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  csep.returnedCols({});
 | 
				
			||||||
 | 
					  csep.groupByCols({});
 | 
				
			||||||
 | 
					  int64_t colPos = 0;
 | 
				
			||||||
 | 
					  for (const auto& rc : origCSEP->returnedCols())
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    auto rcCloned = cloneAsSimpleColumn(rc, tableAlias, colPos);
 | 
				
			||||||
 | 
					    csep.returnedCols().emplace_back(rcCloned);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto grpByCloned = cloneAsSimpleColumn(rc, tableAlias, colPos);
 | 
				
			||||||
 | 
					    grpByCloned->orderPos(colPos);
 | 
				
			||||||
 | 
					    csep.groupByCols().emplace_back(grpByCloned);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ++colPos;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // order by
 | 
				
			||||||
 | 
					  csep.orderByCols({});
 | 
				
			||||||
 | 
					  int64_t orderByColPos = 0;
 | 
				
			||||||
 | 
					  for (const auto& obc : origCSEP->orderByCols())
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    bool found = false;
 | 
				
			||||||
 | 
					    int64_t retColPos = 0;
 | 
				
			||||||
 | 
					    for (const auto& rc : origCSEP->returnedCols())
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      if (*obc == *rc)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        // lucky me, order by column is in the result set
 | 
				
			||||||
 | 
					        found = true;
 | 
				
			||||||
 | 
					        execplan::SRCP outerRC;
 | 
				
			||||||
 | 
					        if (retColPos < colPos)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          outerRC = csep.returnedCols()[retColPos];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          outerRC = csep.orderByCols()[retColPos - colPos];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        auto obcCloned = cloneAsSimpleColumn(outerRC, tableAlias, retColPos);
 | 
				
			||||||
 | 
					        obcCloned->asc(obc->asc());
 | 
				
			||||||
 | 
					        obcCloned->nullsFirst(obc->nullsFirst());
 | 
				
			||||||
 | 
					        csep.orderByCols().emplace_back(obcCloned);
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      ++retColPos;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (found)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      continue;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // order by column is not in the result set of the original query, so add it to the resultset
 | 
				
			||||||
 | 
					    auto rc = boost::shared_ptr<execplan::ReturnedColumn>(obc->clone());
 | 
				
			||||||
 | 
					    origCSEP->returnedCols().emplace_back(rc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto rcCloned = cloneAsSimpleColumn(rc, tableAlias, colPos + orderByColPos);
 | 
				
			||||||
 | 
					    //This "order by" column does not belong to "group by" columns, so it should be an aggregated column
 | 
				
			||||||
 | 
					    auto* aggCol = new execplan::AggregateColumn();
 | 
				
			||||||
 | 
					    auto obcCloned = boost::shared_ptr<execplan::ReturnedColumn>(aggCol);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    aggCol->asc(obc->asc());
 | 
				
			||||||
 | 
					    aggCol->nullsFirst(obc->nullsFirst());
 | 
				
			||||||
 | 
					    aggCol->aggOp(execplan::AggregateColumn::SELECT_SOME);
 | 
				
			||||||
 | 
					    aggCol->aggParms().emplace_back(rcCloned);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    csep.orderByCols().emplace_back(obcCloned);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ++orderByColPos;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  origCSEP->orderByCols().clear();
 | 
				
			||||||
 | 
					  origCSEP->distinct(false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace optimizer
 | 
				
			||||||
							
								
								
									
										31
									
								
								dbcon/rbo/rbo_apply_rewrite_distinct.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								dbcon/rbo/rbo_apply_rewrite_distinct.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,31 @@
 | 
				
			|||||||
 | 
					/* Copyright (C) 2025 MariaDB Corporation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   This program is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					   modify it under the terms of the GNU General Public License
 | 
				
			||||||
 | 
					   as published by the Free Software Foundation; version 2 of
 | 
				
			||||||
 | 
					   the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					   GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					   along with this program; if not, write to the Free Software
 | 
				
			||||||
 | 
					   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
				
			||||||
 | 
					   MA 02110-1301, USA. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define PREFER_MY_CONFIG_H
 | 
				
			||||||
 | 
					#include <my_config.h>
 | 
				
			||||||
 | 
					#include "../mysql/idb_mysql.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "execplan/calpontselectexecutionplan.h"
 | 
				
			||||||
 | 
					#include "rulebased_optimizer.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace optimizer
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  bool rewriteDistinctFilter(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx);
 | 
				
			||||||
 | 
					  bool applyRewriteDistinct(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -15,29 +15,31 @@
 | 
				
			|||||||
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
					   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
				
			||||||
   MA 02110-1301, USA. */
 | 
					   MA 02110-1301, USA. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <algorithm>
 | 
					 | 
				
			||||||
#include <cstddef>
 | 
					 | 
				
			||||||
#include <cstdint>
 | 
					 | 
				
			||||||
#include <limits>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "rulebased_optimizer.h"
 | 
					#include "rulebased_optimizer.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "configcpp.h"
 | 
					#include "configcpp.h"
 | 
				
			||||||
#include "constantcolumn.h"
 | 
					#include "constantcolumn.h"
 | 
				
			||||||
#include "execplan/calpontselectexecutionplan.h"
 | 
					#include "execplan/calpontselectexecutionplan.h"
 | 
				
			||||||
#include "execplan/simplecolumn.h"
 | 
					 | 
				
			||||||
#include "existsfilter.h"
 | 
					 | 
				
			||||||
#include "logicoperator.h"
 | 
					 | 
				
			||||||
#include "operator.h"
 | 
					 | 
				
			||||||
#include "predicateoperator.h"
 | 
					#include "predicateoperator.h"
 | 
				
			||||||
#include "simplefilter.h"
 | 
					 | 
				
			||||||
#include "rbo_apply_parallel_ces.h"
 | 
					#include "rbo_apply_parallel_ces.h"
 | 
				
			||||||
#include "rbo_predicate_pushdown.h"
 | 
					#include "rbo_predicate_pushdown.h"
 | 
				
			||||||
 | 
					#include "rbo_apply_rewrite_distinct.h"
 | 
				
			||||||
#include "utils/pron/pron.h"
 | 
					#include "utils/pron/pron.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "calpontsystemcatalog.h"
 | 
				
			||||||
 | 
					#include "functioncolumn.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace optimizer
 | 
					namespace optimizer
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					std::string getRewrittenSubTableAlias(const execplan::CalpontSystemCatalog::TableAliasName& table,
 | 
				
			||||||
 | 
					                                      const RBOptimizerContext& ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  static const std::string rewrittenSubTableAliasPrefix{"$added_sub_"};
 | 
				
			||||||
 | 
					  return rewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
 | 
				
			||||||
 | 
					         std::to_string(ctx.getUniqueId());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Apply a list of rules to a CSEP
 | 
					// Apply a list of rules to a CSEP
 | 
				
			||||||
bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector<Rule>& rules,
 | 
					bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector<Rule>& rules,
 | 
				
			||||||
                           optimizer::RBOptimizerContext& ctx)
 | 
					                           optimizer::RBOptimizerContext& ctx)
 | 
				
			||||||
@@ -83,6 +85,10 @@ bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptim
 | 
				
			|||||||
  {
 | 
					  {
 | 
				
			||||||
    optimizer::Rule parallelCES{"parallel_ces", optimizer::parallelCESFilter, optimizer::applyParallelCES};
 | 
					    optimizer::Rule parallelCES{"parallel_ces", optimizer::parallelCESFilter, optimizer::applyParallelCES};
 | 
				
			||||||
    rules.push_back(parallelCES);
 | 
					    rules.push_back(parallelCES);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    optimizer::Rule rewriteDistinct{"rewrite_distinct", optimizer::rewriteDistinctFilter,
 | 
				
			||||||
 | 
					                                    optimizer::applyRewriteDistinct};
 | 
				
			||||||
 | 
					    rules.push_back(rewriteDistinct);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  optimizer::Rule predicatePushdown{"predicate_pushdown", optimizer::predicatePushdownFilter,
 | 
					  optimizer::Rule predicatePushdown{"predicate_pushdown", optimizer::predicatePushdownFilter,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -27,6 +27,7 @@
 | 
				
			|||||||
#include <dbcon/mysql/ha_mcs_impl_if.h>
 | 
					#include <dbcon/mysql/ha_mcs_impl_if.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "execplan/calpontselectexecutionplan.h"
 | 
					#include "execplan/calpontselectexecutionplan.h"
 | 
				
			||||||
 | 
					#include "execplan/calpontsystemcatalog.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace optimizer
 | 
					namespace optimizer
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -141,4 +142,7 @@ struct Rule
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx,
 | 
					bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx,
 | 
				
			||||||
                  bool useUnstableOptimizer);
 | 
					                  bool useUnstableOptimizer);
 | 
				
			||||||
}  // namespace optimizer
 | 
					std::string getRewrittenSubTableAlias(const execplan::CalpontSystemCatalog::TableAliasName& table,
 | 
				
			||||||
 | 
					                                      const RBOptimizerContext& ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -11,10 +11,10 @@ COUNT(DISTINCT col2)
 | 
				
			|||||||
5
 | 
					5
 | 
				
			||||||
SELECT DISTINCT col1 FROM t1;
 | 
					SELECT DISTINCT col1 FROM t1;
 | 
				
			||||||
col1
 | 
					col1
 | 
				
			||||||
NULL
 | 
					 | 
				
			||||||
1
 | 
					1
 | 
				
			||||||
2
 | 
					2
 | 
				
			||||||
3
 | 
					3
 | 
				
			||||||
 | 
					NULL
 | 
				
			||||||
SELECT DISTINCT col1 FROM t1 ORDER BY col1 DESC;
 | 
					SELECT DISTINCT col1 FROM t1 ORDER BY col1 DESC;
 | 
				
			||||||
col1
 | 
					col1
 | 
				
			||||||
3
 | 
					3
 | 
				
			||||||
@@ -33,10 +33,10 @@ CREATE TABLE t2(col1 INT)ENGINE=Columnstore;
 | 
				
			|||||||
INSERT INTO t2 SELECT DISTINCT col1 FROM t1;
 | 
					INSERT INTO t2 SELECT DISTINCT col1 FROM t1;
 | 
				
			||||||
SELECT * FROM t2;
 | 
					SELECT * FROM t2;
 | 
				
			||||||
col1
 | 
					col1
 | 
				
			||||||
NULL
 | 
					 | 
				
			||||||
1
 | 
					1
 | 
				
			||||||
2
 | 
					2
 | 
				
			||||||
3
 | 
					3
 | 
				
			||||||
 | 
					NULL
 | 
				
			||||||
CREATE TABLE t3 (name varchar(255));
 | 
					CREATE TABLE t3 (name varchar(255));
 | 
				
			||||||
INSERT INTO t3 VALUES ('aa'),('ab'),('ac'),('ad'),('ae');
 | 
					INSERT INTO t3 VALUES ('aa'),('ab'),('ac'),('ad'),('ae');
 | 
				
			||||||
SELECT DISTINCT * FROM t3;
 | 
					SELECT DISTINCT * FROM t3;
 | 
				
			||||||
@@ -46,7 +46,7 @@ ab
 | 
				
			|||||||
ac
 | 
					ac
 | 
				
			||||||
ad
 | 
					ad
 | 
				
			||||||
ae
 | 
					ae
 | 
				
			||||||
SELECT DISTINCT name FROM t3 LIMIT 2;
 | 
					SELECT DISTINCT name FROM t3 ORDER BY name LIMIT 2;
 | 
				
			||||||
name
 | 
					name
 | 
				
			||||||
aa
 | 
					aa
 | 
				
			||||||
ab
 | 
					ab
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -15,18 +15,22 @@ CREATE TABLE t1(col1 INT, col2 CHAR(5))ENGINE=Columnstore;
 | 
				
			|||||||
INSERT INTO t1 VALUES(NULL, NULL),(1,'a'),(1,'b'),(1,'c'),(2,'dd'),(3,'eee');
 | 
					INSERT INTO t1 VALUES(NULL, NULL),(1,'a'),(1,'b'),(1,'c'),(2,'dd'),(3,'eee');
 | 
				
			||||||
SELECT COUNT(DISTINCT col1) FROM t1;
 | 
					SELECT COUNT(DISTINCT col1) FROM t1;
 | 
				
			||||||
SELECT COUNT(DISTINCT col2) FROM t1;
 | 
					SELECT COUNT(DISTINCT col2) FROM t1;
 | 
				
			||||||
 | 
					--sorted_result
 | 
				
			||||||
SELECT DISTINCT col1 FROM t1;
 | 
					SELECT DISTINCT col1 FROM t1;
 | 
				
			||||||
SELECT DISTINCT col1 FROM t1 ORDER BY col1 DESC;
 | 
					SELECT DISTINCT col1 FROM t1 ORDER BY col1 DESC;
 | 
				
			||||||
 | 
					--sorted_result
 | 
				
			||||||
SELECT DISTINCT col2 FROM t1;
 | 
					SELECT DISTINCT col2 FROM t1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CREATE TABLE t2(col1 INT)ENGINE=Columnstore;
 | 
					CREATE TABLE t2(col1 INT)ENGINE=Columnstore;
 | 
				
			||||||
INSERT INTO t2 SELECT DISTINCT col1 FROM t1;
 | 
					INSERT INTO t2 SELECT DISTINCT col1 FROM t1;
 | 
				
			||||||
 | 
					--sorted_result
 | 
				
			||||||
SELECT * FROM t2;
 | 
					SELECT * FROM t2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CREATE TABLE t3 (name varchar(255));
 | 
					CREATE TABLE t3 (name varchar(255));
 | 
				
			||||||
INSERT INTO t3 VALUES ('aa'),('ab'),('ac'),('ad'),('ae');
 | 
					INSERT INTO t3 VALUES ('aa'),('ab'),('ac'),('ad'),('ae');
 | 
				
			||||||
 | 
					--sorted_result
 | 
				
			||||||
SELECT DISTINCT * FROM t3;
 | 
					SELECT DISTINCT * FROM t3;
 | 
				
			||||||
SELECT DISTINCT name FROM t3 LIMIT 2;
 | 
					SELECT DISTINCT name FROM t3 ORDER BY name LIMIT 2;
 | 
				
			||||||
SELECT DISTINCT 1 FROM t3 LIMIT 3;
 | 
					SELECT DISTINCT 1 FROM t3 LIMIT 3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Clean UP
 | 
					# Clean UP
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user