mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Implement choice between hash-based and sort-based grouping for doing
DISTINCT processing on the output of an IN sub-select.
This commit is contained in:
@ -10,12 +10,13 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.133 2003/01/22 00:07:00 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
@ -418,6 +419,7 @@ create_unique_plan(Query *root, UniquePath *best_path)
|
|||||||
Plan *plan;
|
Plan *plan;
|
||||||
Plan *subplan;
|
Plan *subplan;
|
||||||
List *sub_targetlist;
|
List *sub_targetlist;
|
||||||
|
List *my_tlist;
|
||||||
List *l;
|
List *l;
|
||||||
|
|
||||||
subplan = create_plan(root, best_path->subpath);
|
subplan = create_plan(root, best_path->subpath);
|
||||||
@ -474,21 +476,39 @@ create_unique_plan(Query *root, UniquePath *best_path)
|
|||||||
subplan->targetlist = newtlist;
|
subplan->targetlist = newtlist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
my_tlist = new_unsorted_tlist(subplan->targetlist);
|
||||||
|
|
||||||
if (best_path->use_hash)
|
if (best_path->use_hash)
|
||||||
{
|
{
|
||||||
elog(ERROR, "create_unique_plan: hash case not implemented yet");
|
int numGroupCols = length(my_tlist);
|
||||||
plan = NULL;
|
long numGroups;
|
||||||
|
AttrNumber *groupColIdx;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
|
||||||
|
|
||||||
|
groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
|
||||||
|
for (i = 0; i < numGroupCols; i++)
|
||||||
|
groupColIdx[i] = i+1;
|
||||||
|
|
||||||
|
plan = (Plan *) make_agg(root,
|
||||||
|
my_tlist,
|
||||||
|
NIL,
|
||||||
|
AGG_HASHED,
|
||||||
|
numGroupCols,
|
||||||
|
groupColIdx,
|
||||||
|
numGroups,
|
||||||
|
0,
|
||||||
|
subplan);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
List *sort_tlist;
|
|
||||||
List *sortList;
|
List *sortList;
|
||||||
|
|
||||||
sort_tlist = new_unsorted_tlist(subplan->targetlist);
|
sortList = addAllTargetsToSortList(NIL, my_tlist);
|
||||||
sortList = addAllTargetsToSortList(NIL, sort_tlist);
|
plan = (Plan *) make_sort_from_sortclauses(root, my_tlist,
|
||||||
plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist,
|
|
||||||
subplan, sortList);
|
subplan, sortList);
|
||||||
plan = (Plan *) make_unique(sort_tlist, plan, sortList);
|
plan = (Plan *) make_unique(my_tlist, plan, sortList);
|
||||||
}
|
}
|
||||||
|
|
||||||
plan->plan_rows = best_path->rows;
|
plan->plan_rows = best_path->rows;
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.85 2003/01/22 00:07:00 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -16,14 +16,22 @@
|
|||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "catalog/pg_operator.h"
|
||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
#include "nodes/plannodes.h"
|
#include "nodes/plannodes.h"
|
||||||
#include "optimizer/cost.h"
|
#include "optimizer/cost.h"
|
||||||
#include "optimizer/pathnode.h"
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/paths.h"
|
#include "optimizer/paths.h"
|
||||||
#include "optimizer/restrictinfo.h"
|
#include "optimizer/restrictinfo.h"
|
||||||
|
#include "parser/parse_expr.h"
|
||||||
|
#include "parser/parse_oper.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "utils/selfuncs.h"
|
#include "utils/selfuncs.h"
|
||||||
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
|
|
||||||
|
static bool hash_safe_tlist(List *tlist);
|
||||||
|
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
@ -506,6 +514,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
|||||||
{
|
{
|
||||||
UniquePath *pathnode;
|
UniquePath *pathnode;
|
||||||
Path sort_path; /* dummy for result of cost_sort */
|
Path sort_path; /* dummy for result of cost_sort */
|
||||||
|
Path agg_path; /* dummy for result of cost_agg */
|
||||||
MemoryContext oldcontext;
|
MemoryContext oldcontext;
|
||||||
List *sub_targetlist;
|
List *sub_targetlist;
|
||||||
List *l;
|
List *l;
|
||||||
@ -587,16 +596,80 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
|||||||
*/
|
*/
|
||||||
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
|
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
|
||||||
|
|
||||||
pathnode->use_hash = false; /* for now */
|
/*
|
||||||
|
* Is it safe to use a hashed implementation? If so, estimate and
|
||||||
|
* compare costs. We only try this if we know the targetlist for
|
||||||
|
* sure (else we can't be sure about the datatypes involved).
|
||||||
|
*/
|
||||||
|
pathnode->use_hash = false;
|
||||||
|
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Estimate the overhead per hashtable entry at 64 bytes (same
|
||||||
|
* as in planner.c).
|
||||||
|
*/
|
||||||
|
int hashentrysize = rel->width + 64;
|
||||||
|
|
||||||
pathnode->path.startup_cost = sort_path.startup_cost;
|
if (hashentrysize * pathnode->rows <= SortMem * 1024L)
|
||||||
pathnode->path.total_cost = sort_path.total_cost;
|
{
|
||||||
|
cost_agg(&agg_path, root,
|
||||||
|
AGG_HASHED, 0,
|
||||||
|
numCols, pathnode->rows,
|
||||||
|
subpath->startup_cost,
|
||||||
|
subpath->total_cost,
|
||||||
|
rel->rows);
|
||||||
|
if (agg_path.total_cost < sort_path.total_cost)
|
||||||
|
pathnode->use_hash = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pathnode->use_hash)
|
||||||
|
{
|
||||||
|
pathnode->path.startup_cost = agg_path.startup_cost;
|
||||||
|
pathnode->path.total_cost = agg_path.total_cost;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pathnode->path.startup_cost = sort_path.startup_cost;
|
||||||
|
pathnode->path.total_cost = sort_path.total_cost;
|
||||||
|
}
|
||||||
|
|
||||||
rel->cheapest_unique_path = (Path *) pathnode;
|
rel->cheapest_unique_path = (Path *) pathnode;
|
||||||
|
|
||||||
return pathnode;
|
return pathnode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hash_safe_tlist - can datatypes of given tlist be hashed?
|
||||||
|
*
|
||||||
|
* We assume hashed aggregation will work if the datatype's equality operator
|
||||||
|
* is marked hashjoinable.
|
||||||
|
*
|
||||||
|
* XXX this probably should be somewhere else. See also hash_safe_grouping
|
||||||
|
* in plan/planner.c.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
hash_safe_tlist(List *tlist)
|
||||||
|
{
|
||||||
|
List *tl;
|
||||||
|
|
||||||
|
foreach(tl, tlist)
|
||||||
|
{
|
||||||
|
Node *expr = (Node *) lfirst(tl);
|
||||||
|
Operator optup;
|
||||||
|
bool oprcanhash;
|
||||||
|
|
||||||
|
optup = equality_oper(exprType(expr), true);
|
||||||
|
if (!optup)
|
||||||
|
return false;
|
||||||
|
oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
|
||||||
|
ReleaseSysCache(optup);
|
||||||
|
if (!oprcanhash)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* create_subqueryscan_path
|
* create_subqueryscan_path
|
||||||
* Creates a path corresponding to a sequential scan of a subquery,
|
* Creates a path corresponding to a sequential scan of a subquery,
|
||||||
|
Reference in New Issue
Block a user