mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Improve UniquePath logic to detect the case where the input is already
known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a redundant unique-ification step.
This commit is contained in:
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Every node type that can appear in stored rules' parsetrees *must*
|
* Every node type that can appear in stored rules' parsetrees *must*
|
||||||
@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node)
|
|||||||
_outPathInfo(str, (Path *) node);
|
_outPathInfo(str, (Path *) node);
|
||||||
|
|
||||||
WRITE_NODE_FIELD(subpath);
|
WRITE_NODE_FIELD(subpath);
|
||||||
WRITE_BOOL_FIELD(use_hash);
|
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
|
||||||
WRITE_FLOAT_FIELD(rows, "%.0f");
|
WRITE_FLOAT_FIELD(rows, "%.0f");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path)
|
|||||||
subplan->targetlist = newtlist;
|
subplan->targetlist = newtlist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Done if we don't need to do any actual unique-ifying */
|
||||||
|
if (best_path->umethod == UNIQUE_PATH_NOOP)
|
||||||
|
return subplan;
|
||||||
|
|
||||||
/* Copy tlist again to make one we can put sorting labels on */
|
/* Copy tlist again to make one we can put sorting labels on */
|
||||||
my_tlist = copyObject(subplan->targetlist);
|
my_tlist = copyObject(subplan->targetlist);
|
||||||
|
|
||||||
if (best_path->use_hash)
|
if (best_path->umethod == UNIQUE_PATH_HASH)
|
||||||
{
|
{
|
||||||
long numGroups;
|
long numGroups;
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $
|
||||||
*
|
*
|
||||||
* HISTORY
|
* HISTORY
|
||||||
* AUTHOR DATE MAJOR EVENT
|
* AUTHOR DATE MAJOR EVENT
|
||||||
@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test whether a query uses simple DISTINCT, ie, has a distinct-list that
|
||||||
|
* is the same as the set of output columns.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
has_distinct_clause(Query *query)
|
||||||
|
{
|
||||||
|
/* Is there a DISTINCT clause at all? */
|
||||||
|
if (query->distinctClause == NIL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* It's DISTINCT if it's not DISTINCT ON */
|
||||||
|
return !has_distinct_on_clause(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
* *
|
* *
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -20,12 +20,14 @@
|
|||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "nodes/plannodes.h"
|
#include "nodes/plannodes.h"
|
||||||
|
#include "optimizer/clauses.h"
|
||||||
#include "optimizer/cost.h"
|
#include "optimizer/cost.h"
|
||||||
#include "optimizer/pathnode.h"
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/paths.h"
|
#include "optimizer/paths.h"
|
||||||
#include "optimizer/restrictinfo.h"
|
#include "optimizer/restrictinfo.h"
|
||||||
#include "parser/parse_expr.h"
|
#include "parser/parse_expr.h"
|
||||||
#include "parser/parse_oper.h"
|
#include "parser/parse_oper.h"
|
||||||
|
#include "parser/parsetree.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "utils/selfuncs.h"
|
#include "utils/selfuncs.h"
|
||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
@ -546,6 +548,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
|||||||
|
|
||||||
pathnode->subpath = subpath;
|
pathnode->subpath = subpath;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the input is a subquery that uses DISTINCT, we don't need to do
|
||||||
|
* anything; its output is already unique. (Are there any other cases
|
||||||
|
* in which we can easily prove the input must be distinct?)
|
||||||
|
*/
|
||||||
|
if (rel->rtekind == RTE_SUBQUERY)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable);
|
||||||
|
Query *subquery = rte->subquery;
|
||||||
|
|
||||||
|
if (has_distinct_clause(subquery))
|
||||||
|
{
|
||||||
|
pathnode->umethod = UNIQUE_PATH_NOOP;
|
||||||
|
pathnode->rows = rel->rows;
|
||||||
|
pathnode->path.startup_cost = subpath->startup_cost;
|
||||||
|
pathnode->path.total_cost = subpath->total_cost;
|
||||||
|
pathnode->path.pathkeys = subpath->pathkeys;
|
||||||
|
|
||||||
|
rel->cheapest_unique_path = (Path *) pathnode;
|
||||||
|
|
||||||
|
return pathnode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to identify the targetlist that will actually be unique-ified.
|
* Try to identify the targetlist that will actually be unique-ified.
|
||||||
* In current usage, this routine is only used for sub-selects of IN
|
* In current usage, this routine is only used for sub-selects of IN
|
||||||
@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
|||||||
* compare costs. We only try this if we know the targetlist for sure
|
* compare costs. We only try this if we know the targetlist for sure
|
||||||
* (else we can't be sure about the datatypes involved).
|
* (else we can't be sure about the datatypes involved).
|
||||||
*/
|
*/
|
||||||
pathnode->use_hash = false;
|
pathnode->umethod = UNIQUE_PATH_SORT;
|
||||||
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
|
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
|||||||
subpath->total_cost,
|
subpath->total_cost,
|
||||||
rel->rows);
|
rel->rows);
|
||||||
if (agg_path.total_cost < sort_path.total_cost)
|
if (agg_path.total_cost < sort_path.total_cost)
|
||||||
pathnode->use_hash = true;
|
pathnode->umethod = UNIQUE_PATH_HASH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pathnode->use_hash)
|
if (pathnode->umethod == UNIQUE_PATH_HASH)
|
||||||
{
|
{
|
||||||
pathnode->path.startup_cost = agg_path.startup_cost;
|
pathnode->path.startup_cost = agg_path.startup_cost;
|
||||||
pathnode->path.total_cost = agg_path.total_cost;
|
pathnode->path.total_cost = agg_path.total_cost;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -442,15 +442,26 @@ typedef struct MaterialPath
|
|||||||
* its subpath.
|
* its subpath.
|
||||||
*
|
*
|
||||||
* This is unlike the other Path nodes in that it can actually generate
|
* This is unlike the other Path nodes in that it can actually generate
|
||||||
* two different plans: either hash-based or sort-based implementation.
|
* different plans: either hash-based or sort-based implementation, or a
|
||||||
* The decision is sufficiently localized that it's not worth having two
|
* no-op if the input path can be proven distinct already. The decision
|
||||||
* separate Path node types.
|
* is sufficiently localized that it's not worth having separate Path node
|
||||||
|
* types. (Note: in the no-op case, we could eliminate the UniquePath node
|
||||||
|
* entirely and just return the subpath; but it's convenient to have a
|
||||||
|
* UniquePath in the path tree to signal upper-level routines that the input
|
||||||
|
* is known distinct.)
|
||||||
*/
|
*/
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
UNIQUE_PATH_NOOP, /* input is known unique already */
|
||||||
|
UNIQUE_PATH_HASH, /* use hashing */
|
||||||
|
UNIQUE_PATH_SORT /* use sorting */
|
||||||
|
} UniquePathMethod;
|
||||||
|
|
||||||
typedef struct UniquePath
|
typedef struct UniquePath
|
||||||
{
|
{
|
||||||
Path path;
|
Path path;
|
||||||
Path *subpath;
|
Path *subpath;
|
||||||
bool use_hash;
|
UniquePathMethod umethod;
|
||||||
double rows; /* estimated number of result tuples */
|
double rows; /* estimated number of result tuples */
|
||||||
} UniquePath;
|
} UniquePath;
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause);
|
|||||||
extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
|
extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
|
||||||
extern List *pull_constant_clauses(List *quals, List **constantQual);
|
extern List *pull_constant_clauses(List *quals, List **constantQual);
|
||||||
|
|
||||||
|
extern bool has_distinct_clause(Query *query);
|
||||||
extern bool has_distinct_on_clause(Query *query);
|
extern bool has_distinct_on_clause(Query *query);
|
||||||
|
|
||||||
extern int NumRelids(Node *clause);
|
extern int NumRelids(Node *clause);
|
||||||
|
Reference in New Issue
Block a user