1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-21 00:42:43 +03:00

Refactor planner's pathkeys data structure to create a separate, explicit

representation of equivalence classes of variables.  This is an extensive
rewrite, but it brings a number of benefits:
* planner no longer fails in the presence of "incomplete" operator families
that don't offer operators for every possible combination of datatypes.
* avoid generating and then discarding redundant equality clauses.
* remove bogus assumption that derived equalities always use operators
named "=".
* mergejoins can work with a variety of sort orders (e.g., descending) now,
instead of tying each mergejoinable operator to exactly one sort order.
* better recognition of redundant sort columns.
* can make use of equalities appearing underneath an outer join.
This commit is contained in:
Tom Lane
2007-01-20 20:45:41 +00:00
parent 2b7334d487
commit f41803bb39
35 changed files with 3882 additions and 2719 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.51 2007/01/05 22:19:33 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.52 2007/01/20 20:45:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -33,10 +33,9 @@ static Expr *make_sub_restrictinfos(Expr *clause,
bool outerjoin_delayed,
bool pseudoconstant,
Relids required_relids);
static RestrictInfo *join_clause_is_redundant(PlannerInfo *root,
static bool join_clause_is_redundant(PlannerInfo *root,
RestrictInfo *rinfo,
List *reference_list,
bool isouterjoin);
List *reference_list);
/*
@@ -336,19 +335,17 @@ make_restrictinfo_internal(Expr *clause,
* that happens only if it appears in the right context (top level of a
* joinclause list).
*/
restrictinfo->parent_ec = NULL;
restrictinfo->eval_cost.startup = -1;
restrictinfo->this_selec = -1;
restrictinfo->mergejoinoperator = InvalidOid;
restrictinfo->left_sortop = InvalidOid;
restrictinfo->right_sortop = InvalidOid;
restrictinfo->mergeopfamily = InvalidOid;
restrictinfo->mergeopfamilies = NIL;
restrictinfo->left_pathkey = NIL;
restrictinfo->right_pathkey = NIL;
restrictinfo->left_ec = NULL;
restrictinfo->right_ec = NULL;
restrictinfo->left_mergescansel = -1;
restrictinfo->right_mergescansel = -1;
restrictinfo->outer_is_left = false;
restrictinfo->hashjoinoperator = InvalidOid;
@@ -529,78 +526,18 @@ extract_actual_join_clauses(List *restrictinfo_list,
}
}
/*
* remove_redundant_join_clauses
*
* Given a list of RestrictInfo clauses that are to be applied in a join,
* remove any duplicate or redundant clauses.
*
* We must eliminate duplicates when forming the restrictlist for a joinrel,
* since we will see many of the same clauses arriving from both input
* relations. Also, if a clause is a mergejoinable clause, it's possible that
* it is redundant with previous clauses (see optimizer/README for
* discussion). We detect that case and omit the redundant clause from the
* result list.
*
* The result is a fresh List, but it points to the same member nodes
* as were in the input.
*/
List *
remove_redundant_join_clauses(PlannerInfo *root, List *restrictinfo_list,
bool isouterjoin)
{
List *result = NIL;
ListCell *item;
QualCost cost;
/*
* If there are any redundant clauses, we want to eliminate the ones that
* are more expensive in favor of the ones that are less so. Run
* cost_qual_eval() to ensure the eval_cost fields are set up.
*/
cost_qual_eval(&cost, restrictinfo_list);
/*
* We don't have enough knowledge yet to be able to estimate the number of
* times a clause might be evaluated, so it's hard to weight the startup
* and per-tuple costs appropriately. For now just weight 'em the same.
*/
#define CLAUSECOST(r) ((r)->eval_cost.startup + (r)->eval_cost.per_tuple)
foreach(item, restrictinfo_list)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
RestrictInfo *prevrinfo;
/* is it redundant with any prior clause? */
prevrinfo = join_clause_is_redundant(root, rinfo, result, isouterjoin);
if (prevrinfo == NULL)
{
/* no, so add it to result list */
result = lappend(result, rinfo);
}
else if (CLAUSECOST(rinfo) < CLAUSECOST(prevrinfo))
{
/* keep this one, drop the previous one */
result = list_delete_ptr(result, prevrinfo);
result = lappend(result, rinfo);
}
/* else, drop this one */
}
return result;
}
/*
* select_nonredundant_join_clauses
*
* Given a list of RestrictInfo clauses that are to be applied in a join,
* select the ones that are not redundant with any clause in the
* reference_list.
* reference_list. This is used only for nestloop-with-inner-indexscan
* joins: any clauses being checked by the index should be removed from
* the qpquals list.
*
* This is similar to remove_redundant_join_clauses, but we are looking for
* redundancies with a separate list of clauses (i.e., clauses that have
* already been applied below the join itself).
* "Redundant" means either equal() or derived from the same EquivalenceClass.
* We have to check the latter because indxqual.c may select different derived
* clauses than were selected by generate_join_implied_equalities().
*
* Note that we assume the given restrictinfo_list has already been checked
* for local redundancies, so we don't check again.
@@ -608,8 +545,7 @@ remove_redundant_join_clauses(PlannerInfo *root, List *restrictinfo_list,
List *
select_nonredundant_join_clauses(PlannerInfo *root,
List *restrictinfo_list,
List *reference_list,
bool isouterjoin)
List *reference_list)
{
List *result = NIL;
ListCell *item;
@@ -619,7 +555,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
/* drop it if redundant with any reference clause */
if (join_clause_is_redundant(root, rinfo, reference_list, isouterjoin) != NULL)
if (join_clause_is_redundant(root, rinfo, reference_list))
continue;
/* otherwise, add it to result list */
@@ -631,79 +567,28 @@ select_nonredundant_join_clauses(PlannerInfo *root,
/*
* join_clause_is_redundant
* If rinfo is redundant with any clause in reference_list,
* return one such clause; otherwise return NULL.
*
* This is the guts of both remove_redundant_join_clauses and
* select_nonredundant_join_clauses. See the docs above for motivation.
*
* We can detect redundant mergejoinable clauses very cheaply by using their
* left and right pathkeys, which uniquely identify the sets of equijoined
* variables in question. All the members of a pathkey set that are in the
* left relation have already been forced to be equal; likewise for those in
* the right relation. So, we need to have only one clause that checks
* equality between any set member on the left and any member on the right;
* by transitivity, all the rest are then equal.
*
* However, clauses that are of the form "var expr = const expr" cannot be
* eliminated as redundant. This is because when there are const expressions
* in a pathkey set, generate_implied_equalities() suppresses "var = var"
* clauses in favor of "var = const" clauses. We cannot afford to drop any
* of the latter, even though they might seem redundant by the pathkey
* membership test.
*
* Weird special case: if we have two clauses that seem redundant
* except one is pushed down into an outer join and the other isn't,
* then they're not really redundant, because one constrains the
* joined rows after addition of null fill rows, and the other doesn't.
* Test whether rinfo is redundant with any clause in reference_list.
*/
static RestrictInfo *
static bool
join_clause_is_redundant(PlannerInfo *root,
RestrictInfo *rinfo,
List *reference_list,
bool isouterjoin)
List *reference_list)
{
ListCell *refitem;
/* always consider exact duplicates redundant */
foreach(refitem, reference_list)
{
RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
/* always consider exact duplicates redundant */
if (equal(rinfo, refrinfo))
return refrinfo;
return true;
/* check if derived from same EquivalenceClass */
if (rinfo->parent_ec != NULL &&
rinfo->parent_ec == refrinfo->parent_ec)
return true;
}
/* check for redundant merge clauses */
if (rinfo->mergejoinoperator != InvalidOid)
{
/* do the cheap test first: is it a "var = const" clause? */
if (bms_is_empty(rinfo->left_relids) ||
bms_is_empty(rinfo->right_relids))
return NULL; /* var = const, so not redundant */
cache_mergeclause_pathkeys(root, rinfo);
foreach(refitem, reference_list)
{
RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
if (refrinfo->mergejoinoperator != InvalidOid)
{
cache_mergeclause_pathkeys(root, refrinfo);
if (rinfo->left_pathkey == refrinfo->left_pathkey &&
rinfo->right_pathkey == refrinfo->right_pathkey &&
(rinfo->is_pushed_down == refrinfo->is_pushed_down ||
!isouterjoin))
{
/* Yup, it's redundant */
return refrinfo;
}
}
}
}
/* otherwise, not redundant */
return NULL;
return false;
}