1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-20 05:03:10 +03:00

Refactor planner's pathkeys data structure to create a separate, explicit

representation of equivalence classes of variables.  This is an extensive
rewrite, but it brings a number of benefits:
* planner no longer fails in the presence of "incomplete" operator families
that don't offer operators for every possible combination of datatypes.
* avoid generating and then discarding redundant equality clauses.
* remove bogus assumption that derived equalities always use operators
named "=".
* mergejoins can work with a variety of sort orders (e.g., descending) now,
instead of tying each mergejoinable operator to exactly one sort order.
* better recognition of redundant sort columns.
* can make use of equalities appearing underneath an outer join.
This commit is contained in:
Tom Lane
2007-01-20 20:45:41 +00:00
parent 2b7334d487
commit f41803bb39
35 changed files with 3882 additions and 2719 deletions

View File

@ -4,7 +4,7 @@
# Makefile for optimizer/path
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.17 2007/01/20 17:16:11 petere Exp $
# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.18 2007/01/20 20:45:38 tgl Exp $
#
#-------------------------------------------------------------------------
@ -12,7 +12,7 @@ subdir = src/backend/optimizer/path
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = allpaths.o clausesel.o costsize.o indxpath.o \
OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o
all: SUBSYS.o

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.156 2007/01/09 02:14:12 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.157 2007/01/20 20:45:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -325,6 +325,16 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
adjust_appendrel_attrs((Node *) rel->joininfo,
appinfo);
/*
* We have to make child entries in the EquivalenceClass data
* structures as well.
*/
if (rel->has_eclass_joins)
{
add_child_rel_equivalences(root, appinfo, rel, childrel);
childrel->has_eclass_joins = true;
}
/*
* Copy the parent's attr_needed data as well, with appropriate
* adjustment of relids and attribute numbers.

View File

@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.175 2007/01/20 20:45:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1258,8 +1258,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
Path *outer_path = path->jpath.outerjoinpath;
Path *inner_path = path->jpath.innerjoinpath;
List *mergeclauses = path->path_mergeclauses;
Oid *mergeFamilies = path->path_mergeFamilies;
int *mergeStrategies = path->path_mergeStrategies;
List *outersortkeys = path->outersortkeys;
List *innersortkeys = path->innersortkeys;
Cost startup_cost = 0;
@ -1268,7 +1266,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
Selectivity merge_selec;
QualCost merge_qual_cost;
QualCost qp_qual_cost;
RestrictInfo *firstclause;
double outer_path_rows = PATH_ROWS(outer_path);
double inner_path_rows = PATH_ROWS(inner_path);
double outer_rows,
@ -1347,32 +1344,47 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* inputs that will actually need to be scanned. We use only the first
* (most significant) merge clause for this purpose.
*
* Since this calculation is somewhat expensive, and will be the same for
* all mergejoin paths associated with the merge clause, we cache the
* results in the RestrictInfo node. XXX that won't work anymore once
* we support multiple possible orderings!
* XXX mergejoinscansel is a bit expensive, can we cache its results?
*/
if (mergeclauses && path->jpath.jointype != JOIN_FULL)
{
firstclause = (RestrictInfo *) linitial(mergeclauses);
if (firstclause->left_mergescansel < 0) /* not computed yet? */
mergejoinscansel(root, (Node *) firstclause->clause,
mergeFamilies[0],
mergeStrategies[0],
&firstclause->left_mergescansel,
&firstclause->right_mergescansel);
RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
List *opathkeys;
List *ipathkeys;
PathKey *opathkey;
PathKey *ipathkey;
Selectivity leftscansel,
rightscansel;
if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))
/* Get the input pathkeys to determine the sort-order details */
opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys;
Assert(opathkeys);
Assert(ipathkeys);
opathkey = (PathKey *) linitial(opathkeys);
ipathkey = (PathKey *) linitial(ipathkeys);
/* debugging check */
if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
opathkey->pk_strategy != ipathkey->pk_strategy ||
opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
elog(ERROR, "left and right pathkeys do not match in mergejoin");
mergejoinscansel(root, (Node *) firstclause->clause,
opathkey->pk_opfamily, opathkey->pk_strategy,
&leftscansel, &rightscansel);
if (bms_is_subset(firstclause->left_relids,
outer_path->parent->relids))
{
/* left side of clause is outer */
outerscansel = firstclause->left_mergescansel;
innerscansel = firstclause->right_mergescansel;
outerscansel = leftscansel;
innerscansel = rightscansel;
}
else
{
/* left side of clause is inner */
outerscansel = firstclause->right_mergescansel;
innerscansel = firstclause->left_mergescansel;
outerscansel = rightscansel;
innerscansel = leftscansel;
}
if (path->jpath.jointype == JOIN_LEFT)
outerscansel = 1.0;

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.215 2007/01/09 02:14:12 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.216 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -32,7 +32,6 @@
#include "optimizer/var.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
@ -72,21 +71,11 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index,
Oid opfamily,
RowCompareExpr *clause,
Relids outer_relids);
static Relids indexable_outerrelids(RelOptInfo *rel);
static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel);
static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
Relids outer_relids);
static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids, bool isouterjoin);
static ScanDirection match_variant_ordering(PlannerInfo *root,
IndexOptInfo *index,
List *restrictclauses);
static List *identify_ignorable_ordering_cols(PlannerInfo *root,
IndexOptInfo *index,
List *restrictclauses);
static bool match_index_to_query_keys(PlannerInfo *root,
IndexOptInfo *index,
ScanDirection indexscandir,
List *ignorables);
static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause, Oid opfamily,
@ -157,7 +146,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
* participate in such join clauses. We'll use this set later to
* recognize outer rels that are equivalent for joining purposes.
*/
rel->index_outer_relids = indexable_outerrelids(rel);
rel->index_outer_relids = indexable_outerrelids(root, rel);
/*
* Find all the index paths that are directly usable for this relation
@ -351,8 +340,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
if (index_is_ordered && istoplevel && outer_rel == NULL)
{
index_pathkeys = build_index_pathkeys(root, index,
ForwardScanDirection,
true);
ForwardScanDirection);
useful_pathkeys = truncate_useless_pathkeys(root, rel,
index_pathkeys);
}
@ -378,23 +366,21 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
}
/*
* 4. If the index is ordered, and there is a requested query ordering
* that we failed to match, consider variant ways of achieving the
* ordering. Again, this is only interesting at top level.
* 4. If the index is ordered, a backwards scan might be
* interesting. Again, this is only interesting at top level.
*/
if (index_is_ordered && istoplevel && outer_rel == NULL &&
root->query_pathkeys != NIL &&
pathkeys_useful_for_ordering(root, useful_pathkeys) == 0)
if (index_is_ordered && istoplevel && outer_rel == NULL)
{
ScanDirection scandir;
scandir = match_variant_ordering(root, index, restrictclauses);
if (!ScanDirectionIsNoMovement(scandir))
index_pathkeys = build_index_pathkeys(root, index,
BackwardScanDirection);
useful_pathkeys = truncate_useless_pathkeys(root, rel,
index_pathkeys);
if (useful_pathkeys != NIL)
{
ipath = create_index_path(root, index,
restrictclauses,
root->query_pathkeys,
scandir,
useful_pathkeys,
BackwardScanDirection,
outer_rel);
result = lappend(result, ipath);
}
@ -1207,19 +1193,6 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
List *restrictinfo_list = rel->baserestrictinfo;
ListCell *ilist;
/*
* Note: if Postgres tried to optimize queries by forming equivalence
* classes over equi-joined attributes (i.e., if it recognized that a
* qualification such as "where a.b=c.d and a.b=5" could make use of an
* index on c.d), then we could use that equivalence class info here with
* joininfo lists to do more complete tests for the usability of a partial
* index. For now, the test only uses restriction clauses (those in
* baserestrictinfo). --Nels, Dec '92
*
* XXX as of 7.1, equivalence class info *is* available. Consider
* improving this code as foreseen by Nels.
*/
foreach(ilist, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
@ -1242,18 +1215,19 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
* for the specified table. Returns a set of relids.
*/
static Relids
indexable_outerrelids(RelOptInfo *rel)
indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel)
{
Relids outer_relids = NULL;
ListCell *l;
bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
ListCell *lc1;
/*
* Examine each joinclause in the joininfo list to see if it matches any
* key of any index. If so, add the clause's other rels to the result.
*/
foreach(l, rel->joininfo)
foreach(lc1, rel->joininfo)
{
RestrictInfo *joininfo = (RestrictInfo *) lfirst(l);
RestrictInfo *joininfo = (RestrictInfo *) lfirst(lc1);
Relids other_rels;
other_rels = bms_difference(joininfo->required_relids, rel->relids);
@ -1263,6 +1237,71 @@ indexable_outerrelids(RelOptInfo *rel)
bms_free(other_rels);
}
/*
* We also have to look through the query's EquivalenceClasses to see
* if any of them could generate indexable join conditions for this rel.
*/
if (rel->has_eclass_joins)
{
foreach(lc1, root->eq_classes)
{
EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
Relids other_rels = NULL;
bool found_index = false;
ListCell *lc2;
/*
* Won't generate joinclauses if const or single-member (the latter
* test covers the volatile case too)
*/
if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
continue;
/*
* Note we don't test ec_broken; if we did, we'd need a separate
* code path to look through ec_sources. Checking the members
* anyway is OK as a possibly-overoptimistic heuristic.
*/
/*
* No point in searching if rel not mentioned in eclass (but we
* can't tell that for a child rel).
*/
if (!is_child_rel &&
!bms_is_subset(rel->relids, cur_ec->ec_relids))
continue;
/*
* Scan members, looking for both an index match and join
* candidates
*/
foreach(lc2, cur_ec->ec_members)
{
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
/* Join candidate? */
if (!cur_em->em_is_child &&
!bms_overlap(cur_em->em_relids, rel->relids))
{
other_rels = bms_add_members(other_rels,
cur_em->em_relids);
continue;
}
/* Check for index match (only need one) */
if (!found_index &&
bms_equal(cur_em->em_relids, rel->relids) &&
eclass_matches_any_index(cur_ec, cur_em, rel))
found_index = true;
}
if (found_index)
outer_relids = bms_join(outer_relids, other_rels);
else
bms_free(other_rels);
}
}
return outer_relids;
}
@ -1339,6 +1378,42 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids)
return false;
}
/*
* eclass_matches_any_index
* Workhorse for indexable_outerrelids: see if an EquivalenceClass member
* can be matched to any index column of the given rel.
*
* This is also exported for use by find_eclass_clauses_for_index_join.
*/
bool
eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
RelOptInfo *rel)
{
ListCell *l;
foreach(l, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
int indexcol = 0;
Oid *families = index->opfamily;
do
{
Oid curFamily = families[0];
if (list_member_oid(ec->ec_opfamilies, curFamily) &&
match_index_to_operand((Node *) em->em_expr, indexcol, index))
return true;
indexcol++;
families++;
} while (!DoneMatchingIndexKeys(families));
}
return false;
}
/*
* best_inner_indexscan
* Finds the best available inner indexscan for a nestloop join
@ -1393,12 +1468,12 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
return NULL;
/*
* Otherwise, we have to do path selection in the memory context of the
* given rel, so that any created path can be safely attached to the rel's
* cache of best inner paths. (This is not currently an issue for normal
* planning, but it is an issue for GEQO planning.)
* Otherwise, we have to do path selection in the main planning context,
* so that any created path can be safely attached to the rel's cache of
* best inner paths. (This is not currently an issue for normal planning,
* but it is an issue for GEQO planning.)
*/
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
/*
* Intersect the given outer relids with index_outer_relids to find the
@ -1539,7 +1614,12 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
Relids join_relids;
ListCell *l;
/* Look for joinclauses that are usable with given outer_relids */
/*
* Look for joinclauses that are usable with given outer_relids. Note
* we'll take anything that's applicable to the join whether it has
* anything to do with an index or not; since we're only building a list,
* it's not worth filtering more finely here.
*/
join_relids = bms_union(rel->relids, outer_relids);
foreach(l, rel->joininfo)
@ -1557,276 +1637,27 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
bms_free(join_relids);
/* if no join clause was matched then forget it, per comments above */
/*
* Also check to see if any EquivalenceClasses can produce a relevant
* joinclause. Since all such clauses are effectively pushed-down,
* this doesn't apply to outer joins.
*/
if (!isouterjoin && rel->has_eclass_joins)
clause_list = list_concat(clause_list,
find_eclass_clauses_for_index_join(root,
rel,
outer_relids));
/* If no join clause was matched then forget it, per comments above */
if (clause_list == NIL)
return NIL;
/*
* We can also use any plain restriction clauses for the rel. We put
* these at the front of the clause list for the convenience of
* remove_redundant_join_clauses, which can never remove non-join clauses
* and hence won't be able to get rid of a non-join clause if it appears
* after a join clause it is redundant with.
*/
/* We can also use any plain restriction clauses for the rel */
clause_list = list_concat(list_copy(rel->baserestrictinfo), clause_list);
/*
* We may now have clauses that are known redundant. Get rid of 'em.
*/
if (list_length(clause_list) > 1)
{
clause_list = remove_redundant_join_clauses(root,
clause_list,
isouterjoin);
}
return clause_list;
}
/****************************************************************************
* ---- ROUTINES TO HANDLE PATHKEYS ----
****************************************************************************/
/*
* match_variant_ordering
* Try to match an index's ordering to the query's requested ordering
*
* This is used when the index is ordered but a naive comparison fails to
* match its ordering (pathkeys) to root->query_pathkeys. It may be that
* we need to scan the index backwards. Also, a less naive comparison can
* help for both forward and backward indexscans. Columns of the index
* that have an equality restriction clause can be ignored in the match;
* that is, an index on (x,y) can be considered to match the ordering of
* ... WHERE x = 42 ORDER BY y;
*
* Note: it would be possible to similarly ignore useless ORDER BY items;
* that is, an index on just y could be considered to match the ordering of
* ... WHERE x = 42 ORDER BY x, y;
* But proving that this is safe would require finding a btree opfamily
* containing both the = operator and the < or > operator in the ORDER BY
* item. That's significantly more expensive than what we do here, since
* we'd have to look at restriction clauses unrelated to the current index
* and search for opfamilies without any hint from the index. The practical
* use-cases seem to be mostly covered by ignoring index columns, so that's
* all we do for now.
*
* Inputs:
* 'index' is the index of interest.
* 'restrictclauses' is the list of sublists of restriction clauses
* matching the columns of the index (NIL if none)
*
* If able to match the requested query pathkeys, returns either
* ForwardScanDirection or BackwardScanDirection to indicate the proper index
* scan direction. If no match, returns NoMovementScanDirection.
*/
static ScanDirection
match_variant_ordering(PlannerInfo *root,
IndexOptInfo *index,
List *restrictclauses)
{
List *ignorables;
/*
* Forget the whole thing if not a btree index; our check for ignorable
* columns assumes we are dealing with btree opfamilies. (It'd be possible
* to factor out just the try for backwards indexscan, but considering
* that we presently have no orderable indexes except btrees anyway, it's
* hardly worth contorting this code for that case.)
*
* Note: if you remove this, you probably need to put in a check on
* amoptionalkey to prevent possible clauseless scan on an index that
* won't cope.
*/
if (index->relam != BTREE_AM_OID)
return NoMovementScanDirection;
/*
* Figure out which index columns can be optionally ignored because they
* have an equality constraint. This is the same set for either forward
* or backward scan, so we do it just once.
*/
ignorables = identify_ignorable_ordering_cols(root, index,
restrictclauses);
/*
* Try to match to forward scan, then backward scan. However, we can skip
* the forward-scan case if there are no ignorable columns, because
* find_usable_indexes() would have found the match already.
*/
if (ignorables &&
match_index_to_query_keys(root, index, ForwardScanDirection,
ignorables))
return ForwardScanDirection;
if (match_index_to_query_keys(root, index, BackwardScanDirection,
ignorables))
return BackwardScanDirection;
return NoMovementScanDirection;
}
/*
* identify_ignorable_ordering_cols
* Determine which index columns can be ignored for ordering purposes
*
* Returns an integer List of column numbers (1-based) of ignorable
* columns. The ignorable columns are those that have equality constraints
* against pseudoconstants.
*/
static List *
identify_ignorable_ordering_cols(PlannerInfo *root,
IndexOptInfo *index,
List *restrictclauses)
{
List *result = NIL;
int indexcol = 0; /* note this is 0-based */
ListCell *l;
/* restrictclauses is either NIL or has a sublist per column */
foreach(l, restrictclauses)
{
List *sublist = (List *) lfirst(l);
Oid opfamily = index->opfamily[indexcol];
ListCell *l2;
foreach(l2, sublist)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l2);
OpExpr *clause = (OpExpr *) rinfo->clause;
Oid clause_op;
int op_strategy;
bool varonleft;
bool ispc;
/* First check for boolean-index cases. */
if (IsBooleanOpfamily(opfamily))
{
if (match_boolean_index_clause((Node *) clause, indexcol,
index))
{
/*
* The clause means either col = TRUE or col = FALSE; we
* do not care which, it's an equality constraint either
* way.
*/
result = lappend_int(result, indexcol + 1);
break;
}
}
/* Otherwise, ignore if not a binary opclause */
if (!is_opclause(clause) || list_length(clause->args) != 2)
continue;
/* Determine left/right sides and check the operator */
clause_op = clause->opno;
if (match_index_to_operand(linitial(clause->args), indexcol,
index))
{
/* clause_op is correct */
varonleft = true;
}
else
{
Assert(match_index_to_operand(lsecond(clause->args), indexcol,
index));
/* Must flip operator to get the opfamily member */
clause_op = get_commutator(clause_op);
varonleft = false;
}
if (!OidIsValid(clause_op))
continue; /* ignore non match, per next comment */
op_strategy = get_op_opfamily_strategy(clause_op, opfamily);
/*
* You might expect to see Assert(op_strategy != 0) here, but you
* won't: the clause might contain a special indexable operator
* rather than an ordinary opfamily member. Currently none of the
* special operators are very likely to expand to an equality
* operator; we do not bother to check, but just assume no match.
*/
if (op_strategy != BTEqualStrategyNumber)
continue;
/* Now check that other side is pseudoconstant */
if (varonleft)
ispc = is_pseudo_constant_clause_relids(lsecond(clause->args),
rinfo->right_relids);
else
ispc = is_pseudo_constant_clause_relids(linitial(clause->args),
rinfo->left_relids);
if (ispc)
{
result = lappend_int(result, indexcol + 1);
break;
}
}
indexcol++;
}
return result;
}
/*
* match_index_to_query_keys
* Check a single scan direction for "intelligent" match to query keys
*
* 'index' is the index of interest.
* 'indexscandir' is the scan direction to consider
* 'ignorables' is an integer list of indexes of ignorable index columns
*
* Returns TRUE on successful match (ie, the query_pathkeys can be considered
* to match this index).
*/
static bool
match_index_to_query_keys(PlannerInfo *root,
IndexOptInfo *index,
ScanDirection indexscandir,
List *ignorables)
{
List *index_pathkeys;
ListCell *index_cell;
int index_col;
ListCell *r;
/* Get the pathkeys that exactly describe the index */
index_pathkeys = build_index_pathkeys(root, index, indexscandir, false);
/*
* Can we match to the query's requested pathkeys? The inner loop skips
* over ignorable index columns while trying to match.
*/
index_cell = list_head(index_pathkeys);
index_col = 0;
foreach(r, root->query_pathkeys)
{
List *rsubkey = (List *) lfirst(r);
for (;;)
{
List *isubkey;
if (index_cell == NULL)
return false;
isubkey = (List *) lfirst(index_cell);
index_cell = lnext(index_cell);
index_col++; /* index_col is now 1-based */
/*
* Since we are dealing with canonicalized pathkeys, pointer
* comparison is sufficient to determine a match.
*/
if (rsubkey == isubkey)
break; /* matched current query pathkey */
if (!list_member_int(ignorables, index_col))
return false; /* definite failure to match */
/* otherwise loop around and try to match to next index col */
}
}
return true;
}
/****************************************************************************
* ---- PATH CREATION UTILITIES ----

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -16,7 +16,6 @@
#include <math.h>
#include "access/skey.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@ -40,10 +39,6 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype);
static void build_mergejoin_strat_arrays(List *mergeclauses,
Oid **mergefamilies,
int **mergestrategies,
bool **mergenullsfirst);
/*
@ -205,9 +200,9 @@ sort_inner_and_outer(PlannerInfo *root,
*
* Actually, it's not quite true that every mergeclause ordering will
* generate a different path order, because some of the clauses may be
* redundant. Therefore, what we do is convert the mergeclause list to a
* list of canonical pathkeys, and then consider different orderings of
* the pathkeys.
* partially redundant (refer to the same EquivalenceClasses). Therefore,
* what we do is convert the mergeclause list to a list of canonical
* pathkeys, and then consider different orderings of the pathkeys.
*
* Generating a path for *every* permutation of the pathkeys doesn't seem
* like a winning strategy; the cost in planning time is too high. For
@ -216,76 +211,59 @@ sort_inner_and_outer(PlannerInfo *root,
* mergejoin without re-sorting against any other possible mergejoin
* partner path. But if we've not guessed the right ordering of secondary
* keys, we may end up evaluating clauses as qpquals when they could have
* been done as mergeclauses. We need to figure out a better way. (Two
* possible approaches: look at all the relevant index relations to
* suggest plausible sort orders, or make just one output path and somehow
* mark it as having a sort-order that can be rearranged freely.)
* been done as mergeclauses. (In practice, it's rare that there's more
* than two or three mergeclauses, so expending a huge amount of thought
* on that is probably not worth it.)
*
* The pathkey order returned by select_outer_pathkeys_for_merge() has
* some heuristics behind it (see that function), so be sure to try it
* exactly as-is as well as making variants.
*/
all_pathkeys = make_pathkeys_for_mergeclauses(root,
mergeclause_list,
outerrel);
all_pathkeys = select_outer_pathkeys_for_merge(root,
mergeclause_list,
joinrel);
foreach(l, all_pathkeys)
{
List *front_pathkey = (List *) lfirst(l);
List *cur_pathkeys;
List *cur_mergeclauses;
Oid *mergefamilies;
int *mergestrategies;
bool *mergenullsfirst;
List *outerkeys;
List *innerkeys;
List *merge_pathkeys;
/* Make a pathkey list with this guy first. */
/* Make a pathkey list with this guy first */
if (l != list_head(all_pathkeys))
cur_pathkeys = lcons(front_pathkey,
list_delete_ptr(list_copy(all_pathkeys),
front_pathkey));
outerkeys = lcons(front_pathkey,
list_delete_ptr(list_copy(all_pathkeys),
front_pathkey));
else
cur_pathkeys = all_pathkeys; /* no work at first one... */
outerkeys = all_pathkeys; /* no work at first one... */
/*
* Select mergeclause(s) that match this sort ordering. If we had
* redundant merge clauses then we will get a subset of the original
* clause list. There had better be some match, however...
*/
/* Sort the mergeclauses into the corresponding ordering */
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
cur_pathkeys,
outerkeys,
true,
mergeclause_list);
Assert(cur_mergeclauses != NIL);
/* Forget it if can't use all the clauses in right/full join */
if (useallclauses &&
list_length(cur_mergeclauses) != list_length(mergeclause_list))
continue;
/* Should have used them all... */
Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
/* Build sort pathkeys for the inner side */
innerkeys = make_inner_pathkeys_for_merge(root,
cur_mergeclauses,
outerkeys);
/* Build pathkeys representing output sort order */
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
/*
* Build sort pathkeys for both sides.
* And now we can make the path.
*
* Note: it's possible that the cheapest paths will already be sorted
* properly. create_mergejoin_path will detect that case and suppress
* an explicit sort step, so we needn't do so here.
*/
outerkeys = make_pathkeys_for_mergeclauses(root,
cur_mergeclauses,
outerrel);
innerkeys = make_pathkeys_for_mergeclauses(root,
cur_mergeclauses,
innerrel);
/* Build pathkeys representing output sort order. */
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(cur_mergeclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
/*
* And now we can make the path.
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@ -295,9 +273,6 @@ sort_inner_and_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
outerkeys,
innerkeys));
}
@ -427,9 +402,6 @@ match_unsorted_outer(PlannerInfo *root,
Path *outerpath = (Path *) lfirst(l);
List *merge_pathkeys;
List *mergeclauses;
Oid *mergefamilies;
int *mergestrategies;
bool *mergenullsfirst;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
@ -510,6 +482,7 @@ match_unsorted_outer(PlannerInfo *root,
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
mergeclause_list);
/*
@ -532,15 +505,9 @@ match_unsorted_outer(PlannerInfo *root,
continue;
/* Compute the required ordering of the inner path */
innersortkeys = make_pathkeys_for_mergeclauses(root,
mergeclauses,
innerrel);
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(mergeclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
@ -557,9 +524,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
mergeclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
NIL,
innersortkeys));
@ -613,18 +577,12 @@ match_unsorted_outer(PlannerInfo *root,
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(newclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@ -634,9 +592,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
NIL,
NIL));
cheapest_total_inner = innerpath;
@ -666,19 +621,13 @@ match_unsorted_outer(PlannerInfo *root,
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(newclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@ -688,9 +637,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
NIL,
NIL));
}
@ -909,6 +855,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
* Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
*
* We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only
* good for the duration of the current add_paths_to_joinrel() call!
*
* We examine each restrictinfo clause known for the join to see
* if it is mergejoinable and involves vars from the two sub-relations
* currently of interest.
@ -939,7 +889,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue;
if (!restrictinfo->can_join ||
restrictinfo->mergejoinoperator == InvalidOid)
restrictinfo->mergeopfamilies == NIL)
{
have_nonmergeable_joinclause = true;
continue; /* not mergejoinable */
@ -954,11 +904,13 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
{
/* righthand side is inner */
restrictinfo->outer_is_left = true;
}
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
{
/* lefthand side is inner */
restrictinfo->outer_is_left = false;
}
else
{
@ -966,7 +918,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue; /* no good for these input relations */
}
result_list = lcons(restrictinfo, result_list);
result_list = lappend(result_list, restrictinfo);
}
/*
@ -995,46 +947,3 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
return result_list;
}
/*
* Temporary hack to build opfamily and strategy info needed for mergejoin
* by the executor. We need to rethink the planner's handling of merge
* planning so that it can deal with multiple possible merge orders, but
* that's not done yet.
*/
static void
build_mergejoin_strat_arrays(List *mergeclauses,
Oid **mergefamilies,
int **mergestrategies,
bool **mergenullsfirst)
{
int nClauses = list_length(mergeclauses);
int i;
ListCell *l;
*mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid));
*mergestrategies = (int *) palloc(nClauses * sizeof(int));
*mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
i = 0;
foreach(l, mergeclauses)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* We do not need to worry about whether the mergeclause will be
* commuted at runtime --- it's the same opfamily either way.
*/
(*mergefamilies)[i] = restrictinfo->mergeopfamily;
/*
* For the moment, strategy must always be LessThan --- see
* hack version of get_op_mergejoin_info
*/
(*mergestrategies)[i] = BTLessStrategyNumber;
/* And we only allow NULLS LAST, too */
(*mergenullsfirst)[i] = false;
i++;
}
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.83 2007/01/05 22:19:31 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.84 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -72,7 +72,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
other_rels = list_head(joinrels[1]); /* consider all initial
* rels */
if (old_rel->joininfo != NIL)
if (old_rel->joininfo != NIL || old_rel->has_eclass_joins)
{
/*
* Note that if all available join clauses for this rel require
@ -152,7 +152,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
* outer joins --- then we might have to force a bushy outer
* join. See have_relevant_joinclause().
*/
if (old_rel->joininfo == NIL && root->oj_info_list == NIL)
if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
root->oj_info_list == NIL)
continue;
if (k == other_level)
@ -251,8 +252,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
/*
* make_rels_by_clause_joins
* Build joins between the given relation 'old_rel' and other relations
* that are mentioned within old_rel's joininfo list (i.e., relations
* that participate in join clauses that 'old_rel' also participates in).
* that participate in join clauses that 'old_rel' also participates in.
* The join rel nodes are returned in a list.
*
* 'old_rel' is the relation entry for the relation to be joined

File diff suppressed because it is too large Load Diff