diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 454a8d08867..0a7aa250dbb 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.363 2009/07/30 02:45:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.364 2009/09/17 20:49:28 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1421,6 +1421,16 @@ _outUniquePath(StringInfo str, UniquePath *node) WRITE_FLOAT_FIELD(rows, "%.0f"); } +static void +_outNoOpPath(StringInfo str, NoOpPath *node) +{ + WRITE_NODE_TYPE("NOOPPATH"); + + _outPathInfo(str, (Path *) node); + + WRITE_NODE_FIELD(subpath); +} + static void _outNestPath(StringInfo str, NestPath *node) { @@ -2634,6 +2644,9 @@ _outNode(StringInfo str, void *obj) case T_UniquePath: _outUniquePath(str, obj); break; + case T_NoOpPath: + _outNoOpPath(str, obj); + break; case T_NestPath: _outNestPath(str, obj); break; diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index f406e642db3..560e42981a5 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.50 2009/07/21 02:02:44 tgl Exp $ +$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.51 2009/09/17 20:49:28 tgl Exp $ Optimizer ========= @@ -354,6 +354,7 @@ RelOptInfo - a relation or joined relations NestPath - nested-loop joins MergePath - merge joins HashPath - hash joins + NoOpPath - same as its input path (used when a join is removed) EquivalenceClass - a data structure representing a set of values known equal diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index ada9140552e..5b7f0ff0e3f 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.185 2009/09/02 17:52:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.186 2009/09/17 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1387,6 +1387,10 @@ print_path(PlannerInfo *root, Path *path, int indent) ptype = "Unique"; subpath = ((UniquePath *) path)->subpath; break; + case T_NoOpPath: + ptype = "NoOp"; + subpath = ((NoOpPath *) path)->subpath; + break; case T_NestPath: ptype = "NestLoop"; join = true; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 3930acf05a7..7bb5d8993cc 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.242 2009/09/17 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1918,6 +1918,86 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, return clause_list; } +/* + * relation_has_unique_index_for + * Determine whether the relation provably has at most one row satisfying + * a set of equality conditions, because the conditions constrain all + * columns of some unique index. + * + * The conditions are provided as a list of RestrictInfo nodes, where the + * caller has already determined that each condition is a mergejoinable + * equality with an expression in this relation on one side, and an + * expression not involving this relation on the other. The transient + * outer_is_left flag is used to identify which side we should look at: + * left side if outer_is_left is false, right side if it is true. + */ +bool +relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist) +{ + ListCell *ic; + + /* Short-circuit the easy case */ + if (restrictlist == NIL) + return false; + + /* Examine each index of the relation ... */ + foreach(ic, rel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); + int c; + + /* + * If the index is not unique or if it's a partial index that doesn't + * match the query, it's useless here. + */ + if (!ind->unique || (ind->indpred != NIL && !ind->predOK)) + continue; + + /* + * Try to find each index column in the list of conditions. This is + * O(n^2) or worse, but we expect all the lists to be short. + */ + for (c = 0; c < ind->ncolumns; c++) + { + ListCell *lc; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind + * of equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c])) + continue; + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (match_index_to_operand(rexpr, c, ind)) + break; /* found a match; column is unique */ + } + + if (lc == NULL) + break; /* no match; this index doesn't help us */ + } + + /* Matched all columns of this index? */ + if (c == ind->ncolumns) + return true; + } + + return false; +} + /**************************************************************************** * ---- PATH CREATION UTILITIES ---- diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 269c4824b6a..827e18a5028 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.123 2009/09/12 22:12:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.124 2009/09/17 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,11 @@ #include "optimizer/paths.h" +static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype); +static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel); static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, @@ -78,11 +83,26 @@ add_paths_to_joinrel(PlannerInfo *root, { List *mergeclause_list = NIL; + /* + * 0. Consider join removal. This is always the most efficient strategy, + * so if it works, there's no need to consider anything further. + */ + if (join_is_removable(root, joinrel, outerrel, innerrel, + restrictlist, jointype)) + { + generate_outer_only(root, joinrel, outerrel); + return; + } + /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin is currently our * only way of implementing full outer joins, so override mergejoin * disable if it's a full join. + * + * Note: do this after join_is_removable(), because this sets the + * outer_is_left flags in the mergejoin clauses, while join_is_removable + * uses those flags for its own purposes. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(root, @@ -133,6 +153,180 @@ add_paths_to_joinrel(PlannerInfo *root, restrictlist, jointype, sjinfo); } +/* + * join_is_removable + * Determine whether we need not perform the join at all, because + * it will just duplicate its left input. + * + * This is true for a left join for which the join condition cannot match + * more than one inner-side row. (There are other possibly interesting + * cases, but we don't have the infrastructure to prove them.) + * + * Note: there is no need to consider the symmetrical case of duplicating the + * right input, because add_paths_to_joinrel() will be called with each rel + * on the outer side. + */ +static bool +join_is_removable(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype) +{ + List *clause_list = NIL; + ListCell *l; + int attroff; + + /* + * Currently, we only know how to remove left joins to a baserel with + * unique indexes. We can check most of these criteria pretty trivially + * to avoid doing useless extra work. But checking whether any of the + * indexes are unique would require iterating over the indexlist, so for + * now we just make sure there are indexes of some sort or other. If none + * of them are unique, join removal will still fail, just slightly later. + */ + if (jointype != JOIN_LEFT || + innerrel->reloptkind == RELOPT_JOINREL || + innerrel->rtekind != RTE_RELATION || + innerrel->indexlist == NIL) + return false; + + /* + * We can't remove the join if any inner-rel attributes are used above + * the join. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = innerrel->max_attr - innerrel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids)) + return false; + } + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * either the outer rel or a pseudoconstant. If an operator is + * mergejoinable then it behaves like equality for some btree opclass, + * so it's what we want. The mergejoinability test also eliminates + * clauses containing volatile functions, which we couldn't depend on. + */ + foreach(l, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * We are always considering an outer join here, so ignore pushed-down + * clauses. Also ignore anything that doesn't have a mergejoinable + * operator. + */ + if (restrictinfo->is_pushed_down) + continue; + + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause is usable with these input rels. All the vars + * needed on each side of the clause must be available from one or the + * other of the input rels. + */ + if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) && + bms_is_subset(restrictinfo->right_relids, innerrel->relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && + bms_is_subset(restrictinfo->right_relids, outerrel->relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the rel's restriction clauses for var = const clauses */ + foreach(l, innerrel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator + * and doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given + * rel. If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, innerrel, clause_list)) + return true; + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} + +/* + * generate_outer_only + * Generate "join" paths when we have found the join is removable. + */ +static void +generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel) +{ + ListCell *lc; + + /* + * For the moment, replicate all of the outerrel's paths as join paths. + * Some of them might not really be interesting above the join, if they + * have sort orderings that have no real use except to do a mergejoin + * for the join we've just found we don't need. But distinguishing that + * case probably isn't worth the extra code it would take. + */ + foreach(lc, outerrel->pathlist) + { + Path *outerpath = (Path *) lfirst(lc); + + add_path(joinrel, (Path *) + create_noop_path(root, joinrel, outerpath)); + } +} + /* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ac9b96229a1..0bb53d33089 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.262 2009/09/12 22:12:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.263 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -164,6 +164,11 @@ create_plan(PlannerInfo *root, Path *best_path) case T_WorkTableScan: plan = create_scan_plan(root, best_path); break; + case T_Join: + /* this is only used for no-op joins */ + Assert(IsA(best_path, NoOpPath)); + plan = create_plan(root, ((NoOpPath *) best_path)->subpath); + break; case T_HashJoin: case T_MergeJoin: case T_NestLoop: diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e2de24956a2..e9733892cc2 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.153 2009/09/12 22:12:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.154 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1215,6 +1215,26 @@ distinct_col_search(int colno, List *colnos, List *opids) return InvalidOid; } +/* + * create_noop_path + * Creates a path equivalent to the input subpath, but having a different + * parent rel. This is used when a join is found to be removable. + */ +NoOpPath * +create_noop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) +{ + NoOpPath *pathnode = makeNode(NoOpPath); + + pathnode->path.pathtype = T_Join; /* by convention */ + pathnode->path.parent = rel; + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + pathnode->path.pathkeys = subpath->pathkeys; + pathnode->subpath = subpath; + + return pathnode; +} + /* * create_subqueryscan_path * Creates a path corresponding to a sequential scan of a subquery, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index b29f7203cab..6be4bf39598 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.224 2009/07/30 02:45:37 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.225 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -211,6 +211,7 @@ typedef enum NodeTag T_ResultPath, T_MaterialPath, T_UniquePath, + T_NoOpPath, T_EquivalenceClass, T_EquivalenceMember, T_PathKey, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 4432252a72f..9b59d63b2b3 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.174 2009/07/16 20:55:44 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.175 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -783,6 +783,22 @@ typedef struct UniquePath double rows; /* estimated number of result tuples */ } UniquePath; +/* + * NoOpPath represents exactly the same plan as its subpath. This is used + * when we have determined that a join can be eliminated. The difference + * between the NoOpPath and its subpath is just that the NoOpPath's parent + * is the whole join relation while the subpath is for one of the joined + * relations (and the other one isn't needed). + * + * Note: path.pathtype is always T_Join, but this won't actually give rise + * to a Join plan node. + */ +typedef struct NoOpPath +{ + Path path; + Path *subpath; +} NoOpPath; + /* * All join-type paths share these fields. */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index b275b1dd497..8b71500407e 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.80 2009/01/01 17:24:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.81 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -51,6 +51,8 @@ extern ResultPath *create_result_path(List *quals); extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); +extern NoOpPath *create_noop_path(PlannerInfo *root, RelOptInfo *rel, + Path *subpath); extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys); extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index cbc8214e9e3..d9e91675f22 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.107 2009/06/11 14:49:11 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.108 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,6 +57,8 @@ extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, JoinType jointype, Path **cheapest_startup, Path **cheapest_total); +extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist); extern List *group_clauses_by_indexkey(IndexOptInfo *index, List *clauses, List *outer_clauses, Relids outer_relids,