1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-28 18:48:04 +03:00

Major optimizer improvement for joining a large number of tables.

This commit is contained in:
Bruce Momjian
1999-02-09 03:51:42 +00:00
parent be948af2e8
commit fe35ffe7e0
21 changed files with 277 additions and 139 deletions

View File

@@ -1,10 +1,56 @@
Summary
-------
The optimizer generates optimial query plans by doing several steps:
Take each relation in a query, and make a RelOptInfo structure for it.
1) Take each relation in a query, and make a RelOptInfo structure for it.
Find each way of accessing the relation, called a Path, including
sequential and index scans, and add it to the RelOptInfo.path_order
list.
2) Join each RelOptInfo to each other RelOptInfo as specified in the
WHERE clause. At this point each RelOptInfo is a single relation, so
you are joining every relation to every relation it is joined to in the
WHERE clause.
Joins occur using two RelOptInfos. One is outer, the other inner.
Outers drive lookups of values in the inner. In a nested loop, lookups
of values in the inner occur by scanning to find each matching inner
row. In a mergejoin, inner rows are ordered, and are accessed in order,
so only one scan of inner is required to perform the entire join. In a
hashjoin, inner rows are hashed for lookups.
Each unique join combination becomes a new RelOptInfo. The RelOptInfo
is now the joining of two relations. RelOptInfo.path_order are various
paths to create the joined result, having different orderings depending
on the join method used.
3) At this point, every RelOptInfo is joined to each other again, with
a new relation added to each RelOptInfo. This continues until all
relations have been joined into one RelOptInfo, and the cheapest Path is
chosen.
SELECT *
FROM tab1, tab2, tab3, tab4
WHERE tab1.col = tab2.col AND
tab2.col = tab3.col AND
tab3.col = tab4.col
Tables 1, 2, 3, and 4 are joined as:
{1 2},{2 3},{3 4}
{1 2 3},{2 3 4}
{1 2 3 4}
SELECT *
FROM tab1, tab2, tab3, tab4
WHERE tab1.col = tab2.col AND
tab1.col = tab3.col AND
tab1.col = tab4.col
Tables 1, 2, 3, and 4 are joined as:
{1 2},{1 3},{1 4}
{1 2 3},{1 3 4},{1,2,4}
{1 2 3 4}
Optimizer Functions
-------------------
@@ -56,28 +102,6 @@ planner()
one relation, return
find selectivity of columns used in joins
-----find_join_paths()
Summary: With OPTIMIZER_DEBUG defined, you see:
Tables 1, 2, 3, and 4 are joined as:
{1 2},{1 3},{1 4},{2 3},{2 4}
{1 2 3},{1 2 4},{2 3 4}
{1 2 3 4}
Actual output tests show combinations:
{4 2},{3 2},{1 4},{1 3},{1 2}
{4 2 3},{1 4 2},{1 3 2}
{4 2 3 1}
Cheapest join order shows:
{4 2},{3 2},{1 4},{1 3},{1 2}
{3 2 4},{1 4 2},{1 3 2}
{1 4 2 3}
It first finds the best way to join each table to every other
table. It then takes those joined table combinations, and joins
them to the other joined table combinations, until all tables are
joined.
jump to geqo if needed
again:
find_join_rels():

View File

@@ -5,7 +5,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_paths.c,v 1.13 1999/02/08 04:29:06 momjian Exp $
* $Id: geqo_paths.c,v 1.14 1999/02/09 03:51:15 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -113,7 +113,7 @@ geqo_rel_paths(RelOptInfo * rel)
{
path = (Path *) lfirst(y);
if (!path->path_order.ord.sortop)
if (!path->path_order->ord.sortop)
break;
}

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.39 1999/02/08 04:29:08 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.40 1999/02/09 03:51:17 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1290,8 +1290,9 @@ index_innerjoin(Query *root, RelOptInfo * rel, List *clausegroup_list,
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.path_order.ordtype = SORTOP_ORDER;
pathnode->path.path_order.ord.sortop = index->ordering;
pathnode->path.path_order = makeNode(PathOrder);
pathnode->path.path_order->ordtype = SORTOP_ORDER;
pathnode->path.path_order->ord.sortop = index->ordering;
pathnode->path.keys = NIL; /* not sure about this, bjm 1998/09/21 */
pathnode->indexid = index->relids;

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.15 1999/02/08 04:29:11 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.16 1999/02/09 03:51:19 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -324,11 +324,11 @@ match_unsorted_outer(RelOptInfo * joinrel,
List *clauses = NIL;
List *matchedJoinKeys = NIL;
List *matchedJoinClauses = NIL;
MergeInfo *xmergeinfo = (MergeInfo *) NULL;
MergeInfo *xmergeinfo = (MergeInfo *) NULL;
outerpath = (Path *) lfirst(i);
outerpath_ordering = &outerpath->path_order;
outerpath_ordering = outerpath->path_order;
if (outerpath_ordering)
{
@@ -464,14 +464,14 @@ match_unsorted_inner(RelOptInfo * joinrel,
foreach(i, innerpath_list)
{
MergeInfo *xmergeinfo = (MergeInfo *) NULL;
MergeInfo *xmergeinfo = (MergeInfo *) NULL;
List *clauses = NIL;
List *matchedJoinKeys = NIL;
List *matchedJoinClauses = NIL;
innerpath = (Path *) lfirst(i);
innerpath_ordering = &innerpath->path_order;
innerpath_ordering = innerpath->path_order;
if (innerpath_ordering)
{

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/joinutils.c,v 1.11 1999/02/08 04:29:12 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/joinutils.c,v 1.12 1999/02/09 03:51:20 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -218,8 +218,7 @@ match_paths_joinkeys(List *joinkeys,
key_match = every_func(joinkeys, path->keys, which_subkey);
if (equal_path_ordering(ordering,
&path->path_order) &&
if (equal_path_ordering(ordering, path->path_order) &&
length(joinkeys) == length(path->keys) &&
key_match)
{

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/mergeutils.c,v 1.14 1999/02/08 04:29:12 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/mergeutils.c,v 1.15 1999/02/09 03:51:20 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -52,16 +52,17 @@ group_clauses_by_order(List *restrictinfo_list,
* Create a new mergeinfo node and add it to 'mergeinfo-list'
* if one does not yet exist for this merge ordering.
*/
PathOrder path_order;
MergeInfo *xmergeinfo;
PathOrder *path_order;
MergeInfo *xmergeinfo;
Expr *clause = restrictinfo->clause;
Var *leftop = get_leftop(clause);
Var *rightop = get_rightop(clause);
JoinKey *keys;
path_order.ordtype = MERGE_ORDER;
path_order.ord.merge = merge_ordering;
xmergeinfo = match_order_mergeinfo(&path_order, mergeinfo_list);
path_order = makeNode(PathOrder);
path_order->ordtype = MERGE_ORDER;
path_order->ord.merge = merge_ordering;
xmergeinfo = match_order_mergeinfo(path_order, mergeinfo_list);
if (inner_relid == leftop->varno)
{
keys = makeNode(JoinKey);

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.15 1999/02/08 04:29:12 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.16 1999/02/09 03:51:20 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -104,13 +104,14 @@ create_or_index_paths(Query *root,
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.path_order.ordtype = SORTOP_ORDER;
pathnode->path.path_order = makeNode(PathOrder);
pathnode->path.path_order->ordtype = SORTOP_ORDER;
/*
* This is an IndexScan, but it does index lookups based
* on the order of the fields specified in the WHERE clause,
* not in any order, so the sortop is NULL.
*/
pathnode->path.path_order.ord.sortop = NULL;
pathnode->path.path_order->ord.sortop = NULL;
pathnode->path.keys = NIL; /* not sure about this, bjm 1998/09/21 */
pathnode->indexqual = lcons(clausenode, NIL);

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/prune.c,v 1.24 1999/02/08 04:29:12 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/prune.c,v 1.25 1999/02/09 03:51:21 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -109,7 +109,7 @@ prune_rel_paths(List *rel_list)
{
path = (Path *) lfirst(y);
if (!path->path_order.ord.sortop)
if (!path->path_order->ord.sortop)
break;
}
cheapest = (JoinPath *) prune_rel_path(rel, path);

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.38 1999/02/08 04:29:17 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.39 1999/02/09 03:51:21 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -529,14 +529,14 @@ create_mergejoin_node(MergePath *best_path,
outer_tlist,
inner_tlist));
opcode = get_opcode((best_path->jpath.path.path_order.ord.merge)->join_operator);
opcode = get_opcode((best_path->jpath.path.path_order->ord.merge)->join_operator);
outer_order = (Oid *) palloc(sizeof(Oid) * 2);
outer_order[0] = (best_path->jpath.path.path_order.ord.merge)->left_operator;
outer_order[0] = (best_path->jpath.path.path_order->ord.merge)->left_operator;
outer_order[1] = 0;
inner_order = (Oid *) palloc(sizeof(Oid) * 2);
inner_order[0] = (best_path->jpath.path.path_order.ord.merge)->right_operator;
inner_order[0] = (best_path->jpath.path.path_order->ord.merge)->right_operator;
inner_order[1] = 0;
/*
@@ -825,7 +825,6 @@ set_temp_tlist_operators(List *tlist, List *pathkeys, Oid *operators)
resdom = tlist_member((Var *) keys, tlist);
if (resdom)
{
/*
* Order the resdom keys and replace the operator OID for each
* key with the regproc OID.

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/Attic/keys.c,v 1.10 1999/02/01 04:20:50 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/Attic/keys.c,v 1.11 1999/02/09 03:51:26 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -125,7 +125,7 @@ samekeys(List *keys1, List *keys2)
for (key1 = keys1, key2 = keys2; key1 != NIL && key2 != NIL;
key1 = lnext(key1), key2 = lnext(key2))
if (!member(lfirst(key1), lfirst(key2)))
if (!member(lfirst((List *)lfirst(key1)), lfirst(key2)))
return false;
/* Now the result should be true if list keys2 has at least as many

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.20 1999/02/08 04:29:21 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.21 1999/02/09 03:51:27 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -164,9 +164,31 @@ better_path(Path *new_path, List *unique_paths, bool *noOther)
{
path = (Path *) lfirst(temp);
#ifdef OPTDUP_DEBUG
if (!samekeys(path->keys, new_path->keys))
{
printf("oldpath\n");
pprint(path->keys);
printf("newpath\n");
pprint(new_path->keys);
if (path->keys && new_path->keys &&
length(lfirst(path->keys)) >= 2 &&
length(lfirst(path->keys)) < length(lfirst(new_path->keys)))
sleep(0); /* set breakpoint here */
}
if (!equal_path_ordering(path->path_order,
new_path->path_order))
{
printf("oldord\n");
pprint(path->path_order);
printf("neword\n");
pprint(new_path->path_order);
}
#endif
if (samekeys(path->keys, new_path->keys) &&
equal_path_ordering(&path->path_order,
&new_path->path_order))
equal_path_ordering(path->path_order,
new_path->path_order))
{
old_path = path;
break;
@@ -207,8 +229,9 @@ create_seqscan_path(RelOptInfo * rel)
pathnode->pathtype = T_SeqScan;
pathnode->parent = rel;
pathnode->path_cost = 0.0;
pathnode->path_order.ordtype = SORTOP_ORDER;
pathnode->path_order.ord.sortop = NULL;
pathnode->path_order = makeNode(PathOrder);
pathnode->path_order->ordtype = SORTOP_ORDER;
pathnode->path_order->ord.sortop = NULL;
pathnode->keys = NIL;
/*
@@ -256,8 +279,9 @@ create_index_path(Query *root,
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.path_order.ordtype = SORTOP_ORDER;
pathnode->path.path_order.ord.sortop = index->ordering;
pathnode->path.path_order = makeNode(PathOrder);
pathnode->path.path_order->ordtype = SORTOP_ORDER;
pathnode->path.path_order->ord.sortop = index->ordering;
pathnode->indexid = index->relids;
pathnode->indexkeys = index->indexkeys;
@@ -274,7 +298,7 @@ create_index_path(Query *root,
* The index must have an ordering for the path to have (ordering)
* keys, and vice versa.
*/
if (pathnode->path.path_order.ord.sortop)
if (pathnode->path.path_order->ord.sortop)
{
pathnode->path.keys = collect_index_pathkeys(index->indexkeys,
rel->targetlist);
@@ -286,7 +310,7 @@ create_index_path(Query *root,
* if no index keys were found, we can't order the path).
*/
if (pathnode->path.keys == NULL)
pathnode->path.path_order.ord.sortop = NULL;
pathnode->path.path_order->ord.sortop = NULL;
}
else
pathnode->path.keys = NULL;
@@ -412,23 +436,20 @@ create_nestloop_path(RelOptInfo * joinrel,
pathnode->path.joinid = NIL;
pathnode->path.outerjoincost = (Cost) 0.0;
pathnode->path.loc_restrictinfo = NIL;
pathnode->path.path_order = makeNode(PathOrder);
if (keys)
{
pathnode->path.path_order.ordtype = outer_path->path_order.ordtype;
if (outer_path->path_order.ordtype == SORTOP_ORDER)
{
pathnode->path.path_order.ord.sortop = outer_path->path_order.ord.sortop;
}
pathnode->path.path_order->ordtype = outer_path->path_order->ordtype;
if (outer_path->path_order->ordtype == SORTOP_ORDER)
pathnode->path.path_order->ord.sortop = outer_path->path_order->ord.sortop;
else
{
pathnode->path.path_order.ord.merge = outer_path->path_order.ord.merge;
}
pathnode->path.path_order->ord.merge = outer_path->path_order->ord.merge;
}
else
{
pathnode->path.path_order.ordtype = SORTOP_ORDER;
pathnode->path.path_order.ord.sortop = NULL;
pathnode->path.path_order->ordtype = SORTOP_ORDER;
pathnode->path.path_order->ord.sortop = NULL;
}
pathnode->path.path_cost = cost_nestloop(outer_path->path_cost,
@@ -487,8 +508,9 @@ create_mergejoin_path(RelOptInfo * joinrel,
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.pathinfo = joinrel->restrictinfo;
pathnode->jpath.path.keys = keys;
pathnode->jpath.path.path_order.ordtype = MERGE_ORDER;
pathnode->jpath.path.path_order.ord.merge = order;
pathnode->jpath.path.path_order = makeNode(PathOrder);
pathnode->jpath.path.path_order->ordtype = MERGE_ORDER;
pathnode->jpath.path.path_order->ord.merge = order;
pathnode->path_mergeclauses = mergeclauses;
pathnode->jpath.path.loc_restrictinfo = NIL;
pathnode->outersortkeys = outersortkeys;
@@ -552,8 +574,9 @@ create_hashjoin_path(RelOptInfo * joinrel,
pathnode->jpath.pathinfo = joinrel->restrictinfo;
pathnode->jpath.path.loc_restrictinfo = NIL;
pathnode->jpath.path.keys = keys;
pathnode->jpath.path.path_order.ordtype = SORTOP_ORDER;
pathnode->jpath.path.path_order.ord.sortop = NULL;
pathnode->jpath.path.path_order = makeNode(PathOrder);
pathnode->jpath.path.path_order->ordtype = SORTOP_ORDER;
pathnode->jpath.path.path_order->ord.sortop = NULL;
pathnode->jpath.path.outerjoincost = (Cost) 0.0;
pathnode->jpath.path.joinid = (Relid) NULL;
/* pathnode->hashjoinoperator = operator; */