mirror of
https://github.com/postgres/postgres.git
synced 2025-09-11 00:12:06 +03:00
comment line where output as too long, and update typedefs for /lib directory. Also fix case where identifiers were used as variable names in the backend, but as typedefs in ecpg (favor the backend for indenting). Backpatch to 8.1.X.
282 lines
9.0 KiB
C
282 lines
9.0 KiB
C
/*------------------------------------------------------------------------
|
|
*
|
|
* geqo_eval.c
|
|
* Routines to evaluate query trees
|
|
*
|
|
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.77.2.1 2005/11/22 18:23:10 momjian Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
/* contributed by:
|
|
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
|
|
* Martin Utesch * Institute of Automatic Control *
|
|
= = University of Mining and Technology =
|
|
* utesch@aut.tu-freiberg.de * Freiberg, Germany *
|
|
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include <float.h>
|
|
#include <limits.h>
|
|
#include <math.h>
|
|
|
|
#include "optimizer/geqo.h"
|
|
#include "optimizer/joininfo.h"
|
|
#include "optimizer/pathnode.h"
|
|
#include "optimizer/paths.h"
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
static bool desirable_join(PlannerInfo *root,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel);
|
|
|
|
|
|
/*
|
|
* geqo_eval
|
|
*
|
|
* Returns cost of a query tree as an individual of the population.
|
|
*/
|
|
Cost
|
|
geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
|
|
{
|
|
MemoryContext mycontext;
|
|
MemoryContext oldcxt;
|
|
RelOptInfo *joinrel;
|
|
Cost fitness;
|
|
int savelength;
|
|
struct HTAB *savehash;
|
|
|
|
/*
|
|
* Because gimme_tree considers both left- and right-sided trees, there is
|
|
* no difference between a tour (a,b,c,d,...) and a tour (b,a,c,d,...) ---
|
|
* the same join orders will be considered. To avoid redundant cost
|
|
* calculations, we simply reject tours where tour[0] > tour[1], assigning
|
|
* them an artificially bad fitness.
|
|
*
|
|
* init_tour() is aware of this rule and so we should never reject a tour
|
|
* during the initial filling of the pool. It seems difficult to persuade
|
|
* the recombination logic never to break the rule, however.
|
|
*/
|
|
if (num_gene >= 2 && tour[0] > tour[1])
|
|
return DBL_MAX;
|
|
|
|
/*
|
|
* Create a private memory context that will hold all temp storage
|
|
* allocated inside gimme_tree().
|
|
*
|
|
* Since geqo_eval() will be called many times, we can't afford to let all
|
|
* that memory go unreclaimed until end of statement. Note we make the
|
|
* temp context a child of the planner's normal context, so that it will
|
|
* be freed even if we abort via ereport(ERROR).
|
|
*/
|
|
mycontext = AllocSetContextCreate(CurrentMemoryContext,
|
|
"GEQO",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
oldcxt = MemoryContextSwitchTo(mycontext);
|
|
|
|
/*
|
|
* gimme_tree will add entries to root->join_rel_list, which may or may
|
|
* not already contain some entries. The newly added entries will be
|
|
* recycled by the MemoryContextDelete below, so we must ensure that the
|
|
* list is restored to its former state before exiting. We can do this by
|
|
* truncating the list to its original length. NOTE this assumes that any
|
|
* added entries are appended at the end!
|
|
*
|
|
* We also must take care not to mess up the outer join_rel_hash, if there
|
|
* is one. We can do this by just temporarily setting the link to NULL.
|
|
* (If we are dealing with enough join rels, which we very likely are, a
|
|
* new hash table will get built and used locally.)
|
|
*/
|
|
savelength = list_length(evaldata->root->join_rel_list);
|
|
savehash = evaldata->root->join_rel_hash;
|
|
|
|
evaldata->root->join_rel_hash = NULL;
|
|
|
|
/* construct the best path for the given combination of relations */
|
|
joinrel = gimme_tree(tour, num_gene, evaldata);
|
|
|
|
/*
|
|
* compute fitness
|
|
*
|
|
* XXX geqo does not currently support optimization for partial result
|
|
* retrieval --- how to fix?
|
|
*/
|
|
if (joinrel)
|
|
fitness = joinrel->cheapest_total_path->total_cost;
|
|
else
|
|
fitness = DBL_MAX;
|
|
|
|
/*
|
|
* Restore join_rel_list to its former state, and put back original
|
|
* hashtable if any.
|
|
*/
|
|
evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
|
|
savelength);
|
|
evaldata->root->join_rel_hash = savehash;
|
|
|
|
/* release all the memory acquired within gimme_tree */
|
|
MemoryContextSwitchTo(oldcxt);
|
|
MemoryContextDelete(mycontext);
|
|
|
|
return fitness;
|
|
}
|
|
|
|
/*
|
|
* gimme_tree
|
|
* Form planner estimates for a join tree constructed in the specified
|
|
* order.
|
|
*
|
|
* 'tour' is the proposed join order, of length 'num_gene'
|
|
* 'evaldata' contains the context we need
|
|
*
|
|
* Returns a new join relation whose cheapest path is the best plan for
|
|
* this join order. NB: will return NULL if join order is invalid.
|
|
*
|
|
* The original implementation of this routine always joined in the specified
|
|
* order, and so could only build left-sided plans (and right-sided and
|
|
* mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
|
|
* It could never produce a "bushy" plan. This had a couple of big problems,
|
|
* of which the worst was that as of 7.4, there are situations involving IN
|
|
* subqueries where the only valid plans are bushy.
|
|
*
|
|
* The present implementation takes the given tour as a guideline, but
|
|
* postpones joins that seem unsuitable according to some heuristic rules.
|
|
* This allows correct bushy plans to be generated at need, and as a nice
|
|
* side-effect it seems to materially improve the quality of the generated
|
|
* plans.
|
|
*/
|
|
RelOptInfo *
|
|
gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata)
|
|
{
|
|
RelOptInfo **stack;
|
|
int stack_depth;
|
|
RelOptInfo *joinrel;
|
|
int rel_count;
|
|
|
|
/*
|
|
* Create a stack to hold not-yet-joined relations.
|
|
*/
|
|
stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *));
|
|
stack_depth = 0;
|
|
|
|
/*
|
|
* Push each relation onto the stack in the specified order. After
|
|
* pushing each relation, see whether the top two stack entries are
|
|
* joinable according to the desirable_join() heuristics. If so, join
|
|
* them into one stack entry, and try again to combine with the next stack
|
|
* entry down (if any). When the stack top is no longer joinable,
|
|
* continue to the next input relation. After we have pushed the last
|
|
* input relation, the heuristics are disabled and we force joining all
|
|
* the remaining stack entries.
|
|
*
|
|
* If desirable_join() always returns true, this produces a straight
|
|
* left-to-right join just like the old code. Otherwise we may produce a
|
|
* bushy plan or a left/right-sided plan that really corresponds to some
|
|
* tour other than the one given. To the extent that the heuristics are
|
|
* helpful, however, this will be a better plan than the raw tour.
|
|
*
|
|
* Also, when a join attempt fails (because of IN-clause constraints), we
|
|
* may be able to recover and produce a workable plan, where the old code
|
|
* just had to give up. This case acts the same as a false result from
|
|
* desirable_join().
|
|
*/
|
|
for (rel_count = 0; rel_count < num_gene; rel_count++)
|
|
{
|
|
int cur_rel_index;
|
|
|
|
/* Get the next input relation and push it */
|
|
cur_rel_index = (int) tour[rel_count];
|
|
stack[stack_depth] = (RelOptInfo *) list_nth(evaldata->initial_rels,
|
|
cur_rel_index - 1);
|
|
stack_depth++;
|
|
|
|
/*
|
|
* While it's feasible, pop the top two stack entries and replace with
|
|
* their join.
|
|
*/
|
|
while (stack_depth >= 2)
|
|
{
|
|
RelOptInfo *outer_rel = stack[stack_depth - 2];
|
|
RelOptInfo *inner_rel = stack[stack_depth - 1];
|
|
|
|
/*
|
|
* Don't pop if heuristics say not to join now. However, once we
|
|
* have exhausted the input, the heuristics can't prevent popping.
|
|
*/
|
|
if (rel_count < num_gene - 1 &&
|
|
!desirable_join(evaldata->root, outer_rel, inner_rel))
|
|
break;
|
|
|
|
/*
|
|
* Construct a RelOptInfo representing the join of these two input
|
|
* relations. These are always inner joins. Note that we expect
|
|
* the joinrel not to exist in root->join_rel_list yet, and so the
|
|
* paths constructed for it will only include the ones we want.
|
|
*/
|
|
joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel,
|
|
JOIN_INNER);
|
|
|
|
/* Can't pop stack here if join order is not valid */
|
|
if (!joinrel)
|
|
break;
|
|
|
|
/* Find and save the cheapest paths for this rel */
|
|
set_cheapest(joinrel);
|
|
|
|
/* Pop the stack and replace the inputs with their join */
|
|
stack_depth--;
|
|
stack[stack_depth - 1] = joinrel;
|
|
}
|
|
}
|
|
|
|
/* Did we succeed in forming a single join relation? */
|
|
if (stack_depth == 1)
|
|
joinrel = stack[0];
|
|
else
|
|
joinrel = NULL;
|
|
|
|
pfree(stack);
|
|
|
|
return joinrel;
|
|
}
|
|
|
|
/*
|
|
* Heuristics for gimme_tree: do we want to join these two relations?
|
|
*/
|
|
static bool
|
|
desirable_join(PlannerInfo *root,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel)
|
|
{
|
|
ListCell *l;
|
|
|
|
/*
|
|
* Join if there is an applicable join clause.
|
|
*/
|
|
if (have_relevant_joinclause(outer_rel, inner_rel))
|
|
return true;
|
|
|
|
/*
|
|
* Join if the rels are members of the same IN sub-select. This is needed
|
|
* to improve the odds that we will find a valid solution in a case where
|
|
* an IN sub-select has a clauseless join.
|
|
*/
|
|
foreach(l, root->in_info_list)
|
|
{
|
|
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
|
|
|
|
if (bms_is_subset(outer_rel->relids, ininfo->righthand) &&
|
|
bms_is_subset(inner_rel->relids, ininfo->righthand))
|
|
return true;
|
|
}
|
|
|
|
/* Otherwise postpone the join till later. */
|
|
return false;
|
|
}
|