1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-25 21:42:33 +03:00
Tom Lane febc9a613c Expand the 'special index operator' machinery to handle special cases
for boolean indexes.  Previously we would only use such an index with
WHERE clauses like 'indexkey = true' or 'indexkey = false'.  The new
code transforms the cases 'indexkey', 'NOT indexkey', 'indexkey IS TRUE',
and 'indexkey IS FALSE' into one of these.  While this is only marginally
useful in itself, I intend soon to change constant-expression simplification
so that 'foo = true' and 'foo = false' are reduced to just 'foo' and
'NOT foo' ... which would lose the ability to use boolean indexes for
such queries at all, if the indexscan machinery couldn't make the
reverse transformation.
2005-03-26 23:29:20 +00:00

418 lines
14 KiB
C

/*-------------------------------------------------------------------------
*
* orindxpath.c
* Routines to find index paths that match a set of OR clauses
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.66 2005/03/26 23:29:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/restrictinfo.h"
static IndexPath *best_or_subclause_indexes(Query *root, RelOptInfo *rel,
List *subclauses);
static bool best_or_subclause_index(Query *root,
RelOptInfo *rel,
Expr *subclause,
IndexOptInfo **retIndexInfo,
List **retIndexClauses,
List **retIndexQuals,
Cost *retStartupCost,
Cost *retTotalCost);
/*----------
* create_or_index_quals
* Examine join OR-of-AND quals to see if any useful restriction OR
* clauses can be extracted. If so, add them to the query.
*
* Although a join clause must reference other relations overall,
* an OR of ANDs clause might contain sub-clauses that reference just this
* relation and can be used to build a restriction clause.
* For example consider
* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
* We can transform this into
* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
* AND (a.x = 42 OR a.x = 44)
* AND (b.y = 43 OR b.z = 45);
* which opens the potential to build OR indexscans on a and b. In essence
* this is a partial transformation to CNF (AND of ORs format). It is not
* complete, however, because we do not unravel the original OR --- doing so
* would usually bloat the qualification expression to little gain.
*
* The added quals are partially redundant with the original OR, and therefore
* will cause the size of the joinrel to be underestimated when it is finally
* formed. (This would be true of a full transformation to CNF as well; the
* fault is not really in the transformation, but in clauselist_selectivity's
* inability to recognize redundant conditions.) To minimize the collateral
* damage, we want to minimize the number of quals added. Therefore we do
* not add every possible extracted restriction condition to the query.
* Instead, we search for the single restriction condition that generates
* the most useful (cheapest) OR indexscan, and add only that condition.
* This is a pretty ad-hoc heuristic, but quite useful.
*
* We can then compensate for the redundancy of the added qual by poking
* the recorded selectivity of the original OR clause, thereby ensuring
* the added qual doesn't change the estimated size of the joinrel when
* it is finally formed. This is a MAJOR HACK: it depends on the fact
* that clause selectivities are cached and on the fact that the same
* RestrictInfo node will appear in every joininfo list that might be used
* when the joinrel is formed. And it probably isn't right in cases where
* the size estimation is nonlinear (i.e., outer and IN joins). But it
* beats not doing anything.
*
* NOTE: one might think this messiness could be worked around by generating
* the indexscan path with a small path->rows value, and not touching the
* rel's baserestrictinfo or rel->rows. However, that does not work.
* The optimizer's fundamental design assumes that every general-purpose
* Path for a given relation generates the same number of rows. Without
* this assumption we'd not be able to optimize solely on the cost of Paths,
* but would have to take number of output rows into account as well.
* (Perhaps someday that'd be worth doing, but it's a pretty big change...)
*
* 'rel' is the relation entry for which quals are to be created
*
* If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
* If no quals available, returns FALSE and doesn't change rel.
*
* Note: check_partial_indexes() must have been run previously.
*----------
*/
bool
create_or_index_quals(Query *root, RelOptInfo *rel)
{
IndexPath *bestpath = NULL;
RestrictInfo *bestrinfo = NULL;
List *newrinfos;
RestrictInfo *or_rinfo;
Selectivity or_selec,
orig_selec;
ListCell *i;
/*
* We use the best_or_subclause_indexes() machinery to locate the best
* combination of restriction subclauses. Note we must ignore any
* joinclauses that are not marked valid_everywhere, because they
* cannot be pushed down due to outer-join rules.
*/
foreach(i, rel->joininfo)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(i);
ListCell *j;
foreach(j, joininfo->jinfo_restrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
if (restriction_is_or_clause(rinfo) &&
rinfo->valid_everywhere)
{
IndexPath *pathnode;
pathnode = best_or_subclause_indexes(root,
rel,
((BoolExpr *) rinfo->orclause)->args);
if (pathnode)
{
if (bestpath == NULL ||
pathnode->path.total_cost < bestpath->path.total_cost)
{
bestpath = pathnode;
bestrinfo = rinfo;
}
}
}
}
}
/* Fail if no suitable clauses found */
if (bestpath == NULL)
return false;
/*
* Convert the indexclauses structure to a RestrictInfo tree, and add
* it to the rel's restriction list.
*/
newrinfos = make_restrictinfo_from_indexclauses(bestpath->indexclauses,
true, true);
Assert(list_length(newrinfos) == 1);
or_rinfo = (RestrictInfo *) linitial(newrinfos);
rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
/*
* Adjust the original OR clause's cached selectivity to compensate
* for the selectivity of the added (but redundant) lower-level qual.
* This should result in the join rel getting approximately the same
* rows estimate as it would have gotten without all these
* shenanigans. (XXX major hack alert ... this depends on the
* assumption that the selectivity will stay cached ...)
*/
or_selec = clause_selectivity(root, (Node *) or_rinfo,
0, JOIN_INNER);
if (or_selec > 0 && or_selec < 1)
{
orig_selec = clause_selectivity(root, (Node *) bestrinfo,
0, JOIN_INNER);
bestrinfo->this_selec = orig_selec / or_selec;
/* clamp result to sane range */
if (bestrinfo->this_selec > 1)
bestrinfo->this_selec = 1;
}
/* Tell caller to recompute rel's rows estimate */
return true;
}
/*
* create_or_index_paths
* Creates multi-scan index paths for indexes that match OR clauses.
*
* 'rel' is the relation entry for which the paths are to be created
*
* Returns nothing, but adds paths to rel->pathlist via add_path().
*
* Note: check_partial_indexes() must have been run previously.
*/
void
create_or_index_paths(Query *root, RelOptInfo *rel)
{
ListCell *l;
/*
* Check each restriction clause to see if it is an OR clause, and if
* so, try to make a path using it.
*/
foreach(l, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
if (restriction_is_or_clause(rinfo))
{
IndexPath *pathnode;
pathnode = best_or_subclause_indexes(root,
rel,
((BoolExpr *) rinfo->orclause)->args);
if (pathnode)
add_path(rel, (Path *) pathnode);
}
}
}
/*
* best_or_subclause_indexes
* Determine the best index to be used in conjunction with each subclause
* of an OR clause, and build a Path for a multi-index scan.
*
* 'rel' is the node of the relation to be scanned
* 'subclauses' are the subclauses of the OR clause (must be the modified
* form that includes sub-RestrictInfo clauses)
*
* Returns an IndexPath if successful, or NULL if it is not possible to
* find an index for each OR subclause.
*
* NOTE: we choose each scan on the basis of its total cost, ignoring startup
* cost. This is reasonable as long as all index types have zero or small
* startup cost, but we might have to work harder if any index types with
* nontrivial startup cost are ever invented.
*
* This routine also creates the indexqual list that will be needed by
* the executor. The indexqual list has one entry for each scan of the base
* rel, which is a sublist of indexqual conditions to apply in that scan.
* The implicit semantics are AND across each sublist of quals, and OR across
* the toplevel list (note that the executor takes care not to return any
* single tuple more than once).
*/
static IndexPath *
best_or_subclause_indexes(Query *root,
RelOptInfo *rel,
List *subclauses)
{
List *infos = NIL;
List *clauses = NIL;
List *quals = NIL;
Cost path_startup_cost = 0;
Cost path_total_cost = 0;
ListCell *slist;
IndexPath *pathnode;
/* Gather info for each OR subclause */
foreach(slist, subclauses)
{
Expr *subclause = lfirst(slist);
IndexOptInfo *best_indexinfo;
List *best_indexclauses;
List *best_indexquals;
Cost best_startup_cost;
Cost best_total_cost;
if (!best_or_subclause_index(root, rel, subclause,
&best_indexinfo,
&best_indexclauses, &best_indexquals,
&best_startup_cost, &best_total_cost))
return NULL; /* failed to match this subclause */
infos = lappend(infos, best_indexinfo);
clauses = lappend(clauses, best_indexclauses);
quals = lappend(quals, best_indexquals);
/*
* Path startup_cost is the startup cost for the first index scan
* only; startup costs for later scans will be paid later on, so
* they just get reflected in total_cost.
*
* Total cost is sum of the per-scan costs.
*/
if (slist == list_head(subclauses)) /* first scan? */
path_startup_cost = best_startup_cost;
path_total_cost += best_total_cost;
}
/* We succeeded, so build an IndexPath node */
pathnode = makeNode(IndexPath);
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.startup_cost = path_startup_cost;
pathnode->path.total_cost = path_total_cost;
/*
* This is an IndexScan, but the overall result will consist of tuples
* extracted in multiple passes (one for each subclause of the OR), so
* the result cannot be claimed to have any particular ordering.
*/
pathnode->path.pathkeys = NIL;
pathnode->indexinfo = infos;
pathnode->indexclauses = clauses;
pathnode->indexquals = quals;
/* It's not an innerjoin path. */
pathnode->isjoininner = false;
/* We don't actually care what order the index scans in. */
pathnode->indexscandir = NoMovementScanDirection;
/*
* The number of rows is the same as the parent rel's estimate, since
* this isn't a join inner indexscan.
*/
pathnode->rows = rel->rows;
return pathnode;
}
/*
* best_or_subclause_index
* Determines which is the best index to be used with a subclause of an
* OR clause by estimating the cost of using each index and selecting
* the least expensive (considering total cost only, for now).
*
* Returns FALSE if no index exists that can be used with this OR subclause;
* in that case the output parameters are not set.
*
* 'rel' is the node of the relation to be scanned
* 'subclause' is the OR subclause being considered
*
* '*retIndexInfo' gets the IndexOptInfo of the best index
* '*retIndexClauses' gets a list of the index clauses for the best index
* '*retIndexQuals' gets a list of the expanded indexquals for the best index
* '*retStartupCost' gets the startup cost of a scan with that index
* '*retTotalCost' gets the total cost of a scan with that index
*/
static bool
best_or_subclause_index(Query *root,
RelOptInfo *rel,
Expr *subclause,
IndexOptInfo **retIndexInfo, /* return value */
List **retIndexClauses, /* return value */
List **retIndexQuals, /* return value */
Cost *retStartupCost, /* return value */
Cost *retTotalCost) /* return value */
{
bool found = false;
ListCell *ilist;
foreach(ilist, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
List *indexclauses;
List *indexquals;
Path subclause_path;
/*
* Ignore partial indexes that do not match the query. If predOK
* is true then the index's predicate is implied by top-level
* restriction clauses, so we can use it. However, it might also
* be implied by the current OR subclause (perhaps in conjunction
* with the top-level clauses), in which case we can use it for this
* particular scan.
*
* XXX this code is partially redundant with logic in
* group_clauses_by_indexkey_for_or(); consider refactoring.
*/
if (index->indpred != NIL && !index->predOK)
{
List *subclauserinfos;
if (and_clause((Node *) subclause))
subclauserinfos = list_copy(((BoolExpr *) subclause)->args);
else if (IsA(subclause, RestrictInfo))
subclauserinfos = list_make1(subclause);
else
continue; /* probably can't happen */
if (!pred_test(index->indpred,
list_concat(subclauserinfos,
rel->baserestrictinfo)))
continue;
}
/* Collect index clauses usable with this index */
indexclauses = group_clauses_by_indexkey_for_or(rel, index, subclause);
/*
* Ignore index if it doesn't match the subclause at all; except
* that if it's a partial index matching the current OR subclause,
* consider it anyway, since effectively we are using the index
* predicate to match the subclause. (Note: we exclude partial
* indexes that are predOK; else such a partial index would be
* considered to match *every* OR subclause, generating bogus OR
* plans that are redundant with the basic scan on that index.)
*/
if (indexclauses == NIL && (index->indpred == NIL || index->predOK))
continue;
/* Convert clauses to indexquals the executor can handle */
indexquals = expand_indexqual_conditions(rel, index, indexclauses);
cost_index(&subclause_path, root, rel, index, indexquals, false);
if (!found || subclause_path.total_cost < *retTotalCost)
{
*retIndexInfo = index;
*retIndexClauses = flatten_clausegroups_list(indexclauses);
*retIndexQuals = indexquals;
*retStartupCost = subclause_path.startup_cost;
*retTotalCost = subclause_path.total_cost;
found = true;
}
}
return found;
}