mirror of
https://github.com/postgres/postgres.git
synced 2025-11-03 09:13:20 +03:00
Extract restriction OR clauses whether or not they are indexable.
It's possible to extract a restriction OR clause from a join clause that has the form of an OR-of-ANDs, if each sub-AND includes a clause that mentions only one specific relation. While PG has been aware of that idea for many years, the code previously only did it if it could extract an indexable OR clause. On reflection, though, that seems a silly limitation: adding a restriction clause can be a win by reducing the number of rows that have to be filtered at the join step, even if we have to test the clause as a plain filter clause during the scan. This should be especially useful for foreign tables, where the change can cut the number of rows that have to be retrieved from the foreign server; but testing shows it can win even on local tables. Per a suggestion from Robert Haas. As a heuristic, I made the code accept an extracted restriction clause if its estimated selectivity is less than 0.9, which will probably result in accepting extracted clauses just about always. We might need to tweak that later based on experience. Since the code no longer has even a weak connection to Path creation, remove orindxpath.c and create a new file optimizer/util/orclauses.c. There's some additional janitorial cleanup of now-dead code that needs to happen, but it seems like that's a fit subject for a separate commit.
This commit is contained in:
@@ -13,6 +13,6 @@ top_builddir = ../../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
|
||||
joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o
|
||||
joinpath.o joinrels.o pathkeys.o tidpath.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
||||
@@ -362,17 +362,6 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
||||
|
||||
/* Mark rel with estimated output rows, width, etc */
|
||||
set_baserel_size_estimates(root, rel);
|
||||
|
||||
/*
|
||||
* Check to see if we can extract any restriction conditions from join
|
||||
* quals that are OR-of-AND structures. If so, add them to the rel's
|
||||
* restriction list, and redo the above steps.
|
||||
*/
|
||||
if (create_or_index_quals(root, rel))
|
||||
{
|
||||
check_partial_indexes(root, rel);
|
||||
set_baserel_size_estimates(root, rel);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -1354,7 +1354,8 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths)
|
||||
* we can remove this limitation. (But note that this also defends
|
||||
* against flat-out duplicate input paths, which can happen because
|
||||
* match_join_clauses_to_index will find the same OR join clauses that
|
||||
* create_or_index_quals has pulled OR restriction clauses out of.)
|
||||
* extract_restriction_or_clauses has pulled OR restriction clauses out
|
||||
* of.)
|
||||
*
|
||||
* For the same reason, we reject AND combinations in which an index
|
||||
* predicate clause duplicates another clause. Here we find it necessary
|
||||
|
||||
@@ -1,187 +0,0 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* orindxpath.c
|
||||
* Routines to find index paths that match a set of OR clauses
|
||||
*
|
||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/optimizer/path/orindxpath.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "optimizer/cost.h"
|
||||
#include "optimizer/paths.h"
|
||||
#include "optimizer/restrictinfo.h"
|
||||
|
||||
|
||||
/*----------
|
||||
* create_or_index_quals
|
||||
* Examine join OR-of-AND quals to see if any useful restriction OR
|
||||
* clauses can be extracted. If so, add them to the query.
|
||||
*
|
||||
* Although a join clause must reference other relations overall,
|
||||
* an OR of ANDs clause might contain sub-clauses that reference just this
|
||||
* relation and can be used to build a restriction clause.
|
||||
* For example consider
|
||||
* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
|
||||
* We can transform this into
|
||||
* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
|
||||
* AND (a.x = 42 OR a.x = 44)
|
||||
* AND (b.y = 43 OR b.z = 45);
|
||||
* which opens the potential to build OR indexscans on a and b. In essence
|
||||
* this is a partial transformation to CNF (AND of ORs format). It is not
|
||||
* complete, however, because we do not unravel the original OR --- doing so
|
||||
* would usually bloat the qualification expression to little gain.
|
||||
*
|
||||
* The added quals are partially redundant with the original OR, and therefore
|
||||
* will cause the size of the joinrel to be underestimated when it is finally
|
||||
* formed. (This would be true of a full transformation to CNF as well; the
|
||||
* fault is not really in the transformation, but in clauselist_selectivity's
|
||||
* inability to recognize redundant conditions.) To minimize the collateral
|
||||
* damage, we want to minimize the number of quals added. Therefore we do
|
||||
* not add every possible extracted restriction condition to the query.
|
||||
* Instead, we search for the single restriction condition that generates
|
||||
* the most useful (cheapest) OR indexscan, and add only that condition.
|
||||
* This is a pretty ad-hoc heuristic, but quite useful.
|
||||
*
|
||||
* We can then compensate for the redundancy of the added qual by poking
|
||||
* the recorded selectivity of the original OR clause, thereby ensuring
|
||||
* the added qual doesn't change the estimated size of the joinrel when
|
||||
* it is finally formed. This is a MAJOR HACK: it depends on the fact
|
||||
* that clause selectivities are cached and on the fact that the same
|
||||
* RestrictInfo node will appear in every joininfo list that might be used
|
||||
* when the joinrel is formed. And it probably isn't right in cases where
|
||||
* the size estimation is nonlinear (i.e., outer and IN joins). But it
|
||||
* beats not doing anything.
|
||||
*
|
||||
* NOTE: one might think this messiness could be worked around by generating
|
||||
* the indexscan path with a small path->rows value, and not touching the
|
||||
* rel's baserestrictinfo or rel->rows. However, that does not work.
|
||||
* The optimizer's fundamental design assumes that every general-purpose
|
||||
* Path for a given relation generates the same number of rows. Without
|
||||
* this assumption we'd not be able to optimize solely on the cost of Paths,
|
||||
* but would have to take number of output rows into account as well.
|
||||
* (The parameterized-paths stuff almost fixes this, but not quite...)
|
||||
*
|
||||
* 'rel' is the relation entry for which quals are to be created
|
||||
*
|
||||
* If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
|
||||
* If no quals available, returns FALSE and doesn't change rel.
|
||||
*
|
||||
* Note: check_partial_indexes() must have been run previously.
|
||||
*----------
|
||||
*/
|
||||
bool
|
||||
create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
|
||||
{
|
||||
BitmapOrPath *bestpath = NULL;
|
||||
RestrictInfo *bestrinfo = NULL;
|
||||
List *newrinfos;
|
||||
RestrictInfo *or_rinfo;
|
||||
Selectivity or_selec,
|
||||
orig_selec;
|
||||
ListCell *i;
|
||||
|
||||
/* Skip the whole mess if no indexes */
|
||||
if (rel->indexlist == NIL)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Find potentially interesting OR joinclauses. We can use any joinclause
|
||||
* that is considered safe to move to this rel by the parameterized-path
|
||||
* machinery, even though what we are going to do with it is not exactly a
|
||||
* parameterized path.
|
||||
*/
|
||||
foreach(i, rel->joininfo)
|
||||
{
|
||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
|
||||
|
||||
if (restriction_is_or_clause(rinfo) &&
|
||||
join_clause_is_movable_to(rinfo, rel))
|
||||
{
|
||||
/*
|
||||
* Use the generate_bitmap_or_paths() machinery to estimate the
|
||||
* value of each OR clause. We can use regular restriction
|
||||
* clauses along with the OR clause contents to generate
|
||||
* indexquals. We pass restriction_only = true so that any
|
||||
* sub-clauses that are actually joins will be ignored.
|
||||
*/
|
||||
List *orpaths;
|
||||
ListCell *k;
|
||||
|
||||
orpaths = generate_bitmap_or_paths(root, rel,
|
||||
list_make1(rinfo),
|
||||
rel->baserestrictinfo,
|
||||
true);
|
||||
|
||||
/* Locate the cheapest OR path */
|
||||
foreach(k, orpaths)
|
||||
{
|
||||
BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
|
||||
|
||||
Assert(IsA(path, BitmapOrPath));
|
||||
if (bestpath == NULL ||
|
||||
path->path.total_cost < bestpath->path.total_cost)
|
||||
{
|
||||
bestpath = path;
|
||||
bestrinfo = rinfo;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Fail if no suitable clauses found */
|
||||
if (bestpath == NULL)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Convert the path's indexclauses structure to a RestrictInfo tree. We
|
||||
* include any partial-index predicates so as to get a reasonable
|
||||
* representation of what the path is actually scanning.
|
||||
*/
|
||||
newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
|
||||
true, true);
|
||||
|
||||
/* It's possible we get back something other than a single OR clause */
|
||||
if (list_length(newrinfos) != 1)
|
||||
return false;
|
||||
or_rinfo = (RestrictInfo *) linitial(newrinfos);
|
||||
Assert(IsA(or_rinfo, RestrictInfo));
|
||||
if (!restriction_is_or_clause(or_rinfo))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* OK, add it to the rel's restriction list.
|
||||
*/
|
||||
rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
|
||||
|
||||
/*
|
||||
* Adjust the original OR clause's cached selectivity to compensate for
|
||||
* the selectivity of the added (but redundant) lower-level qual. This
|
||||
* should result in the join rel getting approximately the same rows
|
||||
* estimate as it would have gotten without all these shenanigans. (XXX
|
||||
* major hack alert ... this depends on the assumption that the
|
||||
* selectivity will stay cached ...)
|
||||
*/
|
||||
or_selec = clause_selectivity(root, (Node *) or_rinfo,
|
||||
0, JOIN_INNER, NULL);
|
||||
if (or_selec > 0 && or_selec < 1)
|
||||
{
|
||||
orig_selec = clause_selectivity(root, (Node *) bestrinfo,
|
||||
0, JOIN_INNER, NULL);
|
||||
bestrinfo->norm_selec = orig_selec / or_selec;
|
||||
/* clamp result to sane range */
|
||||
if (bestrinfo->norm_selec > 1)
|
||||
bestrinfo->norm_selec = 1;
|
||||
/* It isn't an outer join clause, so no need to adjust outer_selec */
|
||||
}
|
||||
|
||||
/* Tell caller to recompute partial index status and rowcount estimate */
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user