mirror of
https://github.com/postgres/postgres.git
synced 2025-11-07 19:06:32 +03:00
First cut at making useful selectivity estimates for range queries
(ie, WHERE x > lowbound AND x < highbound). It's not very bright yet but it does something useful. Also, rename intltsel/intgtsel to scalarltsel/scalargtsel to reflect usage better. Extend convert_to_scalar to do something a little bit useful with string data types. Still need to make it do something with date/time datatypes, but I'll wait for Thomas's datetime unification dust to settle first. Eventually the routine ought not have any type-specific knowledge at all; it ought to be calling a type-dependent routine found via a pg_type column; but that's a task for another day.
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -23,6 +23,23 @@
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
/*
|
||||
* Data structure for accumulating info about possible range-query
|
||||
* clause pairs in clauselist_selectivity.
|
||||
*/
|
||||
typedef struct RangeQueryClause {
|
||||
struct RangeQueryClause *next; /* next in linked list */
|
||||
Node *var; /* The common variable of the clauses */
|
||||
bool have_lobound; /* found a low-bound clause yet? */
|
||||
bool have_hibound; /* found a high-bound clause yet? */
|
||||
Selectivity lobound; /* Selectivity of a var > something clause */
|
||||
Selectivity hibound; /* Selectivity of a var < something clause */
|
||||
} RangeQueryClause;
|
||||
|
||||
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
|
||||
int flag, bool isLTsel, Selectivity s2);
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* ROUTINES TO COMPUTE SELECTIVITIES
|
||||
****************************************************************************/
|
||||
@@ -55,29 +72,237 @@ restrictlist_selectivity(Query *root,
|
||||
* must be returned.
|
||||
*
|
||||
* See clause_selectivity() for the meaning of the varRelid parameter.
|
||||
*
|
||||
* Our basic approach is to take the product of the selectivities of the
|
||||
* subclauses. However, that's only right if the subclauses have independent
|
||||
* probabilities, and in reality they are often NOT independent. So,
|
||||
* we want to be smarter where we can.
|
||||
|
||||
* Currently, the only extra smarts we have is to recognize "range queries",
|
||||
* such as "x > 34 AND x < 42". Clauses are recognized as possible range
|
||||
* query components if they are restriction opclauses whose operators have
|
||||
* scalarltsel() or scalargtsel() as their restriction selectivity estimator.
|
||||
* We pair up clauses of this form that refer to the same variable. An
|
||||
* unpairable clause of this kind is simply multiplied into the selectivity
|
||||
* product in the normal way. But when we find a pair, we know that the
|
||||
* selectivities represent the relative positions of the low and high bounds
|
||||
* within the column's range, so instead of figuring the selectivity as
|
||||
* hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
|
||||
* see that hisel is the fraction of the range below the high bound, while
|
||||
* losel is the fraction above the low bound; so hisel can be interpreted
|
||||
* directly as a 0..1 value but we need to convert losel to 1-losel before
|
||||
* interpreting it as a value. Then the available range is 1-losel to hisel.)
|
||||
* If the calculation yields zero or negative, however, we chicken out and
|
||||
* use the default interpretation; that probably means that one or both
|
||||
* selectivities is a default estimate rather than an actual range value.
|
||||
* Of course this is all very dependent on the behavior of
|
||||
* scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
|
||||
*/
|
||||
Selectivity
|
||||
clauselist_selectivity(Query *root,
|
||||
List *clauses,
|
||||
int varRelid)
|
||||
{
|
||||
Selectivity s1 = 1.0;
|
||||
List *clause;
|
||||
Selectivity s1 = 1.0;
|
||||
RangeQueryClause *rqlist = NULL;
|
||||
List *clist;
|
||||
|
||||
/* Use the product of the selectivities of the subclauses.
|
||||
* XXX this is too optimistic, since the subclauses
|
||||
* are very likely not independent...
|
||||
/*
|
||||
* Initial scan over clauses. Anything that doesn't look like a
|
||||
* potential rangequery clause gets multiplied into s1 and forgotten.
|
||||
* Anything that does gets inserted into an rqlist entry.
|
||||
*/
|
||||
foreach(clause, clauses)
|
||||
foreach(clist, clauses)
|
||||
{
|
||||
Selectivity s2 = clause_selectivity(root,
|
||||
(Node *) lfirst(clause),
|
||||
varRelid);
|
||||
Node *clause = (Node *) lfirst(clist);
|
||||
Selectivity s2;
|
||||
|
||||
/*
|
||||
* See if it looks like a restriction clause with a constant.
|
||||
* (If it's not a constant we can't really trust the selectivity!)
|
||||
* NB: for consistency of results, this fragment of code had
|
||||
* better match what clause_selectivity() would do.
|
||||
*/
|
||||
if (varRelid != 0 || NumRelids(clause) == 1)
|
||||
{
|
||||
int relidx;
|
||||
AttrNumber attno;
|
||||
Datum constval;
|
||||
int flag;
|
||||
|
||||
get_relattval(clause, varRelid,
|
||||
&relidx, &attno, &constval, &flag);
|
||||
if (relidx != 0 && (flag & SEL_CONSTANT))
|
||||
{
|
||||
/* if get_relattval succeeded, it must be an opclause */
|
||||
Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno;
|
||||
RegProcedure oprrest = get_oprrest(opno);
|
||||
|
||||
if (!oprrest)
|
||||
s2 = (Selectivity) 0.5;
|
||||
else
|
||||
s2 = restriction_selectivity(oprrest, opno,
|
||||
getrelid(relidx,
|
||||
root->rtable),
|
||||
attno,
|
||||
constval, flag);
|
||||
/*
|
||||
* If we reach here, we have computed the same result
|
||||
* that clause_selectivity would, so we can just use s2
|
||||
* if it's the wrong oprrest. But if it's the right
|
||||
* oprrest, add the clause to rqlist for later processing.
|
||||
*/
|
||||
switch (oprrest)
|
||||
{
|
||||
case F_SCALARLTSEL:
|
||||
addRangeClause(&rqlist, clause, flag, true, s2);
|
||||
break;
|
||||
case F_SCALARGTSEL:
|
||||
addRangeClause(&rqlist, clause, flag, false, s2);
|
||||
break;
|
||||
default:
|
||||
/* Just merge the selectivity in generically */
|
||||
s1 = s1 * s2;
|
||||
break;
|
||||
}
|
||||
continue; /* drop to loop bottom */
|
||||
}
|
||||
}
|
||||
/* Not the right form, so treat it generically. */
|
||||
s2 = clause_selectivity(root, clause, varRelid);
|
||||
s1 = s1 * s2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now scan the rangequery pair list.
|
||||
*/
|
||||
while (rqlist != NULL)
|
||||
{
|
||||
RangeQueryClause *rqnext;
|
||||
|
||||
if (rqlist->have_lobound && rqlist->have_hibound)
|
||||
{
|
||||
/* Successfully matched a pair of range clauses */
|
||||
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
|
||||
|
||||
if (s2 > 0.0)
|
||||
{
|
||||
/* All our hard work has paid off! */
|
||||
s1 *= s2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* One or both is probably a default estimate,
|
||||
* so punt and just merge them in generically.
|
||||
*/
|
||||
s1 *= rqlist->hibound * rqlist->lobound;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Only found one of a pair, merge it in generically */
|
||||
if (rqlist->have_lobound)
|
||||
s1 *= rqlist->lobound;
|
||||
else
|
||||
s1 *= rqlist->hibound;
|
||||
}
|
||||
/* release storage and advance */
|
||||
rqnext = rqlist->next;
|
||||
pfree(rqlist);
|
||||
rqlist = rqnext;
|
||||
}
|
||||
|
||||
return s1;
|
||||
}
|
||||
|
||||
/*
|
||||
* addRangeClause --- add a new range clause for clauselist_selectivity
|
||||
*
|
||||
* Here is where we try to match up pairs of range-query clauses
|
||||
*/
|
||||
static void
|
||||
addRangeClause(RangeQueryClause **rqlist, Node *clause,
|
||||
int flag, bool isLTsel, Selectivity s2)
|
||||
{
|
||||
RangeQueryClause *rqelem;
|
||||
Node *var;
|
||||
bool is_lobound;
|
||||
|
||||
/* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */
|
||||
if (flag & SEL_RIGHT)
|
||||
{
|
||||
var = (Node *) get_leftop((Expr *) clause);
|
||||
is_lobound = ! isLTsel; /* x < something is high bound */
|
||||
}
|
||||
else
|
||||
{
|
||||
var = (Node *) get_rightop((Expr *) clause);
|
||||
is_lobound = isLTsel; /* something < x is low bound */
|
||||
}
|
||||
|
||||
for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
|
||||
{
|
||||
/* We use full equal() here because the "var" might be a function
|
||||
* of one or more attributes of the same relation...
|
||||
*/
|
||||
if (! equal(var, rqelem->var))
|
||||
continue;
|
||||
/* Found the right group to put this clause in */
|
||||
if (is_lobound)
|
||||
{
|
||||
if (! rqelem->have_lobound)
|
||||
{
|
||||
rqelem->have_lobound = true;
|
||||
rqelem->lobound = s2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We have found two similar clauses, such as
|
||||
* x < y AND x < z. Keep only the more restrictive one.
|
||||
*/
|
||||
if (rqelem->lobound > s2)
|
||||
rqelem->lobound = s2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (! rqelem->have_hibound)
|
||||
{
|
||||
rqelem->have_hibound = true;
|
||||
rqelem->hibound = s2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We have found two similar clauses, such as
|
||||
* x > y AND x > z. Keep only the more restrictive one.
|
||||
*/
|
||||
if (rqelem->hibound > s2)
|
||||
rqelem->hibound = s2;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* No matching var found, so make a new clause-pair data structure */
|
||||
rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
|
||||
rqelem->var = var;
|
||||
if (is_lobound)
|
||||
{
|
||||
rqelem->have_lobound = true;
|
||||
rqelem->have_hibound = false;
|
||||
rqelem->lobound = s2;
|
||||
}
|
||||
else
|
||||
{
|
||||
rqelem->have_lobound = false;
|
||||
rqelem->have_hibound = true;
|
||||
rqelem->hibound = s2;
|
||||
}
|
||||
rqelem->next = *rqlist;
|
||||
*rqlist = rqelem;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* clause_selectivity -
|
||||
* Compute the selectivity of a general boolean expression clause.
|
||||
|
||||
Reference in New Issue
Block a user