mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Use a hopefully-more-reliable method of detecting default selectivity
estimates when combining the estimates for a range query. As pointed out by Miquel van Smoorenburg, the existing check for an impossible combined result would quite possibly fail to detect one default and one non-default input. It seems better to use the default range query estimate in such cases. To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL. This is a bit ugly because it introduces additional coupling between clauselist_selectivity and scalarltsel/scalargtsel, but it's not like there wasn't plenty already...
This commit is contained in:
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
|
|||||||
* interpreting it as a value. Then the available range is 1-losel to hisel.
|
* interpreting it as a value. Then the available range is 1-losel to hisel.
|
||||||
* However, this calculation double-excludes nulls, so really we need
|
* However, this calculation double-excludes nulls, so really we need
|
||||||
* hisel + losel + null_frac - 1.)
|
* hisel + losel + null_frac - 1.)
|
||||||
* If the calculation yields zero or negative, however, we chicken out and
|
*
|
||||||
* use a default estimate; that probably means that one or both
|
* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
|
||||||
* selectivities is a default estimate rather than an actual range value.
|
* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
|
||||||
|
* yields an impossible (negative) result.
|
||||||
*
|
*
|
||||||
* A free side-effect is that we can recognize redundant inequalities such
|
* A free side-effect is that we can recognize redundant inequalities such
|
||||||
* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
|
* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
|
||||||
@ -194,37 +195,51 @@ clauselist_selectivity(Query *root,
|
|||||||
if (rqlist->have_lobound && rqlist->have_hibound)
|
if (rqlist->have_lobound && rqlist->have_hibound)
|
||||||
{
|
{
|
||||||
/* Successfully matched a pair of range clauses */
|
/* Successfully matched a pair of range clauses */
|
||||||
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
|
Selectivity s2;
|
||||||
|
|
||||||
/* Adjust for double-exclusion of NULLs */
|
|
||||||
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A zero or slightly negative s2 should be converted into a
|
* Exact equality to the default value probably means the
|
||||||
* small positive value; we probably are dealing with a very
|
* selectivity function punted. This is not airtight but
|
||||||
* tight range and got a bogus result due to roundoff errors.
|
* should be good enough.
|
||||||
* However, if s2 is very negative, then we probably have
|
|
||||||
* default selectivity estimates on one or both sides of the
|
|
||||||
* range. In that case, insert a not-so-wildly-optimistic
|
|
||||||
* default estimate.
|
|
||||||
*/
|
*/
|
||||||
if (s2 <= 0.0)
|
if (rqlist->hibound == DEFAULT_INEQ_SEL ||
|
||||||
|
rqlist->lobound == DEFAULT_INEQ_SEL)
|
||||||
{
|
{
|
||||||
if (s2 < -0.01)
|
s2 = DEFAULT_RANGE_INEQ_SEL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
s2 = rqlist->hibound + rqlist->lobound - 1.0;
|
||||||
|
|
||||||
|
/* Adjust for double-exclusion of NULLs */
|
||||||
|
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A zero or slightly negative s2 should be converted into a
|
||||||
|
* small positive value; we probably are dealing with a very
|
||||||
|
* tight range and got a bogus result due to roundoff errors.
|
||||||
|
* However, if s2 is very negative, then we probably have
|
||||||
|
* default selectivity estimates on one or both sides of the
|
||||||
|
* range that we failed to recognize above for some reason.
|
||||||
|
*/
|
||||||
|
if (s2 <= 0.0)
|
||||||
{
|
{
|
||||||
/*
|
if (s2 < -0.01)
|
||||||
* No data available --- use a default estimate that
|
{
|
||||||
* is small, but not real small.
|
/*
|
||||||
*/
|
* No data available --- use a default estimate that
|
||||||
s2 = 0.005;
|
* is small, but not real small.
|
||||||
}
|
*/
|
||||||
else
|
s2 = DEFAULT_RANGE_INEQ_SEL;
|
||||||
{
|
}
|
||||||
/*
|
else
|
||||||
* It's just roundoff error; use a small positive
|
{
|
||||||
* value
|
/*
|
||||||
*/
|
* It's just roundoff error; use a small positive
|
||||||
s2 = 1.0e-10;
|
* value
|
||||||
|
*/
|
||||||
|
s2 = 1.0e-10;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Merge in the selectivity of the pair of clauses */
|
/* Merge in the selectivity of the pair of clauses */
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -111,45 +111,6 @@
|
|||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note: the default selectivity estimates are not chosen entirely at random.
|
|
||||||
* We want them to be small enough to ensure that indexscans will be used if
|
|
||||||
* available, for typical table densities of ~100 tuples/page. Thus, for
|
|
||||||
* example, 0.01 is not quite small enough, since that makes it appear that
|
|
||||||
* nearly all pages will be hit anyway. Also, since we sometimes estimate
|
|
||||||
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
|
|
||||||
* 1/DEFAULT_EQ_SEL.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* default selectivity estimate for equalities such as "A = b" */
|
|
||||||
#define DEFAULT_EQ_SEL 0.005
|
|
||||||
|
|
||||||
/* default selectivity estimate for inequalities such as "A < b" */
|
|
||||||
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
|
|
||||||
|
|
||||||
/* default selectivity estimate for pattern-match operators such as LIKE */
|
|
||||||
#define DEFAULT_MATCH_SEL 0.005
|
|
||||||
|
|
||||||
/* default number of distinct values in a table */
|
|
||||||
#define DEFAULT_NUM_DISTINCT 200
|
|
||||||
|
|
||||||
/* default selectivity estimate for boolean and null test nodes */
|
|
||||||
#define DEFAULT_UNK_SEL 0.005
|
|
||||||
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Clamp a computed probability estimate (which may suffer from roundoff or
|
|
||||||
* estimation errors) to valid range. Argument must be a float variable.
|
|
||||||
*/
|
|
||||||
#define CLAMP_PROBABILITY(p) \
|
|
||||||
do { \
|
|
||||||
if (p < 0.0) \
|
|
||||||
p = 0.0; \
|
|
||||||
else if (p > 1.0) \
|
|
||||||
p = 1.0; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
|
|
||||||
/* Return data from examine_variable and friends */
|
/* Return data from examine_variable and friends */
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -19,6 +19,49 @@
|
|||||||
#include "nodes/parsenodes.h"
|
#include "nodes/parsenodes.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: the default selectivity estimates are not chosen entirely at random.
|
||||||
|
* We want them to be small enough to ensure that indexscans will be used if
|
||||||
|
* available, for typical table densities of ~100 tuples/page. Thus, for
|
||||||
|
* example, 0.01 is not quite small enough, since that makes it appear that
|
||||||
|
* nearly all pages will be hit anyway. Also, since we sometimes estimate
|
||||||
|
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
|
||||||
|
* 1/DEFAULT_EQ_SEL.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* default selectivity estimate for equalities such as "A = b" */
|
||||||
|
#define DEFAULT_EQ_SEL 0.005
|
||||||
|
|
||||||
|
/* default selectivity estimate for inequalities such as "A < b" */
|
||||||
|
#define DEFAULT_INEQ_SEL 0.3333333333333333
|
||||||
|
|
||||||
|
/* default selectivity estimate for range inequalities "A > b AND A < c" */
|
||||||
|
#define DEFAULT_RANGE_INEQ_SEL 0.005
|
||||||
|
|
||||||
|
/* default selectivity estimate for pattern-match operators such as LIKE */
|
||||||
|
#define DEFAULT_MATCH_SEL 0.005
|
||||||
|
|
||||||
|
/* default number of distinct values in a table */
|
||||||
|
#define DEFAULT_NUM_DISTINCT 200
|
||||||
|
|
||||||
|
/* default selectivity estimate for boolean and null test nodes */
|
||||||
|
#define DEFAULT_UNK_SEL 0.005
|
||||||
|
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clamp a computed probability estimate (which may suffer from roundoff or
|
||||||
|
* estimation errors) to valid range. Argument must be a float variable.
|
||||||
|
*/
|
||||||
|
#define CLAMP_PROBABILITY(p) \
|
||||||
|
do { \
|
||||||
|
if (p < 0.0) \
|
||||||
|
p = 0.0; \
|
||||||
|
else if (p > 1.0) \
|
||||||
|
p = 1.0; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
Pattern_Type_Like, Pattern_Type_Like_IC,
|
Pattern_Type_Like, Pattern_Type_Like_IC,
|
||||||
|
Reference in New Issue
Block a user