mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Modify prefix_selectivity() so that it will never estimate the selectivity
of the generated range condition var >= 'foo' AND var < 'fop' as being less than what eqsel() would estimate for var = 'foo'. This is intuitively reasonable and it gets rid of the need for some entirely ad-hoc coding we formerly used to reject bogus estimates. The basic problem here is that if the prefix is more than a few characters long, the two boundary values are too close together to be distinguishable by comparison to the column histogram, resulting in a selectivity estimate of zero, which is often not very sane. Change motivated by an example from Peter Eisentraut. Arguably this is a bug fix, but I'll refrain from back-patching it for the moment.
This commit is contained in:
@ -15,7 +15,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.243 2008/01/01 19:45:52 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.244 2008/03/08 22:41:38 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -103,6 +103,12 @@
|
|||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
|
|
||||||
|
static double var_eq_const(VariableStatData *vardata, Oid operator,
|
||||||
|
Datum constval, bool constisnull,
|
||||||
|
bool varonleft);
|
||||||
|
static double var_eq_non_const(VariableStatData *vardata, Oid operator,
|
||||||
|
Node *other,
|
||||||
|
bool varonleft);
|
||||||
static double ineq_histogram_selectivity(VariableStatData *vardata,
|
static double ineq_histogram_selectivity(VariableStatData *vardata,
|
||||||
FmgrInfo *opproc, bool isgt,
|
FmgrInfo *opproc, bool isgt,
|
||||||
Datum constval, Oid consttype);
|
Datum constval, Oid consttype);
|
||||||
@ -156,10 +162,6 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
VariableStatData vardata;
|
VariableStatData vardata;
|
||||||
Node *other;
|
Node *other;
|
||||||
bool varonleft;
|
bool varonleft;
|
||||||
Datum *values;
|
|
||||||
int nvalues;
|
|
||||||
float4 *numbers;
|
|
||||||
int nnumbers;
|
|
||||||
double selec;
|
double selec;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -171,29 +173,55 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the something is a NULL constant, assume operator is strict and
|
* We can do a lot better if the something is a constant. (Note: the
|
||||||
|
* Const might result from estimation rather than being a simple constant
|
||||||
|
* in the query.)
|
||||||
|
*/
|
||||||
|
if (IsA(other, Const))
|
||||||
|
selec = var_eq_const(&vardata, operator,
|
||||||
|
((Const *) other)->constvalue,
|
||||||
|
((Const *) other)->constisnull,
|
||||||
|
varonleft);
|
||||||
|
else
|
||||||
|
selec = var_eq_non_const(&vardata, operator, other,
|
||||||
|
varonleft);
|
||||||
|
|
||||||
|
ReleaseVariableStats(vardata);
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8((float8) selec);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* var_eq_const --- eqsel for var = const case
|
||||||
|
*
|
||||||
|
* This is split out so that some other estimation functions can use it.
|
||||||
|
*/
|
||||||
|
static double
|
||||||
|
var_eq_const(VariableStatData *vardata, Oid operator,
|
||||||
|
Datum constval, bool constisnull,
|
||||||
|
bool varonleft)
|
||||||
|
{
|
||||||
|
double selec;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the constant is NULL, assume operator is strict and
|
||||||
* return zero, ie, operator will never return TRUE.
|
* return zero, ie, operator will never return TRUE.
|
||||||
*/
|
*/
|
||||||
if (IsA(other, Const) &&
|
if (constisnull)
|
||||||
((Const *) other)->constisnull)
|
return 0.0;
|
||||||
{
|
|
||||||
ReleaseVariableStats(vardata);
|
|
||||||
PG_RETURN_FLOAT8(0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (HeapTupleIsValid(vardata.statsTuple))
|
if (HeapTupleIsValid(vardata->statsTuple))
|
||||||
{
|
{
|
||||||
Form_pg_statistic stats;
|
Form_pg_statistic stats;
|
||||||
|
Datum *values;
|
||||||
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
|
int nvalues;
|
||||||
|
float4 *numbers;
|
||||||
if (IsA(other, Const))
|
int nnumbers;
|
||||||
{
|
|
||||||
/* Variable is being compared to a known non-null constant */
|
|
||||||
Datum constval = ((Const *) other)->constvalue;
|
|
||||||
bool match = false;
|
bool match = false;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is the constant "=" to any of the column's most common values?
|
* Is the constant "=" to any of the column's most common values?
|
||||||
* (Although the given operator may not really be "=", we will
|
* (Although the given operator may not really be "=", we will
|
||||||
@ -201,8 +229,8 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
* test. If you don't like this, maybe you shouldn't be using
|
* test. If you don't like this, maybe you shouldn't be using
|
||||||
* eqsel for your operator...)
|
* eqsel for your operator...)
|
||||||
*/
|
*/
|
||||||
if (get_attstatsslot(vardata.statsTuple,
|
if (get_attstatsslot(vardata->statsTuple,
|
||||||
vardata.atttype, vardata.atttypmod,
|
vardata->atttype, vardata->atttypmod,
|
||||||
STATISTIC_KIND_MCV, InvalidOid,
|
STATISTIC_KIND_MCV, InvalidOid,
|
||||||
&values, &nvalues,
|
&values, &nvalues,
|
||||||
&numbers, &nnumbers))
|
&numbers, &nnumbers))
|
||||||
@ -264,8 +292,7 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
* fraction equally, so we divide by the number of other
|
* fraction equally, so we divide by the number of other
|
||||||
* distinct values.
|
* distinct values.
|
||||||
*/
|
*/
|
||||||
otherdistinct = get_variable_numdistinct(&vardata)
|
otherdistinct = get_variable_numdistinct(vardata) - nnumbers;
|
||||||
- nnumbers;
|
|
||||||
if (otherdistinct > 1)
|
if (otherdistinct > 1)
|
||||||
selec /= otherdistinct;
|
selec /= otherdistinct;
|
||||||
|
|
||||||
@ -277,12 +304,43 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
selec = numbers[nnumbers - 1];
|
selec = numbers[nnumbers - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
free_attstatsslot(vardata.atttype, values, nvalues,
|
free_attstatsslot(vardata->atttype, values, nvalues,
|
||||||
numbers, nnumbers);
|
numbers, nnumbers);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* No ANALYZE stats available, so make a guess using estimated number
|
||||||
|
* of distinct values and assuming they are equally common. (The guess
|
||||||
|
* is unlikely to be very good, but we do know a few special cases.)
|
||||||
|
*/
|
||||||
|
selec = 1.0 / get_variable_numdistinct(vardata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* result should be in range, but make sure... */
|
||||||
|
CLAMP_PROBABILITY(selec);
|
||||||
|
|
||||||
|
return selec;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* var_eq_non_const --- eqsel for var = something-other-than-const case
|
||||||
|
*/
|
||||||
|
static double
|
||||||
|
var_eq_non_const(VariableStatData *vardata, Oid operator,
|
||||||
|
Node *other,
|
||||||
|
bool varonleft)
|
||||||
|
{
|
||||||
|
double selec;
|
||||||
|
|
||||||
|
if (HeapTupleIsValid(vardata->statsTuple))
|
||||||
|
{
|
||||||
|
Form_pg_statistic stats;
|
||||||
double ndistinct;
|
double ndistinct;
|
||||||
|
float4 *numbers;
|
||||||
|
int nnumbers;
|
||||||
|
|
||||||
|
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Search is for a value that we do not know a priori, but we will
|
* Search is for a value that we do not know a priori, but we will
|
||||||
@ -295,7 +353,7 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
* good idea?)
|
* good idea?)
|
||||||
*/
|
*/
|
||||||
selec = 1.0 - stats->stanullfrac;
|
selec = 1.0 - stats->stanullfrac;
|
||||||
ndistinct = get_variable_numdistinct(&vardata);
|
ndistinct = get_variable_numdistinct(vardata);
|
||||||
if (ndistinct > 1)
|
if (ndistinct > 1)
|
||||||
selec /= ndistinct;
|
selec /= ndistinct;
|
||||||
|
|
||||||
@ -303,16 +361,15 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
* Cross-check: selectivity should never be estimated as more than
|
* Cross-check: selectivity should never be estimated as more than
|
||||||
* the most common value's.
|
* the most common value's.
|
||||||
*/
|
*/
|
||||||
if (get_attstatsslot(vardata.statsTuple,
|
if (get_attstatsslot(vardata->statsTuple,
|
||||||
vardata.atttype, vardata.atttypmod,
|
vardata->atttype, vardata->atttypmod,
|
||||||
STATISTIC_KIND_MCV, InvalidOid,
|
STATISTIC_KIND_MCV, InvalidOid,
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
&numbers, &nnumbers))
|
&numbers, &nnumbers))
|
||||||
{
|
{
|
||||||
if (nnumbers > 0 && selec > numbers[0])
|
if (nnumbers > 0 && selec > numbers[0])
|
||||||
selec = numbers[0];
|
selec = numbers[0];
|
||||||
free_attstatsslot(vardata.atttype, NULL, 0, numbers, nnumbers);
|
free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -322,15 +379,13 @@ eqsel(PG_FUNCTION_ARGS)
|
|||||||
* of distinct values and assuming they are equally common. (The guess
|
* of distinct values and assuming they are equally common. (The guess
|
||||||
* is unlikely to be very good, but we do know a few special cases.)
|
* is unlikely to be very good, but we do know a few special cases.)
|
||||||
*/
|
*/
|
||||||
selec = 1.0 / get_variable_numdistinct(&vardata);
|
selec = 1.0 / get_variable_numdistinct(vardata);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReleaseVariableStats(vardata);
|
|
||||||
|
|
||||||
/* result should be in range, but make sure... */
|
/* result should be in range, but make sure... */
|
||||||
CLAMP_PROBABILITY(selec);
|
CLAMP_PROBABILITY(selec);
|
||||||
|
|
||||||
PG_RETURN_FLOAT8((float8) selec);
|
return selec;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1047,16 +1102,11 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
|
|||||||
*/
|
*/
|
||||||
Oid eqopr = get_opfamily_member(opfamily, vartype, vartype,
|
Oid eqopr = get_opfamily_member(opfamily, vartype, vartype,
|
||||||
BTEqualStrategyNumber);
|
BTEqualStrategyNumber);
|
||||||
List *eqargs;
|
|
||||||
|
|
||||||
if (eqopr == InvalidOid)
|
if (eqopr == InvalidOid)
|
||||||
elog(ERROR, "no = operator for opfamily %u", opfamily);
|
elog(ERROR, "no = operator for opfamily %u", opfamily);
|
||||||
eqargs = list_make2(variable, prefix);
|
result = var_eq_const(&vardata, eqopr, prefix->constvalue,
|
||||||
result = DatumGetFloat8(DirectFunctionCall4(eqsel,
|
false, true);
|
||||||
PointerGetDatum(root),
|
|
||||||
ObjectIdGetDatum(eqopr),
|
|
||||||
PointerGetDatum(eqargs),
|
|
||||||
Int32GetDatum(varRelid)));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -4430,6 +4480,7 @@ prefix_selectivity(VariableStatData *vardata,
|
|||||||
Oid cmpopr;
|
Oid cmpopr;
|
||||||
FmgrInfo opproc;
|
FmgrInfo opproc;
|
||||||
Const *greaterstrcon;
|
Const *greaterstrcon;
|
||||||
|
Selectivity eq_sel;
|
||||||
|
|
||||||
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
|
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
|
||||||
BTGreaterEqualStrategyNumber);
|
BTGreaterEqualStrategyNumber);
|
||||||
@ -4444,7 +4495,7 @@ prefix_selectivity(VariableStatData *vardata,
|
|||||||
if (prefixsel <= 0.0)
|
if (prefixsel <= 0.0)
|
||||||
{
|
{
|
||||||
/* No histogram is present ... return a suitable default estimate */
|
/* No histogram is present ... return a suitable default estimate */
|
||||||
return 0.005;
|
return DEFAULT_MATCH_SEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*-------
|
/*-------
|
||||||
@ -4452,17 +4503,17 @@ prefix_selectivity(VariableStatData *vardata,
|
|||||||
* "x < greaterstr".
|
* "x < greaterstr".
|
||||||
*-------
|
*-------
|
||||||
*/
|
*/
|
||||||
|
greaterstrcon = make_greater_string(prefixcon, &opproc);
|
||||||
|
if (greaterstrcon)
|
||||||
|
{
|
||||||
|
Selectivity topsel;
|
||||||
|
|
||||||
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
|
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
|
||||||
BTLessStrategyNumber);
|
BTLessStrategyNumber);
|
||||||
if (cmpopr == InvalidOid)
|
if (cmpopr == InvalidOid)
|
||||||
elog(ERROR, "no < operator for opfamily %u", opfamily);
|
elog(ERROR, "no < operator for opfamily %u", opfamily);
|
||||||
fmgr_info(get_opcode(cmpopr), &opproc);
|
fmgr_info(get_opcode(cmpopr), &opproc);
|
||||||
|
|
||||||
greaterstrcon = make_greater_string(prefixcon, &opproc);
|
|
||||||
if (greaterstrcon)
|
|
||||||
{
|
|
||||||
Selectivity topsel;
|
|
||||||
|
|
||||||
topsel = ineq_histogram_selectivity(vardata, &opproc, false,
|
topsel = ineq_histogram_selectivity(vardata, &opproc, false,
|
||||||
greaterstrcon->constvalue,
|
greaterstrcon->constvalue,
|
||||||
greaterstrcon->consttype);
|
greaterstrcon->consttype);
|
||||||
@ -4477,15 +4528,29 @@ prefix_selectivity(VariableStatData *vardata,
|
|||||||
* doesn't count those anyway.
|
* doesn't count those anyway.
|
||||||
*/
|
*/
|
||||||
prefixsel = topsel + prefixsel - 1.0;
|
prefixsel = topsel + prefixsel - 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A zero or negative prefixsel should be converted into a small
|
* If the prefix is long then the two bounding values might be too
|
||||||
* positive value; we probably are dealing with a very tight range and
|
* close together for the histogram to distinguish them usefully,
|
||||||
* got a bogus result due to roundoff errors.
|
* resulting in a zero estimate (plus or minus roundoff error).
|
||||||
|
* To avoid returning a ridiculously small estimate, compute the
|
||||||
|
* estimated selectivity for "variable = 'foo'", and clamp to that.
|
||||||
|
* (Obviously, the resultant estimate should be at least that.)
|
||||||
|
*
|
||||||
|
* We apply this even if we couldn't make a greater string. That case
|
||||||
|
* suggests that the prefix is near the maximum possible, and thus
|
||||||
|
* probably off the end of the histogram, and thus we probably got a
|
||||||
|
* very small estimate from the >= condition; so we still need to clamp.
|
||||||
*/
|
*/
|
||||||
if (prefixsel <= 0.0)
|
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
|
||||||
prefixsel = 1.0e-10;
|
BTEqualStrategyNumber);
|
||||||
}
|
if (cmpopr == InvalidOid)
|
||||||
|
elog(ERROR, "no = operator for opfamily %u", opfamily);
|
||||||
|
eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
|
||||||
|
false, true);
|
||||||
|
|
||||||
|
prefixsel = Max(prefixsel, eq_sel);
|
||||||
|
|
||||||
return prefixsel;
|
return prefixsel;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user