1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-08 06:02:22 +03:00

Improve selectivity estimation for assorted match-style operators.

Quite a few matching operators such as JSONB's @> used "contsel" and
"contjoinsel" as their selectivity estimators.  That was a bad idea,
because (a) contsel is only a stub, yielding a fixed default estimate,
and (b) that default is 0.001, meaning we estimate these operators as
five times more selective than equality, which is surely pretty silly.

There's a good model for improving this in ltree's ltreeparentsel():
for any "var OP constant" query, we can try applying the operator
to all of the column's MCV and histogram values, taking the latter
as being a random sample of the non-MCV values.  That code is
actually 100% generic, except for the question of exactly what
default selectivity ought to be plugged in when we don't have stats.

Hence, migrate the guts of ltreeparentsel() into the core code, provide
wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate
default estimate, and use those for the non-geometric operators that
formerly used contsel (mostly JSONB containment operators and tsquery
matching).

Also apply this code to some match-like operators in hstore, ltree, and
pg_trgm, including the former users of ltreeparentsel as well as ones
that improperly used contsel.  Since commit 911e70207 just created new
versions of those extensions that we haven't released yet, we can sneak
this change into those new versions instead of having to create an
additional generation of update scripts.

Patch by me, reviewed by Alexey Bashtanov

Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2020-04-01 10:32:33 -04:00
parent d8653f4687
commit a80818605e
11 changed files with 332 additions and 121 deletions

View File

@@ -19,3 +19,84 @@ ADD FUNCTION 10 (ltree) ltree_gist_options (internal);
ALTER OPERATOR FAMILY gist__ltree_ops USING gist
ADD FUNCTION 10 (_ltree) _ltree_gist_options (internal);
ALTER OPERATOR < (ltree, ltree)
SET (RESTRICT = scalarltsel, JOIN = scalarltjoinsel);
ALTER OPERATOR <= (ltree, ltree)
SET (RESTRICT = scalarlesel, JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ltree, ltree)
SET (RESTRICT = scalargesel, JOIN = scalargejoinsel);
ALTER OPERATOR > (ltree, ltree)
SET (RESTRICT = scalargtsel, JOIN = scalargtjoinsel);
ALTER OPERATOR @> (ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@> (ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR <@ (ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^<@ (ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ~ (ltree, lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ~ (lquery, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^~ (ltree, lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^~ (lquery, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ? (ltree, _lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ? (_lquery, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^? (ltree, _lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^? (_lquery, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR @ (ltree, ltxtquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR @ (ltxtquery, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@ (ltree, ltxtquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@ (ltxtquery, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR @> (_ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR <@ (ltree, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR <@ (_ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR @> (ltree, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ~ (_ltree, lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ~ (lquery, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ? (_ltree, _lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ? (_lquery, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR @ (_ltree, ltxtquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR @ (ltxtquery, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@> (_ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^<@ (ltree, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^<@ (_ltree, ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@> (ltree, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^~ (_ltree, lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^~ (lquery, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^? (_ltree, _lquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^? (_lquery, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@ (_ltree, ltxtquery)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
ALTER OPERATOR ^@ (ltxtquery, _ltree)
SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);

View File

@@ -566,10 +566,11 @@ ltree2text(PG_FUNCTION_ARGS)
}
#define DEFAULT_PARENT_SEL 0.001
/*
* ltreeparentsel - Selectivity of parent relationship for ltree data types.
*
* This function is not used anymore, if the ltree extension has been
* updated to 1.2 or later.
*/
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
@@ -578,101 +579,12 @@ ltreeparentsel(PG_FUNCTION_ARGS)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
double selec;
/*
* If expression is not variable <@ something or something <@ variable,
* then punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
/*
* If the something is a NULL constant, assume operator is strict and
* return zero, ie, operator will never return TRUE.
*/
if (IsA(other, Const) &&
((Const *) other)->constisnull)
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(0.0);
}
if (IsA(other, Const))
{
/* Variable is being compared to a known non-null constant */
Datum constval = ((Const *) other)->constvalue;
FmgrInfo contproc;
double mcvsum;
double mcvsel;
double nullfrac;
int hist_size;
fmgr_info(get_opcode(operator), &contproc);
/*
* Is the constant "<@" to any of the column's most common values?
*/
mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
&mcvsum);
/*
* If the histogram is large enough, see what fraction of it the
* constant is "<@" to, and assume that's representative of the
* non-MCV population. Otherwise use the default selectivity for the
* non-MCV population.
*/
selec = histogram_selectivity(&vardata, &contproc,
constval, varonleft,
10, 1, &hist_size);
if (selec < 0)
{
/* Nope, fall back on default */
selec = DEFAULT_PARENT_SEL;
}
else if (hist_size < 100)
{
/*
* For histogram sizes from 10 to 100, we combine the histogram
* and default selectivities, putting increasingly more trust in
* the histogram for larger sizes.
*/
double hist_weight = hist_size / 100.0;
selec = selec * hist_weight +
DEFAULT_PARENT_SEL * (1.0 - hist_weight);
}
/* In any case, don't believe extremely small or large estimates. */
if (selec < 0.0001)
selec = 0.0001;
else if (selec > 0.9999)
selec = 0.9999;
if (HeapTupleIsValid(vardata.statsTuple))
nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
else
nullfrac = 0.0;
/*
* Now merge the results from the MCV and histogram calculations,
* realizing that the histogram covers only the non-null values that
* are not listed in MCV.
*/
selec *= 1.0 - nullfrac - mcvsum;
selec += mcvsel;
}
else
selec = DEFAULT_PARENT_SEL;
ReleaseVariableStats(vardata);
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
/* Use generic restriction selectivity logic, with default 0.001. */
selec = generic_restriction_selectivity(root, operator,
args, varRelid,
0.001);
PG_RETURN_FLOAT8((float8) selec);
}