1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-12 16:21:30 +03:00

Use query collation, not column's collation, while examining statistics.

Commit 5e0928005 changed the planner so that, instead of blindly using
DEFAULT_COLLATION_OID when invoking operators for selectivity estimation,
it would use the collation of the column whose statistics we're
considering.  This was recognized as still being not quite the right
thing, but it seemed like a good incremental improvement.  However,
shortly thereafter we introduced nondeterministic collations, and that
creates cases where operators can fail if they're passed the wrong
collation.  We don't want planning to fail in cases where the query itself
would work, so this means that we *must* use the query's collation when
invoking operators for estimation purposes.

The only real problem this creates is in ineq_histogram_selectivity, where
the binary search might produce a garbage answer if we perform comparisons
using a different collation than the column's histogram is ordered with.
However, when the query's collation is significantly different from the
column's default collation, the estimate we previously generated would be
pretty irrelevant anyway; so it's not clear that this will result in
noticeably worse estimates in practice.  (A follow-on patch will improve
this situation in HEAD, but it seems too invasive for back-patch.)

The patch requires changing the signatures of mcv_selectivity and allied
functions, which are exported and very possibly are used by extensions.
In HEAD, I just did that, but an API/ABI break of this sort isn't
acceptable in stable branches.  Therefore, in v12 the patch introduces
"mcv_selectivity_ext" and so on, with signatures matching HEAD, and makes
the old functions into wrappers that assume DEFAULT_COLLATION_OID should
be used.  That does not match the prior behavior, but it should avoid risk
of failure in most cases.  (In practice, I think most extension datatypes
aren't collation-aware, so the change probably doesn't matter to them.)

Per report from James Lucas.  Back-patch to v12 where the problem was
introduced.

Discussion: https://postgr.es/m/CAAFmbbOvfi=wMM=3qRsPunBSLb8BFREno2oOzSBS=mzfLPKABw@mail.gmail.com
This commit is contained in:
Tom Lane 2020-06-05 16:18:50 -04:00
parent 75f1479240
commit 022cd0bfd3
4 changed files with 167 additions and 90 deletions

View File

@ -90,7 +90,9 @@ static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Selectivity *rest_selec);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
Oid vartype, Oid opfamily,
Oid collation,
Const *prefixcon);
static Selectivity like_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Selectivity regex_selectivity(const char *patt, int pattlen,
@ -586,8 +588,8 @@ patternsel_common(PlannerInfo *root,
if (eqopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
result = var_eq_const(&vardata, eqopr, prefix->constvalue,
false, true, false);
result = var_eq_const_ext(&vardata, eqopr, collation,
prefix->constvalue, false, true, false);
}
else
{
@ -618,7 +620,8 @@ patternsel_common(PlannerInfo *root,
opfuncid = get_opcode(oprid);
fmgr_info(opfuncid, &opproc);
selec = histogram_selectivity(&vardata, &opproc, constval, true,
selec = histogram_selectivity_ext(&vardata, &opproc, collation,
constval, true,
10, 1, &hist_size);
/* If not at least 100 entries, use the heuristic method */
@ -629,7 +632,7 @@ patternsel_common(PlannerInfo *root,
if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(root, &vardata, vartype,
opfamily, prefix);
opfamily, collation, prefix);
else
prefixsel = 1.0;
heursel = prefixsel * rest_selec;
@ -661,7 +664,8 @@ patternsel_common(PlannerInfo *root,
* directly to the result selectivity. Also add up the total fraction
* represented by MCV entries.
*/
mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
mcv_selec = mcv_selectivity_ext(&vardata, &opproc, collation,
constval, true,
&sumcommon);
/*
@ -1170,12 +1174,13 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
*/
static Selectivity
prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon)
Oid vartype, Oid opfamily,
Oid collation,
Const *prefixcon)
{
Selectivity prefixsel;
Oid cmpopr;
FmgrInfo opproc;
AttStatsSlot sslot;
Const *greaterstrcon;
Selectivity eq_sel;
@ -1185,8 +1190,9 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
elog(ERROR, "no >= operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
prefixsel = ineq_histogram_selectivity(root, vardata,
prefixsel = ineq_histogram_selectivity_ext(root, vardata,
&opproc, true, true,
collation,
prefixcon->constvalue,
prefixcon->consttype);
@ -1196,31 +1202,22 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
return DEFAULT_MATCH_SEL;
}
/*-------
* If we can create a string larger than the prefix, say
* "x < greaterstr". We try to generate the string referencing the
* collation of the var's statistics, but if that's not available,
* use DEFAULT_COLLATION_OID.
*-------
/*
* If we can create a string larger than the prefix, say "x < greaterstr".
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
/* sslot.stacoll is set up */ ;
else
sslot.stacoll = DEFAULT_COLLATION_OID;
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
BTLessStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
if (greaterstrcon)
{
Selectivity topsel;
topsel = ineq_histogram_selectivity(root, vardata,
topsel = ineq_histogram_selectivity_ext(root, vardata,
&opproc, false, false,
collation,
greaterstrcon->constvalue,
greaterstrcon->consttype);
@ -1253,7 +1250,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
BTEqualStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
eq_sel = var_eq_const_ext(vardata, cmpopr, collation, prefixcon->constvalue,
false, true, false);
prefixsel = Max(prefixsel, eq_sel);

View File

@ -137,7 +137,8 @@ networksel(PG_FUNCTION_ARGS)
* by MCV entries.
*/
fmgr_info(get_opcode(operator), &proc);
mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
mcv_selec = mcv_selectivity_ext(&vardata, &proc, InvalidOid,
constvalue, varonleft,
&sumcommon);
/*

View File

@ -88,11 +88,7 @@
* (if any) is passed using the standard fmgr mechanism, so that the estimator
* function can fetch it with PG_GET_COLLATION(). Note, however, that all
* statistics in pg_statistic are currently built using the relevant column's
* collation. Thus, in most cases where we are looking at statistics, we
* should ignore the operator collation and use the stats entry's collation.
* We expect that the error induced by doing this is usually not large enough
* to justify complicating matters. In any case, doing otherwise would yield
* entirely garbage results for ordered stats data such as histograms.
* collation.
*----------
*/
@ -148,14 +144,14 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
static double eqjoinsel_inner(Oid opfuncoid,
static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2);
static double eqjoinsel_semi(Oid opfuncoid,
static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
@ -193,10 +189,11 @@ static double convert_timevalue_to_scalar(Datum value, Oid typid,
static void examine_simple_variable(PlannerInfo *root, Var *var,
VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Datum *min, Datum *max);
Oid sortop, Oid collation,
Datum *min, Datum *max);
static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
Oid sortop,
Oid sortop, Oid collation,
Datum *min, Datum *max);
static bool get_actual_variable_endpoint(Relation heapRel,
Relation indexRel,
@ -234,6 +231,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
@ -267,7 +265,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
* in the query.)
*/
if (IsA(other, Const))
selec = var_eq_const(&vardata, operator,
selec = var_eq_const_ext(&vardata, operator, collation,
((Const *) other)->constvalue,
((Const *) other)->constisnull,
varonleft, negate);
@ -289,6 +287,16 @@ double
var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
bool varonleft, bool negate)
{
return var_eq_const_ext(vardata, operator, DEFAULT_COLLATION_OID,
constval, constisnull,
varonleft, negate);
}
double
var_eq_const_ext(VariableStatData *vardata, Oid operator, Oid collation,
Datum constval, bool constisnull,
bool varonleft, bool negate)
{
double selec;
double nullfrac = 0.0;
@ -353,12 +361,12 @@ var_eq_const(VariableStatData *vardata, Oid operator,
/* be careful to apply operator right way 'round */
if (varonleft)
match = DatumGetBool(FunctionCall2Coll(&eqproc,
sslot.stacoll,
collation,
sslot.values[i],
constval));
else
match = DatumGetBool(FunctionCall2Coll(&eqproc,
sslot.stacoll,
collation,
constval,
sslot.values[i]));
if (match)
@ -555,6 +563,7 @@ neqsel(PG_FUNCTION_ARGS)
*/
static double
scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
Oid collation,
VariableStatData *vardata, Datum constval, Oid consttype)
{
Form_pg_statistic stats;
@ -654,15 +663,16 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
* to the result selectivity. Also add up the total fraction represented
* by MCV entries.
*/
mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
mcv_selec = mcv_selectivity_ext(vardata, &opproc, collation, constval, true,
&sumcommon);
/*
* If there is a histogram, determine which bin the constant falls in, and
* compute the resulting contribution to selectivity.
*/
hist_selec = ineq_histogram_selectivity(root, vardata,
hist_selec = ineq_histogram_selectivity_ext(root, vardata,
&opproc, isgt, iseq,
collation,
constval, consttype);
/*
@ -707,6 +717,15 @@ double
mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
double *sumcommonp)
{
return mcv_selectivity_ext(vardata, opproc, DEFAULT_COLLATION_OID,
constval, varonleft, sumcommonp);
}
double
mcv_selectivity_ext(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
Datum constval, bool varonleft,
double *sumcommonp)
{
double mcv_selec,
sumcommon;
@ -726,11 +745,11 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
sslot.stacoll,
collation,
sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
sslot.stacoll,
collation,
constval,
sslot.values[i])))
mcv_selec += sslot.numbers[i];
@ -780,6 +799,20 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size)
{
return histogram_selectivity_ext(vardata,
opproc, DEFAULT_COLLATION_OID,
constval, varonleft,
min_hist_size, n_skip,
hist_size);
}
double
histogram_selectivity_ext(VariableStatData *vardata,
FmgrInfo *opproc, Oid collation,
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size)
{
double result;
AttStatsSlot sslot;
@ -804,11 +837,11 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
sslot.stacoll,
collation,
sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
sslot.stacoll,
collation,
constval,
sslot.values[i])))
nmatch++;
@ -848,6 +881,19 @@ ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,
Datum constval, Oid consttype)
{
return ineq_histogram_selectivity_ext(root, vardata,
opproc, isgt, iseq,
DEFAULT_COLLATION_OID,
constval, consttype);
}
double
ineq_histogram_selectivity_ext(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,
Oid collation,
Datum constval, Oid consttype)
{
double hist_selec;
AttStatsSlot sslot;
@ -860,9 +906,11 @@ ineq_histogram_selectivity(PlannerInfo *root,
* column type. However, to make that work we will need to figure out
* which staop to search for --- it's not necessarily the one we have at
* hand! (For example, we might have a '<=' operator rather than the '<'
* operator that will appear in staop.) For now, assume that whatever
* appears in pg_statistic is sorted the same way our operator sorts, or
* the reverse way if isgt is true.
* operator that will appear in staop.) The collation might not agree
* either. For now, just assume that whatever appears in pg_statistic is
* sorted the same way our operator sorts, or the reverse way if isgt is
* true. This could result in a bogus estimate, but it still seems better
* than falling back to the default estimate.
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
statistic_proc_security_check(vardata, opproc->fn_oid) &&
@ -908,6 +956,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
collation,
&sslot.values[0],
&sslot.values[1]);
@ -925,17 +974,19 @@ ineq_histogram_selectivity(PlannerInfo *root,
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
collation,
&sslot.values[0],
NULL);
else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
collation,
NULL,
&sslot.values[probe]);
ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
sslot.stacoll,
collation,
sslot.values[probe],
constval));
if (isgt)
@ -1020,7 +1071,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
* values to a uniform comparison scale, and do a linear
* interpolation within this bin.
*/
if (convert_to_scalar(constval, consttype, sslot.stacoll,
if (convert_to_scalar(constval, consttype, collation,
&val,
sslot.values[i - 1], sslot.values[i],
vardata->vartype,
@ -1160,6 +1211,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
@ -1212,7 +1264,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
}
/* The rest of the work is done by scalarineqsel(). */
selec = scalarineqsel(root, operator, isgt, iseq,
selec = scalarineqsel(root, operator, isgt, iseq, collation,
&vardata, constval, consttype);
ReleaseVariableStats(vardata);
@ -1277,7 +1329,7 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
* A boolean variable V is equivalent to the clause V = 't', so we
* compute the selectivity as if that is what we have.
*/
selec = var_eq_const(&vardata, BooleanEqualOperator,
selec = var_eq_const_ext(&vardata, BooleanEqualOperator, InvalidOid,
BoolGetDatum(true), false, true, false);
}
else
@ -2003,6 +2055,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
Oid collation = PG_GET_COLLATION();
double selec;
double selec_inner;
VariableStatData vardata1;
@ -2053,7 +2106,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
}
/* We need to compute the inner-join selectivity in all cases */
selec_inner = eqjoinsel_inner(opfuncoid,
selec_inner = eqjoinsel_inner(opfuncoid, collation,
&vardata1, &vardata2,
nd1, nd2,
isdefault1, isdefault2,
@ -2080,7 +2133,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
if (!join_is_reversed)
selec = eqjoinsel_semi(opfuncoid,
selec = eqjoinsel_semi(opfuncoid, collation,
&vardata1, &vardata2,
nd1, nd2,
isdefault1, isdefault2,
@ -2093,7 +2146,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
Oid commop = get_commutator(operator);
Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
selec = eqjoinsel_semi(commopfuncoid,
selec = eqjoinsel_semi(commopfuncoid, collation,
&vardata2, &vardata1,
nd2, nd1,
isdefault2, isdefault1,
@ -2141,7 +2194,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
* that it's worth trying to distinguish them here.
*/
static double
eqjoinsel_inner(Oid opfuncoid,
eqjoinsel_inner(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
@ -2203,7 +2256,7 @@ eqjoinsel_inner(Oid opfuncoid,
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
sslot1->stacoll,
collation,
sslot1->values[i],
sslot2->values[j])))
{
@ -2321,7 +2374,7 @@ eqjoinsel_inner(Oid opfuncoid,
* Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
*/
static double
eqjoinsel_semi(Oid opfuncoid,
eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
@ -2415,7 +2468,7 @@ eqjoinsel_semi(Oid opfuncoid,
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
sslot1->stacoll,
collation,
sslot1->values[i],
sslot2->values[j])))
{
@ -2635,6 +2688,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
Oid op_lefttype;
Oid op_righttype;
Oid opno,
collation,
lsortop,
rsortop,
lstatop,
@ -2659,6 +2713,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
if (!is_opclause(clause))
return; /* shouldn't happen */
opno = ((OpExpr *) clause)->opno;
collation = ((OpExpr *) clause)->inputcollid;
left = get_leftop((Expr *) clause);
right = get_rightop((Expr *) clause);
if (!right)
@ -2792,20 +2847,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
/* Try to get ranges of both inputs */
if (!isgt)
{
if (!get_variable_range(root, &leftvar, lstatop,
if (!get_variable_range(root, &leftvar, lstatop, collation,
&leftmin, &leftmax))
goto fail; /* no range available from stats */
if (!get_variable_range(root, &rightvar, rstatop,
if (!get_variable_range(root, &rightvar, rstatop, collation,
&rightmin, &rightmax))
goto fail; /* no range available from stats */
}
else
{
/* need to swap the max and min */
if (!get_variable_range(root, &leftvar, lstatop,
if (!get_variable_range(root, &leftvar, lstatop, collation,
&leftmax, &leftmin))
goto fail; /* no range available from stats */
if (!get_variable_range(root, &rightvar, rstatop,
if (!get_variable_range(root, &rightvar, rstatop, collation,
&rightmax, &rightmin))
goto fail; /* no range available from stats */
}
@ -2815,13 +2870,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
* fraction that's <= the right-side maximum value. But only believe
* non-default estimates, else stick with our 1.0.
*/
selec = scalarineqsel(root, leop, isgt, true, &leftvar,
selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
rightmax, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftend = selec;
/* And similarly for the right variable. */
selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
leftmax, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightend = selec;
@ -2845,13 +2900,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
* minimum value. But only believe non-default estimates, else stick with
* our own default.
*/
selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
rightmin, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftstart = selec;
/* And similarly for the right variable. */
selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
leftmin, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightstart = selec;
@ -5124,9 +5179,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
*
* sortop is the "<" comparison operator to use. This should generally
* be "<" not ">", as only the former is likely to be found in pg_statistic.
* The collation must be specified too.
*/
static bool
get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Oid collation,
Datum *min, Datum *max)
{
Datum tmin = 0;
@ -5146,7 +5203,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
* before enabling this.
*/
#ifdef NOT_USED
if (get_actual_variable_range(root, vardata, sortop, min, max))
if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
return true;
#endif
@ -5174,7 +5231,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
*
* If there is a histogram that is sorted with some other operator than
* the one we want, fail --- this suggests that there is data we can't
* use.
* use. XXX consider collation too.
*/
if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, sortop,
@ -5221,14 +5278,14 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
continue;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
sslot.stacoll,
collation,
sslot.values[i], tmin)))
{
tmin = sslot.values[i];
tmin_is_mcv = true;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
sslot.stacoll,
collation,
tmax, sslot.values[i])))
{
tmax = sslot.values[i];
@ -5258,10 +5315,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
* If no data available, return false.
*
* sortop is the "<" comparison operator to use.
* collation is the required collation.
*/
static bool
get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop,
Oid sortop, Oid collation,
Datum *min, Datum *max)
{
bool have_data = false;
@ -5301,9 +5359,11 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
continue;
/*
* The first index column must match the desired variable and sort
* operator --- but we can use a descending-order index.
* The first index column must match the desired variable, sortop, and
* collation --- but we can use a descending-order index.
*/
if (collation != index->indexcollations[0])
continue; /* test first 'cause it's cheapest */
if (!match_index_to_operand(vardata->var, 0, index))
continue;
switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))

View File

@ -143,17 +143,36 @@ extern double get_variable_numdistinct(VariableStatData *vardata,
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
double *sumcommonp);
extern double mcv_selectivity_ext(VariableStatData *vardata,
FmgrInfo *opproc, Oid collation,
Datum constval, bool varonleft,
double *sumcommonp);
extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size);
extern double histogram_selectivity_ext(VariableStatData *vardata,
FmgrInfo *opproc, Oid collation,
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size);
extern double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,
Datum constval, Oid consttype);
extern double ineq_histogram_selectivity_ext(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc,
bool isgt, bool iseq,
Oid collation,
Datum constval, Oid consttype);
extern double var_eq_const(VariableStatData *vardata, Oid oproid,
Datum constval, bool constisnull,
bool varonleft, bool negate);
extern double var_eq_const_ext(VariableStatData *vardata,
Oid oproid, Oid collation,
Datum constval, bool constisnull,
bool varonleft, bool negate);
extern double var_eq_non_const(VariableStatData *vardata, Oid oproid,
Node *other,
bool varonleft, bool negate);