diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index ae5c8f084e9..bcfbaa1c3d1 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1217,7 +1217,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, fmgr_info(get_opcode(geopr), &opproc); prefixsel = ineq_histogram_selectivity(root, vardata, - &opproc, true, true, + geopr, &opproc, true, true, collation, prefixcon->constvalue, prefixcon->consttype); @@ -1238,7 +1238,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Selectivity topsel; topsel = ineq_histogram_selectivity(root, vardata, - &opproc, false, false, + ltopr, &opproc, false, false, collation, greaterstrcon->constvalue, greaterstrcon->consttype); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 23322773078..0f02f32ffd5 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -192,6 +192,10 @@ static void examine_simple_variable(PlannerInfo *root, Var *var, static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max); +static void get_stats_slot_range(AttStatsSlot *sslot, + Oid opfuncoid, FmgrInfo *opproc, + Oid collation, int16 typLen, bool typByVal, + Datum *min, Datum *max, bool *p_have_data); static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, @@ -679,7 +683,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, * compute the resulting contribution to selectivity. */ hist_selec = ineq_histogram_selectivity(root, vardata, - &opproc, isgt, iseq, + operator, &opproc, isgt, iseq, collation, constval, consttype); @@ -1019,6 +1023,9 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation, * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST. * The isgt and iseq flags distinguish which of the four cases apply. * + * While opproc could be looked up from the operator OID, common callers + * also need to call it separately, so we make the caller pass both. + * * Returns -1 if there is no histogram (valid results will always be >= 0). * * Note that the result disregards both the most-common-values (if any) and @@ -1030,7 +1037,7 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation, double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, - FmgrInfo *opproc, bool isgt, bool iseq, + Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype) { @@ -1042,14 +1049,14 @@ ineq_histogram_selectivity(PlannerInfo *root, /* * Someday, ANALYZE might store more than one histogram per rel/att, * corresponding to more than one possible sort ordering defined for the - * column type. However, to make that work we will need to figure out - * which staop to search for --- it's not necessarily the one we have at - * hand! (For example, we might have a '<=' operator rather than the '<' - * operator that will appear in staop.) The collation might not agree - * either. For now, just assume that whatever appears in pg_statistic is - * sorted the same way our operator sorts, or the reverse way if isgt is - * true. This could result in a bogus estimate, but it still seems better - * than falling back to the default estimate. + * column type. Right now, we know there is only one, so just grab it and + * see if it matches the query. + * + * Note that we can't use opoid as search argument; the staop appearing in + * pg_statistic will be for the relevant '<' operator, but what we have + * might be some other inequality operator such as '>='. (Even if opoid + * is a '<' operator, it could be cross-type.) Hence we must use + * comparison_ops_are_compatible() to see if the operators match. */ if (HeapTupleIsValid(vardata->statsTuple) && statistic_proc_security_check(vardata, opproc->fn_oid) && @@ -1057,16 +1064,15 @@ ineq_histogram_selectivity(PlannerInfo *root, STATISTIC_KIND_HISTOGRAM, InvalidOid, ATTSTATSSLOT_VALUES)) { - if (sslot.nvalues > 1) + if (sslot.nvalues > 1 && + sslot.stacoll == collation && + comparison_ops_are_compatible(sslot.staop, opoid)) { /* * Use binary search to find the desired location, namely the * right end of the histogram bin containing the comparison value, * which is the leftmost entry for which the comparison operator - * succeeds (if isgt) or fails (if !isgt). (If the given operator - * isn't actually sort-compatible with the histogram, you'll get - * garbage results ... but probably not any more garbage-y than - * you would have from the old linear search.) + * succeeds (if isgt) or fails (if !isgt). * * In this loop, we pay no attention to whether the operator iseq * or not; that detail will be mopped up below. (We cannot tell, @@ -1332,6 +1338,50 @@ ineq_histogram_selectivity(PlannerInfo *root, hist_selec = 1.0 - cutoff; } } + else if (sslot.nvalues > 1) + { + /* + * If we get here, we have a histogram but it's not sorted the way + * we want. Do a brute-force search to see how many of the + * entries satisfy the comparison condition, and take that + * fraction as our estimate. (This is identical to the inner loop + * of histogram_selectivity; maybe share code?) + */ + LOCAL_FCINFO(fcinfo, 2); + int nmatch = 0; + + InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, + NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + fcinfo->args[1].value = constval; + for (int i = 0; i < sslot.nvalues; i++) + { + Datum fresult; + + fcinfo->args[0].value = sslot.values[i]; + fcinfo->isnull = false; + fresult = FunctionCallInvoke(fcinfo); + if (!fcinfo->isnull && DatumGetBool(fresult)) + nmatch++; + } + hist_selec = ((double) nmatch) / ((double) sslot.nvalues); + + /* + * As above, clamp to a hundredth of the histogram resolution. + * This case is surely even less trustworthy than the normal one, + * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the + * clamp should be more restrictive in this case?) + */ + { + double cutoff = 0.01 / (double) (sslot.nvalues - 1); + + if (hist_selec < cutoff) + hist_selec = cutoff; + else if (hist_selec > 1.0 - cutoff) + hist_selec = 1.0 - cutoff; + } + } free_attstatsslot(&sslot); } @@ -5363,8 +5413,8 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, int16 typLen; bool typByVal; Oid opfuncoid; + FmgrInfo opproc; AttStatsSlot sslot; - int i; /* * XXX It's very tempting to try to use the actual column min and max, if @@ -5395,20 +5445,19 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, (opfuncoid = get_opcode(sortop)))) return false; + opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */ + get_typlenbyval(vardata->atttype, &typLen, &typByVal); /* - * If there is a histogram, grab the first and last values. - * - * If there is a histogram that is sorted with some other operator than - * the one we want, fail --- this suggests that there is data we can't - * use. XXX consider collation too. + * If there is a histogram with the ordering we want, grab the first and + * last values. */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_HISTOGRAM, sortop, ATTSTATSSLOT_VALUES)) { - if (sslot.nvalues > 0) + if (sslot.stacoll == collation && sslot.nvalues > 0) { tmin = datumCopy(sslot.values[0], typByVal, typLen); tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen); @@ -5416,57 +5465,36 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, } free_attstatsslot(&sslot); } - else if (get_attstatsslot(&sslot, vardata->statsTuple, - STATISTIC_KIND_HISTOGRAM, InvalidOid, - 0)) + + /* + * Otherwise, if there is a histogram with some other ordering, scan it + * and get the min and max values according to the ordering we want. This + * of course may not find values that are really extremal according to our + * ordering, but it beats ignoring available data. + */ + if (!have_data && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + ATTSTATSSLOT_VALUES)) { + get_stats_slot_range(&sslot, opfuncoid, &opproc, + collation, typLen, typByVal, + &tmin, &tmax, &have_data); free_attstatsslot(&sslot); - return false; } /* * If we have most-common-values info, look for extreme MCVs. This is * needed even if we also have a histogram, since the histogram excludes - * the MCVs. However, usually the MCVs will not be the extreme values, so - * avoid unnecessary data copying. + * the MCVs. */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_MCV, InvalidOid, ATTSTATSSLOT_VALUES)) { - bool tmin_is_mcv = false; - bool tmax_is_mcv = false; - FmgrInfo opproc; - - fmgr_info(opfuncoid, &opproc); - - for (i = 0; i < sslot.nvalues; i++) - { - if (!have_data) - { - tmin = tmax = sslot.values[i]; - tmin_is_mcv = tmax_is_mcv = have_data = true; - continue; - } - if (DatumGetBool(FunctionCall2Coll(&opproc, - collation, - sslot.values[i], tmin))) - { - tmin = sslot.values[i]; - tmin_is_mcv = true; - } - if (DatumGetBool(FunctionCall2Coll(&opproc, - collation, - tmax, sslot.values[i]))) - { - tmax = sslot.values[i]; - tmax_is_mcv = true; - } - } - if (tmin_is_mcv) - tmin = datumCopy(tmin, typByVal, typLen); - if (tmax_is_mcv) - tmax = datumCopy(tmax, typByVal, typLen); + get_stats_slot_range(&sslot, opfuncoid, &opproc, + collation, typLen, typByVal, + &tmin, &tmax, &have_data); free_attstatsslot(&sslot); } @@ -5475,6 +5503,62 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, return have_data; } +/* + * get_stats_slot_range: scan sslot for min/max values + * + * Subroutine for get_variable_range: update min/max/have_data according + * to what we find in the statistics array. + */ +static void +get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, + Oid collation, int16 typLen, bool typByVal, + Datum *min, Datum *max, bool *p_have_data) +{ + Datum tmin = *min; + Datum tmax = *max; + bool have_data = *p_have_data; + bool found_tmin = false; + bool found_tmax = false; + + /* Look up the comparison function, if we didn't already do so */ + if (opproc->fn_oid != opfuncoid) + fmgr_info(opfuncoid, opproc); + + /* Scan all the slot's values */ + for (int i = 0; i < sslot->nvalues; i++) + { + if (!have_data) + { + tmin = tmax = sslot->values[i]; + found_tmin = found_tmax = true; + *p_have_data = have_data = true; + continue; + } + if (DatumGetBool(FunctionCall2Coll(opproc, + collation, + sslot->values[i], tmin))) + { + tmin = sslot->values[i]; + found_tmin = true; + } + if (DatumGetBool(FunctionCall2Coll(opproc, + collation, + tmax, sslot->values[i]))) + { + tmax = sslot->values[i]; + found_tmax = true; + } + } + + /* + * Copy the slot's values, if we found new extreme values. + */ + if (found_tmin) + *min = datumCopy(tmin, typByVal, typLen); + if (found_tmax) + *max = datumCopy(tmax, typByVal, typLen); +} + /* * get_actual_variable_range diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 63d1263502d..f3bf413829f 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -731,6 +731,55 @@ equality_ops_are_compatible(Oid opno1, Oid opno2) return result; } +/* + * comparison_ops_are_compatible + * Return true if the two given comparison operators have compatible + * semantics. + * + * This is trivially true if they are the same operator. Otherwise, + * we look to see if they can be found in the same btree opfamily. + * For example, '<' and '>=' ops match if they belong to the same family. + * + * (This is identical to equality_ops_are_compatible(), except that we + * don't bother to examine hash opclasses.) + */ +bool +comparison_ops_are_compatible(Oid opno1, Oid opno2) +{ + bool result; + CatCList *catlist; + int i; + + /* Easy if they're the same operator */ + if (opno1 == opno2) + return true; + + /* + * We search through all the pg_amop entries for opno1. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno1)); + + result = false; + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple op_tuple = &catlist->members[i]->tuple; + Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); + + if (op_form->amopmethod == BTREE_AM_OID) + { + if (op_in_opfamily(opno2, op_form->amopfamily)) + { + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* ---------- AMPROC CACHES ---------- */ @@ -3028,19 +3077,6 @@ get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, sslot->staop = (&stats->staop1)[i]; sslot->stacoll = (&stats->stacoll1)[i]; - /* - * XXX Hopefully-temporary hack: if stacoll isn't set, inject the default - * collation. This won't matter for non-collation-aware datatypes. For - * those that are, this covers cases where stacoll has not been set. In - * the short term we need this because some code paths involving type NAME - * do not pass any collation to prefix_selectivity and related functions. - * Even when that's been fixed, it's likely that some add-on typanalyze - * functions won't get the word right away about filling stacoll during - * ANALYZE, so we'll probably need this for awhile. - */ - if (sslot->stacoll == InvalidOid) - sslot->stacoll = DEFAULT_COLLATION_OID; - if (flags & ATTSTATSSLOT_VALUES) { val = SysCacheGetAttr(STATRELATTINH, statstuple, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 91aed1f5a51..fecfe1f4f6e 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -82,6 +82,7 @@ extern bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno); extern List *get_op_btree_interpretation(Oid opno); extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); +extern bool comparison_ops_are_compatible(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum, bool missing_ok); diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 15d22890248..7ac4a063915 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -159,7 +159,8 @@ extern double generic_restriction_selectivity(PlannerInfo *root, double default_selectivity); extern double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, - FmgrInfo *opproc, bool isgt, bool iseq, + Oid opoid, FmgrInfo *opproc, + bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype); extern double var_eq_const(VariableStatData *vardata, diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index c2d037b6141..7caf0c9b6b9 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -191,7 +191,10 @@ CREATE TABLE atest12 as SELECT x AS a, 10001 - x AS b FROM generate_series(1,10000) x; CREATE INDEX ON atest12 (a); CREATE INDEX ON atest12 (abs(a)); +-- results below depend on having quite accurate stats for atest12 +SET default_statistics_target = 10000; VACUUM ANALYZE atest12; +RESET default_statistics_target; CREATE FUNCTION leak(integer,integer) RETURNS boolean AS $$begin return $1 < $2; end$$ LANGUAGE plpgsql immutable; diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 2ba69617dce..0ab5245b1eb 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -136,7 +136,10 @@ CREATE TABLE atest12 as SELECT x AS a, 10001 - x AS b FROM generate_series(1,10000) x; CREATE INDEX ON atest12 (a); CREATE INDEX ON atest12 (abs(a)); +-- results below depend on having quite accurate stats for atest12 +SET default_statistics_target = 10000; VACUUM ANALYZE atest12; +RESET default_statistics_target; CREATE FUNCTION leak(integer,integer) RETURNS boolean AS $$begin return $1 < $2; end$$