From f84ff0c6d4eb4e470e55f48103a7edd269d13c49 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 18 Nov 2022 11:01:03 -0500 Subject: [PATCH] Don't read MCV stats needlessly in eqjoinsel(). eqjoinsel() currently makes use of MCV stats only when we have such stats for both sides of the clause. As coded, though, it would fetch those stats even when they're present for just one side. This can be a bit expensive with high statistics targets, leading to wasted effort in common cases such as joining a unique column to a non-unique column. So it seems worth the trouble to do a quick pre-check to confirm that both sides have MCVs before fetching either. Also, tweak the API spec for get_attstatsslot() to document the method we're using here. David Geier, Tomas Vondra, Tom Lane Discussion: https://postgr.es/m/b9846ca0-5f1c-9b26-5881-aad3f42b07f0@gmail.com --- src/backend/utils/adt/selfuncs.c | 20 ++++++++++++++++++-- src/backend/utils/cache/lsyscache.c | 4 ++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d597b7e81fc..e0aeaa69092 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2261,6 +2261,7 @@ eqjoinsel(PG_FUNCTION_ARGS) Form_pg_statistic stats2 = NULL; bool have_mcvs1 = false; bool have_mcvs2 = false; + bool get_mcv_stats; bool join_is_reversed; RelOptInfo *inner_rel; @@ -2275,11 +2276,25 @@ eqjoinsel(PG_FUNCTION_ARGS) memset(&sslot1, 0, sizeof(sslot1)); memset(&sslot2, 0, sizeof(sslot2)); + /* + * There is no use in fetching one side's MCVs if we lack MCVs for the + * other side, so do a quick check to verify that both stats exist. + */ + get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) && + HeapTupleIsValid(vardata2.statsTuple) && + get_attstatsslot(&sslot1, vardata1.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + 0) && + get_attstatsslot(&sslot2, vardata2.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + 0)); + if (HeapTupleIsValid(vardata1.statsTuple)) { /* note we allow use of nullfrac regardless of security check */ stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); - if (statistic_proc_security_check(&vardata1, opfuncoid)) + if (get_mcv_stats && + statistic_proc_security_check(&vardata1, opfuncoid)) have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple, STATISTIC_KIND_MCV, InvalidOid, ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); @@ -2289,7 +2304,8 @@ eqjoinsel(PG_FUNCTION_ARGS) { /* note we allow use of nullfrac regardless of security check */ stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); - if (statistic_proc_security_check(&vardata2, opfuncoid)) + if (get_mcv_stats && + statistic_proc_security_check(&vardata2, opfuncoid)) have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple, STATISTIC_KIND_MCV, InvalidOid, ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index a16a63f4957..94ca8e12303 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -3183,6 +3183,10 @@ get_attavgwidth(Oid relid, AttrNumber attnum) * * If it's desirable to call free_attstatsslot when get_attstatsslot might * not have been called, memset'ing sslot to zeroes will allow that. + * + * Passing flags=0 can be useful to quickly check if the requested slot type + * exists. In this case no arrays are extracted, so free_attstatsslot need + * not be called. */ bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,