diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 5b0da14748a..5783f90b62e 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -744,8 +744,8 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) bool query_supports_distinctness(Query *query) { - /* we don't cope with SRFs, see comment below */ - if (query->hasTargetSRFs) + /* SRFs break distinctness except with DISTINCT, see below */ + if (query->hasTargetSRFs && query->distinctClause == NIL) return false; /* check for features we can prove distinctness with */ @@ -786,21 +786,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) Assert(list_length(colnos) == list_length(opids)); - /* - * A set-returning function in the query's targetlist can result in - * returning duplicate rows, if the SRF is evaluated after the - * de-duplication step; so we play it safe and say "no" if there are any - * SRFs. (We could be certain that it's okay if SRFs appear only in the - * specified columns, since those must be evaluated before de-duplication; - * but it doesn't presently seem worth the complication to check that.) - */ - if (query->hasTargetSRFs) - return false; - /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the * columns in the DISTINCT clause appear in colnos and operator semantics - * match. + * match. This is true even if there are SRFs in the DISTINCT columns or + * elsewhere in the tlist. */ if (query->distinctClause) { @@ -819,6 +809,16 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) return true; } + /* + * Otherwise, a set-returning function in the query's targetlist can + * result in returning duplicate rows, despite any grouping that might + * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY + * columns, it would be safe because they'd be expanded before grouping. + * But it doesn't currently seem worth the effort to check for that.) + */ + if (query->hasTargetSRFs) + return false; + /* * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all * the grouped columns appear in colnos and operator semantics match. diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 4bbb4a850eb..edff6da4109 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3361,6 +3361,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset) { List *varinfos = NIL; + double srf_multiplier = 1.0; double numdistinct; ListCell *l; int i; @@ -3394,6 +3395,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); + double this_srf_multiplier; VariableStatData vardata; List *varshere; ListCell *l2; @@ -3402,6 +3404,21 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, if (pgset && !list_member_int(*pgset, i++)) continue; + /* + * Set-returning functions in grouping columns are a bit problematic. + * The code below will effectively ignore their SRF nature and come up + * with a numdistinct estimate as though they were scalar functions. + * We compensate by scaling up the end result by the largest SRF + * rowcount estimate. (This will be an overestimate if the SRF + * produces multiple copies of any output value, but it seems best to + * assume the SRF's outputs are distinct. In any case, it's probably + * pointless to worry too much about this without much better + * estimates for SRF output rowcounts than we have today.) + */ + this_srf_multiplier = expression_returns_set_rows(groupexpr); + if (srf_multiplier < this_srf_multiplier) + srf_multiplier = this_srf_multiplier; + /* Short-circuit for expressions returning boolean */ if (exprType(groupexpr) == BOOLOID) { @@ -3467,9 +3484,15 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, */ if (varinfos == NIL) { + /* Apply SRF multiplier as we would do in the long path */ + numdistinct *= srf_multiplier; + /* Round off */ + numdistinct = ceil(numdistinct); /* Guard against out-of-range answers */ if (numdistinct > input_rows) numdistinct = input_rows; + if (numdistinct < 1.0) + numdistinct = 1.0; return numdistinct; } @@ -3638,6 +3661,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, varinfos = newvarinfos; } while (varinfos != NIL); + /* Now we can account for the effects of any SRFs */ + numdistinct *= srf_multiplier; + + /* Round off */ numdistinct = ceil(numdistinct); /* Guard against out-of-range answers */