diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index a4babed2b04..20b25935ce6 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3087,7 +3087,7 @@ estimate_path_cost_size(PlannerInfo *root, numGroups = estimate_num_groups(root, get_sortgrouplist_exprs(root->parse->groupClause, fpinfo->grouped_tlist), - input_rows, NULL); + input_rows, NULL, NULL); /* * Get the retrieved_rows and rows estimates. If there are HAVING diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a25b674a192..b92c9485882 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1969,7 +1969,8 @@ cost_incremental_sort(Path *path, /* Estimate number of groups with equal presorted keys. */ if (!unknown_varno) - input_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL); + input_groups = estimate_num_groups(root, presortedExprs, input_tuples, + NULL, NULL); group_tuples = input_tuples / input_groups; group_input_run_cost = input_run_cost / input_groups; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index ff536e6b24b..53b24e9e8c8 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1990,6 +1990,7 @@ adjust_rowcount_for_semijoins(PlannerInfo *root, nunique = estimate_num_groups(root, sjinfo->semi_rhs_exprs, nraw, + NULL, NULL); if (rowcount > nunique) rowcount = nunique; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index f529d107d29..0886bf4ae8f 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3702,7 +3702,8 @@ get_number_of_groups(PlannerInfo *root, double numGroups = estimate_num_groups(root, groupExprs, path_rows, - &gset); + &gset, + NULL); gs->numGroups = numGroups; rollup->numGroups += numGroups; @@ -3727,7 +3728,8 @@ get_number_of_groups(PlannerInfo *root, double numGroups = estimate_num_groups(root, groupExprs, path_rows, - &gset); + &gset, + NULL); gs->numGroups = numGroups; gd->dNumHashGroups += numGroups; @@ -3743,7 +3745,7 @@ get_number_of_groups(PlannerInfo *root, target_list); dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL); + NULL, NULL); } } else if (parse->groupingSets) @@ -4792,7 +4794,7 @@ create_distinct_paths(PlannerInfo *root, parse->targetList); numDistinctRows = estimate_num_groups(root, distinctExprs, cheapest_input_path->rows, - NULL); + NULL, NULL); } /* diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index becdcbb8725..037dfaacfd4 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -338,6 +338,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, *pNumGroups = estimate_num_groups(subroot, get_tlist_exprs(subquery->targetList, false), subpath->rows, + NULL, NULL); } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 69b83071cf2..d5c66780ac8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1713,6 +1713,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->path.rows = estimate_num_groups(root, sjinfo->semi_rhs_exprs, rel->rows, + NULL, NULL); numCols = list_length(sjinfo->semi_rhs_exprs); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7e41bc56418..0963e2701cb 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3241,6 +3241,7 @@ typedef struct Node *var; /* might be an expression, not just a Var */ RelOptInfo *rel; /* relation it belongs to */ double ndistinct; /* # distinct values */ + bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */ } GroupVarInfo; static List * @@ -3287,6 +3288,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, varinfo->var = var; varinfo->rel = vardata->rel; varinfo->ndistinct = ndistinct; + varinfo->isdefault = isdefault; varinfos = lappend(varinfos, varinfo); return varinfos; } @@ -3311,6 +3313,12 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, * pgset - NULL, or a List** pointing to a grouping set to filter the * groupExprs against * + * Outputs: + * estinfo - When passed as non-NULL, the function will set bits in the + * "flags" field in order to provide callers with additional information + * about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT + * bit if we used any default values in the estimation. + * * Given the lack of any cross-correlation statistics in the system, it's * impossible to do anything really trustworthy with GROUP BY conditions * involving multiple Vars. We should however avoid assuming the worst @@ -3358,7 +3366,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, */ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, - List **pgset) + List **pgset, EstimationInfo *estinfo) { List *varinfos = NIL; double srf_multiplier = 1.0; @@ -3366,6 +3374,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, ListCell *l; int i; + /* Zero the estinfo output parameter, if non-NULL */ + if (estinfo != NULL) + memset(estinfo, 0, sizeof(EstimationInfo)); + /* * We don't ever want to return an estimate of zero groups, as that tends * to lead to division-by-zero and other unpleasantness. The input_rows @@ -3577,6 +3589,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, if (relmaxndistinct < varinfo2->ndistinct) relmaxndistinct = varinfo2->ndistinct; relvarcount++; + + /* + * When varinfo2's isdefault is set then we'd better set + * the SELFLAG_USED_DEFAULT bit in the EstimationInfo. + */ + if (estinfo != NULL && varinfo2->isdefault) + estinfo->flags |= SELFLAG_USED_DEFAULT; + } /* we're done with this relation */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index f9be539602b..78cde58acc1 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -68,6 +68,20 @@ p = 1.0; \ } while (0) +/* + * A set of flags which some selectivity estimation functions can pass back to + * callers to provide further details about some assumptions which were made + * during the estimation. + */ +#define SELFLAG_USED_DEFAULT (1 << 0) /* Estimation fell back on one + * of the DEFAULTs as defined + * above. */ + +typedef struct EstimationInfo +{ + uint32 flags; /* Flags, as defined above to mark special + * properties of the estimation. */ +} EstimationInfo; /* Return data from examine_variable and friends */ typedef struct VariableStatData @@ -197,7 +211,8 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, Selectivity *rightstart, Selectivity *rightend); extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset); + double input_rows, List **pgset, + EstimationInfo *estinfo); extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,