From 97930cf578e28c01f67fe4006ffcdbb5aedf18c2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 31 Aug 2011 16:04:48 -0400 Subject: [PATCH] Improve eqjoinsel's ndistinct clamping to work for multiple levels of join. This patch fixes an oversight in my commit 7f3eba30c9d622d1981b1368f2d79ba0999cdff2 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant. --- src/backend/utils/adt/selfuncs.c | 81 ++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e0658265ecb..18bbfe9b8d7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -142,9 +142,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype); static double eqjoinsel_inner(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2); + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2); static double eqjoinsel_semi(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2); + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -173,6 +175,7 @@ static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max); +static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); @@ -2008,24 +2011,47 @@ eqjoinsel(PG_FUNCTION_ARGS) VariableStatData vardata1; VariableStatData vardata2; bool join_is_reversed; + RelOptInfo *rel1; + RelOptInfo *rel2; get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); + /* + * Identify the join's direct input relations. We use the min lefthand + * and min righthand as the inputs, even though the join might actually + * get done with larger input relations. The min inputs are guaranteed to + * have been formed by now, though, and always using them ensures + * consistency of estimates. + */ + if (!join_is_reversed) + { + rel1 = find_join_input_rel(root, sjinfo->min_lefthand); + rel2 = find_join_input_rel(root, sjinfo->min_righthand); + } + else + { + rel1 = find_join_input_rel(root, sjinfo->min_righthand); + rel2 = find_join_input_rel(root, sjinfo->min_lefthand); + } + switch (sjinfo->jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_FULL: - selec = eqjoinsel_inner(operator, &vardata1, &vardata2); + selec = eqjoinsel_inner(operator, &vardata1, &vardata2, + rel1, rel2); break; case JOIN_SEMI: case JOIN_ANTI: if (!join_is_reversed) - selec = eqjoinsel_semi(operator, &vardata1, &vardata2); + selec = eqjoinsel_semi(operator, &vardata1, &vardata2, + rel1, rel2); else selec = eqjoinsel_semi(get_commutator(operator), - &vardata2, &vardata1); + &vardata2, &vardata1, + rel2, rel1); break; default: /* other values not expected here */ @@ -2051,7 +2077,8 @@ eqjoinsel(PG_FUNCTION_ARGS) */ static double eqjoinsel_inner(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2) + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2) { double selec; double nd1; @@ -2252,15 +2279,19 @@ eqjoinsel_inner(Oid operator, * be, providing a crude correction for the selectivity of restriction * clauses on those relations. (We don't do that in the other path * since there we are comparing the nd values to stats for the whole - * relations.) + * relations.) We can apply this clamp both with respect to the base + * relations from which the join variables come, and to the immediate + * input relations of the current join. */ double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; if (vardata1->rel) nd1 = Min(nd1, vardata1->rel->rows); + nd1 = Min(nd1, rel1->rows); if (vardata2->rel) nd2 = Min(nd2, vardata2->rel->rows); + nd2 = Min(nd2, rel2->rows); selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); if (nd1 > nd2) @@ -2287,7 +2318,8 @@ eqjoinsel_inner(Oid operator, */ static double eqjoinsel_semi(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2) + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2) { double selec; double nd1; @@ -2435,8 +2467,10 @@ eqjoinsel_semi(Oid operator, { if (vardata1->rel) nd1 = Min(nd1, vardata1->rel->rows); + nd1 = Min(nd1, rel1->rows); if (vardata2->rel) nd2 = Min(nd2, vardata2->rel->rows); + nd2 = Min(nd2, rel2->rows); if (nd1 <= nd2 || nd2 <= 0) selec = 1.0 - nullfrac1; @@ -4759,6 +4793,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, return have_data; } +/* + * find_join_input_rel + * Look up the input relation for a join. + * + * We assume that the input relation's RelOptInfo must have been constructed + * already. + */ +static RelOptInfo * +find_join_input_rel(PlannerInfo *root, Relids relids) +{ + RelOptInfo *rel = NULL; + + switch (bms_membership(relids)) + { + case BMS_EMPTY_SET: + /* should not happen */ + break; + case BMS_SINGLETON: + rel = find_base_rel(root, bms_singleton_member(relids)); + break; + case BMS_MULTIPLE: + rel = find_join_rel(root, relids); + break; + } + + if (rel == NULL) + elog(ERROR, "could not find RelOptInfo for given relids"); + + return rel; +} + /*------------------------------------------------------------------------- *