From 1f171a1803c28d3ae24636c9ca3352ec82c39e5f Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Mon, 27 Mar 2017 12:52:50 -0300 Subject: [PATCH] Fix thinko in estimate_num_groups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code for the reworked n-distinct estimation on commit 7b504eb282 was written differently in a previous version of the patch, prior to commit; on rewriting it, we missed updating an initializer. This caused the code to (mistakenly) apply a fudge factor even in the case where a single value is applied, leading to incorrect results. This means that the 'relvarcount' variable name is now wrong. Add a comment to try and make the situation clearer, and remove an incorrect comment I added. Problem noticed, and code patch, by Tomas Vondra. Additional commentary by Álvaro. --- src/backend/utils/adt/selfuncs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index cc24c8aeb56..5c382a2013e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3404,7 +3404,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, RelOptInfo *rel = varinfo1->rel; double reldistinct = 1; double relmaxndistinct = reldistinct; - int relvarcount = 1; + int relvarcount = 0; List *newvarinfos = NIL; List *relvarinfos = NIL; @@ -3436,6 +3436,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * we multiply them together. Any remaining relvarinfos after * no more multivariate matches are found are assumed independent too, * so their individual ndistinct estimates are multiplied also. + * + * While iterating, count how many separate numdistinct values we + * apply. We apply a fudge factor below, but only if we multiplied + * more than one such values. */ while (relvarinfos) { @@ -3447,7 +3451,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, reldistinct *= mvndistinct; if (relmaxndistinct < mvndistinct) relmaxndistinct = mvndistinct; - relvarcount++; /* inaccurate, but doesn't matter */ + relvarcount++; } else {