diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d4c6f6ed5a4..93e4a8906c5 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -121,6 +121,7 @@ #include "parser/parse_clause.h" #include "parser/parse_relation.h" #include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" #include "statistics/statistics.h" #include "storage/bufmgr.h" #include "utils/acl.h" @@ -3306,6 +3307,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, ndistinct = get_variable_numdistinct(vardata, &isdefault); + /* + * The nullingrels bits within the var could cause the same var to be + * counted multiple times if it's marked with different nullingrels. They + * could also prevent us from matching the var to the expressions in + * extended statistics (see estimate_multivariate_ndistinct). So strip + * them out first. + */ + var = remove_nulling_relids(var, root->outer_join_rels, NULL); + foreach(lc, varinfos) { varinfo = (GroupVarInfo *) lfirst(lc); @@ -5025,6 +5035,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, { Node *basenode; Relids varnos; + Relids basevarnos; RelOptInfo *onerel; /* Make sure we don't return dangling pointers in vardata */ @@ -5066,10 +5077,11 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, * relation are considered "real" vars. */ varnos = pull_varnos(root, basenode); + basevarnos = bms_difference(varnos, root->outer_join_rels); onerel = NULL; - if (bms_is_empty(varnos)) + if (bms_is_empty(basevarnos)) { /* No Vars at all ... must be pseudo-constant clause */ } @@ -5077,7 +5089,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, { int relid; - if (bms_get_singleton_member(varnos, &relid)) + /* Check if the expression is in vars of a single base relation */ + if (bms_get_singleton_member(basevarnos, &relid)) { if (varRelid == 0 || varRelid == relid) { @@ -5107,7 +5120,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, } } - bms_free(varnos); + bms_free(basevarnos); vardata->var = node; vardata->atttype = exprType(node); @@ -5132,6 +5145,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, ListCell *slist; Oid userid; + /* + * The nullingrels bits within the expression could prevent us from + * matching it to expressional index columns or to the expressions in + * extended statistics. So strip them out first. + */ + if (bms_overlap(varnos, root->outer_join_rels)) + node = remove_nulling_relids(node, root->outer_join_rels, NULL); + /* * Determine the user ID to use for privilege checks: either * onerel->userid if it's set (e.g., in case we're accessing the table @@ -5402,6 +5423,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, } } } + + bms_free(varnos); } /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 0c9b312eafd..079fcf46f0d 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2573,10 +2573,11 @@ where t1.f1 = coalesce(t2.f1, 1); -> Materialize -> Seq Scan on int4_tbl t2 Filter: (f1 > 1) - -> Seq Scan on int4_tbl t3 + -> Materialize + -> Seq Scan on int4_tbl t3 -> Materialize -> Seq Scan on int4_tbl t4 -(13 rows) +(14 rows) explain (costs off) select * from int4_tbl t1 @@ -8217,3 +8218,24 @@ SELECT * FROM rescan_bhs t1 LEFT JOIN rescan_bhs t2 ON t1.a IN RESET enable_seqscan; RESET enable_indexscan; +-- Test that we do not account for nullingrels when looking up statistics +CREATE TABLE group_tbl (a INT, b INT); +INSERT INTO group_tbl SELECT 1, 1; +CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl; +ANALYZE group_tbl; +EXPLAIN (COSTS OFF) +SELECT 1 FROM group_tbl t1 + LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE +GROUP BY s.c1, s.c2; + QUERY PLAN +-------------------------------------------- + Group + Group Key: t2.a, (COALESCE(t2.a)) + -> Sort + Sort Key: t2.a, (COALESCE(t2.a)) + -> Nested Loop Left Join + -> Seq Scan on group_tbl t1 + -> Seq Scan on group_tbl t2 +(7 rows) + +DROP TABLE group_tbl; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 8cfc1053cb7..779d56cb30f 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -3033,3 +3033,16 @@ SELECT * FROM rescan_bhs t1 LEFT JOIN rescan_bhs t2 ON t1.a IN RESET enable_seqscan; RESET enable_indexscan; + +-- Test that we do not account for nullingrels when looking up statistics +CREATE TABLE group_tbl (a INT, b INT); +INSERT INTO group_tbl SELECT 1, 1; +CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl; +ANALYZE group_tbl; + +EXPLAIN (COSTS OFF) +SELECT 1 FROM group_tbl t1 + LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE +GROUP BY s.c1, s.c2; + +DROP TABLE group_tbl;