diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index d912bd9d54b..21a2ef5ad3a 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -8337,8 +8337,9 @@ ALTER TABLE fprt2_p1 SET (autovacuum_enabled = 'false'); ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false'); INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i; INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i; -CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250) +CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int) SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true'); +ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250); CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500) SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true'); ANALYZE fprt2; @@ -8389,28 +8390,42 @@ SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) 8 | | (5 rows) --- with whole-row reference +-- with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) -SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2; - QUERY PLAN ---------------------------------------------------------------------------------- +SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; + QUERY PLAN +-------------------------------------------------------- Sort Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2) - -> Append - -> Foreign Scan - Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2) - -> Foreign Scan - Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2) -(7 rows) + -> Hash Full Join + Hash Cond: (t1.a = t2.b) + -> Append + -> Foreign Scan on ftprt1_p1 t1 + -> Foreign Scan on ftprt1_p2 t1_1 + -> Hash + -> Append + -> Foreign Scan on ftprt2_p1 t2 + -> Foreign Scan on ftprt2_p2 t2_1 +(11 rows) -SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2; - t1 | t2 +SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; + wr | wr ----------------+---------------- (0,0,0000) | (0,0,0000) + (50,50,0001) | + (100,100,0002) | (150,150,0003) | (150,150,0003) + (200,200,0004) | (250,250,0005) | (250,250,0005) + (300,300,0006) | + (350,350,0007) | (400,400,0008) | (400,400,0008) -(4 rows) + (450,450,0009) | + | (75,75,0001) + | (225,225,0004) + | (325,325,0006) + | (475,475,0009) +(14 rows) -- join with lateral reference EXPLAIN (COSTS OFF) @@ -8474,6 +8489,34 @@ SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE | | 475 | t2_phv (14 rows) +-- test FOR UPDATE; partitionwise join does not apply +EXPLAIN (COSTS OFF) +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; + QUERY PLAN +-------------------------------------------------------------- + LockRows + -> Sort + Sort Key: t1.a + -> Hash Join + Hash Cond: (t2.b = t1.a) + -> Append + -> Foreign Scan on ftprt2_p1 t2 + -> Foreign Scan on ftprt2_p2 t2_1 + -> Hash + -> Append + -> Foreign Scan on ftprt1_p1 t1 + -> Foreign Scan on ftprt1_p2 t1_1 +(12 rows) + +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; + a | b +-----+----- + 0 | 0 + 150 | 150 + 250 | 250 + 400 | 400 +(4 rows) + RESET enable_partitionwise_join; -- =================================================================== -- test partitionwise aggregates diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index c0b0dd949b6..88c4cb4783f 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -2263,8 +2263,9 @@ ALTER TABLE fprt2_p1 SET (autovacuum_enabled = 'false'); ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false'); INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i; INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i; -CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250) +CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int) SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true'); +ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250); CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500) SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true'); ANALYZE fprt2; @@ -2281,10 +2282,10 @@ EXPLAIN (COSTS OFF) SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3; SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3; --- with whole-row reference +-- with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) -SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2; -SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2; +SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; +SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; -- join with lateral reference EXPLAIN (COSTS OFF) @@ -2296,6 +2297,11 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b; SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b; +-- test FOR UPDATE; partitionwise join does not apply +EXPLAIN (COSTS OFF) +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; + RESET enable_partitionwise_join; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6269f474d2c..b5af904c183 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2368,6 +2368,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_UINT_FIELD(baserestrict_min_security); WRITE_NODE_FIELD(joininfo); WRITE_BOOL_FIELD(has_eclass_joins); + WRITE_BOOL_FIELD(consider_partitionwise_join); WRITE_BITMAPSET_FIELD(top_parent_relids); WRITE_NODE_FIELD(partitioned_child_rels); } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0e80aeb65cd..5db1688bf04 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -909,6 +909,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, did_pruning = true; } + /* + * If this is a partitioned baserel, set the consider_partitionwise_join + * flag; currently, we only consider partitionwise joins with the baserel + * if its targetlist doesn't contain a whole-row Var. + */ + if (enable_partitionwise_join && + rel->reloptkind == RELOPT_BASEREL && + rte->relkind == RELKIND_PARTITIONED_TABLE && + rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL) + rel->consider_partitionwise_join = true; + /* * Initialize to compute size estimates for whole append relation. * @@ -956,54 +967,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, childrel = find_base_rel(root, childRTindex); Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - if (rel->part_scheme) - { - AttrNumber attno; - - /* - * We need attr_needed data for building targetlist of a join - * relation representing join between matching partitions for - * partitionwise join. A given attribute of a child will be needed - * in the same highest joinrel where the corresponding attribute - * of parent is needed. Hence it suffices to use the same Relids - * set for parent and child. - */ - for (attno = rel->min_attr; attno <= rel->max_attr; attno++) - { - int index = attno - rel->min_attr; - Relids attr_needed = rel->attr_needed[index]; - - /* System attributes do not need translation. */ - if (attno <= 0) - { - Assert(rel->min_attr == childrel->min_attr); - childrel->attr_needed[index] = attr_needed; - } - else - { - Var *var = list_nth_node(Var, - appinfo->translated_vars, - attno - 1); - int child_index; - - /* - * Ignore any column dropped from the parent. - * Corresponding Var won't have any translation. It won't - * have attr_needed information, since it can not be - * referenced in the query. - */ - if (var == NULL) - { - Assert(attr_needed == NULL); - continue; - } - - child_index = var->varattno - childrel->min_attr; - childrel->attr_needed[child_index] = attr_needed; - } - } - } - /* * Copy/Modify targetlist. Even if this child is deemed empty, we need * its targetlist in case it falls on nullable side in a child-join @@ -1180,6 +1143,22 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, (Node *) rel->joininfo, 1, &appinfo); + /* + * Note: we could compute appropriate attr_needed data for the child's + * variables, by transforming the parent's attr_needed through the + * translated_vars mapping. However, currently there's no need + * because attr_needed is only examined for base relations not + * otherrels. So we just leave the child's attr_needed empty. + */ + + /* + * If we consider partitionwise joins with the parent rel, do the same + * for partitioned child rels. + */ + if (rel->consider_partitionwise_join && + childRTE->relkind == RELKIND_PARTITIONED_TABLE) + childrel->consider_partitionwise_join = true; + /* * If parallelism is allowable for this query in general, see whether * it's allowable for this childrel in particular. But if we've @@ -3538,6 +3517,9 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel) if (!IS_PARTITIONED_REL(rel)) return; + /* The relation should have consider_partitionwise_join set. */ + Assert(rel->consider_partitionwise_join); + /* Guard against stack overflow due to overly deep partition hierarchy. */ check_stack_depth(); diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 7008e1318e4..d3d21fed5d4 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1322,6 +1322,9 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, if (!IS_PARTITIONED_REL(joinrel)) return; + /* The join relation should have consider_partitionwise_join set. */ + Assert(joinrel->consider_partitionwise_join); + /* * Since this join relation is partitioned, all the base relations * participating in this join must be partitioned and so are all the @@ -1330,6 +1333,10 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2)); Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2)); + /* The joining relations should have consider_partitionwise_join set. */ + Assert(rel1->consider_partitionwise_join && + rel2->consider_partitionwise_join); + /* * The partition scheme of the join relation should match that of the * joining relations. diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 69dd327f0c9..f66f39d8c68 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -41,9 +41,6 @@ typedef struct int num_vars; /* number of plain Var tlist entries */ bool has_ph_vars; /* are there PlaceHolderVar entries? */ bool has_non_vars; /* are there other entries? */ - bool has_conv_whole_rows; /* are there ConvertRowtypeExpr - * entries encapsulating a whole-row - * Var? */ tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */ } indexed_tlist; @@ -143,7 +140,6 @@ static List *set_returning_clause_references(PlannerInfo *root, int rtoffset); static bool extract_query_dependencies_walker(Node *node, PlannerInfo *context); -static bool is_converted_whole_row_reference(Node *node); /***************************************************************************** * @@ -1997,7 +1993,6 @@ build_tlist_index(List *tlist) itlist->tlist = tlist; itlist->has_ph_vars = false; itlist->has_non_vars = false; - itlist->has_conv_whole_rows = false; /* Find the Vars and fill in the index array */ vinfo = itlist->vars; @@ -2016,8 +2011,6 @@ build_tlist_index(List *tlist) } else if (tle->expr && IsA(tle->expr, PlaceHolderVar)) itlist->has_ph_vars = true; - else if (is_converted_whole_row_reference((Node *) tle->expr)) - itlist->has_conv_whole_rows = true; else itlist->has_non_vars = true; } @@ -2033,10 +2026,7 @@ build_tlist_index(List *tlist) * This is like build_tlist_index, but we only index tlist entries that * are Vars belonging to some rel other than the one specified. We will set * has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars - * (so nothing other than Vars and PlaceHolderVars can be matched). In case of - * DML, where this function will be used, returning lists from child relations - * will be appended similar to a simple append relation. That does not require - * fixing ConvertRowtypeExpr references. So, those are not considered here. + * (so nothing other than Vars and PlaceHolderVars can be matched). */ static indexed_tlist * build_tlist_index_other_vars(List *tlist, Index ignore_rel) @@ -2053,7 +2043,6 @@ build_tlist_index_other_vars(List *tlist, Index ignore_rel) itlist->tlist = tlist; itlist->has_ph_vars = false; itlist->has_non_vars = false; - itlist->has_conv_whole_rows = false; /* Find the desired Vars and fill in the index array */ vinfo = itlist->vars; @@ -2256,7 +2245,6 @@ static Node * fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { Var *newvar; - bool converted_whole_row; if (node == NULL) return NULL; @@ -2326,12 +2314,8 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) } if (IsA(node, Param)) return fix_param_node(context->root, (Param *) node); - /* Try matching more complex expressions too, if tlists have any */ - converted_whole_row = is_converted_whole_row_reference(node); - if (context->outer_itlist && - (context->outer_itlist->has_non_vars || - (context->outer_itlist->has_conv_whole_rows && converted_whole_row))) + if (context->outer_itlist && context->outer_itlist->has_non_vars) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->outer_itlist, @@ -2339,9 +2323,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) if (newvar) return (Node *) newvar; } - if (context->inner_itlist && - (context->inner_itlist->has_non_vars || - (context->inner_itlist->has_conv_whole_rows && converted_whole_row))) + if (context->inner_itlist && context->inner_itlist->has_non_vars) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->inner_itlist, @@ -2461,9 +2443,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) /* If no match, just fall through to process it normally */ } /* Try matching more complex expressions too, if tlist has any */ - if (context->subplan_itlist->has_non_vars || - (context->subplan_itlist->has_conv_whole_rows && - is_converted_whole_row_reference(node))) + if (context->subplan_itlist->has_non_vars) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->subplan_itlist, @@ -2670,33 +2650,3 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context) return expression_tree_walker(node, extract_query_dependencies_walker, (void *) context); } - -/* - * is_converted_whole_row_reference - * If the given node is a ConvertRowtypeExpr encapsulating a whole-row - * reference as implicit cast, return true. Otherwise return false. - */ -static bool -is_converted_whole_row_reference(Node *node) -{ - ConvertRowtypeExpr *convexpr; - - if (!node || !IsA(node, ConvertRowtypeExpr)) - return false; - - /* Traverse nested ConvertRowtypeExpr's. */ - convexpr = castNode(ConvertRowtypeExpr, node); - while (convexpr->convertformat == COERCE_IMPLICIT_CAST && - IsA(convexpr->arg, ConvertRowtypeExpr)) - convexpr = castNode(ConvertRowtypeExpr, convexpr->arg); - - if (IsA(convexpr->arg, Var)) - { - Var *var = castNode(Var, convexpr->arg); - - if (var->varattno == 0) - return true; - } - - return false; -} diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index c79d0f25d47..4dc632dcb52 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -20,7 +20,6 @@ #include "optimizer/pathnode.h" #include "optimizer/placeholder.h" #include "optimizer/planmain.h" -#include "optimizer/prep.h" #include "optimizer/var.h" #include "utils/lsyscache.h" @@ -415,10 +414,6 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, Relids relids = joinrel->relids; ListCell *lc; - /* This function is called only on the parent relations. */ - Assert(!IS_OTHER_REL(joinrel) && !IS_OTHER_REL(outer_rel) && - !IS_OTHER_REL(inner_rel)); - foreach(lc, root->placeholder_list) { PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); @@ -464,56 +459,3 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, } } } - -/* - * add_placeholders_to_child_joinrel - * Translate the PHVs in parent's targetlist and add them to the child's - * targetlist. Also adjust the cost - */ -void -add_placeholders_to_child_joinrel(PlannerInfo *root, RelOptInfo *childrel, - RelOptInfo *parentrel) -{ - ListCell *lc; - AppendRelInfo **appinfos; - int nappinfos; - - Assert(IS_JOIN_REL(childrel) && IS_JOIN_REL(parentrel)); - Assert(IS_OTHER_REL(childrel)); - - /* Nothing to do if no PHVs. */ - if (root->placeholder_list == NIL) - return; - - appinfos = find_appinfos_by_relids(root, childrel->relids, &nappinfos); - foreach(lc, parentrel->reltarget->exprs) - { - PlaceHolderVar *phv = lfirst(lc); - - if (IsA(phv, PlaceHolderVar)) - { - /* - * In case the placeholder Var refers to any of the parent - * relations, translate it to refer to the corresponding child. - */ - if (bms_overlap(phv->phrels, parentrel->relids) && - childrel->reloptkind == RELOPT_OTHER_JOINREL) - { - phv = (PlaceHolderVar *) adjust_appendrel_attrs(root, - (Node *) phv, - nappinfos, - appinfos); - } - - childrel->reltarget->exprs = lappend(childrel->reltarget->exprs, - phv); - } - } - - /* Adjust the cost and width of child targetlist. */ - childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup; - childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple; - childrel->reltarget->width = parentrel->reltarget->width; - - pfree(appinfos); -} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c69740eda6c..39f5729b915 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -57,6 +57,11 @@ static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel); static void build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, List *restrictlist, JoinType jointype); +static void build_child_join_reltarget(PlannerInfo *root, + RelOptInfo *parentrel, + RelOptInfo *childrel, + int nappinfos, + AppendRelInfo **appinfos); /* @@ -188,6 +193,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->baserestrict_min_security = UINT_MAX; rel->joininfo = NIL; rel->has_eclass_joins = false; + rel->consider_partitionwise_join = false; /* might get changed later */ rel->part_scheme = NULL; rel->nparts = 0; rel->boundinfo = NULL; @@ -602,6 +608,7 @@ build_join_rel(PlannerInfo *root, joinrel->baserestrict_min_security = UINT_MAX; joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; + joinrel->consider_partitionwise_join = false; /* might get changed later */ joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; joinrel->nparts = 0; @@ -732,6 +739,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, /* Only joins between "other" relations land here. */ Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel)); + /* The parent joinrel should have consider_partitionwise_join set. */ + Assert(parent_joinrel->consider_partitionwise_join); + joinrel->reloptkind = RELOPT_OTHER_JOINREL; joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids); joinrel->rows = 0; @@ -773,6 +783,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->baserestrictcost.per_tuple = 0; joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; + joinrel->consider_partitionwise_join = false; /* might get changed later */ joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; joinrel->nparts = 0; @@ -789,14 +800,13 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, /* Compute information relevant to foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); - /* Build targetlist */ - build_joinrel_tlist(root, joinrel, outer_rel); - build_joinrel_tlist(root, joinrel, inner_rel); - /* Add placeholder variables. */ - add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel); + appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos); + + /* Set up reltarget struct */ + build_child_join_reltarget(root, parent_joinrel, joinrel, + nappinfos, appinfos); /* Construct joininfo list. */ - appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos); joinrel->joininfo = (List *) adjust_appendrel_attrs(root, (Node *) parent_joinrel->joininfo, nappinfos, @@ -826,7 +836,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, /* Child joinrel is parallel safe if parent is parallel safe. */ joinrel->consider_parallel = parent_joinrel->consider_parallel; - /* Set estimates of the child-joinrel's size. */ set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist); @@ -895,15 +904,9 @@ static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel) { - Relids relids; + Relids relids = joinrel->relids; ListCell *vars; - /* attrs_needed refers to parent relids and not those of a child. */ - if (joinrel->top_parent_relids) - relids = joinrel->top_parent_relids; - else - relids = joinrel->relids; - foreach(vars, input_rel->reltarget->exprs) { Var *var = (Var *) lfirst(vars); @@ -919,54 +922,23 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, /* * Otherwise, anything in a baserel or joinrel targetlist ought to be - * a Var. Children of a partitioned table may have ConvertRowtypeExpr - * translating whole-row Var of a child to that of the parent. - * Children of an inherited table or subquery child rels can not - * directly participate in a join, so other kinds of nodes here. + * a Var. (More general cases can only appear in appendrel child + * rels, which will never be seen here.) */ - if (IsA(var, Var)) - { - baserel = find_base_rel(root, var->varno); - ndx = var->varattno - baserel->min_attr; - } - else if (IsA(var, ConvertRowtypeExpr)) - { - ConvertRowtypeExpr *child_expr = (ConvertRowtypeExpr *) var; - Var *childvar = (Var *) child_expr->arg; - - /* - * Child's whole-row references are converted to look like those - * of parent using ConvertRowtypeExpr. There can be as many - * ConvertRowtypeExpr decorations as the depth of partition tree. - * The argument to the deepest ConvertRowtypeExpr is expected to - * be a whole-row reference of the child. - */ - while (IsA(childvar, ConvertRowtypeExpr)) - { - child_expr = (ConvertRowtypeExpr *) childvar; - childvar = (Var *) child_expr->arg; - } - Assert(IsA(childvar, Var) &&childvar->varattno == 0); - - baserel = find_base_rel(root, childvar->varno); - ndx = 0 - baserel->min_attr; - } - else + if (!IsA(var, Var)) elog(ERROR, "unexpected node type in rel targetlist: %d", (int) nodeTag(var)); + /* Get the Var's original base rel */ + baserel = find_base_rel(root, var->varno); - /* Is the target expression still needed above this joinrel? */ + /* Is it still needed above this joinrel? */ + ndx = var->varattno - baserel->min_attr; if (bms_nonempty_difference(baserel->attr_needed[ndx], relids)) { /* Yup, add it to the output */ joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, var); - - /* - * Vars have cost zero, so no need to adjust reltarget->cost. Even - * if it's a ConvertRowtypeExpr, it will be computed only for the - * base relation, costing nothing for a join. - */ + /* Vars have cost zero, so no need to adjust reltarget->cost */ joinrel->reltarget->width += baserel->attr_widths[ndx]; } } @@ -1626,16 +1598,18 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, /* * We can only consider this join as an input to further partitionwise - * joins if (a) the input relations are partitioned, (b) the partition - * schemes match, and (c) we can identify an equi-join between the - * partition keys. Note that if it were possible for - * have_partkey_equi_join to return different answers for the same joinrel - * depending on which join ordering we try first, this logic would break. - * That shouldn't happen, though, because of the way the query planner - * deduces implied equalities and reorders the joins. Please see - * optimizer/README for details. + * joins if (a) the input relations are partitioned and have + * consider_partitionwise_join=true, (b) the partition schemes match, and + * (c) we can identify an equi-join between the partition keys. Note that + * if it were possible for have_partkey_equi_join to return different + * answers for the same joinrel depending on which join ordering we try + * first, this logic would break. That shouldn't happen, though, because + * of the way the query planner deduces implied equalities and reorders + * the joins. Please see optimizer/README for details. */ if (!IS_PARTITIONED_REL(outer_rel) || !IS_PARTITIONED_REL(inner_rel) || + !outer_rel->consider_partitionwise_join || + !inner_rel->consider_partitionwise_join || outer_rel->part_scheme != inner_rel->part_scheme || !have_partkey_equi_join(joinrel, outer_rel, inner_rel, jointype, restrictlist)) @@ -1687,6 +1661,12 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, joinrel->part_rels = (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * joinrel->nparts); + /* + * Set the consider_partitionwise_join flag. + */ + Assert(outer_rel->consider_partitionwise_join); + Assert(inner_rel->consider_partitionwise_join); + joinrel->consider_partitionwise_join = true; /* * Construct partition keys for the join. @@ -1768,3 +1748,26 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, joinrel->nullable_partexprs[cnt] = nullable_partexpr; } } + +/* + * build_child_join_reltarget + * Set up a child-join relation's reltarget from a parent-join relation. + */ +static void +build_child_join_reltarget(PlannerInfo *root, + RelOptInfo *parentrel, + RelOptInfo *childrel, + int nappinfos, + AppendRelInfo **appinfos) +{ + /* Build the targetlist */ + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) parentrel->reltarget->exprs, + nappinfos, appinfos); + + /* Set the cost and width fields */ + childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup; + childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple; + childrel->reltarget->width = parentrel->reltarget->width; +} diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 41caf873fb4..adb42650479 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -687,8 +687,12 @@ typedef struct RelOptInfo * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ - /* used by "other" relations */ - Relids top_parent_relids; /* Relids of topmost parents */ + /* used by partitionwise joins: */ + bool consider_partitionwise_join; /* consider partitionwise + * join paths? (if + * partitioned rel) */ + Relids top_parent_relids; /* Relids of topmost parents (if "other" + * rel) */ /* used for partitioned relations */ PartitionScheme part_scheme; /* Partitioning scheme. */ diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index d286050c9aa..6bc106831ee 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -449,6 +449,38 @@ SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2 24 | 900 | 100 (5 rows) +-- Check with whole-row reference; partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) + -> HashAggregate + Group Key: t1.x + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Append + -> Seq Scan on pagg_tab1_p1 t1 + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Seq Scan on pagg_tab1_p3 t1_2 + -> Hash + -> Append + -> Seq Scan on pagg_tab2_p1 t2 + -> Seq Scan on pagg_tab2_p2 t2_1 + -> Seq Scan on pagg_tab2_p3 t2_2 +(15 rows) + +SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + x | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + -- GROUP BY having other matching key EXPLAIN (COSTS OFF) SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 7d04d12c6e2..3ba3aaf2d86 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -62,33 +62,28 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 450 | 0450 | 450 | 0450 (4 rows) --- left outer join, with whole-row reference +-- left outer join, with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; QUERY PLAN -------------------------------------------------- Sort Sort Key: t1.a, t2.b - -> Append - -> Hash Right Join - Hash Cond: (t2.b = t1.a) + -> Hash Right Join + Hash Cond: (t2.b = t1.a) + -> Append -> Seq Scan on prt2_p1 t2 - -> Hash + -> Seq Scan on prt2_p2 t2_1 + -> Seq Scan on prt2_p3 t2_2 + -> Hash + -> Append -> Seq Scan on prt1_p1 t1 Filter: (b = 0) - -> Hash Right Join - Hash Cond: (t2_1.b = t1_1.a) - -> Seq Scan on prt2_p2 t2_1 - -> Hash -> Seq Scan on prt1_p2 t1_1 Filter: (b = 0) - -> Hash Right Join - Hash Cond: (t2_2.b = t1_2.a) - -> Seq Scan on prt2_p3 t2_2 - -> Hash -> Seq Scan on prt1_p3 t1_2 Filter: (b = 0) -(21 rows) +(16 rows) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; t1 | t2 @@ -1042,6 +1037,40 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * 400 | (9 rows) +-- merge join when expression with whole-row reference needs to be sorted; +-- partitionwise join does not apply +EXPLAIN (COSTS OFF) +SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; + QUERY PLAN +----------------------------------------------------------------------------------------- + Merge Join + Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text))) + -> Sort + Sort Key: t1.a, ((((t1.*)::prt1))::text) + -> Result + -> Append + -> Seq Scan on prt1_p1 t1 + -> Seq Scan on prt1_p2 t1_1 + -> Seq Scan on prt1_p3 t1_2 + -> Sort + Sort Key: t2.b, ((((t2.*)::prt2))::text) + -> Result + -> Append + -> Seq Scan on prt2_p1 t2 + -> Seq Scan on prt2_p2 t2_1 + -> Seq Scan on prt2_p3 t2_2 +(16 rows) + +SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; + a | b +----+---- + 0 | 0 + 6 | 6 + 12 | 12 + 18 | 18 + 24 | 24 +(5 rows) + RESET enable_hashjoin; RESET enable_nestloop; -- diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index 6d8b73964a1..c387d64db3a 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -111,6 +111,11 @@ EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; +-- Check with whole-row reference; partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; +SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + -- GROUP BY having other matching key EXPLAIN (COSTS OFF) SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index d001420b061..c1c98596515 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -34,7 +34,7 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b; SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b; --- left outer join, with whole-row reference +-- left outer join, with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; @@ -160,6 +160,12 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; +-- merge join when expression with whole-row reference needs to be sorted; +-- partitionwise join does not apply +EXPLAIN (COSTS OFF) +SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; +SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; + RESET enable_hashjoin; RESET enable_nestloop;