diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index d5883c98d15..4562a5121d4 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2469,7 +2469,7 @@ CopyFrom(CopyState cstate) PartitionTupleRouting *proute; proute = cstate->partition_tuple_routing = - ExecSetupPartitionTupleRouting(NULL, cstate->rel, 1, estate); + ExecSetupPartitionTupleRouting(NULL, cstate->rel); /* * If we are capturing transition tuples, they may need to be @@ -2606,6 +2606,14 @@ CopyFrom(CopyState cstate) */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + if (resultRelInfo == NULL) + { + resultRelInfo = ExecInitPartitionInfo(NULL, + saved_resultRelInfo, + proute, estate, + leaf_part_index); + Assert(resultRelInfo != NULL); + } /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index beb2362ab0f..54efc9e5452 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -44,21 +44,25 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, * * Note that all the relations in the partition tree are locked using the * RowExclusiveLock mode upon return from this function. + * + * While we allocate the arrays of pointers of ResultRelInfo and + * TupleConversionMap for all partitions here, actual objects themselves are + * lazily allocated for a given partition if a tuple is actually routed to it; + * see ExecInitPartitionInfo. However, if the function is invoked for update + * tuple routing, caller would already have initialized ResultRelInfo's for + * some of the partitions, which are reused and assigned to their respective + * slot in the aforementioned array. */ PartitionTupleRouting * -ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate) +ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) { TupleDesc tupDesc = RelationGetDescr(rel); List *leaf_parts; ListCell *cell; int i; - ResultRelInfo *leaf_part_arr = NULL, - *update_rri = NULL; + ResultRelInfo *update_rri = NULL; int num_update_rri = 0, update_rri_index = 0; - bool is_update = false; PartitionTupleRouting *proute; /* @@ -76,13 +80,14 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, proute->parent_child_tupconv_maps = (TupleConversionMap **) palloc0(proute->num_partitions * sizeof(TupleConversionMap *)); + proute->partition_oids = (Oid *) palloc(proute->num_partitions * + sizeof(Oid)); /* Set up details specific to the type of tuple routing we are doing. */ if (mtstate && mtstate->operation == CMD_UPDATE) { ModifyTable *node = (ModifyTable *) mtstate->ps.plan; - is_update = true; update_rri = mtstate->resultRelInfo; num_update_rri = list_length(node->plans); proute->subplan_partition_offsets = @@ -95,16 +100,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, */ proute->root_tuple_slot = MakeTupleTableSlot(NULL); } - else - { - /* - * Since we are inserting tuples, we need to create all new result - * rels. Avoid repeated pallocs by allocating memory for all the - * result rels in bulk. - */ - leaf_part_arr = (ResultRelInfo *) palloc0(proute->num_partitions * - sizeof(ResultRelInfo)); - } /* * Initialize an empty slot that will be used to manipulate tuples of any @@ -117,117 +112,68 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, i = 0; foreach(cell, leaf_parts) { - ResultRelInfo *leaf_part_rri; - Relation partrel = NULL; - TupleDesc part_tupdesc; + ResultRelInfo *leaf_part_rri = NULL; Oid leaf_oid = lfirst_oid(cell); - if (is_update) - { - /* - * If the leaf partition is already present in the per-subplan - * result rels, we re-use that rather than initialize a new result - * rel. The per-subplan resultrels and the resultrels of the leaf - * partitions are both in the same canonical order. So while going - * through the leaf partition oids, we need to keep track of the - * next per-subplan result rel to be looked for in the leaf - * partition resultrels. - */ - if (update_rri_index < num_update_rri && - RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid) - { - leaf_part_rri = &update_rri[update_rri_index]; - partrel = leaf_part_rri->ri_RelationDesc; - - /* - * This is required in order to convert the partition's tuple - * to be compatible with the root partitioned table's tuple - * descriptor. When generating the per-subplan result rels, - * this was not set. - */ - leaf_part_rri->ri_PartitionRoot = rel; - - /* Remember the subplan offset for this ResultRelInfo */ - proute->subplan_partition_offsets[update_rri_index] = i; - - update_rri_index++; - } - else - leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); - } - else - { - /* For INSERTs, we already have an array of result rels allocated */ - leaf_part_rri = &leaf_part_arr[i]; - } + proute->partition_oids[i] = leaf_oid; /* - * If we didn't open the partition rel, it means we haven't - * initialized the result rel either. + * If the leaf partition is already present in the per-subplan result + * rels, we re-use that rather than initialize a new result rel. The + * per-subplan resultrels and the resultrels of the leaf partitions + * are both in the same canonical order. So while going through the + * leaf partition oids, we need to keep track of the next per-subplan + * result rel to be looked for in the leaf partition resultrels. */ - if (!partrel) + if (update_rri_index < num_update_rri && + RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid) { - /* - * We locked all the partitions above including the leaf - * partitions. Note that each of the newly opened relations in - * proute->partitions are eventually closed by the caller. - */ - partrel = heap_open(leaf_oid, NoLock); - InitResultRelInfo(leaf_part_rri, - partrel, - resultRTindex, - rel, - estate->es_instrument); + Relation partrel; + TupleDesc part_tupdesc; + + leaf_part_rri = &update_rri[update_rri_index]; + partrel = leaf_part_rri->ri_RelationDesc; /* - * Since we've just initialized this ResultRelInfo, it's not in - * any list attached to the estate as yet. Add it, so that it can - * be found later. + * This is required in order to convert the partition's tuple to + * be compatible with the root partitioned table's tuple + * descriptor. When generating the per-subplan result rels, this + * was not set. */ - estate->es_tuple_routing_result_relations = - lappend(estate->es_tuple_routing_result_relations, - leaf_part_rri); + leaf_part_rri->ri_PartitionRoot = rel; + + /* Remember the subplan offset for this ResultRelInfo */ + proute->subplan_partition_offsets[update_rri_index] = i; + + update_rri_index++; + + part_tupdesc = RelationGetDescr(partrel); + + /* + * Save a tuple conversion map to convert a tuple routed to this + * partition from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[i] = + convert_tuples_by_name(tupDesc, part_tupdesc, + gettext_noop("could not convert row type")); + + /* + * Verify result relation is a valid target for an INSERT. An + * UPDATE of a partition-key becomes a DELETE+INSERT operation, so + * this check is required even when the operation is CMD_UPDATE. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); } - part_tupdesc = RelationGetDescr(partrel); - - /* - * Save a tuple conversion map to convert a tuple routed to this - * partition from the parent's type to the partition's. - */ - proute->parent_child_tupconv_maps[i] = - convert_tuples_by_name(tupDesc, part_tupdesc, - gettext_noop("could not convert row type")); - - /* - * Verify result relation is a valid target for an INSERT. An UPDATE - * of a partition-key becomes a DELETE+INSERT operation, so this check - * is still required when the operation is CMD_UPDATE. - */ - CheckValidResultRel(leaf_part_rri, CMD_INSERT); - - /* - * Open partition indices. The user may have asked to check for - * conflicts within this leaf partition and do "nothing" instead of - * throwing an error. Be prepared in that case by initializing the - * index information needed by ExecInsert() to perform speculative - * insertions. - */ - if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex && - leaf_part_rri->ri_IndexRelationDescs == NULL) - ExecOpenIndices(leaf_part_rri, - mtstate != NULL && - mtstate->mt_onconflict != ONCONFLICT_NONE); - proute->partitions[i] = leaf_part_rri; i++; } /* * For UPDATE, we should have found all the per-subplan resultrels in the - * leaf partitions. + * leaf partitions. (If this is an INSERT, both values will be zero.) */ - Assert(!is_update || update_rri_index == num_update_rri); + Assert(update_rri_index == num_update_rri); return proute; } @@ -351,6 +297,201 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, return result; } +/* + * ExecInitPartitionInfo + * Initialize ResultRelInfo and other information for a partition if not + * already done + * + * Returns the ResultRelInfo + */ +ResultRelInfo * +ExecInitPartitionInfo(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + PartitionTupleRouting *proute, + EState *estate, int partidx) +{ + Relation rootrel = resultRelInfo->ri_RelationDesc, + partrel; + ResultRelInfo *leaf_part_rri; + ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL; + MemoryContext oldContext; + + /* + * We locked all the partitions in ExecSetupPartitionTupleRouting + * including the leaf partitions. + */ + partrel = heap_open(proute->partition_oids[partidx], NoLock); + + /* + * Keep ResultRelInfo and other information for this partition in the + * per-query memory context so they'll survive throughout the query. + */ + oldContext = MemoryContextSwitchTo(estate->es_query_cxt); + + leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); + InitResultRelInfo(leaf_part_rri, + partrel, + node ? node->nominalRelation : 1, + rootrel, + estate->es_instrument); + + /* + * Verify result relation is a valid target for an INSERT. An UPDATE of a + * partition-key becomes a DELETE+INSERT operation, so this check is still + * required when the operation is CMD_UPDATE. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); + + /* + * Since we've just initialized this ResultRelInfo, it's not in any list + * attached to the estate as yet. Add it, so that it can be found later. + * + * Note that the entries in this list appear in no predetermined order, + * because partition result rels are initialized as and when they're + * needed. + */ + estate->es_tuple_routing_result_relations = + lappend(estate->es_tuple_routing_result_relations, + leaf_part_rri); + + /* + * Open partition indices. The user may have asked to check for conflicts + * within this leaf partition and do "nothing" instead of throwing an + * error. Be prepared in that case by initializing the index information + * needed by ExecInsert() to perform speculative insertions. + */ + if (partrel->rd_rel->relhasindex && + leaf_part_rri->ri_IndexRelationDescs == NULL) + ExecOpenIndices(leaf_part_rri, + (mtstate != NULL && + mtstate->mt_onconflict != ONCONFLICT_NONE)); + + /* + * Build WITH CHECK OPTION constraints for the partition. Note that we + * didn't build the withCheckOptionList for partitions within the planner, + * but simple translation of varattnos will suffice. This only occurs for + * the INSERT case or in the case of UPDATE tuple routing where we didn't + * find a result rel to reuse in ExecSetupPartitionTupleRouting(). + */ + if (node && node->withCheckOptionLists != NIL) + { + List *wcoList; + List *wcoExprs = NIL; + ListCell *ll; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + + /* + * In the case of INSERT on a partitioned table, there is only one + * plan. Likewise, there is only one WCO list, not one per partition. + * For UPDATE, there are as many WCO lists as there are plans. + */ + Assert((node->operation == CMD_INSERT && + list_length(node->withCheckOptionLists) == 1 && + list_length(node->plans) == 1) || + (node->operation == CMD_UPDATE && + list_length(node->withCheckOptionLists) == + list_length(node->plans))); + + /* + * Use the WCO list of the first plan as a reference to calculate + * attno's for the WCO list of this partition. In the INSERT case, + * that refers to the root partitioned table, whereas in the UPDATE + * tuple routing case, that refers to the first partition in the + * mtstate->resultRelInfo array. In any case, both that relation and + * this partition should have the same columns, so we should be able + * to map attributes successfully. + */ + wcoList = linitial(node->withCheckOptionLists); + + /* + * Convert Vars in it to contain this partition's attribute numbers. + */ + wcoList = map_partition_varattnos(wcoList, firstVarno, + partrel, firstResultRel, NULL); + foreach(ll, wcoList) + { + WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); + ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), + mtstate->mt_plans[0]); + + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + leaf_part_rri->ri_WithCheckOptions = wcoList; + leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs; + } + + /* + * Build the RETURNING projection for the partition. Note that we didn't + * build the returningList for partitions within the planner, but simple + * translation of varattnos will suffice. This only occurs for the INSERT + * case or in the case of UPDATE tuple routing where we didn't find a + * result rel to reuse in ExecSetupPartitionTupleRouting(). + */ + if (node && node->returningLists != NIL) + { + TupleTableSlot *slot; + ExprContext *econtext; + List *returningList; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + + /* See the comment above for WCO lists. */ + Assert((node->operation == CMD_INSERT && + list_length(node->returningLists) == 1 && + list_length(node->plans) == 1) || + (node->operation == CMD_UPDATE && + list_length(node->returningLists) == + list_length(node->plans))); + + /* + * Use the RETURNING list of the first plan as a reference to + * calculate attno's for the RETURNING list of this partition. See + * the comment above for WCO lists for more details on why this is + * okay. + */ + returningList = linitial(node->returningLists); + + /* + * Convert Vars in it to contain this partition's attribute numbers. + */ + returningList = map_partition_varattnos(returningList, firstVarno, + partrel, firstResultRel, + NULL); + + /* + * Initialize the projection itself. + * + * Use the slot and the expression context that would have been set up + * in ExecInitModifyTable() for projection's output. + */ + Assert(mtstate->ps.ps_ResultTupleSlot != NULL); + slot = mtstate->ps.ps_ResultTupleSlot; + Assert(mtstate->ps.ps_ExprContext != NULL); + econtext = mtstate->ps.ps_ExprContext; + leaf_part_rri->ri_projectReturning = + ExecBuildProjectionInfo(returningList, econtext, slot, + &mtstate->ps, RelationGetDescr(partrel)); + } + + Assert(proute->partitions[partidx] == NULL); + proute->partitions[partidx] = leaf_part_rri; + + /* + * Save a tuple conversion map to convert a tuple routed to this partition + * from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[partidx] = + convert_tuples_by_name(RelationGetDescr(rootrel), + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + + MemoryContextSwitchTo(oldContext); + + return leaf_part_rri; +} + /* * ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition * child-to-root tuple conversion map array. @@ -477,6 +618,10 @@ ExecCleanupTupleRouting(PartitionTupleRouting *proute) { ResultRelInfo *resultRelInfo = proute->partitions[i]; + /* skip further processsing for uninitialized partitions */ + if (resultRelInfo == NULL) + continue; + /* * If this result rel is one of the UPDATE subplan result rels, let * ExecEndPlan() close it. For INSERT or COPY, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 93c03cfb071..c32928d9bd7 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -306,10 +306,18 @@ ExecInsert(ModifyTableState *mtstate, /* * Save the old ResultRelInfo and switch to the one corresponding to - * the selected partition. + * the selected partition. (We might need to initialize it first.) */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + if (resultRelInfo == NULL) + { + resultRelInfo = ExecInitPartitionInfo(mtstate, + saved_resultRelInfo, + proute, estate, + leaf_part_index); + Assert(resultRelInfo != NULL); + } /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -2098,14 +2106,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; Plan *subplan; - int firstVarno = 0; - Relation firstResultRel = NULL; ListCell *l; int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; - PartitionTupleRouting *proute = NULL; - int num_partitions = 0; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2228,20 +2232,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) - { - proute = mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, - rel, node->nominalRelation, - estate); - num_partitions = proute->num_partitions; - - /* - * Below are required as reference objects for mapping partition - * attno's in expressions such as WithCheckOptions and RETURNING. - */ - firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; - } + mtstate->mt_partition_tuple_routing = + ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a @@ -2287,70 +2279,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) i++; } - /* - * Build WITH CHECK OPTION constraints for each leaf partition rel. Note - * that we didn't build the withCheckOptionList for each partition within - * the planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE row - * movement. DELETEs and local UPDATEs are handled above. - */ - if (node->withCheckOptionLists != NIL && num_partitions > 0) - { - List *first_wcoList; - - /* - * In case of INSERT on partitioned tables, there is only one plan. - * Likewise, there is only one WITH CHECK OPTIONS list, not one per - * partition. Whereas for UPDATE, there are as many WCOs as there are - * plans. So in either case, use the WCO expression of the first - * resultRelInfo as a reference to calculate attno's for the WCO - * expression of each of the partitions. We make a copy of the WCO - * qual for each partition. Note that, if there are SubPlans in there, - * they all end up attached to the one parent Plan node. - */ - Assert(update_tuple_routing_needed || - (operation == CMD_INSERT && - list_length(node->withCheckOptionLists) == 1 && - mtstate->mt_nplans == 1)); - - first_wcoList = linitial(node->withCheckOptionLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *mapped_wcoList; - List *wcoExprs = NIL; - ListCell *ll; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have - * WithCheckOptions initialized. - */ - if (resultRelInfo->ri_WithCheckOptions) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - mapped_wcoList = map_partition_varattnos(first_wcoList, - firstVarno, - partrel, firstResultRel, - NULL); - foreach(ll, mapped_wcoList) - { - WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); - ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), - &mtstate->ps); - - wcoExprs = lappend(wcoExprs, wcoExpr); - } - - resultRelInfo->ri_WithCheckOptions = mapped_wcoList; - resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - } - } - /* * Initialize RETURNING projections if needed. */ @@ -2358,7 +2286,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) { TupleTableSlot *slot; ExprContext *econtext; - List *firstReturningList; /* * Initialize result tuple slot and assign its rowtype using the first @@ -2388,44 +2315,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } - - /* - * Build a projection for each leaf partition rel. Note that we - * didn't build the returningList for each partition within the - * planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE - * row movement. DELETEs and local UPDATEs are handled above. - */ - firstReturningList = linitial(node->returningLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *rlist; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have a returningList - * built. - */ - if (resultRelInfo->ri_projectReturning) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - /* - * Use the returning expression of the first resultRelInfo as a - * reference to calculate attno's for the returning expression of - * each of the partitions. - */ - rlist = map_partition_varattnos(firstReturningList, - firstVarno, - partrel, firstResultRel, NULL); - resultRelInfo->ri_projectReturning = - ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, - resultRelInfo->ri_RelationDesc->rd_att); - } } else { diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 3df9c498bbf..e94718608fb 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -58,6 +58,7 @@ typedef struct PartitionDispatchData *PartitionDispatch; * partition tree. * num_dispatch number of partitioned tables in the partition * tree (= length of partition_dispatch_info[]) + * partition_oids Array of leaf partitions OIDs * partitions Array of ResultRelInfo* objects with one entry * for every leaf partition in the partition tree. * num_partitions Number of leaf partitions in the partition tree @@ -91,6 +92,7 @@ typedef struct PartitionTupleRouting { PartitionDispatch *partition_dispatch_info; int num_dispatch; + Oid *partition_oids; ResultRelInfo **partitions; int num_partitions; TupleConversionMap **parent_child_tupconv_maps; @@ -103,12 +105,15 @@ typedef struct PartitionTupleRouting } PartitionTupleRouting; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate); + Relation rel); extern int ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, TupleTableSlot *slot, EState *estate); +extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + PartitionTupleRouting *proute, + EState *estate, int partidx); extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute); extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute, ResultRelInfo *rootRelInfo, int leaf_index);