Revert "Don't lock partitions pruned by initial pruning"

As pointed out by Tom Lane, the patch introduced fragile and invasive design around plan invalidation handling when locking of prunable partitions was deferred from plancache.c to the executor. In particular, it violated assumptions about CachedPlan immutability and altered executor APIs in ways that are difficult to justify given the added complexity and overhead. This also removes the firstResultRels field added to PlannedStmt in commit 28317de72, which was intended to support deferred locking of certain ModifyTable result relations. Reported-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/605328.1747710381@sss.pgh.pa.us
2025-08-19 23:22:23 +03:00 · 2025-05-22 14:17:24 +09:00
parent f3622b6476
commit 1722d5eb05
33 changed files with 90 additions and 673 deletions
--- a/src/backend/executor/README
+++ b/src/backend/executor/README
@@ -285,28 +285,6 @@ are typically reset to empty once per tuple.  Per-tuple contexts are usually
 associated with ExprContexts, and commonly each PlanState node has its own
 ExprContext to evaluate its qual and targetlist expressions in.

-Relation Locking
----------------
-
-When the executor initializes a plan tree for execution, it doesn't lock
-non-index relations if the plan tree is freshly generated and not derived
-from a CachedPlan. This is because such locks have already been established
-during the query's parsing, rewriting, and planning phases. However, with a
-cached plan tree, some relations may remain unlocked. The function
-AcquireExecutorLocks() only locks unprunable relations in the plan, deferring
-the locking of prunable ones to executor initialization. This avoids
-unnecessary locking of relations that will be pruned during "initial" runtime
-pruning in ExecDoInitialPruning().
-
-This approach creates a window where a cached plan tree with child tables
-could become outdated if another backend modifies these tables before
-ExecDoInitialPruning() locks them. As a result, the executor has the added duty
-to verify the plan tree's validity whenever it locks a child table after
-doing initial pruning. This validation is done by checking the CachedPlan.is_valid
-flag. If the plan tree is outdated (is_valid = false), the executor stops
-further initialization, cleans up anything in EState that would have been
-allocated up to that point, and retries execution after recreating the
-invalid plan in the CachedPlan.  See ExecutorStartCachedPlan().

 Query Processing Control Flow
 -----------------------------
@@ -315,13 +293,11 @@ This is a sketch of control flow for full query processing:

 	CreateQueryDesc

-	ExecutorStart or ExecutorStartCachedPlan
+	ExecutorStart
 		CreateExecutorState
 			creates per-query context
-		switch to per-query context to run ExecDoInitialPruning and ExecInitNode
+		switch to per-query context to run ExecInitNode
 		AfterTriggerBeginQuery
-		ExecDoInitialPruning
-			does initial pruning and locks surviving partitions if needed
 		ExecInitNode --- recursively scans plan tree
 			ExecInitNode
 				recurse into subsidiary nodes
@@ -345,12 +321,7 @@ This is a sketch of control flow for full query processing:

 	FreeQueryDesc

-As mentioned in the "Relation Locking" section, if the plan tree is found to
-be stale after locking partitions in ExecDoInitialPruning(), the control is
-immediately returned to ExecutorStartCachedPlan(), which will create a new plan
-tree and perform the steps starting from CreateExecutorState() again.
-
-Per above comments, it's not really critical for ExecEndPlan to free any
+Per above comments, it's not really critical for ExecEndNode to free any
 memory; it'll all go away in FreeExecutorState anyway.  However, we do need to
 be careful to close relations, drop buffer pins, etc, so we do need to scan
 the plan state tree to find these sorts of resources.
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -55,13 +55,11 @@
 #include "parser/parse_relation.h"
 #include "pgstat.h"
 #include "rewrite/rewriteHandler.h"
-#include "storage/lmgr.h"
 #include "tcop/utility.h"
 #include "utils/acl.h"
 #include "utils/backend_status.h"
 #include "utils/lsyscache.h"
 #include "utils/partcache.h"
-#include "utils/plancache.h"
 #include "utils/rls.h"
 #include "utils/snapmgr.h"

@@ -119,16 +117,11 @@ static void ReportNotNullViolationError(ResultRelInfo *resultRelInfo,
 * get control when ExecutorStart is called.  Such a plugin would
 * normally call standard_ExecutorStart().
 *
- * Return value indicates if the plan has been initialized successfully so
- * that queryDesc->planstate contains a valid PlanState tree.  It may not
- * if the plan got invalidated during InitPlan().
 * ----------------------------------------------------------------
 */
-bool
+void
 ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
-	bool		plan_valid;
-
 	/*
 	 * In some cases (e.g. an EXECUTE statement or an execute message with the
 	 * extended query protocol) the query_id won't be reported, so do it now.
@@ -140,14 +133,12 @@ ExecutorStart(QueryDesc *queryDesc, int eflags)
 	pgstat_report_query_id(queryDesc->plannedstmt->queryId, false);

 	if (ExecutorStart_hook)
-		plan_valid = (*ExecutorStart_hook) (queryDesc, eflags);
+		(*ExecutorStart_hook) (queryDesc, eflags);
 	else
-		plan_valid = standard_ExecutorStart(queryDesc, eflags);
-
-	return plan_valid;
+		standard_ExecutorStart(queryDesc, eflags);
 }

-bool
+void
 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
 	EState	   *estate;
@@ -271,64 +262,6 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 	InitPlan(queryDesc, eflags);

 	MemoryContextSwitchTo(oldcontext);
-
-	return ExecPlanStillValid(queryDesc->estate);
-}
-
-/*
- * ExecutorStartCachedPlan
- *		Start execution for a given query in the CachedPlanSource, replanning
- *		if the plan is invalidated due to deferred locks taken during the
- *		plan's initialization
- *
- * This function handles cases where the CachedPlan given in queryDesc->cplan
- * might become invalid during the initialization of the plan given in
- * queryDesc->plannedstmt, particularly when prunable relations in it are
- * locked after performing initial pruning. If the locks invalidate the plan,
- * the function calls UpdateCachedPlan() to replan all queries in the
- * CachedPlan, and then retries initialization.
- *
- * The function repeats the process until ExecutorStart() successfully
- * initializes the plan, that is without the CachedPlan becoming invalid.
- */
-void
-ExecutorStartCachedPlan(QueryDesc *queryDesc, int eflags,
-						CachedPlanSource *plansource,
-						int query_index)
-{
-	if (unlikely(queryDesc->cplan == NULL))
-		elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlan");
-	if (unlikely(plansource == NULL))
-		elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlanSource");
-
-	/*
-	 * Loop and retry with an updated plan until no further invalidation
-	 * occurs.
-	 */
-	while (1)
-	{
-		if (!ExecutorStart(queryDesc, eflags))
-		{
-			/*
-			 * Clean up the current execution state before creating the new
-			 * plan to retry ExecutorStart().  Mark execution as aborted to
-			 * ensure that AFTER trigger state is properly reset.
-			 */
-			queryDesc->estate->es_aborted = true;
-			ExecutorEnd(queryDesc);
-
-			/* Retry ExecutorStart() with an updated plan tree. */
-			queryDesc->plannedstmt = UpdateCachedPlan(plansource, query_index,
-													  queryDesc->queryEnv);
-		}
-		else
-
-			/*
-			 * Exit the loop if the plan is initialized successfully and no
-			 * sinval messages were received that invalidated the CachedPlan.
-			 */
-			break;
-	}
 }

 /* ----------------------------------------------------------------
@@ -387,7 +320,6 @@ standard_ExecutorRun(QueryDesc *queryDesc,
 	estate = queryDesc->estate;

 	Assert(estate != NULL);
-	Assert(!estate->es_aborted);
 	Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));

 	/* caller must ensure the query's snapshot is active */
@@ -494,11 +426,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
 	Assert(estate != NULL);
 	Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));

-	/*
-	 * This should be run once and only once per Executor instance and never
-	 * if the execution was aborted.
-	 */
-	Assert(!estate->es_finished && !estate->es_aborted);
+	/* This should be run once and only once per Executor instance */
+	Assert(!estate->es_finished);

 	/* Switch into per-query memory context */
 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
@@ -561,10 +490,11 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
 											 (PgStat_Counter) estate->es_parallel_workers_launched);

 	/*
-	 * Check that ExecutorFinish was called, unless in EXPLAIN-only mode or if
-	 * execution was aborted.
+	 * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
+	 * Assert is needed because ExecutorFinish is new as of 9.1, and callers
+	 * might forget to call it.
 	 */
-	Assert(estate->es_finished || estate->es_aborted ||
+	Assert(estate->es_finished ||
 		   (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));

 	/*
@@ -578,14 +508,6 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
 	UnregisterSnapshot(estate->es_snapshot);
 	UnregisterSnapshot(estate->es_crosscheck_snapshot);

-	/*
-	 * Reset AFTER trigger module if the query execution was aborted.
-	 */
-	if (estate->es_aborted &&
-		!(estate->es_top_eflags &
-		  (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
-		AfterTriggerAbortQuery();
-
 	/*
 	 * Must switch out of context before destroying it
 	 */
@@ -684,21 +606,6 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos,
 				   (rte->rtekind == RTE_SUBQUERY &&
 					rte->relkind == RELKIND_VIEW));

-			/*
-			 * Ensure that we have at least an AccessShareLock on relations
-			 * whose permissions need to be checked.
-			 *
-			 * Skip this check in a parallel worker because locks won't be
-			 * taken until ExecInitNode() performs plan initialization.
-			 *
-			 * XXX: ExecCheckPermissions() in a parallel worker may be
-			 * redundant with the checks done in the leader process, so this
-			 * should be reviewed to ensure it’s necessary.
-			 */
-			Assert(IsParallelWorker() ||
-				   CheckRelationOidLockedByMe(rte->relid, AccessShareLock,
-											  true));
-
 			(void) getRTEPermissionInfo(rteperminfos, rte);
 			/* Many-to-one mapping not allowed */
 			Assert(!bms_is_member(rte->perminfoindex, indexset));
@@ -924,12 +831,6 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
- *
- *		If the plan originates from a CachedPlan (given in queryDesc->cplan),
- *		it can become invalid during runtime "initial" pruning when the
- *		remaining set of locks is taken.  The function returns early in that
- *		case without initializing the plan, and the caller is expected to
- *		retry with a new valid plan.
 * ----------------------------------------------------------------
 */
 static void
@@ -937,7 +838,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 {
 	CmdType		operation = queryDesc->operation;
 	PlannedStmt *plannedstmt = queryDesc->plannedstmt;
-	CachedPlan *cachedplan = queryDesc->cplan;
 	Plan	   *plan = plannedstmt->planTree;
 	List	   *rangeTable = plannedstmt->rtable;
 	EState	   *estate = queryDesc->estate;
@@ -958,7 +858,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 					   bms_copy(plannedstmt->unprunableRelids));

 	estate->es_plannedstmt = plannedstmt;
-	estate->es_cachedplan = cachedplan;
 	estate->es_part_prune_infos = plannedstmt->partPruneInfos;

 	/*
@@ -972,9 +871,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 	 */
 	ExecDoInitialPruning(estate);

-	if (!ExecPlanStillValid(estate))
-		return;
-
 	/*
 	 * Next, build the ExecRowMark array from the PlanRowMark(s), if any.
 	 */
@@ -3092,9 +2988,6 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree)
 	 * the snapshot, rangetable, and external Param info.  They need their own
 	 * copies of local state, including a tuple table, es_param_exec_vals,
 	 * result-rel info, etc.
-	 *
-	 * es_cachedplan is not copied because EPQ plan execution does not acquire
-	 * any new locks that could invalidate the CachedPlan.
 	 */
 	rcestate->es_direction = ForwardScanDirection;
 	rcestate->es_snapshot = parentestate->es_snapshot;
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1278,15 +1278,8 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver,
 	paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false);
 	paramLI = RestoreParamList(&paramspace);

-	/*
-	 * Create a QueryDesc for the query.  We pass NULL for cachedplan, because
-	 * we don't have a pointer to the CachedPlan in the leader's process. It's
-	 * fine because the only reason the executor needs to see it is to decide
-	 * if it should take locks on certain relations, but parallel workers
-	 * always take locks anyway.
-	 */
+	/* Create a QueryDesc for the query. */
 	return CreateQueryDesc(pstmt,
-						   NULL,
 						   queryString,
 						   GetActiveSnapshot(), InvalidSnapshot,
 						   receiver, paramLI, NULL, instrument_options);
@@ -1471,8 +1464,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)

 	/* Start up the executor */
 	queryDesc->plannedstmt->jitFlags = fpes->jit_flags;
-	if (!ExecutorStart(queryDesc, fpes->eflags))
-		elog(ERROR, "ExecutorStart() failed unexpectedly");
+	ExecutorStart(queryDesc, fpes->eflags);

 	/* Special executor initialization steps for parallel workers */
 	queryDesc->planstate->state->es_query_dsa = area;
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -26,7 +26,6 @@
 #include "partitioning/partdesc.h"
 #include "partitioning/partprune.h"
 #include "rewrite/rewriteManip.h"
-#include "storage/lmgr.h"
 #include "utils/acl.h"
 #include "utils/lsyscache.h"
 #include "utils/partcache.h"
@@ -1771,8 +1770,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
 * ExecDoInitialPruning:
 *		Perform runtime "initial" pruning, if necessary, to determine the set
 *		of child subnodes that need to be initialized during ExecInitNode() for
- *		all plan nodes that contain a PartitionPruneInfo.  This also locks the
- *		leaf partitions whose subnodes will be initialized if needed.
+ *		all plan nodes that contain a PartitionPruneInfo.
 *
 * ExecInitPartitionExecPruning:
 *		Updates the PartitionPruneState found at given part_prune_index in
@@ -1798,8 +1796,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
 * ExecDoInitialPruning
 *		Perform runtime "initial" pruning, if necessary, to determine the set
 *		of child subnodes that need to be initialized during ExecInitNode() for
- *		plan nodes that support partition pruning.  This also locks the leaf
- *		partitions whose subnodes will be initialized if needed.
+ *		plan nodes that support partition pruning.
 *
 * This function iterates over each PartitionPruneInfo entry in
 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
@@ -1821,9 +1818,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
 void
 ExecDoInitialPruning(EState *estate)
 {
-	PlannedStmt *stmt = estate->es_plannedstmt;
 	ListCell   *lc;
-	List	   *locked_relids = NIL;

 	foreach(lc, estate->es_part_prune_infos)
 	{
@@ -1849,68 +1844,11 @@ ExecDoInitialPruning(EState *estate)
 		else
 			validsubplan_rtis = all_leafpart_rtis;

-		if (ExecShouldLockRelations(estate))
-		{
-			int			rtindex = -1;
-
-			while ((rtindex = bms_next_member(validsubplan_rtis,
-											  rtindex)) >= 0)
-			{
-				RangeTblEntry *rte = exec_rt_fetch(rtindex, estate);
-
-				Assert(rte->rtekind == RTE_RELATION &&
-					   rte->rellockmode != NoLock);
-				LockRelationOid(rte->relid, rte->rellockmode);
-				locked_relids = lappend_int(locked_relids, rtindex);
-			}
-		}
 		estate->es_unpruned_relids = bms_add_members(estate->es_unpruned_relids,
 													 validsubplan_rtis);
 		estate->es_part_prune_results = lappend(estate->es_part_prune_results,
 												validsubplans);
 	}
-
-	/*
-	 * Lock the first result relation of each ModifyTable node, even if it was
-	 * pruned.  This is required for ExecInitModifyTable(), which keeps its
-	 * first result relation if all other result relations have been pruned,
-	 * because some executor paths (e.g., in nodeModifyTable.c and
-	 * execPartition.c) rely on there being at least one result relation.
-	 *
-	 * There's room for improvement here --- we actually only need to do this
-	 * if all other result relations of the ModifyTable node were pruned, but
-	 * we don't have an easy way to tell that here.
-	 */
-	if (stmt->resultRelations && ExecShouldLockRelations(estate))
-	{
-		foreach(lc, stmt->firstResultRels)
-		{
-			Index		firstResultRel = lfirst_int(lc);
-
-			if (!bms_is_member(firstResultRel, estate->es_unpruned_relids))
-			{
-				RangeTblEntry *rte = exec_rt_fetch(firstResultRel, estate);
-
-				Assert(rte->rtekind == RTE_RELATION && rte->rellockmode != NoLock);
-				LockRelationOid(rte->relid, rte->rellockmode);
-				locked_relids = lappend_int(locked_relids, firstResultRel);
-			}
-		}
-	}
-
-	/*
-	 * Release the useless locks if the plan won't be executed.  This is the
-	 * same as what CheckCachedPlan() in plancache.c does.
-	 */
-	if (!ExecPlanStillValid(estate))
-	{
-		foreach(lc, locked_relids)
-		{
-			RangeTblEntry *rte = exec_rt_fetch(lfirst_int(lc), estate);
-
-			UnlockRelationOid(rte->relid, rte->rellockmode);
-		}
-	}
 }

 /*
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -147,7 +147,6 @@ CreateExecutorState(void)
 	estate->es_top_eflags = 0;
 	estate->es_instrument = 0;
 	estate->es_finished = false;
-	estate->es_aborted = false;

 	estate->es_exprcontexts = NIL;

--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -34,6 +34,7 @@
 #include "utils/funccache.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/plancache.h"
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"

@@ -1338,7 +1339,6 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache)
 		dest = None_Receiver;

 	es->qd = CreateQueryDesc(es->stmt,
-							 NULL,
 							 fcache->func->src,
 							 GetActiveSnapshot(),
 							 InvalidSnapshot,
@@ -1363,8 +1363,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache)
 			eflags = EXEC_FLAG_SKIP_TRIGGERS;
 		else
 			eflags = 0;			/* default run-to-completion flags */
-		if (!ExecutorStart(es->qd, eflags))
-			elog(ERROR, "ExecutorStart() failed unexpectedly");
+		ExecutorStart(es->qd, eflags);
 	}

 	es->status = F_EXEC_RUN;
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -70,8 +70,7 @@ static int	_SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
 static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes,
 										 Datum *Values, const char *Nulls);

-static int	_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
-						CachedPlanSource *plansource, int query_index);
+static int	_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount);

 static void _SPI_error_callback(void *arg);

@@ -1686,8 +1685,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan,
 					  query_string,
 					  plansource->commandTag,
 					  stmt_list,
-					  cplan,
-					  plansource);
+					  cplan);

 	/*
 	 * Set up options for portal.  Default SCROLL type is chosen the same way
@@ -2502,7 +2500,6 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
 		CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1);
 		List	   *stmt_list;
 		ListCell   *lc2;
-		int			query_index = 0;

 		spicallbackarg.query = plansource->query_string;

@@ -2693,16 +2690,14 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
 					snap = InvalidSnapshot;

 				qdesc = CreateQueryDesc(stmt,
-										cplan,
 										plansource->query_string,
 										snap, crosscheck_snapshot,
 										dest,
 										options->params,
 										_SPI_current->queryEnv,
 										0);
-
-				res = _SPI_pquery(qdesc, fire_triggers, canSetTag ? options->tcount : 0,
-								  plansource, query_index);
+				res = _SPI_pquery(qdesc, fire_triggers,
+								  canSetTag ? options->tcount : 0);
 				FreeQueryDesc(qdesc);
 			}
 			else
@@ -2799,8 +2794,6 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
 				my_res = res;
 				goto fail;
 			}
-
-			query_index++;
 		}

 		/* Done with this plan, so release refcount */
@@ -2878,8 +2871,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
 }

 static int
-_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
-			CachedPlanSource *plansource, int query_index)
+_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount)
 {
 	int			operation = queryDesc->operation;
 	int			eflags;
@@ -2935,16 +2927,7 @@ _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
 	else
 		eflags = EXEC_FLAG_SKIP_TRIGGERS;

-	if (queryDesc->cplan)
-	{
-		ExecutorStartCachedPlan(queryDesc, eflags, plansource, query_index);
-		Assert(queryDesc->planstate);
-	}
-	else
-	{
-		if (!ExecutorStart(queryDesc, eflags))
-			elog(ERROR, "ExecutorStart() failed unexpectedly");
-	}
+	ExecutorStart(queryDesc, eflags);

 	ExecutorRun(queryDesc, ForwardScanDirection, tcount);