1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-19 23:22:23 +03:00

Revert "Don't lock partitions pruned by initial pruning"

As pointed out by Tom Lane, the patch introduced fragile and invasive
design around plan invalidation handling when locking of prunable
partitions was deferred from plancache.c to the executor. In
particular, it violated assumptions about CachedPlan immutability and
altered executor APIs in ways that are difficult to justify given the
added complexity and overhead.

This also removes the firstResultRels field added to PlannedStmt in
commit 28317de72, which was intended to support deferred locking of
certain ModifyTable result relations.

Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/605328.1747710381@sss.pgh.pa.us
This commit is contained in:
Amit Langote
2025-05-22 14:17:24 +09:00
parent f3622b6476
commit 1722d5eb05
33 changed files with 90 additions and 673 deletions

View File

@@ -285,28 +285,6 @@ are typically reset to empty once per tuple. Per-tuple contexts are usually
associated with ExprContexts, and commonly each PlanState node has its own
ExprContext to evaluate its qual and targetlist expressions in.
Relation Locking
----------------
When the executor initializes a plan tree for execution, it doesn't lock
non-index relations if the plan tree is freshly generated and not derived
from a CachedPlan. This is because such locks have already been established
during the query's parsing, rewriting, and planning phases. However, with a
cached plan tree, some relations may remain unlocked. The function
AcquireExecutorLocks() only locks unprunable relations in the plan, deferring
the locking of prunable ones to executor initialization. This avoids
unnecessary locking of relations that will be pruned during "initial" runtime
pruning in ExecDoInitialPruning().
This approach creates a window where a cached plan tree with child tables
could become outdated if another backend modifies these tables before
ExecDoInitialPruning() locks them. As a result, the executor has the added duty
to verify the plan tree's validity whenever it locks a child table after
doing initial pruning. This validation is done by checking the CachedPlan.is_valid
flag. If the plan tree is outdated (is_valid = false), the executor stops
further initialization, cleans up anything in EState that would have been
allocated up to that point, and retries execution after recreating the
invalid plan in the CachedPlan. See ExecutorStartCachedPlan().
Query Processing Control Flow
-----------------------------
@@ -315,13 +293,11 @@ This is a sketch of control flow for full query processing:
CreateQueryDesc
ExecutorStart or ExecutorStartCachedPlan
ExecutorStart
CreateExecutorState
creates per-query context
switch to per-query context to run ExecDoInitialPruning and ExecInitNode
switch to per-query context to run ExecInitNode
AfterTriggerBeginQuery
ExecDoInitialPruning
does initial pruning and locks surviving partitions if needed
ExecInitNode --- recursively scans plan tree
ExecInitNode
recurse into subsidiary nodes
@@ -345,12 +321,7 @@ This is a sketch of control flow for full query processing:
FreeQueryDesc
As mentioned in the "Relation Locking" section, if the plan tree is found to
be stale after locking partitions in ExecDoInitialPruning(), the control is
immediately returned to ExecutorStartCachedPlan(), which will create a new plan
tree and perform the steps starting from CreateExecutorState() again.
Per above comments, it's not really critical for ExecEndPlan to free any
Per above comments, it's not really critical for ExecEndNode to free any
memory; it'll all go away in FreeExecutorState anyway. However, we do need to
be careful to close relations, drop buffer pins, etc, so we do need to scan
the plan state tree to find these sorts of resources.

View File

@@ -55,13 +55,11 @@
#include "parser/parse_relation.h"
#include "pgstat.h"
#include "rewrite/rewriteHandler.h"
#include "storage/lmgr.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/backend_status.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
#include "utils/plancache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
@@ -119,16 +117,11 @@ static void ReportNotNullViolationError(ResultRelInfo *resultRelInfo,
* get control when ExecutorStart is called. Such a plugin would
* normally call standard_ExecutorStart().
*
* Return value indicates if the plan has been initialized successfully so
* that queryDesc->planstate contains a valid PlanState tree. It may not
* if the plan got invalidated during InitPlan().
* ----------------------------------------------------------------
*/
bool
void
ExecutorStart(QueryDesc *queryDesc, int eflags)
{
bool plan_valid;
/*
* In some cases (e.g. an EXECUTE statement or an execute message with the
* extended query protocol) the query_id won't be reported, so do it now.
@@ -140,14 +133,12 @@ ExecutorStart(QueryDesc *queryDesc, int eflags)
pgstat_report_query_id(queryDesc->plannedstmt->queryId, false);
if (ExecutorStart_hook)
plan_valid = (*ExecutorStart_hook) (queryDesc, eflags);
(*ExecutorStart_hook) (queryDesc, eflags);
else
plan_valid = standard_ExecutorStart(queryDesc, eflags);
return plan_valid;
standard_ExecutorStart(queryDesc, eflags);
}
bool
void
standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
EState *estate;
@@ -271,64 +262,6 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
InitPlan(queryDesc, eflags);
MemoryContextSwitchTo(oldcontext);
return ExecPlanStillValid(queryDesc->estate);
}
/*
* ExecutorStartCachedPlan
* Start execution for a given query in the CachedPlanSource, replanning
* if the plan is invalidated due to deferred locks taken during the
* plan's initialization
*
* This function handles cases where the CachedPlan given in queryDesc->cplan
* might become invalid during the initialization of the plan given in
* queryDesc->plannedstmt, particularly when prunable relations in it are
* locked after performing initial pruning. If the locks invalidate the plan,
* the function calls UpdateCachedPlan() to replan all queries in the
* CachedPlan, and then retries initialization.
*
* The function repeats the process until ExecutorStart() successfully
* initializes the plan, that is without the CachedPlan becoming invalid.
*/
void
ExecutorStartCachedPlan(QueryDesc *queryDesc, int eflags,
CachedPlanSource *plansource,
int query_index)
{
if (unlikely(queryDesc->cplan == NULL))
elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlan");
if (unlikely(plansource == NULL))
elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlanSource");
/*
* Loop and retry with an updated plan until no further invalidation
* occurs.
*/
while (1)
{
if (!ExecutorStart(queryDesc, eflags))
{
/*
* Clean up the current execution state before creating the new
* plan to retry ExecutorStart(). Mark execution as aborted to
* ensure that AFTER trigger state is properly reset.
*/
queryDesc->estate->es_aborted = true;
ExecutorEnd(queryDesc);
/* Retry ExecutorStart() with an updated plan tree. */
queryDesc->plannedstmt = UpdateCachedPlan(plansource, query_index,
queryDesc->queryEnv);
}
else
/*
* Exit the loop if the plan is initialized successfully and no
* sinval messages were received that invalidated the CachedPlan.
*/
break;
}
}
/* ----------------------------------------------------------------
@@ -387,7 +320,6 @@ standard_ExecutorRun(QueryDesc *queryDesc,
estate = queryDesc->estate;
Assert(estate != NULL);
Assert(!estate->es_aborted);
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
/* caller must ensure the query's snapshot is active */
@@ -494,11 +426,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
Assert(estate != NULL);
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
/*
* This should be run once and only once per Executor instance and never
* if the execution was aborted.
*/
Assert(!estate->es_finished && !estate->es_aborted);
/* This should be run once and only once per Executor instance */
Assert(!estate->es_finished);
/* Switch into per-query memory context */
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
@@ -561,10 +490,11 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
(PgStat_Counter) estate->es_parallel_workers_launched);
/*
* Check that ExecutorFinish was called, unless in EXPLAIN-only mode or if
* execution was aborted.
* Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
* Assert is needed because ExecutorFinish is new as of 9.1, and callers
* might forget to call it.
*/
Assert(estate->es_finished || estate->es_aborted ||
Assert(estate->es_finished ||
(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
/*
@@ -578,14 +508,6 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
UnregisterSnapshot(estate->es_snapshot);
UnregisterSnapshot(estate->es_crosscheck_snapshot);
/*
* Reset AFTER trigger module if the query execution was aborted.
*/
if (estate->es_aborted &&
!(estate->es_top_eflags &
(EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
AfterTriggerAbortQuery();
/*
* Must switch out of context before destroying it
*/
@@ -684,21 +606,6 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos,
(rte->rtekind == RTE_SUBQUERY &&
rte->relkind == RELKIND_VIEW));
/*
* Ensure that we have at least an AccessShareLock on relations
* whose permissions need to be checked.
*
* Skip this check in a parallel worker because locks won't be
* taken until ExecInitNode() performs plan initialization.
*
* XXX: ExecCheckPermissions() in a parallel worker may be
* redundant with the checks done in the leader process, so this
* should be reviewed to ensure its necessary.
*/
Assert(IsParallelWorker() ||
CheckRelationOidLockedByMe(rte->relid, AccessShareLock,
true));
(void) getRTEPermissionInfo(rteperminfos, rte);
/* Many-to-one mapping not allowed */
Assert(!bms_is_member(rte->perminfoindex, indexset));
@@ -924,12 +831,6 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
*
* Initializes the query plan: open files, allocate storage
* and start up the rule manager
*
* If the plan originates from a CachedPlan (given in queryDesc->cplan),
* it can become invalid during runtime "initial" pruning when the
* remaining set of locks is taken. The function returns early in that
* case without initializing the plan, and the caller is expected to
* retry with a new valid plan.
* ----------------------------------------------------------------
*/
static void
@@ -937,7 +838,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
{
CmdType operation = queryDesc->operation;
PlannedStmt *plannedstmt = queryDesc->plannedstmt;
CachedPlan *cachedplan = queryDesc->cplan;
Plan *plan = plannedstmt->planTree;
List *rangeTable = plannedstmt->rtable;
EState *estate = queryDesc->estate;
@@ -958,7 +858,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
bms_copy(plannedstmt->unprunableRelids));
estate->es_plannedstmt = plannedstmt;
estate->es_cachedplan = cachedplan;
estate->es_part_prune_infos = plannedstmt->partPruneInfos;
/*
@@ -972,9 +871,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
*/
ExecDoInitialPruning(estate);
if (!ExecPlanStillValid(estate))
return;
/*
* Next, build the ExecRowMark array from the PlanRowMark(s), if any.
*/
@@ -3092,9 +2988,6 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree)
* the snapshot, rangetable, and external Param info. They need their own
* copies of local state, including a tuple table, es_param_exec_vals,
* result-rel info, etc.
*
* es_cachedplan is not copied because EPQ plan execution does not acquire
* any new locks that could invalidate the CachedPlan.
*/
rcestate->es_direction = ForwardScanDirection;
rcestate->es_snapshot = parentestate->es_snapshot;

View File

@@ -1278,15 +1278,8 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver,
paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false);
paramLI = RestoreParamList(&paramspace);
/*
* Create a QueryDesc for the query. We pass NULL for cachedplan, because
* we don't have a pointer to the CachedPlan in the leader's process. It's
* fine because the only reason the executor needs to see it is to decide
* if it should take locks on certain relations, but parallel workers
* always take locks anyway.
*/
/* Create a QueryDesc for the query. */
return CreateQueryDesc(pstmt,
NULL,
queryString,
GetActiveSnapshot(), InvalidSnapshot,
receiver, paramLI, NULL, instrument_options);
@@ -1471,8 +1464,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
/* Start up the executor */
queryDesc->plannedstmt->jitFlags = fpes->jit_flags;
if (!ExecutorStart(queryDesc, fpes->eflags))
elog(ERROR, "ExecutorStart() failed unexpectedly");
ExecutorStart(queryDesc, fpes->eflags);
/* Special executor initialization steps for parallel workers */
queryDesc->planstate->state->es_query_dsa = area;

View File

@@ -26,7 +26,6 @@
#include "partitioning/partdesc.h"
#include "partitioning/partprune.h"
#include "rewrite/rewriteManip.h"
#include "storage/lmgr.h"
#include "utils/acl.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
@@ -1771,8 +1770,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
* ExecDoInitialPruning:
* Perform runtime "initial" pruning, if necessary, to determine the set
* of child subnodes that need to be initialized during ExecInitNode() for
* all plan nodes that contain a PartitionPruneInfo. This also locks the
* leaf partitions whose subnodes will be initialized if needed.
* all plan nodes that contain a PartitionPruneInfo.
*
* ExecInitPartitionExecPruning:
* Updates the PartitionPruneState found at given part_prune_index in
@@ -1798,8 +1796,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
* ExecDoInitialPruning
* Perform runtime "initial" pruning, if necessary, to determine the set
* of child subnodes that need to be initialized during ExecInitNode() for
* plan nodes that support partition pruning. This also locks the leaf
* partitions whose subnodes will be initialized if needed.
* plan nodes that support partition pruning.
*
* This function iterates over each PartitionPruneInfo entry in
* estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
@@ -1821,9 +1818,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
void
ExecDoInitialPruning(EState *estate)
{
PlannedStmt *stmt = estate->es_plannedstmt;
ListCell *lc;
List *locked_relids = NIL;
foreach(lc, estate->es_part_prune_infos)
{
@@ -1849,68 +1844,11 @@ ExecDoInitialPruning(EState *estate)
else
validsubplan_rtis = all_leafpart_rtis;
if (ExecShouldLockRelations(estate))
{
int rtindex = -1;
while ((rtindex = bms_next_member(validsubplan_rtis,
rtindex)) >= 0)
{
RangeTblEntry *rte = exec_rt_fetch(rtindex, estate);
Assert(rte->rtekind == RTE_RELATION &&
rte->rellockmode != NoLock);
LockRelationOid(rte->relid, rte->rellockmode);
locked_relids = lappend_int(locked_relids, rtindex);
}
}
estate->es_unpruned_relids = bms_add_members(estate->es_unpruned_relids,
validsubplan_rtis);
estate->es_part_prune_results = lappend(estate->es_part_prune_results,
validsubplans);
}
/*
* Lock the first result relation of each ModifyTable node, even if it was
* pruned. This is required for ExecInitModifyTable(), which keeps its
* first result relation if all other result relations have been pruned,
* because some executor paths (e.g., in nodeModifyTable.c and
* execPartition.c) rely on there being at least one result relation.
*
* There's room for improvement here --- we actually only need to do this
* if all other result relations of the ModifyTable node were pruned, but
* we don't have an easy way to tell that here.
*/
if (stmt->resultRelations && ExecShouldLockRelations(estate))
{
foreach(lc, stmt->firstResultRels)
{
Index firstResultRel = lfirst_int(lc);
if (!bms_is_member(firstResultRel, estate->es_unpruned_relids))
{
RangeTblEntry *rte = exec_rt_fetch(firstResultRel, estate);
Assert(rte->rtekind == RTE_RELATION && rte->rellockmode != NoLock);
LockRelationOid(rte->relid, rte->rellockmode);
locked_relids = lappend_int(locked_relids, firstResultRel);
}
}
}
/*
* Release the useless locks if the plan won't be executed. This is the
* same as what CheckCachedPlan() in plancache.c does.
*/
if (!ExecPlanStillValid(estate))
{
foreach(lc, locked_relids)
{
RangeTblEntry *rte = exec_rt_fetch(lfirst_int(lc), estate);
UnlockRelationOid(rte->relid, rte->rellockmode);
}
}
}
/*

View File

@@ -147,7 +147,6 @@ CreateExecutorState(void)
estate->es_top_eflags = 0;
estate->es_instrument = 0;
estate->es_finished = false;
estate->es_aborted = false;
estate->es_exprcontexts = NIL;

View File

@@ -34,6 +34,7 @@
#include "utils/funccache.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/plancache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -1338,7 +1339,6 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache)
dest = None_Receiver;
es->qd = CreateQueryDesc(es->stmt,
NULL,
fcache->func->src,
GetActiveSnapshot(),
InvalidSnapshot,
@@ -1363,8 +1363,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache)
eflags = EXEC_FLAG_SKIP_TRIGGERS;
else
eflags = 0; /* default run-to-completion flags */
if (!ExecutorStart(es->qd, eflags))
elog(ERROR, "ExecutorStart() failed unexpectedly");
ExecutorStart(es->qd, eflags);
}
es->status = F_EXEC_RUN;

View File

@@ -70,8 +70,7 @@ static int _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes,
Datum *Values, const char *Nulls);
static int _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
CachedPlanSource *plansource, int query_index);
static int _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount);
static void _SPI_error_callback(void *arg);
@@ -1686,8 +1685,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan,
query_string,
plansource->commandTag,
stmt_list,
cplan,
plansource);
cplan);
/*
* Set up options for portal. Default SCROLL type is chosen the same way
@@ -2502,7 +2500,6 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1);
List *stmt_list;
ListCell *lc2;
int query_index = 0;
spicallbackarg.query = plansource->query_string;
@@ -2693,16 +2690,14 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
snap = InvalidSnapshot;
qdesc = CreateQueryDesc(stmt,
cplan,
plansource->query_string,
snap, crosscheck_snapshot,
dest,
options->params,
_SPI_current->queryEnv,
0);
res = _SPI_pquery(qdesc, fire_triggers, canSetTag ? options->tcount : 0,
plansource, query_index);
res = _SPI_pquery(qdesc, fire_triggers,
canSetTag ? options->tcount : 0);
FreeQueryDesc(qdesc);
}
else
@@ -2799,8 +2794,6 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
my_res = res;
goto fail;
}
query_index++;
}
/* Done with this plan, so release refcount */
@@ -2878,8 +2871,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
}
static int
_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
CachedPlanSource *plansource, int query_index)
_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount)
{
int operation = queryDesc->operation;
int eflags;
@@ -2935,16 +2927,7 @@ _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
else
eflags = EXEC_FLAG_SKIP_TRIGGERS;
if (queryDesc->cplan)
{
ExecutorStartCachedPlan(queryDesc, eflags, plansource, query_index);
Assert(queryDesc->planstate);
}
else
{
if (!ExecutorStart(queryDesc, eflags))
elog(ERROR, "ExecutorStart() failed unexpectedly");
}
ExecutorStart(queryDesc, eflags);
ExecutorRun(queryDesc, ForwardScanDirection, tcount);