mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Don't scan partitioned tables.
Partitioned tables do not contain any data; only their unpartitioned descendents need to be scanned. However, the partitioned tables still need to be locked, even though they're not scanned. To make that work, Append and MergeAppend relations now need to carry a list of (unscanned) partitioned relations that must be locked, and InitPlan must lock all partitioned result relations. Aside from the obvious advantage of avoiding some work at execution time, this has two other advantages. First, it may improve the planner's decision-making in some cases since the empty relation might throw things off. Second, it paves the way to getting rid of the storage for partitioned tables altogether. Amit Langote, reviewed by me. Discussion: http://postgr.es/m/6837c359-45c4-8044-34d1-736756335a15@lab.ntt.co.jp
This commit is contained in:
@ -844,6 +844,22 @@ InitPlan(QueryDesc *queryDesc, int eflags)
|
||||
estate->es_num_result_relations = numResultRelations;
|
||||
/* es_result_relation_info is NULL except when within ModifyTable */
|
||||
estate->es_result_relation_info = NULL;
|
||||
|
||||
/*
|
||||
* In the partitioned result relation case, lock the non-leaf result
|
||||
* relations too. We don't however need ResultRelInfos for them.
|
||||
*/
|
||||
if (plannedstmt->nonleafResultRelations)
|
||||
{
|
||||
foreach(l, plannedstmt->nonleafResultRelations)
|
||||
{
|
||||
Index resultRelationIndex = lfirst_int(l);
|
||||
Oid resultRelationOid;
|
||||
|
||||
resultRelationOid = getrelid(resultRelationIndex, rangeTable);
|
||||
LockRelationOid(resultRelationOid, RowExclusiveLock);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -858,7 +874,11 @@ InitPlan(QueryDesc *queryDesc, int eflags)
|
||||
/*
|
||||
* Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
|
||||
* before we initialize the plan tree, else we'd be risking lock upgrades.
|
||||
* While we are at it, build the ExecRowMark list.
|
||||
* While we are at it, build the ExecRowMark list. Any partitioned child
|
||||
* tables are ignored here (because isParent=true) and will be locked by
|
||||
* the first Append or MergeAppend node that references them. (Note that
|
||||
* the RowMarks corresponding to partitioned child tables are present in
|
||||
* the same list as the rest, i.e., plannedstmt->rowMarks.)
|
||||
*/
|
||||
estate->es_rowMarks = NIL;
|
||||
foreach(l, plannedstmt->rowMarks)
|
||||
|
@ -161,6 +161,7 @@ ExecSerializePlan(Plan *plan, EState *estate)
|
||||
pstmt->planTree = plan;
|
||||
pstmt->rtable = estate->es_range_table;
|
||||
pstmt->resultRelations = NIL;
|
||||
pstmt->nonleafResultRelations = NIL;
|
||||
pstmt->subplans = estate->es_plannedstmt->subplans;
|
||||
pstmt->rewindPlanIDs = NULL;
|
||||
pstmt->rowMarks = NIL;
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "executor/executor.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
@ -953,3 +954,58 @@ ShutdownExprContext(ExprContext *econtext, bool isCommit)
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
/*
|
||||
* ExecLockNonLeafAppendTables
|
||||
*
|
||||
* Locks, if necessary, the tables indicated by the RT indexes contained in
|
||||
* the partitioned_rels list. These are the non-leaf tables in the partition
|
||||
* tree controlled by a given Append or MergeAppend node.
|
||||
*/
|
||||
void
|
||||
ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
|
||||
{
|
||||
PlannedStmt *stmt = estate->es_plannedstmt;
|
||||
ListCell *lc;
|
||||
|
||||
foreach(lc, partitioned_rels)
|
||||
{
|
||||
ListCell *l;
|
||||
Index rti = lfirst_int(lc);
|
||||
bool is_result_rel = false;
|
||||
Oid relid = getrelid(rti, estate->es_range_table);
|
||||
|
||||
/* If this is a result relation, already locked in InitPlan */
|
||||
foreach(l, stmt->nonleafResultRelations)
|
||||
{
|
||||
if (rti == lfirst_int(l))
|
||||
{
|
||||
is_result_rel = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Not a result relation; check if there is a RowMark that requires
|
||||
* taking a RowShareLock on this rel.
|
||||
*/
|
||||
if (!is_result_rel)
|
||||
{
|
||||
PlanRowMark *rc = NULL;
|
||||
|
||||
foreach(l, stmt->rowMarks)
|
||||
{
|
||||
if (((PlanRowMark *) lfirst(l))->rti == rti)
|
||||
{
|
||||
rc = lfirst(l);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (rc && RowMarkRequiresRowShareLock(rc->markType))
|
||||
LockRelationOid(relid, RowShareLock);
|
||||
else
|
||||
LockRelationOid(relid, AccessShareLock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -128,6 +128,12 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
|
||||
/* check for unsupported flags */
|
||||
Assert(!(eflags & EXEC_FLAG_MARK));
|
||||
|
||||
/*
|
||||
* Lock the non-leaf tables in the partition tree controlled by this
|
||||
* node. It's a no-op for non-partitioned parent tables.
|
||||
*/
|
||||
ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
|
||||
|
||||
/*
|
||||
* Set up empty vector of subplan states
|
||||
*/
|
||||
|
@ -71,6 +71,12 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
|
||||
/* check for unsupported flags */
|
||||
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
|
||||
|
||||
/*
|
||||
* Lock the non-leaf tables in the partition tree controlled by this
|
||||
* node. It's a no-op for non-partitioned parent tables.
|
||||
*/
|
||||
ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
|
||||
|
||||
/*
|
||||
* Set up empty vector of subplan states
|
||||
*/
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "foreign/fdwapi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/builtins.h"
|
||||
@ -1725,8 +1726,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
|
||||
|
||||
estate->es_result_relation_info = saved_resultRelInfo;
|
||||
|
||||
/* The root table RT index is at the head of the partitioned_rels list */
|
||||
if (node->partitioned_rels)
|
||||
{
|
||||
Index root_rti;
|
||||
Oid root_oid;
|
||||
|
||||
root_rti = linitial_int(node->partitioned_rels);
|
||||
root_oid = getrelid(root_rti, estate->es_range_table);
|
||||
rel = heap_open(root_oid, NoLock); /* locked by InitPlan */
|
||||
}
|
||||
else
|
||||
rel = mtstate->resultRelInfo->ri_RelationDesc;
|
||||
|
||||
/* Build state for INSERT tuple routing */
|
||||
rel = mtstate->resultRelInfo->ri_RelationDesc;
|
||||
if (operation == CMD_INSERT &&
|
||||
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
||||
{
|
||||
@ -1897,6 +1910,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
|
||||
mtstate->ps.ps_ExprContext = NULL;
|
||||
}
|
||||
|
||||
/* Close the root partitioned rel if we opened it above. */
|
||||
if (rel != mtstate->resultRelInfo->ri_RelationDesc)
|
||||
heap_close(rel, NoLock);
|
||||
|
||||
/*
|
||||
* If needed, Initialize target list, projection and qual for ON CONFLICT
|
||||
* DO UPDATE.
|
||||
|
Reference in New Issue
Block a user