1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Don't scan partitioned tables.

Partitioned tables do not contain any data; only their unpartitioned
descendents need to be scanned.  However, the partitioned tables still
need to be locked, even though they're not scanned.  To make that
work, Append and MergeAppend relations now need to carry a list of
(unscanned) partitioned relations that must be locked, and InitPlan
must lock all partitioned result relations.

Aside from the obvious advantage of avoiding some work at execution
time, this has two other advantages.  First, it may improve the
planner's decision-making in some cases since the empty relation
might throw things off.  Second, it paves the way to getting rid of
the storage for partitioned tables altogether.

Amit Langote, reviewed by me.

Discussion: http://postgr.es/m/6837c359-45c4-8044-34d1-736756335a15@lab.ntt.co.jp
This commit is contained in:
Robert Haas
2017-03-21 09:48:04 -04:00
parent d5286aa905
commit d3cc37f1d8
27 changed files with 537 additions and 164 deletions

View File

@ -844,6 +844,22 @@ InitPlan(QueryDesc *queryDesc, int eflags)
estate->es_num_result_relations = numResultRelations;
/* es_result_relation_info is NULL except when within ModifyTable */
estate->es_result_relation_info = NULL;
/*
* In the partitioned result relation case, lock the non-leaf result
* relations too. We don't however need ResultRelInfos for them.
*/
if (plannedstmt->nonleafResultRelations)
{
foreach(l, plannedstmt->nonleafResultRelations)
{
Index resultRelationIndex = lfirst_int(l);
Oid resultRelationOid;
resultRelationOid = getrelid(resultRelationIndex, rangeTable);
LockRelationOid(resultRelationOid, RowExclusiveLock);
}
}
}
else
{
@ -858,7 +874,11 @@ InitPlan(QueryDesc *queryDesc, int eflags)
/*
* Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
* before we initialize the plan tree, else we'd be risking lock upgrades.
* While we are at it, build the ExecRowMark list.
* While we are at it, build the ExecRowMark list. Any partitioned child
* tables are ignored here (because isParent=true) and will be locked by
* the first Append or MergeAppend node that references them. (Note that
* the RowMarks corresponding to partitioned child tables are present in
* the same list as the rest, i.e., plannedstmt->rowMarks.)
*/
estate->es_rowMarks = NIL;
foreach(l, plannedstmt->rowMarks)

View File

@ -161,6 +161,7 @@ ExecSerializePlan(Plan *plan, EState *estate)
pstmt->planTree = plan;
pstmt->rtable = estate->es_range_table;
pstmt->resultRelations = NIL;
pstmt->nonleafResultRelations = NIL;
pstmt->subplans = estate->es_plannedstmt->subplans;
pstmt->rewindPlanIDs = NULL;
pstmt->rowMarks = NIL;

View File

@ -43,6 +43,7 @@
#include "executor/executor.h"
#include "nodes/nodeFuncs.h"
#include "parser/parsetree.h"
#include "storage/lmgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@ -953,3 +954,58 @@ ShutdownExprContext(ExprContext *econtext, bool isCommit)
MemoryContextSwitchTo(oldcontext);
}
/*
* ExecLockNonLeafAppendTables
*
* Locks, if necessary, the tables indicated by the RT indexes contained in
* the partitioned_rels list. These are the non-leaf tables in the partition
* tree controlled by a given Append or MergeAppend node.
*/
void
ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
{
PlannedStmt *stmt = estate->es_plannedstmt;
ListCell *lc;
foreach(lc, partitioned_rels)
{
ListCell *l;
Index rti = lfirst_int(lc);
bool is_result_rel = false;
Oid relid = getrelid(rti, estate->es_range_table);
/* If this is a result relation, already locked in InitPlan */
foreach(l, stmt->nonleafResultRelations)
{
if (rti == lfirst_int(l))
{
is_result_rel = true;
break;
}
}
/*
* Not a result relation; check if there is a RowMark that requires
* taking a RowShareLock on this rel.
*/
if (!is_result_rel)
{
PlanRowMark *rc = NULL;
foreach(l, stmt->rowMarks)
{
if (((PlanRowMark *) lfirst(l))->rti == rti)
{
rc = lfirst(l);
break;
}
}
if (rc && RowMarkRequiresRowShareLock(rc->markType))
LockRelationOid(relid, RowShareLock);
else
LockRelationOid(relid, AccessShareLock);
}
}
}

View File

@ -128,6 +128,12 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
/* check for unsupported flags */
Assert(!(eflags & EXEC_FLAG_MARK));
/*
* Lock the non-leaf tables in the partition tree controlled by this
* node. It's a no-op for non-partitioned parent tables.
*/
ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
/*
* Set up empty vector of subplan states
*/

View File

@ -71,6 +71,12 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
* Lock the non-leaf tables in the partition tree controlled by this
* node. It's a no-op for non-partitioned parent tables.
*/
ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
/*
* Set up empty vector of subplan states
*/

View File

@ -45,6 +45,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "parser/parsetree.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
@ -1725,8 +1726,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
estate->es_result_relation_info = saved_resultRelInfo;
/* The root table RT index is at the head of the partitioned_rels list */
if (node->partitioned_rels)
{
Index root_rti;
Oid root_oid;
root_rti = linitial_int(node->partitioned_rels);
root_oid = getrelid(root_rti, estate->es_range_table);
rel = heap_open(root_oid, NoLock); /* locked by InitPlan */
}
else
rel = mtstate->resultRelInfo->ri_RelationDesc;
/* Build state for INSERT tuple routing */
rel = mtstate->resultRelInfo->ri_RelationDesc;
if (operation == CMD_INSERT &&
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
@ -1897,6 +1910,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->ps.ps_ExprContext = NULL;
}
/* Close the root partitioned rel if we opened it above. */
if (rel != mtstate->resultRelInfo->ri_RelationDesc)
heap_close(rel, NoLock);
/*
* If needed, Initialize target list, projection and qual for ON CONFLICT
* DO UPDATE.