mirror of
https://github.com/postgres/postgres.git
synced 2025-07-08 11:42:09 +03:00
Expand partitioned tables in PartDesc order.
Previously, we expanded the inheritance hierarchy in the order in which find_all_inheritors had locked the tables, but that turns out to block quite a bit of useful optimization. For example, a partition-wise join can't count on two tables with matching bounds to get expanded in the same order. Where possible, this change results in expanding partitioned tables in *bound* order. Bound order isn't well-defined for a list-partitioned table with a null-accepting partition or for a list-partitioned table where the bounds for a single partition are interleaved with other partitions. However, when expansion in bound order is possible, it opens up further opportunities for optimization, such as strength-reducing MergeAppend to Append when the expansion order matches the desired sort order. Patch by me, with cosmetic revisions by Ashutosh Bapat. Discussion: http://postgr.es/m/CA+TgmoZrKj7kEzcMSum3aXV4eyvvbh9WD=c6m=002WMheDyE3A@mail.gmail.com
This commit is contained in:
@ -33,6 +33,7 @@
|
|||||||
#include "access/heapam.h"
|
#include "access/heapam.h"
|
||||||
#include "access/htup_details.h"
|
#include "access/htup_details.h"
|
||||||
#include "access/sysattr.h"
|
#include "access/sysattr.h"
|
||||||
|
#include "catalog/partition.h"
|
||||||
#include "catalog/pg_inherits_fn.h"
|
#include "catalog/pg_inherits_fn.h"
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
@ -100,6 +101,19 @@ static List *generate_append_tlist(List *colTypes, List *colCollations,
|
|||||||
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
|
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
|
||||||
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
|
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
|
||||||
Index rti);
|
Index rti);
|
||||||
|
static void expand_partitioned_rtentry(PlannerInfo *root,
|
||||||
|
RangeTblEntry *parentrte,
|
||||||
|
Index parentRTindex, Relation parentrel,
|
||||||
|
PlanRowMark *parentrc, PartitionDesc partdesc,
|
||||||
|
LOCKMODE lockmode,
|
||||||
|
bool *has_child, List **appinfos,
|
||||||
|
List **partitioned_child_rels);
|
||||||
|
static void expand_single_inheritance_child(PlannerInfo *root,
|
||||||
|
RangeTblEntry *parentrte,
|
||||||
|
Index parentRTindex, Relation parentrel,
|
||||||
|
PlanRowMark *parentrc, Relation childrel,
|
||||||
|
bool *has_child, List **appinfos,
|
||||||
|
List **partitioned_child_rels);
|
||||||
static void make_inh_translation_list(Relation oldrelation,
|
static void make_inh_translation_list(Relation oldrelation,
|
||||||
Relation newrelation,
|
Relation newrelation,
|
||||||
Index newvarno,
|
Index newvarno,
|
||||||
@ -1455,13 +1469,34 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
|
|||||||
/* Scan the inheritance set and expand it */
|
/* Scan the inheritance set and expand it */
|
||||||
appinfos = NIL;
|
appinfos = NIL;
|
||||||
has_child = false;
|
has_child = false;
|
||||||
|
if (RelationGetPartitionDesc(oldrelation) != NULL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If this table has partitions, recursively expand them in the order
|
||||||
|
* in which they appear in the PartitionDesc. But first, expand the
|
||||||
|
* parent itself.
|
||||||
|
*/
|
||||||
|
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
|
||||||
|
oldrelation,
|
||||||
|
&has_child, &appinfos,
|
||||||
|
&partitioned_child_rels);
|
||||||
|
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
|
||||||
|
RelationGetPartitionDesc(oldrelation),
|
||||||
|
lockmode,
|
||||||
|
&has_child, &appinfos,
|
||||||
|
&partitioned_child_rels);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This table has no partitions. Expand any plain inheritance
|
||||||
|
* children in the order the OIDs were returned by
|
||||||
|
* find_all_inheritors.
|
||||||
|
*/
|
||||||
foreach(l, inhOIDs)
|
foreach(l, inhOIDs)
|
||||||
{
|
{
|
||||||
Oid childOID = lfirst_oid(l);
|
Oid childOID = lfirst_oid(l);
|
||||||
Relation newrelation;
|
Relation newrelation;
|
||||||
RangeTblEntry *childrte;
|
|
||||||
Index childRTindex;
|
|
||||||
AppendRelInfo *appinfo;
|
|
||||||
|
|
||||||
/* Open rel if needed; we already have required locks */
|
/* Open rel if needed; we already have required locks */
|
||||||
if (childOID != parentOID)
|
if (childOID != parentOID)
|
||||||
@ -1472,8 +1507,8 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
|
|||||||
/*
|
/*
|
||||||
* It is possible that the parent table has children that are temp
|
* It is possible that the parent table has children that are temp
|
||||||
* tables of other backends. We cannot safely access such tables
|
* tables of other backends. We cannot safely access such tables
|
||||||
* (because of buffering issues), and the best thing to do seems to be
|
* (because of buffering issues), and the best thing to do seems
|
||||||
* to silently ignore them.
|
* to be to silently ignore them.
|
||||||
*/
|
*/
|
||||||
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
|
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
|
||||||
{
|
{
|
||||||
@ -1481,106 +1516,16 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
|
||||||
* Build an RTE for the child, and attach to query's rangetable list.
|
newrelation,
|
||||||
* We copy most fields of the parent's RTE, but replace relation OID
|
&has_child, &appinfos,
|
||||||
* and relkind, and set inh = false. Also, set requiredPerms to zero
|
&partitioned_child_rels);
|
||||||
* since all required permissions checks are done on the original RTE.
|
|
||||||
* Likewise, set the child's securityQuals to empty, because we only
|
|
||||||
* want to apply the parent's RLS conditions regardless of what RLS
|
|
||||||
* properties individual children may have. (This is an intentional
|
|
||||||
* choice to make inherited RLS work like regular permissions checks.)
|
|
||||||
* The parent securityQuals will be propagated to children along with
|
|
||||||
* other base restriction clauses, so we don't need to do it here.
|
|
||||||
*/
|
|
||||||
childrte = copyObject(rte);
|
|
||||||
childrte->relid = childOID;
|
|
||||||
childrte->relkind = newrelation->rd_rel->relkind;
|
|
||||||
childrte->inh = false;
|
|
||||||
childrte->requiredPerms = 0;
|
|
||||||
childrte->securityQuals = NIL;
|
|
||||||
parse->rtable = lappend(parse->rtable, childrte);
|
|
||||||
childRTindex = list_length(parse->rtable);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Build an AppendRelInfo for this parent and child, unless the child
|
|
||||||
* is a partitioned table.
|
|
||||||
*/
|
|
||||||
if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
|
|
||||||
{
|
|
||||||
/* Remember if we saw a real child. */
|
|
||||||
if (childOID != parentOID)
|
|
||||||
has_child = true;
|
|
||||||
|
|
||||||
appinfo = makeNode(AppendRelInfo);
|
|
||||||
appinfo->parent_relid = rti;
|
|
||||||
appinfo->child_relid = childRTindex;
|
|
||||||
appinfo->parent_reltype = oldrelation->rd_rel->reltype;
|
|
||||||
appinfo->child_reltype = newrelation->rd_rel->reltype;
|
|
||||||
make_inh_translation_list(oldrelation, newrelation, childRTindex,
|
|
||||||
&appinfo->translated_vars);
|
|
||||||
appinfo->parent_reloid = parentOID;
|
|
||||||
appinfos = lappend(appinfos, appinfo);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Translate the column permissions bitmaps to the child's attnums
|
|
||||||
* (we have to build the translated_vars list before we can do
|
|
||||||
* this). But if this is the parent table, leave copyObject's
|
|
||||||
* result alone.
|
|
||||||
*
|
|
||||||
* Note: we need to do this even though the executor won't run any
|
|
||||||
* permissions checks on the child RTE. The
|
|
||||||
* insertedCols/updatedCols bitmaps may be examined for
|
|
||||||
* trigger-firing purposes.
|
|
||||||
*/
|
|
||||||
if (childOID != parentOID)
|
|
||||||
{
|
|
||||||
childrte->selectedCols = translate_col_privs(rte->selectedCols,
|
|
||||||
appinfo->translated_vars);
|
|
||||||
childrte->insertedCols = translate_col_privs(rte->insertedCols,
|
|
||||||
appinfo->translated_vars);
|
|
||||||
childrte->updatedCols = translate_col_privs(rte->updatedCols,
|
|
||||||
appinfo->translated_vars);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
partitioned_child_rels = lappend_int(partitioned_child_rels,
|
|
||||||
childRTindex);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
|
|
||||||
*/
|
|
||||||
if (oldrc)
|
|
||||||
{
|
|
||||||
PlanRowMark *newrc = makeNode(PlanRowMark);
|
|
||||||
|
|
||||||
newrc->rti = childRTindex;
|
|
||||||
newrc->prti = rti;
|
|
||||||
newrc->rowmarkId = oldrc->rowmarkId;
|
|
||||||
/* Reselect rowmark type, because relkind might not match parent */
|
|
||||||
newrc->markType = select_rowmark_type(childrte, oldrc->strength);
|
|
||||||
newrc->allMarkTypes = (1 << newrc->markType);
|
|
||||||
newrc->strength = oldrc->strength;
|
|
||||||
newrc->waitPolicy = oldrc->waitPolicy;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We mark RowMarks for partitioned child tables as parent
|
|
||||||
* RowMarks so that the executor ignores them (except their
|
|
||||||
* existence means that the child tables be locked using
|
|
||||||
* appropriate mode).
|
|
||||||
*/
|
|
||||||
newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
|
|
||||||
|
|
||||||
/* Include child's rowmark type in parent's allMarkTypes */
|
|
||||||
oldrc->allMarkTypes |= newrc->allMarkTypes;
|
|
||||||
|
|
||||||
root->rowMarks = lappend(root->rowMarks, newrc);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Close child relations, but keep locks */
|
/* Close child relations, but keep locks */
|
||||||
if (childOID != parentOID)
|
if (childOID != parentOID)
|
||||||
heap_close(newrelation, NoLock);
|
heap_close(newrelation, NoLock);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
heap_close(oldrelation, NoLock);
|
heap_close(oldrelation, NoLock);
|
||||||
|
|
||||||
@ -1620,6 +1565,169 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
|
|||||||
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
|
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
|
||||||
|
Index parentRTindex, Relation parentrel,
|
||||||
|
PlanRowMark *parentrc, PartitionDesc partdesc,
|
||||||
|
LOCKMODE lockmode,
|
||||||
|
bool *has_child, List **appinfos,
|
||||||
|
List **partitioned_child_rels)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
check_stack_depth();
|
||||||
|
|
||||||
|
for (i = 0; i < partdesc->nparts; i++)
|
||||||
|
{
|
||||||
|
Oid childOID = partdesc->oids[i];
|
||||||
|
Relation childrel;
|
||||||
|
|
||||||
|
/* Open rel; we already have required locks */
|
||||||
|
childrel = heap_open(childOID, NoLock);
|
||||||
|
|
||||||
|
/* As in expand_inherited_rtentry, skip non-local temp tables */
|
||||||
|
if (RELATION_IS_OTHER_TEMP(childrel))
|
||||||
|
{
|
||||||
|
heap_close(childrel, lockmode);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
expand_single_inheritance_child(root, parentrte, parentRTindex,
|
||||||
|
parentrel, parentrc, childrel,
|
||||||
|
has_child, appinfos,
|
||||||
|
partitioned_child_rels);
|
||||||
|
|
||||||
|
/* If this child is itself partitioned, recurse */
|
||||||
|
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
||||||
|
expand_partitioned_rtentry(root, parentrte, parentRTindex,
|
||||||
|
parentrel, parentrc,
|
||||||
|
RelationGetPartitionDesc(childrel),
|
||||||
|
lockmode,
|
||||||
|
has_child, appinfos,
|
||||||
|
partitioned_child_rels);
|
||||||
|
|
||||||
|
/* Close child relation, but keep locks */
|
||||||
|
heap_close(childrel, NoLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* expand_single_inheritance_child
|
||||||
|
* Expand a single inheritance child, if needed.
|
||||||
|
*
|
||||||
|
* If this is a temp table of another backend, we'll return without doing
|
||||||
|
* anything at all. Otherwise, we'll set "has_child" to true, build a
|
||||||
|
* RangeTblEntry and either a PartitionedChildRelInfo or AppendRelInfo as
|
||||||
|
* appropriate, plus maybe a PlanRowMark.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
|
||||||
|
Index parentRTindex, Relation parentrel,
|
||||||
|
PlanRowMark *parentrc, Relation childrel,
|
||||||
|
bool *has_child, List **appinfos,
|
||||||
|
List **partitioned_child_rels)
|
||||||
|
{
|
||||||
|
Query *parse = root->parse;
|
||||||
|
Oid parentOID = RelationGetRelid(parentrel);
|
||||||
|
Oid childOID = RelationGetRelid(childrel);
|
||||||
|
RangeTblEntry *childrte;
|
||||||
|
Index childRTindex;
|
||||||
|
AppendRelInfo *appinfo;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build an RTE for the child, and attach to query's rangetable list. We
|
||||||
|
* copy most fields of the parent's RTE, but replace relation OID and
|
||||||
|
* relkind, and set inh = false. Also, set requiredPerms to zero since
|
||||||
|
* all required permissions checks are done on the original RTE. Likewise,
|
||||||
|
* set the child's securityQuals to empty, because we only want to apply
|
||||||
|
* the parent's RLS conditions regardless of what RLS properties
|
||||||
|
* individual children may have. (This is an intentional choice to make
|
||||||
|
* inherited RLS work like regular permissions checks.) The parent
|
||||||
|
* securityQuals will be propagated to children along with other base
|
||||||
|
* restriction clauses, so we don't need to do it here.
|
||||||
|
*/
|
||||||
|
childrte = copyObject(parentrte);
|
||||||
|
childrte->relid = childOID;
|
||||||
|
childrte->relkind = childrel->rd_rel->relkind;
|
||||||
|
childrte->inh = false;
|
||||||
|
childrte->requiredPerms = 0;
|
||||||
|
childrte->securityQuals = NIL;
|
||||||
|
parse->rtable = lappend(parse->rtable, childrte);
|
||||||
|
childRTindex = list_length(parse->rtable);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build an AppendRelInfo for this parent and child, unless the child is a
|
||||||
|
* partitioned table.
|
||||||
|
*/
|
||||||
|
if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
|
||||||
|
{
|
||||||
|
/* Remember if we saw a real child. */
|
||||||
|
if (childOID != parentOID)
|
||||||
|
*has_child = true;
|
||||||
|
|
||||||
|
appinfo = makeNode(AppendRelInfo);
|
||||||
|
appinfo->parent_relid = parentRTindex;
|
||||||
|
appinfo->child_relid = childRTindex;
|
||||||
|
appinfo->parent_reltype = parentrel->rd_rel->reltype;
|
||||||
|
appinfo->child_reltype = childrel->rd_rel->reltype;
|
||||||
|
make_inh_translation_list(parentrel, childrel, childRTindex,
|
||||||
|
&appinfo->translated_vars);
|
||||||
|
appinfo->parent_reloid = parentOID;
|
||||||
|
*appinfos = lappend(*appinfos, appinfo);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Translate the column permissions bitmaps to the child's attnums (we
|
||||||
|
* have to build the translated_vars list before we can do this). But
|
||||||
|
* if this is the parent table, leave copyObject's result alone.
|
||||||
|
*
|
||||||
|
* Note: we need to do this even though the executor won't run any
|
||||||
|
* permissions checks on the child RTE. The insertedCols/updatedCols
|
||||||
|
* bitmaps may be examined for trigger-firing purposes.
|
||||||
|
*/
|
||||||
|
if (childOID != parentOID)
|
||||||
|
{
|
||||||
|
childrte->selectedCols = translate_col_privs(parentrte->selectedCols,
|
||||||
|
appinfo->translated_vars);
|
||||||
|
childrte->insertedCols = translate_col_privs(parentrte->insertedCols,
|
||||||
|
appinfo->translated_vars);
|
||||||
|
childrte->updatedCols = translate_col_privs(parentrte->updatedCols,
|
||||||
|
appinfo->translated_vars);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
|
||||||
|
childRTindex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
|
||||||
|
*/
|
||||||
|
if (parentrc)
|
||||||
|
{
|
||||||
|
PlanRowMark *childrc = makeNode(PlanRowMark);
|
||||||
|
|
||||||
|
childrc->rti = childRTindex;
|
||||||
|
childrc->prti = parentRTindex;
|
||||||
|
childrc->rowmarkId = parentrc->rowmarkId;
|
||||||
|
/* Reselect rowmark type, because relkind might not match parent */
|
||||||
|
childrc->markType = select_rowmark_type(childrte, parentrc->strength);
|
||||||
|
childrc->allMarkTypes = (1 << childrc->markType);
|
||||||
|
childrc->strength = parentrc->strength;
|
||||||
|
childrc->waitPolicy = parentrc->waitPolicy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We mark RowMarks for partitioned child tables as parent RowMarks so
|
||||||
|
* that the executor ignores them (except their existence means that
|
||||||
|
* the child tables be locked using appropriate mode).
|
||||||
|
*/
|
||||||
|
childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
|
||||||
|
|
||||||
|
/* Include child's rowmark type in parent's allMarkTypes */
|
||||||
|
parentrc->allMarkTypes |= childrc->allMarkTypes;
|
||||||
|
|
||||||
|
root->rowMarks = lappend(root->rowMarks, childrc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make_inh_translation_list
|
* make_inh_translation_list
|
||||||
* Build the list of translations from parent Vars to child Vars for
|
* Build the list of translations from parent Vars to child Vars for
|
||||||
|
@ -278,12 +278,12 @@ select tableoid::regclass, * from list_parted;
|
|||||||
-------------+----+----
|
-------------+----+----
|
||||||
part_aa_bb | aA |
|
part_aa_bb | aA |
|
||||||
part_cc_dd | cC | 1
|
part_cc_dd | cC | 1
|
||||||
part_null | | 0
|
|
||||||
part_null | | 1
|
|
||||||
part_ee_ff1 | ff | 1
|
part_ee_ff1 | ff | 1
|
||||||
part_ee_ff1 | EE | 1
|
part_ee_ff1 | EE | 1
|
||||||
part_ee_ff2 | ff | 11
|
part_ee_ff2 | ff | 11
|
||||||
part_ee_ff2 | EE | 10
|
part_ee_ff2 | EE | 10
|
||||||
|
part_null | | 0
|
||||||
|
part_null | | 1
|
||||||
(8 rows)
|
(8 rows)
|
||||||
|
|
||||||
-- some more tests to exercise tuple-routing with multi-level partitioning
|
-- some more tests to exercise tuple-routing with multi-level partitioning
|
||||||
|
Reference in New Issue
Block a user