1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-07 19:06:32 +03:00

Faster partition pruning

Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning.  The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.

At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.

This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.

Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
This commit is contained in:
Alvaro Herrera
2018-04-06 16:23:04 -03:00
parent 11523e860f
commit 9fdb675fc5
27 changed files with 3993 additions and 415 deletions

View File

@@ -18,7 +18,8 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
main nodes optimizer port postmaster regex replication rewrite \
main nodes optimizer partitioning port postmaster \
regex replication rewrite \
statistics storage tcop tsearch utils $(top_builddir)/src/timezone \
jit

View File

@@ -41,6 +41,7 @@
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "parser/parse_coerce.h"
#include "partitioning/partbounds.h"
#include "rewrite/rewriteManip.h"
#include "storage/lmgr.h"
#include "utils/array.h"
@@ -55,89 +56,6 @@
#include "utils/ruleutils.h"
#include "utils/syscache.h"
/*
* Information about bounds of a partitioned relation
*
* A list partition datum that is known to be NULL is never put into the
* datums array. Instead, it is tracked using the null_index field.
*
* In the case of range partitioning, ndatums will typically be far less than
* 2 * nparts, because a partition's upper bound and the next partition's lower
* bound are the same in most common cases, and we only store one of them (the
* upper bound). In case of hash partitioning, ndatums will be same as the
* number of partitions.
*
* For range and list partitioned tables, datums is an array of datum-tuples
* with key->partnatts datums each. For hash partitioned tables, it is an array
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
* given partition.
*
* The datums in datums array are arranged in increasing order as defined by
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
* respectively. For range and list partitions this simply means that the
* datums in the datums array are arranged in increasing order as defined by
* the partition key's operator classes and collations.
*
* In the case of list partitioning, the indexes array stores one entry for
* every datum, which is the index of the partition that accepts a given datum.
* In case of range partitioning, it stores one entry per distinct range
* datum, which is the index of the partition for which a given datum
* is an upper bound. In the case of hash partitioning, the number of the
* entries in the indexes array is same as the greatest modulus amongst all
* partitions. For a given partition key datum-tuple, the index of the
* partition which would accept that datum-tuple would be given by the entry
* pointed by remainder produced when hash value of the datum-tuple is divided
* by the greatest modulus.
*/
typedef struct PartitionBoundInfoData
{
char strategy; /* hash, list or range? */
int ndatums; /* Length of the datums following array */
Datum **datums;
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
* if there isn't one */
int default_index; /* Index of the default partition; -1 if there
* isn't one */
} PartitionBoundInfoData;
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
/*
* When qsort'ing partition bounds after reading from the catalog, each bound
* is represented with one of the following structs.
*/
/* One bound of a hash partition */
typedef struct PartitionHashBound
{
int modulus;
int remainder;
int index;
} PartitionHashBound;
/* One value coming from some (index'th) list partition */
typedef struct PartitionListValue
{
int index;
Datum value;
} PartitionListValue;
/* One bound of a range partition */
typedef struct PartitionRangeBound
{
int index;
Datum *datums; /* range bound datums */
PartitionRangeDatumKind *kind; /* the kind of each datum */
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
@@ -173,29 +91,9 @@ static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation, Datum *datums1,
PartitionRangeDatumKind *kind1, bool lower1,
PartitionRangeBound *b2);
static int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
static int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal);
static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal);
static int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal);
static int partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder);
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
static int get_greatest_modulus(PartitionBoundInfo b);
static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
/*
* RelationBuildPartitionDesc
@@ -765,13 +663,13 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
if (b1->strategy == PARTITION_STRATEGY_HASH)
{
int greatest_modulus = get_greatest_modulus(b1);
int greatest_modulus = get_hash_partition_greatest_modulus(b1);
/*
* If two hash partitioned tables have different greatest moduli,
* their partition schemes don't match.
*/
if (greatest_modulus != get_greatest_modulus(b2))
if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
return false;
/*
@@ -1029,7 +927,7 @@ check_new_partition_bound(char *relname, Relation parent,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
greatest_modulus = get_greatest_modulus(boundinfo);
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
remainder = spec->remainder;
/*
@@ -1620,7 +1518,6 @@ get_partition_qual_relid(Oid relid)
return result;
}
/* Module-local functions */
/*
* get_partition_operator
@@ -2637,7 +2534,7 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
case PARTITION_STRATEGY_HASH:
{
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int greatest_modulus = get_greatest_modulus(boundinfo);
int greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
uint64 rowHash = compute_hash_value(key->partnatts,
key->partsupfunc,
values, isnull);
@@ -2971,7 +2868,7 @@ partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
* of attributes resp.
*
*/
static int32
int32
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums)
@@ -3005,7 +2902,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the bound datum at the returned index is equal
* to the input value.
*/
static int
int
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal)
@@ -3048,7 +2945,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the range bound at the returned index is equal
* to the input range bound
*/
static int
int
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
@@ -3093,7 +2990,7 @@ partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
* *is_equal is set to true if the range bound at the returned index is equal
* to the input tuple.
*/
static int
int
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal)
@@ -3136,7 +3033,7 @@ partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* less than or equal to the given (modulus, remainder) pair or -1 if
* all of them are greater
*/
static int
int
partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder)
{
@@ -3294,7 +3191,7 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
* The number of the entries in the indexes array is same as the
* greatest modulus.
*/
num_indexes = get_greatest_modulus(bound);
num_indexes = get_hash_partition_greatest_modulus(bound);
break;
case PARTITION_STRATEGY_LIST:
@@ -3315,14 +3212,14 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
}
/*
* get_greatest_modulus
* get_hash_partition_greatest_modulus
*
* Returns the greatest modulus of the hash partition bound. The greatest
* modulus will be at the end of the datums array because hash partitions are
* arranged in the ascending order of their modulus and remainders.
*/
static int
get_greatest_modulus(PartitionBoundInfo bound)
int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
{
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
Assert(bound->datums && bound->ndatums > 0);
@@ -3336,7 +3233,7 @@ get_greatest_modulus(PartitionBoundInfo bound)
*
* Compute the hash value for given not null partition key values.
*/
static uint64
uint64
compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull)
{

View File

@@ -2150,6 +2150,38 @@ _copyMergeAction(const MergeAction *from)
return newnode;
}
/*
* _copyPartitionPruneStepOp
*/
static PartitionPruneStepOp *
_copyPartitionPruneStepOp(const PartitionPruneStepOp *from)
{
PartitionPruneStepOp *newnode = makeNode(PartitionPruneStepOp);
COPY_SCALAR_FIELD(step.step_id);
COPY_SCALAR_FIELD(opstrategy);
COPY_NODE_FIELD(exprs);
COPY_NODE_FIELD(cmpfns);
COPY_BITMAPSET_FIELD(nullkeys);
return newnode;
}
/*
* _copyPartitionPruneStepCombine
*/
static PartitionPruneStepCombine *
_copyPartitionPruneStepCombine(const PartitionPruneStepCombine *from)
{
PartitionPruneStepCombine *newnode = makeNode(PartitionPruneStepCombine);
COPY_SCALAR_FIELD(step.step_id);
COPY_SCALAR_FIELD(combineOp);
COPY_NODE_FIELD(source_stepids);
return newnode;
}
/* ****************************************************************
* relation.h copy functions
*
@@ -2277,21 +2309,6 @@ _copyAppendRelInfo(const AppendRelInfo *from)
return newnode;
}
/*
* _copyPartitionedChildRelInfo
*/
static PartitionedChildRelInfo *
_copyPartitionedChildRelInfo(const PartitionedChildRelInfo *from)
{
PartitionedChildRelInfo *newnode = makeNode(PartitionedChildRelInfo);
COPY_SCALAR_FIELD(parent_relid);
COPY_NODE_FIELD(child_rels);
COPY_SCALAR_FIELD(part_cols_updated);
return newnode;
}
/*
* _copyPlaceHolderInfo
*/
@@ -5076,6 +5093,12 @@ copyObjectImpl(const void *from)
case T_MergeAction:
retval = _copyMergeAction(from);
break;
case T_PartitionPruneStepOp:
retval = _copyPartitionPruneStepOp(from);
break;
case T_PartitionPruneStepCombine:
retval = _copyPartitionPruneStepCombine(from);
break;
/*
* RELATION NODES
@@ -5095,9 +5118,6 @@ copyObjectImpl(const void *from)
case T_AppendRelInfo:
retval = _copyAppendRelInfo(from);
break;
case T_PartitionedChildRelInfo:
retval = _copyPartitionedChildRelInfo(from);
break;
case T_PlaceHolderInfo:
retval = _copyPlaceHolderInfo(from);
break;

View File

@@ -915,16 +915,6 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b)
return true;
}
static bool
_equalPartitionedChildRelInfo(const PartitionedChildRelInfo *a, const PartitionedChildRelInfo *b)
{
COMPARE_SCALAR_FIELD(parent_relid);
COMPARE_NODE_FIELD(child_rels);
COMPARE_SCALAR_FIELD(part_cols_updated);
return true;
}
static bool
_equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b)
{
@@ -3230,9 +3220,6 @@ equal(const void *a, const void *b)
case T_AppendRelInfo:
retval = _equalAppendRelInfo(a, b);
break;
case T_PartitionedChildRelInfo:
retval = _equalPartitionedChildRelInfo(a, b);
break;
case T_PlaceHolderInfo:
retval = _equalPlaceHolderInfo(a, b);
break;

View File

@@ -2156,6 +2156,17 @@ expression_tree_walker(Node *node,
return true;
}
break;
case T_PartitionPruneStepOp:
{
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
if (walker((Node *) opstep->exprs, context))
return true;
}
break;
case T_PartitionPruneStepCombine:
/* no expression subnodes */
break;
case T_JoinExpr:
{
JoinExpr *join = (JoinExpr *) node;
@@ -2958,6 +2969,20 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
case T_PartitionPruneStepOp:
{
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
PartitionPruneStepOp *newnode;
FLATCOPY(newnode, opstep, PartitionPruneStepOp);
MUTATE(newnode->exprs, opstep->exprs, List *);
return (Node *) newnode;
}
break;
case T_PartitionPruneStepCombine:
/* no expression sub-nodes */
return (Node *) copyObject(node);
case T_JoinExpr:
{
JoinExpr *join = (JoinExpr *) node;

View File

@@ -1710,6 +1710,28 @@ _outFromExpr(StringInfo str, const FromExpr *node)
WRITE_NODE_FIELD(quals);
}
static void
_outPartitionPruneStepOp(StringInfo str, const PartitionPruneStepOp *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNESTEPOP");
WRITE_INT_FIELD(step.step_id);
WRITE_INT_FIELD(opstrategy);
WRITE_NODE_FIELD(exprs);
WRITE_NODE_FIELD(cmpfns);
WRITE_BITMAPSET_FIELD(nullkeys);
}
static void
_outPartitionPruneStepCombine(StringInfo str, const PartitionPruneStepCombine *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNESTEPCOMBINE");
WRITE_INT_FIELD(step.step_id);
WRITE_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
WRITE_NODE_FIELD(source_stepids);
}
static void
_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
{
@@ -2261,7 +2283,6 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(full_join_clauses);
WRITE_NODE_FIELD(join_info_list);
WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(pcinfo_list);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(fkey_list);
@@ -2286,6 +2307,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_INT_FIELD(wt_param_id);
WRITE_BITMAPSET_FIELD(curOuterRels);
WRITE_NODE_FIELD(curOuterParams);
WRITE_BOOL_FIELD(partColsUpdated);
}
static void
@@ -2335,6 +2357,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
WRITE_BITMAPSET_FIELD(top_parent_relids);
WRITE_NODE_FIELD(partitioned_child_rels);
}
static void
@@ -2559,16 +2582,6 @@ _outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
WRITE_OID_FIELD(parent_reloid);
}
static void
_outPartitionedChildRelInfo(StringInfo str, const PartitionedChildRelInfo *node)
{
WRITE_NODE_TYPE("PARTITIONEDCHILDRELINFO");
WRITE_UINT_FIELD(parent_relid);
WRITE_NODE_FIELD(child_rels);
WRITE_BOOL_FIELD(part_cols_updated);
}
static void
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
{
@@ -3973,6 +3986,12 @@ outNode(StringInfo str, const void *obj)
case T_MergeAction:
_outMergeAction(str, obj);
break;
case T_PartitionPruneStepOp:
_outPartitionPruneStepOp(str, obj);
break;
case T_PartitionPruneStepCombine:
_outPartitionPruneStepCombine(str, obj);
break;
case T_Path:
_outPath(str, obj);
break;
@@ -4114,9 +4133,6 @@ outNode(StringInfo str, const void *obj)
case T_AppendRelInfo:
_outAppendRelInfo(str, obj);
break;
case T_PartitionedChildRelInfo:
_outPartitionedChildRelInfo(str, obj);
break;
case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj);
break;

View File

@@ -1331,6 +1331,32 @@ _readOnConflictExpr(void)
READ_DONE();
}
static PartitionPruneStepOp *
_readPartitionPruneStepOp(void)
{
READ_LOCALS(PartitionPruneStepOp);
READ_INT_FIELD(step.step_id);
READ_INT_FIELD(opstrategy);
READ_NODE_FIELD(exprs);
READ_NODE_FIELD(cmpfns);
READ_BITMAPSET_FIELD(nullkeys);
READ_DONE();
}
static PartitionPruneStepCombine *
_readPartitionPruneStepCombine(void)
{
READ_LOCALS(PartitionPruneStepCombine);
READ_INT_FIELD(step.step_id);
READ_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
READ_NODE_FIELD(source_stepids);
READ_DONE();
}
/*
* _readMergeAction
*/
@@ -2615,6 +2641,10 @@ parseNodeString(void)
return_value = _readOnConflictExpr();
else if (MATCH("MERGEACTION", 11))
return_value = _readMergeAction();
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
return_value = _readPartitionPruneStepOp();
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))
return_value = _readPartitionPruneStepCombine();
else if (MATCH("RTE", 3))
return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16))

View File

@@ -43,6 +43,7 @@
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partprune.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
@@ -874,12 +875,39 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
double *parent_attrsizes;
int nattrs;
ListCell *l;
Relids live_children = NULL;
bool did_pruning = false;
/* Guard against stack overflow due to overly deep inheritance tree. */
check_stack_depth();
Assert(IS_SIMPLE_REL(rel));
/*
* Initialize partitioned_child_rels to contain this RT index.
*
* Note that during the set_append_rel_pathlist() phase, we will bubble up
* the indexes of partitioned relations that appear down in the tree, so
* that when we've created Paths for all the children, the root
* partitioned table's list will contain all such indexes.
*/
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
rel->partitioned_child_rels = list_make1_int(rti);
/*
* If the partitioned relation has any baserestrictinfo quals then we
* attempt to use these quals to prune away partitions that cannot
* possibly contain any tuples matching these quals. In this case we'll
* store the relids of all partitions which could possibly contain a
* matching tuple, and skip anything else in the loop below.
*/
if (rte->relkind == RELKIND_PARTITIONED_TABLE &&
rel->baserestrictinfo != NIL)
{
live_children = prune_append_rel_partitions(rel);
did_pruning = true;
}
/*
* Initialize to compute size estimates for whole append relation.
*
@@ -1128,6 +1156,13 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
continue;
}
if (did_pruning && !bms_is_member(appinfo->child_relid, live_children))
{
/* This partition was pruned; skip it. */
set_dummy_rel_pathlist(childrel);
continue;
}
if (relation_excluded_by_constraints(root, childrel, childRTE))
{
/*
@@ -1309,6 +1344,12 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
if (IS_DUMMY_REL(childrel))
continue;
/* Bubble up childrel's partitioned children. */
if (rel->part_scheme)
rel->partitioned_child_rels =
list_concat(rel->partitioned_child_rels,
list_copy(childrel->partitioned_child_rels));
/*
* Child is live, so add it to the live_childrels list for use below.
*/
@@ -1346,49 +1387,55 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List *all_child_outers = NIL;
ListCell *l;
List *partitioned_rels = NIL;
RangeTblEntry *rte;
bool build_partitioned_rels = false;
double partial_rows = -1;
if (IS_SIMPLE_REL(rel))
/*
* AppendPath generated for partitioned tables must record the RT indexes
* of partitioned tables that are direct or indirect children of this
* Append rel.
*
* AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel'
* itself does not represent a partitioned relation, but the child sub-
* queries may contain references to partitioned relations. The loop
* below will look for such children and collect them in a list to be
* passed to the path creation function. (This assumes that we don't need
* to look through multiple levels of subquery RTEs; if we ever do, we
* could consider stuffing the list we generate here into sub-query RTE's
* RelOptInfo, just like we do for partitioned rels, which would be used
* when populating our parent rel with paths. For the present, that
* appears to be unnecessary.)
*/
if (rel->part_scheme != NULL)
{
/*
* A root partition will already have a PartitionedChildRelInfo, and a
* non-root partitioned table doesn't need one, because its Append
* paths will get flattened into the parent anyway. For a subquery
* RTE, no PartitionedChildRelInfo exists; we collect all
* partitioned_rels associated with any child. (This assumes that we
* don't need to look through multiple levels of subquery RTEs; if we
* ever do, we could create a PartitionedChildRelInfo with the
* accumulated list of partitioned_rels which would then be found when
* populated our parent rel with paths. For the present, that appears
* to be unnecessary.)
*/
rte = planner_rt_fetch(rel->relid, root);
switch (rte->rtekind)
if (IS_SIMPLE_REL(rel))
partitioned_rels = rel->partitioned_child_rels;
else if (IS_JOIN_REL(rel))
{
case RTE_RELATION:
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
partitioned_rels =
get_partitioned_child_rels(root, rel->relid, NULL);
break;
case RTE_SUBQUERY:
build_partitioned_rels = true;
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind);
int relid = -1;
/*
* For a partitioned joinrel, concatenate the component rels'
* partitioned_child_rels lists.
*/
while ((relid = bms_next_member(rel->relids, relid)) >= 0)
{
RelOptInfo *component;
Assert(relid >= 1 && relid < root->simple_rel_array_size);
component = root->simple_rel_array[relid];
Assert(component->part_scheme != NULL);
Assert(list_length(component->partitioned_child_rels) >= 1);
partitioned_rels =
list_concat(partitioned_rels,
list_copy(component->partitioned_child_rels));
}
}
Assert(list_length(partitioned_rels) >= 1);
}
else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme)
{
/*
* Associate PartitionedChildRelInfo of the root partitioned tables
* being joined with the root partitioned join (indicated by
* RELOPT_JOINREL).
*/
partitioned_rels = get_partitioned_child_rels_for_join(root,
rel->relids);
}
else if (rel->rtekind == RTE_SUBQUERY)
build_partitioned_rels = true;
/*
* For every non-dummy child, remember the cheapest path. Also, identify
@@ -1407,9 +1454,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
*/
if (build_partitioned_rels)
{
List *cprels;
List *cprels = childrel->partitioned_child_rels;
cprels = get_partitioned_child_rels(root, childrel->relid, NULL);
partitioned_rels = list_concat(partitioned_rels,
list_copy(cprels));
}

View File

@@ -40,9 +40,7 @@
#include "utils/selfuncs.h"
#define IsBooleanOpfamily(opfamily) \
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
/* XXX see PartCollMatchesExprColl */
#define IndexCollMatchesExprColl(idxcollation, exprcollation) \
((idxcollation) == InvalidOid || (idxcollation) == (exprcollation))

View File

@@ -616,7 +616,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->multiexpr_params = NIL;
root->eq_classes = NIL;
root->append_rel_list = NIL;
root->pcinfo_list = NIL;
root->rowMarks = NIL;
memset(root->upper_rels, 0, sizeof(root->upper_rels));
memset(root->upper_targets, 0, sizeof(root->upper_targets));
@@ -631,6 +630,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
else
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
/*
* If there is a WITH list, process each WITH query and build an initplan
@@ -1191,12 +1191,12 @@ inheritance_planner(PlannerInfo *root)
ListCell *lc;
Index rti;
RangeTblEntry *parent_rte;
Relids partitioned_relids = NULL;
List *partitioned_rels = NIL;
PlannerInfo *parent_root;
Query *parent_parse;
Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
PlannerInfo **parent_roots = NULL;
bool partColsUpdated = false;
Assert(parse->commandType != CMD_INSERT);
@@ -1268,10 +1268,12 @@ inheritance_planner(PlannerInfo *root)
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
{
nominalRelation = top_parentRTindex;
partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex,
&partColsUpdated);
/* The root partitioned table is included as a child rel */
Assert(list_length(partitioned_rels) >= 1);
/*
* Root parent's RT index is always present in the partitioned_rels of
* the ModifyTable node, if one is needed at all.
*/
partitioned_relids = bms_make_singleton(top_parentRTindex);
}
/*
@@ -1502,6 +1504,15 @@ inheritance_planner(PlannerInfo *root)
if (IS_DUMMY_PATH(subpath))
continue;
/*
* Add the current parent's RT index to the partitione_rels set if
* we're going to create the ModifyTable path for a partitioned root
* table.
*/
if (partitioned_relids)
partitioned_relids = bms_add_member(partitioned_relids,
appinfo->parent_relid);
/*
* If this is the first non-excluded child, its post-planning rtable
* becomes the initial contents of final_rtable; otherwise, append
@@ -1603,6 +1614,21 @@ inheritance_planner(PlannerInfo *root)
else
rowMarks = root->rowMarks;
if (partitioned_relids)
{
int i;
i = -1;
while ((i = bms_next_member(partitioned_relids, i)) >= 0)
partitioned_rels = lappend_int(partitioned_rels, i);
/*
* If we're going to create ModifyTable at all, the list should
* contain at least one member, that is, the root parent's index.
*/
Assert(list_length(partitioned_rels) >= 1);
}
/* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
add_path(final_rel, (Path *)
create_modifytable_path(root, final_rel,
@@ -1610,7 +1636,7 @@ inheritance_planner(PlannerInfo *root)
parse->canSetTag,
nominalRelation,
partitioned_rels,
partColsUpdated,
root->partColsUpdated,
resultRelations,
0,
subpaths,
@@ -6144,65 +6170,6 @@ done:
return parallel_workers;
}
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
{
List *result = NIL;
ListCell *l;
if (part_cols_updated)
*part_cols_updated = false;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
if (pc->parent_relid == rti)
{
result = pc->child_rels;
if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
break;
}
}
return result;
}
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{
List *result = NIL;
ListCell *l;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
}
return result;
}
/*
* add_paths_to_grouping_rel
*

View File

@@ -104,8 +104,7 @@ static void expand_partitioned_rtentry(PlannerInfo *root,
RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *top_parentrc, LOCKMODE lockmode,
List **appinfos, List **partitioned_child_rels,
bool *part_cols_updated);
List **appinfos);
static void expand_single_inheritance_child(PlannerInfo *root,
RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
@@ -1587,9 +1586,6 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
/* Scan the inheritance set and expand it */
if (RelationGetPartitionDesc(oldrelation) != NULL)
{
List *partitioned_child_rels = NIL;
bool part_cols_updated = false;
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
/*
@@ -1598,28 +1594,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
* extract the partition key columns of all the partitioned tables.
*/
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
lockmode, &root->append_rel_list,
&partitioned_child_rels,
&part_cols_updated);
/*
* We keep a list of objects in root, each of which maps a root
* partitioned parent RT index to the list of RT indexes of descendant
* partitioned child tables. When creating an Append or a ModifyTable
* path for the parent, we copy the child RT index list verbatim to
* the path so that it could be carried over to the executor so that
* the latter could identify the partitioned child tables.
*/
if (rte->inh && partitioned_child_rels != NIL)
{
PartitionedChildRelInfo *pcinfo;
pcinfo = makeNode(PartitionedChildRelInfo);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
pcinfo->part_cols_updated = part_cols_updated;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
}
lockmode, &root->append_rel_list);
}
else
{
@@ -1694,8 +1669,7 @@ static void
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *top_parentrc, LOCKMODE lockmode,
List **appinfos, List **partitioned_child_rels,
bool *part_cols_updated)
List **appinfos)
{
int i;
RangeTblEntry *childrte;
@@ -1717,8 +1691,8 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
* parentrte already has the root partrel's updatedCols translated to match
* the attribute ordering of parentrel.
*/
if (!*part_cols_updated)
*part_cols_updated =
if (!root->partColsUpdated)
root->partColsUpdated =
has_partition_attrs(parentrel, parentrte->updatedCols, NULL);
/* First expand the partitioned table itself. */
@@ -1726,14 +1700,6 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
top_parentrc, parentrel,
appinfos, &childrte, &childRTindex);
/*
* The partitioned table does not have data for itself but still need to
* be locked. Update given list of partitioned children with RTI of this
* partitioned relation.
*/
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
childRTindex);
for (i = 0; i < partdesc->nparts; i++)
{
Oid childOID = partdesc->oids[i];
@@ -1760,8 +1726,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
expand_partitioned_rtentry(root, childrte, childRTindex,
childrel, top_parentrc, lockmode,
appinfos, partitioned_child_rels,
part_cols_updated);
appinfos);
/* Close child relation, but keep locks */
heap_close(childrel, NoLock);

View File

@@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root,
Index varno = rel->relid;
Relation relation;
TupleConstr *constr;
List *pcqual;
/*
* We assume the relation has already been safely locked.
@@ -1257,24 +1256,34 @@ get_relation_constraints(PlannerInfo *root,
}
}
/* Append partition predicates, if any */
pcqual = RelationGetPartitionQual(relation);
if (pcqual)
/*
* Append partition predicates, if any.
*
* For selects, partition pruning uses the parent table's partition bound
* descriptor, instead of constraint exclusion which is driven by the
* individual partition's partition constraint.
*/
if (root->parse->commandType != CMD_SELECT)
{
/*
* Run the partition quals through const-simplification similar to
* check constraints. We skip canonicalize_qual, though, because
* partition quals should be in canonical form already; also, since
* the qual is in implicit-AND format, we'd have to explicitly convert
* it to explicit-AND format and back again.
*/
pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
List *pcqual = RelationGetPartitionQual(relation);
/* Fix Vars to have the desired varno */
if (varno != 1)
ChangeVarNodes((Node *) pcqual, 1, varno, 0);
if (pcqual)
{
/*
* Run the partition quals through const-simplification similar to
* check constraints. We skip canonicalize_qual, though, because
* partition quals should be in canonical form already; also,
* since the qual is in implicit-AND format, we'd have to
* explicitly convert it to explicit-AND format and back again.
*/
pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
result = list_concat(result, pcqual);
/* Fix Vars to have the desired varno */
if (varno != 1)
ChangeVarNodes((Node *) pcqual, 1, varno, 0);
result = list_concat(result, pcqual);
}
}
heap_close(relation, NoLock);
@@ -1869,6 +1878,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey);
rel->nparts = partdesc->nparts;
set_baserel_partition_key_exprs(relation, rel);
rel->partition_qual = RelationGetPartitionQual(relation);
}
/*
@@ -1881,7 +1891,8 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
{
PartitionKey partkey = RelationGetPartitionKey(relation);
ListCell *lc;
int partnatts;
int partnatts,
i;
PartitionScheme part_scheme;
/* A partitioned table should have a partition key. */
@@ -1899,7 +1910,7 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
partnatts != part_scheme->partnatts)
continue;
/* Match the partition key types. */
/* Match partition key type properties. */
if (memcmp(partkey->partopfamily, part_scheme->partopfamily,
sizeof(Oid) * partnatts) != 0 ||
memcmp(partkey->partopcintype, part_scheme->partopcintype,
@@ -1917,6 +1928,19 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval,
sizeof(bool) * partnatts) == 0);
/*
* If partopfamily and partopcintype matched, must have the same
* partition comparison functions. Note that we cannot reliably
* Assert the equality of function structs themselves for they might
* be different across PartitionKey's, so just Assert for the function
* OIDs.
*/
#ifdef USE_ASSERT_CHECKING
for (i = 0; i < partkey->partnatts; i++)
Assert(partkey->partsupfunc[i].fn_oid ==
part_scheme->partsupfunc[i].fn_oid);
#endif
/* Found matching partition scheme. */
return part_scheme;
}
@@ -1951,6 +1975,12 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
memcpy(part_scheme->parttypbyval, partkey->parttypbyval,
sizeof(bool) * partnatts);
part_scheme->partsupfunc = (FmgrInfo *)
palloc(sizeof(FmgrInfo) * partnatts);
for (i = 0; i < partnatts; i++)
fmgr_info_copy(&part_scheme->partsupfunc[i], &partkey->partsupfunc[i],
CurrentMemoryContext);
/* Add the partitioning scheme to PlannerInfo. */
root->part_schemes = lappend(root->part_schemes, part_scheme);

View File

@@ -154,9 +154,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->part_scheme = NULL;
rel->nparts = 0;
rel->boundinfo = NULL;
rel->partition_qual = NIL;
rel->part_rels = NULL;
rel->partexprs = NULL;
rel->nullable_partexprs = NULL;
rel->partitioned_child_rels = NIL;
/*
* Pass top parent's relids down the inheritance hierarchy. If the parent
@@ -567,9 +569,11 @@ build_join_rel(PlannerInfo *root,
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
joinrel->boundinfo = NULL;
joinrel->partition_qual = NIL;
joinrel->part_rels = NULL;
joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL;
joinrel->partitioned_child_rels = NIL;
/* Compute information relevant to the foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
@@ -734,9 +738,13 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->has_eclass_joins = false;
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
joinrel->boundinfo = NULL;
joinrel->partition_qual = NIL;
joinrel->part_rels = NULL;
joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL;
joinrel->partitioned_child_rels = NIL;
joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
inner_rel->top_parent_relids);

View File

@@ -0,0 +1,17 @@
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for backend/partitioning
#
# IDENTIFICATION
# src/backend/partitioning/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/partitioning
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = partprune.o
include $(top_srcdir)/src/backend/common.mk

File diff suppressed because it is too large Load Diff