mirror of
https://github.com/postgres/postgres.git
synced 2025-11-01 21:31:19 +03:00
Implement partition-wise grouping/aggregation.
If the partition keys of input relation are part of the GROUP BY clause, all the rows belonging to a given group come from a single partition. This allows aggregation/grouping over a partitioned relation to be broken down * into aggregation/grouping on each partition. This should be no worse, and often better, than the normal approach. If the GROUP BY clause does not contain all the partition keys, we can still perform partial aggregation for each partition and then finalize aggregation after appending the partial results. This is less certain to be a win, but it's still useful. Jeevan Chalke, Ashutosh Bapat, Robert Haas. The larger patch series of which this patch is a part was also reviewed and tested by Antonin Houska, Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin Knizhnik, Pascal Legrand, and Rafia Sabih. Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
This commit is contained in:
@@ -1079,6 +1079,7 @@ busy for a long time to come.
|
||||
|
||||
Partitionwise joins
|
||||
-------------------
|
||||
|
||||
A join between two similarly partitioned tables can be broken down into joins
|
||||
between their matching partitions if there exists an equi-join condition
|
||||
between the partition keys of the joining tables. The equi-join between
|
||||
@@ -1102,3 +1103,16 @@ any two partitioned relations with same partitioning scheme point to the same
|
||||
PartitionSchemeData object. This reduces memory consumed by
|
||||
PartitionSchemeData objects and makes it easy to compare the partition schemes
|
||||
of joining relations.
|
||||
|
||||
Partition-wise aggregates/grouping
|
||||
----------------------------------
|
||||
|
||||
If the GROUP BY clause has contains all of the partition keys, all the rows
|
||||
that belong to a given group must come from a single partition; therefore,
|
||||
aggregation can be done completely separately for each partition. Otherwise,
|
||||
partial aggregates can be computed for each partition, and then finalized
|
||||
after appending the results from the individual partitions. This technique of
|
||||
breaking down aggregation or grouping over a partitioned relation into
|
||||
aggregation or grouping over its partitions is called partitionwise
|
||||
aggregation. Especially when the partition keys match the GROUP BY clause,
|
||||
this can be significantly faster than the regular method.
|
||||
|
||||
@@ -134,8 +134,6 @@ static void subquery_push_qual(Query *subquery,
|
||||
static void recurse_push_qual(Node *setOp, Query *topquery,
|
||||
RangeTblEntry *rte, Index rti, Node *qual);
|
||||
static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
|
||||
static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
|
||||
List *live_childrels);
|
||||
|
||||
|
||||
/*
|
||||
@@ -1326,7 +1324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
* parameterization or ordering. Similarly it collects partial paths from
|
||||
* non-dummy children to create partial append paths.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
|
||||
List *live_childrels)
|
||||
{
|
||||
@@ -1413,8 +1411,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
|
||||
* If child has an unparameterized cheapest-total path, add that to
|
||||
* the unparameterized Append path we are constructing for the parent.
|
||||
* If not, there's no workable unparameterized path.
|
||||
*
|
||||
* With partitionwise aggregates, the child rel's pathlist may be
|
||||
* empty, so don't assume that a path exists here.
|
||||
*/
|
||||
if (childrel->cheapest_total_path->param_info == NULL)
|
||||
if (childrel->pathlist != NIL &&
|
||||
childrel->cheapest_total_path->param_info == NULL)
|
||||
accumulate_append_subpath(childrel->cheapest_total_path,
|
||||
&subpaths, NULL);
|
||||
else
|
||||
@@ -1682,6 +1684,13 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
|
||||
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
|
||||
Path *subpath;
|
||||
|
||||
if (childrel->pathlist == NIL)
|
||||
{
|
||||
/* failed to make a suitable path for this child */
|
||||
subpaths_valid = false;
|
||||
break;
|
||||
}
|
||||
|
||||
subpath = get_cheapest_parameterized_child_path(root,
|
||||
childrel,
|
||||
required_outer);
|
||||
|
||||
@@ -135,6 +135,7 @@ bool enable_mergejoin = true;
|
||||
bool enable_hashjoin = true;
|
||||
bool enable_gathermerge = true;
|
||||
bool enable_partitionwise_join = false;
|
||||
bool enable_partitionwise_aggregate = false;
|
||||
bool enable_parallel_append = true;
|
||||
bool enable_parallel_hash = true;
|
||||
|
||||
|
||||
@@ -1670,7 +1670,15 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
|
||||
subplan = create_plan_recurse(root, best_path->subpath,
|
||||
flags | CP_SMALL_TLIST);
|
||||
|
||||
plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, NULL);
|
||||
/*
|
||||
* make_sort_from_pathkeys() indirectly calls find_ec_member_for_tle(),
|
||||
* which will ignore any child EC members that don't belong to the given
|
||||
* relids. Thus, if this sort path is based on a child relation, we must
|
||||
* pass its relids.
|
||||
*/
|
||||
plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys,
|
||||
IS_OTHER_REL(best_path->subpath->parent) ?
|
||||
best_path->path.parent->relids : NULL);
|
||||
|
||||
copy_generic_path_info(&plan->plan, (Path *) best_path);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user