1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Implement Incremental Sort

Incremental Sort is an optimized variant of multikey sort for cases when
the input is already sorted by a prefix of the requested sort keys. For
example when the relation is already sorted by (key1, key2) and we need
to sort it by (key1, key2, key3) we can simply split the input rows into
groups having equal values in (key1, key2), and only sort/compare the
remaining column key3.

This has a number of benefits:

- Reduced memory consumption, because only a single group (determined by
  values in the sorted prefix) needs to be kept in memory. This may also
  eliminate the need to spill to disk.

- Lower startup cost, because Incremental Sort produce results after each
  prefix group, which is beneficial for plans where startup cost matters
  (like for example queries with LIMIT clause).

We consider both Sort and Incremental Sort, and decide based on costing.

The implemented algorithm operates in two different modes:

- Fetching a minimum number of tuples without check of equality on the
  prefix keys, and sorting on all columns when safe.

- Fetching all tuples for a single prefix group and then sorting by
  comparing only the remaining (non-prefix) keys.

We always start in the first mode, and employ a heuristic to switch into
the second mode if we believe it's beneficial - the goal is to minimize
the number of unnecessary comparions while keeping memory consumption
below work_mem.

This is a very old patch series. The idea was originally proposed by
Alexander Korotkov back in 2013, and then revived in 2017. In 2018 the
patch was taken over by James Coleman, who wrote and rewrote most of the
current code.

There were many reviewers/contributors since 2013 - I've done my best to
pick the most active ones, and listed them in this commit message.

Author: James Coleman, Alexander Korotkov
Reviewed-by: Tomas Vondra, Andreas Karlsson, Marti Raudsepp, Peter Geoghegan, Robert Haas, Thomas Munro, Antonin Houska, Andres Freund, Alexander Kuzmenkov
Discussion: https://postgr.es/m/CAPpHfdscOX5an71nHd8WSUH6GNOCf=V7wgDaTXdDd9=goN-gfA@mail.gmail.com
Discussion: https://postgr.es/m/CAPpHfds1waRZ=NOmueYq0sx1ZSCnt+5QJvizT8ndT2=etZEeAQ@mail.gmail.com
This commit is contained in:
Tomas Vondra
2020-04-06 21:33:28 +02:00
parent 3c8553547b
commit d2d8a229bc
41 changed files with 4244 additions and 160 deletions

View File

@ -98,6 +98,8 @@ static Plan *create_projection_plan(PlannerInfo *root,
int flags);
static Plan *inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe);
static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags);
static IncrementalSort *create_incrementalsort_plan(PlannerInfo *root,
IncrementalSortPath *best_path, int flags);
static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path);
static Unique *create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path,
int flags);
@ -244,6 +246,10 @@ static MergeJoin *make_mergejoin(List *tlist,
static Sort *make_sort(Plan *lefttree, int numCols,
AttrNumber *sortColIdx, Oid *sortOperators,
Oid *collations, bool *nullsFirst);
static IncrementalSort *make_incrementalsort(Plan *lefttree,
int numCols, int nPresortedCols,
AttrNumber *sortColIdx, Oid *sortOperators,
Oid *collations, bool *nullsFirst);
static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Relids relids,
const AttrNumber *reqColIdx,
@ -258,6 +264,8 @@ static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
Relids relids);
static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Relids relids);
static IncrementalSort *make_incrementalsort_from_pathkeys(Plan *lefttree,
List *pathkeys, Relids relids, int nPresortedCols);
static Sort *make_sort_from_groupcols(List *groupcls,
AttrNumber *grpColIdx,
Plan *lefttree);
@ -460,6 +468,11 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
(SortPath *) best_path,
flags);
break;
case T_IncrementalSort:
plan = (Plan *) create_incrementalsort_plan(root,
(IncrementalSortPath *) best_path,
flags);
break;
case T_Group:
plan = (Plan *) create_group_plan(root,
(GroupPath *) best_path);
@ -1994,6 +2007,32 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
return plan;
}
/*
* create_incrementalsort_plan
*
* Do the same as create_sort_plan, but create IncrementalSort plan.
*/
static IncrementalSort *
create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path,
int flags)
{
IncrementalSort *plan;
Plan *subplan;
/* See comments in create_sort_plan() above */
subplan = create_plan_recurse(root, best_path->spath.subpath,
flags | CP_SMALL_TLIST);
plan = make_incrementalsort_from_pathkeys(subplan,
best_path->spath.path.pathkeys,
IS_OTHER_REL(best_path->spath.subpath->parent) ?
best_path->spath.path.parent->relids : NULL,
best_path->nPresortedCols);
copy_generic_path_info(&plan->sort.plan, (Path *) best_path);
return plan;
}
/*
* create_group_plan
*
@ -5090,6 +5129,12 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
Plan *lefttree = plan->plan.lefttree;
Path sort_path; /* dummy for result of cost_sort */
/*
* This function shouldn't have to deal with IncrementalSort plans because
* they are only created from corresponding Path nodes.
*/
Assert(IsA(plan, Sort));
cost_sort(&sort_path, root, NIL,
lefttree->total_cost,
lefttree->plan_rows,
@ -5677,9 +5722,12 @@ make_sort(Plan *lefttree, int numCols,
AttrNumber *sortColIdx, Oid *sortOperators,
Oid *collations, bool *nullsFirst)
{
Sort *node = makeNode(Sort);
Plan *plan = &node->plan;
Sort *node;
Plan *plan;
node = makeNode(Sort);
plan = &node->plan;
plan->targetlist = lefttree->targetlist;
plan->qual = NIL;
plan->lefttree = lefttree;
@ -5693,6 +5741,37 @@ make_sort(Plan *lefttree, int numCols,
return node;
}
/*
* make_incrementalsort --- basic routine to build an IncrementalSort plan node
*
* Caller must have built the sortColIdx, sortOperators, collations, and
* nullsFirst arrays already.
*/
static IncrementalSort *
make_incrementalsort(Plan *lefttree, int numCols, int nPresortedCols,
AttrNumber *sortColIdx, Oid *sortOperators,
Oid *collations, bool *nullsFirst)
{
IncrementalSort *node;
Plan *plan;
node = makeNode(IncrementalSort);
plan = &node->sort.plan;
plan->targetlist = lefttree->targetlist;
plan->qual = NIL;
plan->lefttree = lefttree;
plan->righttree = NULL;
node->nPresortedCols = nPresortedCols;
node->sort.numCols = numCols;
node->sort.sortColIdx = sortColIdx;
node->sort.sortOperators = sortOperators;
node->sort.collations = collations;
node->sort.nullsFirst = nullsFirst;
return node;
}
/*
* prepare_sort_from_pathkeys
* Prepare to sort according to given pathkeys
@ -6039,6 +6118,42 @@ make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Relids relids)
collations, nullsFirst);
}
/*
* make_incrementalsort_from_pathkeys
* Create sort plan to sort according to given pathkeys
*
* 'lefttree' is the node which yields input tuples
* 'pathkeys' is the list of pathkeys by which the result is to be sorted
* 'relids' is the set of relations required by prepare_sort_from_pathkeys()
* 'nPresortedCols' is the number of presorted columns in input tuples
*/
static IncrementalSort *
make_incrementalsort_from_pathkeys(Plan *lefttree, List *pathkeys,
Relids relids, int nPresortedCols)
{
int numsortkeys;
AttrNumber *sortColIdx;
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
/* Compute sort column info, and adjust lefttree as needed */
lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys,
relids,
NULL,
false,
&numsortkeys,
&sortColIdx,
&sortOperators,
&collations,
&nullsFirst);
/* Now build the Sort node */
return make_incrementalsort(lefttree, numsortkeys, nPresortedCols,
sortColIdx, sortOperators,
collations, nullsFirst);
}
/*
* make_sort_from_sortclauses
* Create sort plan to sort according to given sortclauses
@ -6774,6 +6889,7 @@ is_projection_capable_path(Path *path)
case T_Hash:
case T_Material:
case T_Sort:
case T_IncrementalSort:
case T_Unique:
case T_SetOp:
case T_LockRows: