1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-25 13:17:41 +03:00

Consider explicit incremental sort for Append and MergeAppend

For an ordered Append or MergeAppend, we need to inject an explicit
sort into any subpath that is not already well enough ordered.
Currently, only explicit full sorts are considered; incremental sorts
are not yet taken into account.

In this patch, for subpaths of an ordered Append or MergeAppend, we
choose to use explicit incremental sort if it is enabled and there are
presorted keys.

The rationale is based on the assumption that incremental sort is
always faster than full sort when there are presorted keys, a premise
that has been applied in various parts of the code.  In addition, the
current cost model tends to favor incremental sort as being cheaper
than full sort in the presence of presorted keys, making it reasonable
not to consider full sort in such cases.

No backpatch as this could result in plan changes.

Author: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CAMbWs4_V7a2enTR+T3pOY_YZ-FU8ZsFYym2swOz4jNMqmSgyuw@mail.gmail.com
This commit is contained in:
Richard Guo
2025-07-08 10:21:44 +09:00
parent 7376e60854
commit 55a780e947
7 changed files with 217 additions and 52 deletions

View File

@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
cost_append(AppendPath *apath)
cost_append(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
foreach(l, apath->subpaths)
{
Path *subpath = (Path *) lfirst(l);
Path sort_path; /* dummy for result of cost_sort */
int presorted_keys;
Path sort_path; /* dummy for result of
* cost_sort/cost_incremental_sort */
if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
&presorted_keys))
{
/*
* We'll need to insert a Sort node, so include costs for
* that. We can use the parent's LIMIT if any, since we
* that. We choose to use incremental sort if it is
* enabled and there are presorted keys; otherwise we use
* full sort.
*
* We can use the parent's LIMIT if any, since we
* certainly won't pull more than that many tuples from
* any child.
*/
cost_sort(&sort_path,
NULL, /* doesn't currently need root */
pathkeys,
subpath->disabled_nodes,
subpath->total_cost,
subpath->rows,
subpath->pathtarget->width,
0.0,
work_mem,
apath->limit_tuples);
if (enable_incremental_sort && presorted_keys > 0)
{
cost_incremental_sort(&sort_path,
root,
pathkeys,
presorted_keys,
subpath->disabled_nodes,
subpath->startup_cost,
subpath->total_cost,
subpath->rows,
subpath->pathtarget->width,
0.0,
work_mem,
apath->limit_tuples);
}
else
{
cost_sort(&sort_path,
root,
pathkeys,
subpath->disabled_nodes,
subpath->total_cost,
subpath->rows,
subpath->pathtarget->width,
0.0,
work_mem,
apath->limit_tuples);
}
subpath = &sort_path;
}

View File

@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
int presorted_keys;
/*
* Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
&presorted_keys))
{
Sort *sort = make_sort(subplan, numsortkeys,
Plan *sort_plan;
/*
* We choose to use incremental sort if it is enabled and
* there are presorted keys; otherwise we use full sort.
*/
if (enable_incremental_sort && presorted_keys > 0)
{
sort_plan = (Plan *)
make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
label_sort_with_costsize(root, sort, best_path->limit_tuples);
subplan = (Plan *) sort;
label_incrementalsort_with_costsize(root,
(IncrementalSort *) sort_plan,
pathkeys,
best_path->limit_tuples);
}
else
{
sort_plan = (Plan *) make_sort(subplan, numsortkeys,
sortColIdx, sortOperators,
collations, nullsFirst);
label_sort_with_costsize(root, (Sort *) sort_plan,
best_path->limit_tuples);
}
subplan = sort_plan;
}
}
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
int presorted_keys;
/* Build the child plan */
/* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
&presorted_keys))
{
Sort *sort = make_sort(subplan, numsortkeys,
Plan *sort_plan;
/*
* We choose to use incremental sort if it is enabled and there
* are presorted keys; otherwise we use full sort.
*/
if (enable_incremental_sort && presorted_keys > 0)
{
sort_plan = (Plan *)
make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
label_sort_with_costsize(root, sort, best_path->limit_tuples);
subplan = (Plan *) sort;
label_incrementalsort_with_costsize(root,
(IncrementalSort *) sort_plan,
pathkeys,
best_path->limit_tuples);
}
else
{
sort_plan = (Plan *) make_sort(subplan, numsortkeys,
sortColIdx, sortOperators,
collations, nullsFirst);
label_sort_with_costsize(root, (Sort *) sort_plan,
best_path->limit_tuples);
}
subplan = sort_plan;
}
subplans = lappend(subplans, subplan);

View File

@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
pathnode->path.total_cost = child->total_cost;
}
else
cost_append(pathnode);
cost_append(pathnode, root);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
cost_append(pathnode);
cost_append(pathnode, root);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
int presorted_keys;
Path sort_path; /* dummy for result of
* cost_sort/cost_incremental_sort */
/* All child paths should be unparameterized */
Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
subpath->parallel_safe;
if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
&presorted_keys))
{
/* Subpath is adequately ordered, we won't need to sort it */
input_disabled_nodes += subpath->disabled_nodes;
input_startup_cost += subpath->startup_cost;
input_total_cost += subpath->total_cost;
}
else
{
/* We'll need to insert a Sort node, so include cost for that */
Path sort_path; /* dummy for result of cost_sort */
/*
* We'll need to insert a Sort node, so include costs for that. We
* choose to use incremental sort if it is enabled and there are
* presorted keys; otherwise we use full sort.
*
* We can use the parent's LIMIT if any, since we certainly won't
* pull more than that many tuples from any child.
*/
if (enable_incremental_sort && presorted_keys > 0)
{
cost_incremental_sort(&sort_path,
root,
pathkeys,
presorted_keys,
subpath->disabled_nodes,
subpath->startup_cost,
subpath->total_cost,
subpath->rows,
subpath->pathtarget->width,
0.0,
work_mem,
pathnode->limit_tuples);
}
else
{
cost_sort(&sort_path,
root,
pathkeys,
subpath->disabled_nodes,
subpath->total_cost,
subpath->rows,
subpath->pathtarget->width,
0.0,
work_mem,
pathnode->limit_tuples);
}
cost_sort(&sort_path,
root,
pathkeys,
subpath->disabled_nodes,
subpath->total_cost,
subpath->rows,
subpath->pathtarget->width,
0.0,
work_mem,
pathnode->limit_tuples);
input_disabled_nodes += sort_path.disabled_nodes;
input_startup_cost += sort_path.startup_cost;
input_total_cost += sort_path.total_cost;
subpath = &sort_path;
}
input_disabled_nodes += subpath->disabled_nodes;
input_startup_cost += subpath->startup_cost;
input_total_cost += subpath->total_cost;
}
/*