1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Teach tuplesort.c about "top N" sorting, in which only the first N tuples

need be returned.  We keep a heap of the current best N tuples and sift-up
new tuples into it as we scan the input.  For M input tuples this means
only about M*log(N) comparisons instead of M*log(M), not to mention a lot
less workspace when N is small --- avoiding spill-to-disk for large M
is actually the most attractive thing about it.  Patch includes planner
and executor support for invoking this facility in ORDER BY ... LIMIT
queries.  Greg Stark, with some editorialization by moi.
This commit is contained in:
Tom Lane
2007-05-04 01:13:45 +00:00
parent 0fef38da21
commit d26559dbf3
13 changed files with 465 additions and 59 deletions

View File

@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.181 2007/04/21 21:01:44 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -922,6 +922,10 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
* cpu = comparison_cost * t * log2(t)
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into work_mem, we use a heap method that keeps only
* k tuples in the heap; this will require about t*log2(k) tuple comparisons.
*
* The disk traffic is assumed to be 3/4ths sequential and 1/4th random
* accesses (XXX can't we refine that guess?)
*
@@ -932,6 +936,7 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
* 'input_cost' is the total cost for reading the input data
* 'tuples' is the number of tuples in the relation
* 'width' is the average tuple width in bytes
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
*
* NOTE: some callers currently pass NIL for pathkeys because they
* can't conveniently supply the sort keys. Since this routine doesn't
@@ -942,11 +947,14 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
*/
void
cost_sort(Path *path, PlannerInfo *root,
List *pathkeys, Cost input_cost, double tuples, int width)
List *pathkeys, Cost input_cost, double tuples, int width,
double limit_tuples)
{
Cost startup_cost = input_cost;
Cost run_cost = 0;
double nbytes = relation_byte_size(tuples, width);
double input_bytes = relation_byte_size(tuples, width);
double output_bytes;
double output_tuples;
long work_mem_bytes = work_mem * 1024L;
if (!enable_sort)
@@ -959,23 +967,39 @@ cost_sort(Path *path, PlannerInfo *root,
if (tuples < 2.0)
tuples = 2.0;
/*
* CPU costs
*
* Assume about two operator evals per tuple comparison and N log2 N
* comparisons
*/
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
/* disk costs */
if (nbytes > work_mem_bytes)
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
double npages = ceil(nbytes / BLCKSZ);
double nruns = (nbytes / work_mem_bytes) * 0.5;
output_tuples = limit_tuples;
output_bytes = relation_byte_size(output_tuples, width);
}
else
{
output_tuples = tuples;
output_bytes = input_bytes;
}
if (output_bytes > work_mem_bytes)
{
/*
* We'll have to use a disk-based sort of all the tuples
*/
double npages = ceil(input_bytes / BLCKSZ);
double nruns = (input_bytes / work_mem_bytes) * 0.5;
double mergeorder = tuplesort_merge_order(work_mem_bytes);
double log_runs;
double npageaccesses;
/*
* CPU costs
*
* Assume about two operator evals per tuple comparison and N log2 N
* comparisons
*/
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
/* Disk costs */
/* Compute logM(r) as log(r) / log(M) */
if (nruns > mergeorder)
log_runs = ceil(log(nruns) / log(mergeorder));
@@ -986,10 +1010,27 @@ cost_sort(Path *path, PlannerInfo *root,
startup_cost += npageaccesses *
(seq_page_cost * 0.75 + random_page_cost * 0.25);
}
else if (tuples > 2 * output_tuples || input_bytes > work_mem_bytes)
{
/*
* We'll use a bounded heap-sort keeping just K tuples in memory,
* for a total number of tuple comparisons of N log2 K; but the
* constant factor is a bit higher than for quicksort. Tweak it
* so that the cost curve is continuous at the crossover point.
*/
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(2.0 * output_tuples);
}
else
{
/* We'll use plain quicksort on all the input tuples */
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
}
/*
* Also charge a small amount (arbitrarily set equal to operator cost) per
* extracted tuple.
* extracted tuple. Note it's correct to use tuples not output_tuples
* here --- the upper LIMIT will pro-rate the run cost so we'd be double
* counting the LIMIT otherwise.
*/
run_cost += cpu_operator_cost * tuples;
@@ -1431,7 +1472,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
outersortkeys,
outer_path->total_cost,
outer_path_rows,
outer_path->parent->width);
outer_path->parent->width,
-1.0);
startup_cost += sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* outerscansel;
@@ -1450,7 +1492,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
innersortkeys,
inner_path->total_cost,
inner_path_rows,
inner_path->parent->width);
inner_path->parent->width,
-1.0);
startup_cost += sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* innerscansel * rescanratio;