1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-17 06:41:09 +03:00

Teach CLUSTER to use seqscan-and-sort when it's faster than indexscan.

... or at least, when the planner's cost estimates say it will be faster.

Leonardo Francalanci, reviewed by Itagaki Takahiro and Tom Lane
This commit is contained in:
Tom Lane
2010-10-07 20:00:28 -04:00
parent 694c56af2b
commit 3ba11d3df2
14 changed files with 716 additions and 141 deletions

View File

@ -1071,33 +1071,37 @@ cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm)
* Determines and returns the cost of sorting a relation, including
* the cost of reading the input data.
*
* If the total volume of data to sort is less than work_mem, we will do
* If the total volume of data to sort is less than sort_mem, we will do
* an in-memory sort, which requires no I/O and about t*log2(t) tuple
* comparisons for t tuples.
*
* If the total volume exceeds work_mem, we switch to a tape-style merge
* If the total volume exceeds sort_mem, we switch to a tape-style merge
* algorithm. There will still be about t*log2(t) tuple comparisons in
* total, but we will also need to write and read each tuple once per
* merge pass. We expect about ceil(logM(r)) merge passes where r is the
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about twice work_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
* Since the average initial run should be about twice sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / (2*sort_mem)))
* cpu = comparison_cost * t * log2(t)
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into work_mem, we use a heap method that keeps only
* and k tuples can fit into sort_mem, we use a heap method that keeps only
* k tuples in the heap; this will require about t*log2(k) tuple comparisons.
*
* The disk traffic is assumed to be 3/4ths sequential and 1/4th random
* accesses (XXX can't we refine that guess?)
*
* We charge two operator evals per tuple comparison, which should be in
* the right ballpark in most cases.
* By default, we charge two operator evals per tuple comparison, which should
* be in the right ballpark in most cases. The caller can tweak this by
* specifying nonzero comparison_cost; typically that's used for any extra
* work that has to be done to prepare the inputs to the comparison operators.
*
* 'pathkeys' is a list of sort keys
* 'input_cost' is the total cost for reading the input data
* 'tuples' is the number of tuples in the relation
* 'width' is the average tuple width in bytes
* 'comparison_cost' is the extra cost per comparison, if any
* 'sort_mem' is the number of kilobytes of work memory allowed for the sort
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
*
* NOTE: some callers currently pass NIL for pathkeys because they
@ -1110,6 +1114,7 @@ cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm)
void
cost_sort(Path *path, PlannerInfo *root,
List *pathkeys, Cost input_cost, double tuples, int width,
Cost comparison_cost, int sort_mem,
double limit_tuples)
{
Cost startup_cost = input_cost;
@ -1117,7 +1122,7 @@ cost_sort(Path *path, PlannerInfo *root,
double input_bytes = relation_byte_size(tuples, width);
double output_bytes;
double output_tuples;
long work_mem_bytes = work_mem * 1024L;
long sort_mem_bytes = sort_mem * 1024L;
if (!enable_sort)
startup_cost += disable_cost;
@ -1129,6 +1134,9 @@ cost_sort(Path *path, PlannerInfo *root,
if (tuples < 2.0)
tuples = 2.0;
/* Include the default cost-per-comparison */
comparison_cost += 2.0 * cpu_operator_cost;
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
@ -1141,24 +1149,23 @@ cost_sort(Path *path, PlannerInfo *root,
output_bytes = input_bytes;
}
if (output_bytes > work_mem_bytes)
if (output_bytes > sort_mem_bytes)
{
/*
* We'll have to use a disk-based sort of all the tuples
*/
double npages = ceil(input_bytes / BLCKSZ);
double nruns = (input_bytes / work_mem_bytes) * 0.5;
double mergeorder = tuplesort_merge_order(work_mem_bytes);
double nruns = (input_bytes / sort_mem_bytes) * 0.5;
double mergeorder = tuplesort_merge_order(sort_mem_bytes);
double log_runs;
double npageaccesses;
/*
* CPU costs
*
* Assume about two operator evals per tuple comparison and N log2 N
* comparisons
* Assume about N log2 N comparisons
*/
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
startup_cost += comparison_cost * tuples * LOG2(tuples);
/* Disk costs */
@ -1172,7 +1179,7 @@ cost_sort(Path *path, PlannerInfo *root,
startup_cost += npageaccesses *
(seq_page_cost * 0.75 + random_page_cost * 0.25);
}
else if (tuples > 2 * output_tuples || input_bytes > work_mem_bytes)
else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
{
/*
* We'll use a bounded heap-sort keeping just K tuples in memory, for
@ -1180,12 +1187,12 @@ cost_sort(Path *path, PlannerInfo *root,
* factor is a bit higher than for quicksort. Tweak it so that the
* cost curve is continuous at the crossover point.
*/
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(2.0 * output_tuples);
startup_cost += comparison_cost * tuples * LOG2(2.0 * output_tuples);
}
else
{
/* We'll use plain quicksort on all the input tuples */
startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
startup_cost += comparison_cost * tuples * LOG2(tuples);
}
/*
@ -1786,6 +1793,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
outer_path->total_cost,
outer_path_rows,
outer_path->parent->width,
0.0,
work_mem,
-1.0);
startup_cost += sort_path.startup_cost;
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
@ -1810,6 +1819,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
inner_path->total_cost,
inner_path_rows,
inner_path->parent->width,
0.0,
work_mem,
-1.0);
startup_cost += sort_path.startup_cost;
startup_cost += (sort_path.total_cost - sort_path.startup_cost)