mirror of
https://github.com/postgres/postgres.git
synced 2025-06-29 10:41:53 +03:00
Fix costing for disk-based hash aggregation.
Report and suggestions from Richard Guo and Tomas Vondra. Discussion: https://postgr.es/m/CAMbWs4_W8fYbAn8KxgidAaZHON_Oo08OYn9ze=7remJymLqo5g@mail.gmail.com
This commit is contained in:
@ -1728,6 +1728,8 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
|
|||||||
/* if not expected to spill, use all of work_mem */
|
/* if not expected to spill, use all of work_mem */
|
||||||
if (input_groups * hashentrysize < work_mem * 1024L)
|
if (input_groups * hashentrysize < work_mem * 1024L)
|
||||||
{
|
{
|
||||||
|
if (num_partitions != NULL)
|
||||||
|
*num_partitions = 0;
|
||||||
*mem_limit = work_mem * 1024L;
|
*mem_limit = work_mem * 1024L;
|
||||||
*ngroups_limit = *mem_limit / hashentrysize;
|
*ngroups_limit = *mem_limit / hashentrysize;
|
||||||
return;
|
return;
|
||||||
|
@ -2257,6 +2257,7 @@ cost_agg(Path *path, PlannerInfo *root,
|
|||||||
*/
|
*/
|
||||||
if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
|
if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
|
||||||
{
|
{
|
||||||
|
double pages;
|
||||||
double pages_written = 0.0;
|
double pages_written = 0.0;
|
||||||
double pages_read = 0.0;
|
double pages_read = 0.0;
|
||||||
double hashentrysize;
|
double hashentrysize;
|
||||||
@ -2264,7 +2265,7 @@ cost_agg(Path *path, PlannerInfo *root,
|
|||||||
Size mem_limit;
|
Size mem_limit;
|
||||||
uint64 ngroups_limit;
|
uint64 ngroups_limit;
|
||||||
int num_partitions;
|
int num_partitions;
|
||||||
|
int depth;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Estimate number of batches based on the computed limits. If less
|
* Estimate number of batches based on the computed limits. If less
|
||||||
@ -2279,25 +2280,22 @@ cost_agg(Path *path, PlannerInfo *root,
|
|||||||
nbatches = Max( (numGroups * hashentrysize) / mem_limit,
|
nbatches = Max( (numGroups * hashentrysize) / mem_limit,
|
||||||
numGroups / ngroups_limit );
|
numGroups / ngroups_limit );
|
||||||
|
|
||||||
|
nbatches = Max(ceil(nbatches), 1.0);
|
||||||
|
num_partitions = Max(num_partitions, 2);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The number of partitions can change at different levels of
|
||||||
|
* recursion; but for the purposes of this calculation assume it stays
|
||||||
|
* constant.
|
||||||
|
*/
|
||||||
|
depth = ceil( log(nbatches) / log(num_partitions) );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Estimate number of pages read and written. For each level of
|
* Estimate number of pages read and written. For each level of
|
||||||
* recursion, a tuple must be written and then later read.
|
* recursion, a tuple must be written and then later read.
|
||||||
*/
|
*/
|
||||||
if (nbatches > 1.0)
|
|
||||||
{
|
|
||||||
double depth;
|
|
||||||
double pages;
|
|
||||||
|
|
||||||
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
|
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
|
||||||
|
|
||||||
/*
|
|
||||||
* The number of partitions can change at different levels of
|
|
||||||
* recursion; but for the purposes of this calculation assume it
|
|
||||||
* stays constant.
|
|
||||||
*/
|
|
||||||
depth = ceil( log(nbatches - 1) / log(num_partitions) );
|
|
||||||
pages_written = pages_read = pages * depth;
|
pages_written = pages_read = pages * depth;
|
||||||
}
|
|
||||||
|
|
||||||
startup_cost += pages_written * random_page_cost;
|
startup_cost += pages_written * random_page_cost;
|
||||||
total_cost += pages_written * random_page_cost;
|
total_cost += pages_written * random_page_cost;
|
||||||
|
Reference in New Issue
Block a user