mirror of
https://github.com/postgres/postgres.git
synced 2025-07-02 09:02:37 +03:00
Fixes for Disk-based Hash Aggregation.
Justin Pryzby raised a couple issues with commit 1f39bce0
. Fixed.
Also, tweak the way the size of a hash entry is estimated and the
number of buckets is estimated when calling BuildTupleHashTableExt().
Discussion: https://www.postgresql.org/message-id/20200319064222.GR26184@telsasoft.com
This commit is contained in:
@ -2778,7 +2778,7 @@ static void
|
|||||||
show_hashagg_info(AggState *aggstate, ExplainState *es)
|
show_hashagg_info(AggState *aggstate, ExplainState *es)
|
||||||
{
|
{
|
||||||
Agg *agg = (Agg *)aggstate->ss.ps.plan;
|
Agg *agg = (Agg *)aggstate->ss.ps.plan;
|
||||||
long memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024;
|
int64 memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024;
|
||||||
|
|
||||||
Assert(IsA(aggstate, AggState));
|
Assert(IsA(aggstate, AggState));
|
||||||
|
|
||||||
|
@ -1873,17 +1873,12 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
|
|||||||
aggstate->hash_disk_used = disk_used;
|
aggstate->hash_disk_used = disk_used;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* update hashentrysize estimate based on contents */
|
||||||
* Update hashentrysize estimate based on contents. Don't include meta_mem
|
|
||||||
* in the memory used, because empty buckets would inflate the per-entry
|
|
||||||
* cost. An underestimate of the per-entry size is better than an
|
|
||||||
* overestimate, because an overestimate could compound with each level of
|
|
||||||
* recursion.
|
|
||||||
*/
|
|
||||||
if (aggstate->hash_ngroups_current > 0)
|
if (aggstate->hash_ngroups_current > 0)
|
||||||
{
|
{
|
||||||
aggstate->hashentrysize =
|
aggstate->hashentrysize =
|
||||||
hash_mem / (double)aggstate->hash_ngroups_current;
|
sizeof(TupleHashEntryData) +
|
||||||
|
(hash_mem / (double)aggstate->hash_ngroups_current);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1899,10 +1894,10 @@ hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
|
|||||||
max_nbuckets = memory / hashentrysize;
|
max_nbuckets = memory / hashentrysize;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Leave room for slop to avoid a case where the initial hash table size
|
* Underestimating is better than overestimating. Too many buckets crowd
|
||||||
* exceeds the memory limit (though that may still happen in edge cases).
|
* out space for group keys and transition state values.
|
||||||
*/
|
*/
|
||||||
max_nbuckets *= 0.75;
|
max_nbuckets >>= 1;
|
||||||
|
|
||||||
if (nbuckets > max_nbuckets)
|
if (nbuckets > max_nbuckets)
|
||||||
nbuckets = max_nbuckets;
|
nbuckets = max_nbuckets;
|
||||||
@ -3548,7 +3543,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
|
|||||||
* reasonable.
|
* reasonable.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < aggstate->num_hashes; i++)
|
for (i = 0; i < aggstate->num_hashes; i++)
|
||||||
totalGroups = aggstate->perhash[i].aggnode->numGroups;
|
totalGroups += aggstate->perhash[i].aggnode->numGroups;
|
||||||
|
|
||||||
hash_agg_set_limits(aggstate->hashentrysize, totalGroups, 0,
|
hash_agg_set_limits(aggstate->hashentrysize, totalGroups, 0,
|
||||||
&aggstate->hash_mem_limit,
|
&aggstate->hash_mem_limit,
|
||||||
|
Reference in New Issue
Block a user