mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Adjust hash table sizing algorithm to avoid integer overflow in
ExecHashJoinGetBatch(). Fixes core dump on large hash joins, as in example from Rae Stiening.
This commit is contained in:
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "access/hash.h"
|
#include "access/hash.h"
|
||||||
@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
|
|||||||
{
|
{
|
||||||
int tupsize;
|
int tupsize;
|
||||||
double inner_rel_bytes;
|
double inner_rel_bytes;
|
||||||
double hash_table_bytes;
|
long hash_table_bytes;
|
||||||
|
double dtmp;
|
||||||
int nbatch;
|
int nbatch;
|
||||||
int nbuckets;
|
int nbuckets;
|
||||||
int totalbuckets;
|
int totalbuckets;
|
||||||
@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
|
|||||||
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
|
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Target hashtable size is SortMem kilobytes, but not less than
|
* Target in-memory hashtable size is SortMem kilobytes.
|
||||||
* sqrt(estimated inner rel size), so as to avoid horrible
|
|
||||||
* performance.
|
|
||||||
*/
|
*/
|
||||||
hash_table_bytes = sqrt(inner_rel_bytes);
|
hash_table_bytes = SortMem * 1024L;
|
||||||
if (hash_table_bytes < (SortMem * 1024L))
|
|
||||||
hash_table_bytes = SortMem * 1024L;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Count the number of hash buckets we want for the whole relation,
|
* Count the number of hash buckets we want for the whole relation,
|
||||||
* for an average bucket load of NTUP_PER_BUCKET (per virtual
|
* for an average bucket load of NTUP_PER_BUCKET (per virtual
|
||||||
* bucket!).
|
* bucket!). It has to fit in an int, however.
|
||||||
*/
|
*/
|
||||||
totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
|
dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
|
||||||
|
if (dtmp < INT_MAX)
|
||||||
|
totalbuckets = (int) dtmp;
|
||||||
|
else
|
||||||
|
totalbuckets = INT_MAX;
|
||||||
|
if (totalbuckets <= 0)
|
||||||
|
totalbuckets = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Count the number of buckets we think will actually fit in the
|
* Count the number of buckets we think will actually fit in the
|
||||||
@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
|
|||||||
* that nbatch doesn't have to have anything to do with the ratio
|
* that nbatch doesn't have to have anything to do with the ratio
|
||||||
* totalbuckets/nbuckets; in fact, it is the number of groups we
|
* totalbuckets/nbuckets; in fact, it is the number of groups we
|
||||||
* will use for the part of the data that doesn't fall into the
|
* will use for the part of the data that doesn't fall into the
|
||||||
* first nbuckets hash buckets.
|
* first nbuckets hash buckets. We try to set it to make all the
|
||||||
|
* batches the same size. But we have to keep nbatch small
|
||||||
|
* enough to avoid integer overflow in ExecHashJoinGetBatch().
|
||||||
*/
|
*/
|
||||||
nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
|
dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
|
||||||
hash_table_bytes);
|
hash_table_bytes);
|
||||||
|
if (dtmp < INT_MAX / totalbuckets)
|
||||||
|
nbatch = (int) dtmp;
|
||||||
|
else
|
||||||
|
nbatch = INT_MAX / totalbuckets;
|
||||||
if (nbatch <= 0)
|
if (nbatch <= 0)
|
||||||
nbatch = 1;
|
nbatch = 1;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user