mirror of
https://github.com/postgres/postgres.git
synced 2025-07-02 09:02:37 +03:00
Improve bit perturbation in TupleHashTableHash.
The changes inb81b5a96f4
did not fully address the issue, because the bit-mixing of the IV into the final hash-key didn't prevent clustering in the input-data survive in the output data. This didn't cause a lot of problems because of the additional growth conditions addedd4c62a6b62
. But as we want to rein those in due to explosive growth in some edges, this needs to be fixed. Author: Andres Freund Discussion: https://postgr.es/m/20171127185700.1470.20362@wrigleys.postgresql.org Backpatch: 10, where simplehash was introduced
This commit is contained in:
@ -23,6 +23,7 @@
|
||||
#include "executor/executor.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/hashutils.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
|
||||
@ -326,7 +327,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
|
||||
* underestimated.
|
||||
*/
|
||||
if (use_variable_hash_iv)
|
||||
hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
|
||||
hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
|
||||
else
|
||||
hashtable->hash_iv = 0;
|
||||
|
||||
@ -510,7 +511,13 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
|
||||
}
|
||||
}
|
||||
|
||||
return hashkey;
|
||||
/*
|
||||
* The way hashes are combined above, among each other and with the IV,
|
||||
* doesn't lead to good bit perturbation. As the IV's goal is to lead to
|
||||
* achieve that, perform a round of hashing of the combined hash -
|
||||
* resulting in near perfect perturbation.
|
||||
*/
|
||||
return murmurhash32(hashkey);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user