mirror of
https://github.com/postgres/postgres.git
synced 2025-06-29 10:41:53 +03:00
Improve bit perturbation in TupleHashTableHash.
The changes inb81b5a96f4
did not fully address the issue, because the bit-mixing of the IV into the final hash-key didn't prevent clustering in the input-data survive in the output data. This didn't cause a lot of problems because of the additional growth conditions addedd4c62a6b62
. But as we want to rein those in due to explosive growth in some edges, this needs to be fixed. Author: Andres Freund Discussion: https://postgr.es/m/20171127185700.1470.20362@wrigleys.postgresql.org Backpatch: 10, where simplehash was introduced
This commit is contained in:
@ -23,6 +23,7 @@
|
||||
#include "executor/executor.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/hashutils.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
|
||||
@ -326,7 +327,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
|
||||
* underestimated.
|
||||
*/
|
||||
if (use_variable_hash_iv)
|
||||
hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
|
||||
hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
|
||||
else
|
||||
hashtable->hash_iv = 0;
|
||||
|
||||
@ -510,7 +511,13 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
|
||||
}
|
||||
}
|
||||
|
||||
return hashkey;
|
||||
/*
|
||||
* The way hashes are combined above, among each other and with the IV,
|
||||
* doesn't lead to good bit perturbation. As the IV's goal is to lead to
|
||||
* achieve that, perform a round of hashing of the combined hash -
|
||||
* resulting in near perfect perturbation.
|
||||
*/
|
||||
return murmurhash32(hashkey);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1183,29 +1183,33 @@ explain (costs off)
|
||||
-- simple rescan tests
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
group by grouping sets (a,b)
|
||||
order by 1, 2, 3;
|
||||
a | b | sum
|
||||
---+---+-----
|
||||
2 | | 6
|
||||
1 | | 3
|
||||
2 | | 6
|
||||
| 1 | 3
|
||||
| 2 | 3
|
||||
| 3 | 3
|
||||
| 1 | 3
|
||||
(5 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
QUERY PLAN
|
||||
------------------------------------------
|
||||
HashAggregate
|
||||
Hash Key: gstest_data.a
|
||||
Hash Key: gstest_data.b
|
||||
-> Nested Loop
|
||||
-> Values Scan on "*VALUES*"
|
||||
-> Function Scan on gstest_data
|
||||
(6 rows)
|
||||
group by grouping sets (a,b)
|
||||
order by 3, 1, 2;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (sum("*VALUES*".column1)), gstest_data.a, gstest_data.b
|
||||
-> HashAggregate
|
||||
Hash Key: gstest_data.a
|
||||
Hash Key: gstest_data.b
|
||||
-> Nested Loop
|
||||
-> Values Scan on "*VALUES*"
|
||||
-> Function Scan on gstest_data
|
||||
(8 rows)
|
||||
|
||||
select *
|
||||
from (values (1),(2)) v(x),
|
||||
|
@ -342,12 +342,13 @@ explain (costs off)
|
||||
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
group by grouping sets (a,b)
|
||||
order by 1, 2, 3;
|
||||
explain (costs off)
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
|
||||
group by grouping sets (a,b)
|
||||
order by 3, 1, 2;
|
||||
select *
|
||||
from (values (1),(2)) v(x),
|
||||
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
||||
|
Reference in New Issue
Block a user