mirror of
https://github.com/postgres/postgres.git
synced 2025-06-30 21:42:05 +03:00
Improve bit perturbation in TupleHashTableHash.
The changes inb81b5a96f4
did not fully address the issue, because the bit-mixing of the IV into the final hash-key didn't prevent clustering in the input-data survive in the output data. This didn't cause a lot of problems because of the additional growth conditions addedd4c62a6b62
. But as we want to rein those in due to explosive growth in some edges, this needs to be fixed. Author: Andres Freund Discussion: https://postgr.es/m/20171127185700.1470.20362@wrigleys.postgresql.org Backpatch: 10, where simplehash was introduced
This commit is contained in:
@ -23,6 +23,7 @@
|
|||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
|
#include "utils/hashutils.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
|
static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
|
||||||
@ -326,7 +327,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
|
|||||||
* underestimated.
|
* underestimated.
|
||||||
*/
|
*/
|
||||||
if (use_variable_hash_iv)
|
if (use_variable_hash_iv)
|
||||||
hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
|
hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
|
||||||
else
|
else
|
||||||
hashtable->hash_iv = 0;
|
hashtable->hash_iv = 0;
|
||||||
|
|
||||||
@ -510,7 +511,13 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hashkey;
|
/*
|
||||||
|
* The way hashes are combined above, among each other and with the IV,
|
||||||
|
* doesn't lead to good bit perturbation. As the IV's goal is to lead to
|
||||||
|
* achieve that, perform a round of hashing of the combined hash -
|
||||||
|
* resulting in near perfect perturbation.
|
||||||
|
*/
|
||||||
|
return murmurhash32(hashkey);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1183,29 +1183,33 @@ explain (costs off)
|
|||||||
-- simple rescan tests
|
-- simple rescan tests
|
||||||
select a, b, sum(v.x)
|
select a, b, sum(v.x)
|
||||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||||
group by grouping sets (a,b);
|
group by grouping sets (a,b)
|
||||||
|
order by 1, 2, 3;
|
||||||
a | b | sum
|
a | b | sum
|
||||||
---+---+-----
|
---+---+-----
|
||||||
2 | | 6
|
|
||||||
1 | | 3
|
1 | | 3
|
||||||
|
2 | | 6
|
||||||
|
| 1 | 3
|
||||||
| 2 | 3
|
| 2 | 3
|
||||||
| 3 | 3
|
| 3 | 3
|
||||||
| 1 | 3
|
|
||||||
(5 rows)
|
(5 rows)
|
||||||
|
|
||||||
explain (costs off)
|
explain (costs off)
|
||||||
select a, b, sum(v.x)
|
select a, b, sum(v.x)
|
||||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||||
group by grouping sets (a,b);
|
group by grouping sets (a,b)
|
||||||
QUERY PLAN
|
order by 3, 1, 2;
|
||||||
------------------------------------------
|
QUERY PLAN
|
||||||
HashAggregate
|
---------------------------------------------------------------------
|
||||||
Hash Key: gstest_data.a
|
Sort
|
||||||
Hash Key: gstest_data.b
|
Sort Key: (sum("*VALUES*".column1)), gstest_data.a, gstest_data.b
|
||||||
-> Nested Loop
|
-> HashAggregate
|
||||||
-> Values Scan on "*VALUES*"
|
Hash Key: gstest_data.a
|
||||||
-> Function Scan on gstest_data
|
Hash Key: gstest_data.b
|
||||||
(6 rows)
|
-> Nested Loop
|
||||||
|
-> Values Scan on "*VALUES*"
|
||||||
|
-> Function Scan on gstest_data
|
||||||
|
(8 rows)
|
||||||
|
|
||||||
select *
|
select *
|
||||||
from (values (1),(2)) v(x),
|
from (values (1),(2)) v(x),
|
||||||
|
@ -342,12 +342,13 @@ explain (costs off)
|
|||||||
|
|
||||||
select a, b, sum(v.x)
|
select a, b, sum(v.x)
|
||||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||||
group by grouping sets (a,b);
|
group by grouping sets (a,b)
|
||||||
|
order by 1, 2, 3;
|
||||||
explain (costs off)
|
explain (costs off)
|
||||||
select a, b, sum(v.x)
|
select a, b, sum(v.x)
|
||||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||||
group by grouping sets (a,b);
|
group by grouping sets (a,b)
|
||||||
|
order by 3, 1, 2;
|
||||||
select *
|
select *
|
||||||
from (values (1),(2)) v(x),
|
from (values (1),(2)) v(x),
|
||||||
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
||||||
|
Reference in New Issue
Block a user