mirror of
https://github.com/postgres/postgres.git
synced 2025-05-09 18:21:05 +03:00
Improve hash method for bitmapsets: some examination of actual outputs
shows that adding a circular shift between words greatly improves the distribution of hash outputs.
This commit is contained in:
parent
1f01d59e06
commit
e5a11a8879
@ -14,7 +14,7 @@
|
|||||||
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
|
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.9 2005/06/15 16:24:07 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -769,22 +769,36 @@ bms_first_member(Bitmapset *a)
|
|||||||
*
|
*
|
||||||
* Note: we must ensure that any two bitmapsets that are bms_equal() will
|
* Note: we must ensure that any two bitmapsets that are bms_equal() will
|
||||||
* hash to the same value; in practice this means that trailing all-zero
|
* hash to the same value; in practice this means that trailing all-zero
|
||||||
* words cannot affect the result. Longitudinal XOR provides a reasonable
|
* words cannot affect the result. The circular-shift-and-XOR hash method
|
||||||
* hash value that has this property.
|
* used here has this property, so long as we work from back to front.
|
||||||
|
*
|
||||||
|
* Note: you might wonder why we bother with the circular shift; at first
|
||||||
|
* glance a straight longitudinal XOR seems as good and much simpler. The
|
||||||
|
* reason is empirical: this gives a better distribution of hash values on
|
||||||
|
* the bitmapsets actually generated by the planner. A common way to have
|
||||||
|
* multiword bitmapsets is "a JOIN b JOIN c JOIN d ...", which gives rise
|
||||||
|
* to rangetables in which base tables and JOIN nodes alternate; so
|
||||||
|
* bitmapsets of base table RT indexes tend to use only odd-numbered or only
|
||||||
|
* even-numbered bits. A straight longitudinal XOR would preserve this
|
||||||
|
* property, leading to a much smaller set of possible outputs than if
|
||||||
|
* we include a shift.
|
||||||
*/
|
*/
|
||||||
uint32
|
uint32
|
||||||
bms_hash_value(const Bitmapset *a)
|
bms_hash_value(const Bitmapset *a)
|
||||||
{
|
{
|
||||||
bitmapword result = 0;
|
bitmapword result = 0;
|
||||||
int nwords;
|
|
||||||
int wordnum;
|
int wordnum;
|
||||||
|
|
||||||
if (a == NULL)
|
if (a == NULL || a->nwords <= 0)
|
||||||
return 0; /* All empty sets hash to 0 */
|
return 0; /* All empty sets hash to 0 */
|
||||||
nwords = a->nwords;
|
for (wordnum = a->nwords; --wordnum > 0; )
|
||||||
for (wordnum = 0; wordnum < nwords; wordnum++)
|
|
||||||
{
|
{
|
||||||
result ^= a->words[wordnum];
|
result ^= a->words[wordnum];
|
||||||
|
if (result & ((bitmapword) 1 << (BITS_PER_BITMAPWORD - 1)))
|
||||||
|
result = (result << 1) | 1;
|
||||||
|
else
|
||||||
|
result = (result << 1);
|
||||||
}
|
}
|
||||||
|
result ^= a->words[0];
|
||||||
return (uint32) result;
|
return (uint32) result;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user