mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Improve hash method for bitmapsets: some examination of actual outputs
shows that adding a circular shift between words greatly improves the distribution of hash outputs.
This commit is contained in:
		| @@ -14,7 +14,7 @@ | |||||||
|  * Copyright (c) 2003-2005, PostgreSQL Global Development Group |  * Copyright (c) 2003-2005, PostgreSQL Global Development Group | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $ |  *	  $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.9 2005/06/15 16:24:07 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -769,22 +769,36 @@ bms_first_member(Bitmapset *a) | |||||||
|  * |  * | ||||||
|  * Note: we must ensure that any two bitmapsets that are bms_equal() will |  * Note: we must ensure that any two bitmapsets that are bms_equal() will | ||||||
|  * hash to the same value; in practice this means that trailing all-zero |  * hash to the same value; in practice this means that trailing all-zero | ||||||
|  * words cannot affect the result.  Longitudinal XOR provides a reasonable |  * words cannot affect the result.  The circular-shift-and-XOR hash method | ||||||
|  * hash value that has this property. |  * used here has this property, so long as we work from back to front. | ||||||
|  |  * | ||||||
|  |  * Note: you might wonder why we bother with the circular shift; at first | ||||||
|  |  * glance a straight longitudinal XOR seems as good and much simpler.  The | ||||||
|  |  * reason is empirical: this gives a better distribution of hash values on | ||||||
|  |  * the bitmapsets actually generated by the planner.  A common way to have | ||||||
|  |  * multiword bitmapsets is "a JOIN b JOIN c JOIN d ...", which gives rise | ||||||
|  |  * to rangetables in which base tables and JOIN nodes alternate; so | ||||||
|  |  * bitmapsets of base table RT indexes tend to use only odd-numbered or only | ||||||
|  |  * even-numbered bits.  A straight longitudinal XOR would preserve this | ||||||
|  |  * property, leading to a much smaller set of possible outputs than if | ||||||
|  |  * we include a shift. | ||||||
|  */ |  */ | ||||||
| uint32 | uint32 | ||||||
| bms_hash_value(const Bitmapset *a) | bms_hash_value(const Bitmapset *a) | ||||||
| { | { | ||||||
| 	bitmapword	result = 0; | 	bitmapword	result = 0; | ||||||
| 	int			nwords; |  | ||||||
| 	int			wordnum; | 	int			wordnum; | ||||||
|  |  | ||||||
| 	if (a == NULL) | 	if (a == NULL || a->nwords <= 0) | ||||||
| 		return 0;				/* All empty sets hash to 0 */ | 		return 0;				/* All empty sets hash to 0 */ | ||||||
| 	nwords = a->nwords; | 	for (wordnum = a->nwords; --wordnum > 0; ) | ||||||
| 	for (wordnum = 0; wordnum < nwords; wordnum++) |  | ||||||
| 	{ | 	{ | ||||||
| 		result ^= a->words[wordnum]; | 		result ^= a->words[wordnum]; | ||||||
|  | 		if (result & ((bitmapword) 1 << (BITS_PER_BITMAPWORD - 1))) | ||||||
|  | 			result = (result << 1) | 1; | ||||||
|  | 		else | ||||||
|  | 			result = (result << 1); | ||||||
| 	} | 	} | ||||||
|  | 	result ^= a->words[0]; | ||||||
| 	return (uint32) result; | 	return (uint32) result; | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user