mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code in dynahash hash tables and hash joins.
This commit is contained in:
		| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $ | ||||
|  * | ||||
|  * NOTES | ||||
|  *	  This file contains only the public interface routines. | ||||
| @@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS) | ||||
| 	Datum	   *datum = (Datum *) PG_GETARG_POINTER(1); | ||||
| 	char	   *nulls = (char *) PG_GETARG_POINTER(2); | ||||
| 	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); | ||||
| #ifdef NOT_USED | ||||
| 	Relation	heapRel = (Relation) PG_GETARG_POINTER(4); | ||||
| #endif | ||||
|  | ||||
| 	InsertIndexResult res; | ||||
| 	HashItem	hitem; | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $ | ||||
|  * | ||||
|  * NOTES | ||||
|  *	  These functions are stored in pg_amproc.	For each operator class | ||||
| @@ -58,7 +58,7 @@ hashfloat4(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	float4		key = PG_GETARG_FLOAT4(0); | ||||
|  | ||||
| 	return hash_any((char *) &key, sizeof(key)); | ||||
| 	return hash_any((unsigned char *) &key, sizeof(key)); | ||||
| } | ||||
|  | ||||
| Datum | ||||
| @@ -66,7 +66,7 @@ hashfloat8(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	float8		key = PG_GETARG_FLOAT8(0); | ||||
|  | ||||
| 	return hash_any((char *) &key, sizeof(key)); | ||||
| 	return hash_any((unsigned char *) &key, sizeof(key)); | ||||
| } | ||||
|  | ||||
| Datum | ||||
| @@ -74,7 +74,7 @@ hashoidvector(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	Oid		   *key = (Oid *) PG_GETARG_POINTER(0); | ||||
|  | ||||
| 	return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid)); | ||||
| 	return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid)); | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	int16	   *key = (int16 *) PG_GETARG_POINTER(0); | ||||
|  | ||||
| 	return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16)); | ||||
| 	return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16)); | ||||
| } | ||||
|  | ||||
| Datum | ||||
| hashname(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	char	   *key = NameStr(*PG_GETARG_NAME(0)); | ||||
| 	int			keylen = strlen(key); | ||||
|  | ||||
| 	Assert(strlen(key) <= NAMEDATALEN); | ||||
| 	Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */ | ||||
|  | ||||
| 	return hash_any(key, strlen(key)); | ||||
| 	return hash_any((unsigned char *) key, keylen); | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -110,7 +111,8 @@ hashvarlena(PG_FUNCTION_ARGS) | ||||
| 	struct varlena *key = PG_GETARG_VARLENA_P(0); | ||||
| 	Datum		result; | ||||
|  | ||||
| 	result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ); | ||||
| 	result = hash_any((unsigned char *) VARDATA(key), | ||||
| 					  VARSIZE(key) - VARHDRSZ); | ||||
|  | ||||
| 	/* Avoid leaking memory for toasted inputs */ | ||||
| 	PG_FREE_IF_COPY(key, 0); | ||||
| @@ -118,13 +120,15 @@ hashvarlena(PG_FUNCTION_ARGS) | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
| /* This hash function was written by Bob Jenkins | ||||
| /* | ||||
|  * This hash function was written by Bob Jenkins | ||||
|  * (bob_jenkins@burtleburtle.net), and superficially adapted | ||||
|  * for PostgreSQL by Neil Conway. For more information on this | ||||
|  * hash function, see http://burtleburtle.net/bob/hash/doobs.html | ||||
|  * hash function, see http://burtleburtle.net/bob/hash/doobs.html, | ||||
|  * or Bob's article in Dr. Dobb's Journal, Sept. 1997. | ||||
|  */ | ||||
|  | ||||
| /* | ||||
| /*---------- | ||||
|  * mix -- mix 3 32-bit values reversibly. | ||||
|  * For every delta with one or two bits set, and the deltas of all three | ||||
|  * high bits or all three low bits, whether the original value of a,b,c | ||||
| @@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS) | ||||
|  *   have at least 1/4 probability of changing. | ||||
|  * - If mix() is run forward, every bit of c will change between 1/3 and | ||||
|  *   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.) | ||||
|  *---------- | ||||
|  */ | ||||
| #define mix(a,b,c) \ | ||||
| { \ | ||||
| @@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS) | ||||
|  * hash_any() -- hash a variable-length key into a 32-bit value | ||||
|  *      k       : the key (the unaligned variable-length array of bytes) | ||||
|  *      len     : the length of the key, counting by bytes | ||||
|  * Returns a 32-bit value.  Every bit of the key affects every bit of | ||||
|  * | ||||
|  * Returns a uint32 value.  Every bit of the key affects every bit of | ||||
|  * the return value.  Every 1-bit and 2-bit delta achieves avalanche. | ||||
|  * About 6*len+35 instructions. The best hash table sizes are powers | ||||
|  * of 2.  There is no need to do mod a prime (mod is sooo slow!). | ||||
|  * If you need less than 32 bits, use a bitmask. | ||||
|  */ | ||||
| Datum | ||||
| hash_any(register const char *k, register int keylen) | ||||
| hash_any(register const unsigned char *k, register int keylen) | ||||
| { | ||||
|    register Datum a,b,c,len; | ||||
| 	register uint32 a,b,c,len; | ||||
|  | ||||
| 	/* Set up the internal state */ | ||||
| 	len = keylen; | ||||
| 	a = b = 0x9e3779b9;			/* the golden ratio; an arbitrary value */ | ||||
|    /* Another arbitrary value. If the hash function is called | ||||
|     * multiple times, this could be the previously generated | ||||
|     * hash value; however, the interface currently doesn't allow | ||||
|     * this. AFAIK this isn't a big deal. | ||||
|     */ | ||||
|    c = 3923095; | ||||
| 	c = 3923095;				/* initialize with an arbitrary value */ | ||||
|  | ||||
| 	/* handle most of the key */ | ||||
| 	while (len >= 12) | ||||
| 	{ | ||||
|       a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24)); | ||||
|       b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24)); | ||||
|       c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24)); | ||||
| 		a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24)); | ||||
| 		b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24)); | ||||
| 		c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24)); | ||||
| 		mix(a,b,c); | ||||
| 		k += 12; len -= 12; | ||||
| 	} | ||||
|  | ||||
| 	/* handle the last 11 bytes */ | ||||
| 	c += keylen; | ||||
|    switch(len)              /* all the case statements fall through */ | ||||
| 	switch (len)				/* all the case statements fall through */ | ||||
| 	{ | ||||
|    case 11: c+=((Datum)k[10]<<24); | ||||
|    case 10: c+=((Datum)k[9]<<16); | ||||
|    case 9 : c+=((Datum)k[8]<<8); | ||||
| 		case 11: c+=((uint32)k[10]<<24); | ||||
| 		case 10: c+=((uint32)k[9]<<16); | ||||
| 		case 9 : c+=((uint32)k[8]<<8); | ||||
| 			/* the first byte of c is reserved for the length */ | ||||
|    case 8 : b+=((Datum)k[7]<<24); | ||||
|    case 7 : b+=((Datum)k[6]<<16); | ||||
|    case 6 : b+=((Datum)k[5]<<8); | ||||
| 		case 8 : b+=((uint32)k[7]<<24); | ||||
| 		case 7 : b+=((uint32)k[6]<<16); | ||||
| 		case 6 : b+=((uint32)k[5]<<8); | ||||
| 		case 5 : b+=k[4]; | ||||
|    case 4 : a+=((Datum)k[3]<<24); | ||||
|    case 3 : a+=((Datum)k[2]<<16); | ||||
|    case 2 : a+=((Datum)k[1]<<8); | ||||
| 		case 4 : a+=((uint32)k[3]<<24); | ||||
| 		case 3 : a+=((uint32)k[2]<<16); | ||||
| 		case 2 : a+=((uint32)k[1]<<8); | ||||
| 		case 1 : a+=k[0]; | ||||
| 			/* case 0: nothing left to add */ | ||||
| 	} | ||||
| 	mix(a,b,c); | ||||
| 	/* report the result */ | ||||
|    return c; | ||||
| 	return UInt32GetDatum(c); | ||||
| } | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * | ||||
|  *	$Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $ | ||||
|  *	$Id: nodeHash.c,v 1.62 2002/03/09 17:35:35 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -22,6 +22,7 @@ | ||||
| #include <sys/types.h> | ||||
| #include <math.h> | ||||
|  | ||||
| #include "access/hash.h" | ||||
| #include "executor/execdebug.h" | ||||
| #include "executor/nodeHash.h" | ||||
| #include "executor/nodeHashjoin.h" | ||||
| @@ -31,7 +32,7 @@ | ||||
| #include "utils/lsyscache.h" | ||||
|  | ||||
|  | ||||
| static int	hashFunc(Datum key, int len, bool byVal); | ||||
| static uint32	hashFunc(Datum key, int len, bool byVal); | ||||
|  | ||||
| /* ---------------------------------------------------------------- | ||||
|  *		ExecHash | ||||
| @@ -553,7 +554,7 @@ ExecHashGetBucket(HashJoinTable hashtable, | ||||
| 		bucketno = hashFunc(keyval, | ||||
| 							(int) hashtable->typLen, | ||||
| 							hashtable->typByVal) | ||||
| 			% hashtable->totalbuckets; | ||||
| 			% (uint32) hashtable->totalbuckets; | ||||
| 	} | ||||
|  | ||||
| #ifdef HJDEBUG | ||||
| @@ -624,30 +625,29 @@ ExecScanHashBucket(HashJoinState *hjstate, | ||||
| /* ---------------------------------------------------------------- | ||||
|  *		hashFunc | ||||
|  * | ||||
|  *		the hash function, copied from Margo | ||||
|  *		the hash function for hash joins | ||||
|  * | ||||
|  *		XXX this probably ought to be replaced with datatype-specific | ||||
|  *		hash functions, such as those already implemented for hash indexes. | ||||
|  * ---------------------------------------------------------------- | ||||
|  */ | ||||
| static int | ||||
| static uint32 | ||||
| hashFunc(Datum key, int len, bool byVal) | ||||
| { | ||||
| 	unsigned int h = 0; | ||||
| 	unsigned char *k; | ||||
|  | ||||
| 	if (byVal) | ||||
| 	{ | ||||
| 		/* | ||||
| 		 * If it's a by-value data type, use the 'len' least significant | ||||
| 		 * bytes of the Datum value.  This should do the right thing on | ||||
| 		 * either bigendian or littleendian hardware --- see the Datum | ||||
| 		 * access macros in c.h. | ||||
| 		 * If it's a by-value data type, just hash the whole Datum value. | ||||
| 		 * This assumes that datatypes narrower than Datum are consistently | ||||
| 		 * padded (either zero-extended or sign-extended, but not random | ||||
| 		 * bits) to fill Datum; see the XXXGetDatum macros in postgres.h. | ||||
| 		 * NOTE: it would not work to do hash_any(&key, len) since this | ||||
| 		 * would get the wrong bytes on a big-endian machine. | ||||
| 		 */ | ||||
| 		while (len-- > 0) | ||||
| 		{ | ||||
| 			h = (h * PRIME1) ^ (key & 0xFF); | ||||
| 			key >>= 8; | ||||
| 		} | ||||
| 		k = (unsigned char *) &key; | ||||
| 		len = sizeof(Datum); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| @@ -662,8 +662,6 @@ hashFunc(Datum key, int len, bool byVal) | ||||
| 		 * freeing the detoasted copy; that happens for free when the | ||||
| 		 * per-tuple memory context is reset in ExecHashGetBucket.) | ||||
| 		 */ | ||||
| 		unsigned char *k; | ||||
|  | ||||
| 		if (len < 0) | ||||
| 		{ | ||||
| 			struct varlena *vkey = PG_DETOAST_DATUM(key); | ||||
| @@ -673,12 +671,9 @@ hashFunc(Datum key, int len, bool byVal) | ||||
| 		} | ||||
| 		else | ||||
| 			k = (unsigned char *) DatumGetPointer(key); | ||||
|  | ||||
| 		while (len-- > 0) | ||||
| 			h = (h * PRIME1) ^ (*k++); | ||||
| 	} | ||||
|  | ||||
| 	return h % PRIME2; | ||||
| 	return DatumGetUInt32(hash_any(k, len)); | ||||
| } | ||||
|  | ||||
| /* ---------------------------------------------------------------- | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.64 2001/11/21 05:57:33 thomas Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.65 2002/03/09 17:35:35 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -1116,7 +1116,7 @@ timetz_hash(PG_FUNCTION_ARGS) | ||||
| 	 * sizeof(TimeTzADT), so that any garbage pad bytes in the structure | ||||
| 	 * won't be included in the hash! | ||||
| 	 */ | ||||
| 	return hash_any((char *) key, sizeof(double) + sizeof(int4)); | ||||
| 	return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4)); | ||||
| } | ||||
|  | ||||
| Datum | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| /* | ||||
|  *	PostgreSQL type definitions for MAC addresses. | ||||
|  * | ||||
|  *	$Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.21 2001/08/21 21:23:21 tgl Exp $ | ||||
|  *	$Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.22 2002/03/09 17:35:35 tgl Exp $ | ||||
|  */ | ||||
|  | ||||
| #include "postgres.h" | ||||
| @@ -230,7 +230,7 @@ hashmacaddr(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	macaddr    *key = PG_GETARG_MACADDR_P(0); | ||||
|  | ||||
| 	return hash_any((char *) key, sizeof(macaddr)); | ||||
| 	return hash_any((unsigned char *) key, sizeof(macaddr)); | ||||
| } | ||||
|  | ||||
| /* | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.64 2002/03/06 06:10:18 momjian Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.65 2002/03/09 17:35:36 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -1017,7 +1017,7 @@ interval_hash(PG_FUNCTION_ARGS) | ||||
| 	 * sizeof(Interval), so that any garbage pad bytes in the structure | ||||
| 	 * won't be included in the hash! | ||||
| 	 */ | ||||
| 	return hash_any((char *) key, sizeof(double) + sizeof(int4)); | ||||
| 	return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4)); | ||||
| } | ||||
|  | ||||
| /* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator. | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.87 2001/11/18 12:07:07 ishii Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.88 2002/03/09 17:35:36 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -791,7 +791,7 @@ hashbpchar(PG_FUNCTION_ARGS) | ||||
| 	keydata = VARDATA(key); | ||||
| 	keylen = bcTruelen(key); | ||||
|  | ||||
| 	result = hash_any(keydata, keylen); | ||||
| 	result = hash_any((unsigned char *) keydata, keylen); | ||||
|  | ||||
| 	/* Avoid leaking memory for toasted inputs */ | ||||
| 	PG_FREE_IF_COPY(key, 0); | ||||
|   | ||||
| @@ -9,7 +9,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.41 2002/03/02 21:39:33 momjian Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.42 2002/03/09 17:35:36 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -329,8 +329,7 @@ init_htab(HTAB *hashp, long nelem) | ||||
| 	} | ||||
|  | ||||
| #if HASH_DEBUG | ||||
| 	fprintf(stderr, "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", | ||||
| 			"init_htab:", | ||||
| 	fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n", | ||||
| 			"TABLE POINTER   ", hashp, | ||||
| 			"DIRECTORY SIZE  ", hctl->dsize, | ||||
| 			"SEGMENT SIZE    ", hctl->ssize, | ||||
| @@ -453,7 +452,7 @@ hash_stats(const char *where, HTAB *hashp) | ||||
| 	fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n", | ||||
| 			where, hashp->hctl->accesses, hashp->hctl->collisions); | ||||
|  | ||||
| 	fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %d segmentcount %d\n", | ||||
| 	fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n", | ||||
| 			hashp->hctl->nentries, hashp->hctl->keysize, | ||||
| 			hashp->hctl->max_bucket, hashp->hctl->nsegs); | ||||
| 	fprintf(stderr, "%s: total accesses %ld total collisions %ld\n", | ||||
| @@ -470,7 +469,7 @@ static uint32 | ||||
| call_hash(HTAB *hashp, void *k) | ||||
| { | ||||
| 	HASHHDR    *hctl = hashp->hctl; | ||||
| 	long		hash_val, | ||||
| 	uint32		hash_val, | ||||
| 				bucket; | ||||
|  | ||||
| 	hash_val = hashp->hash(k, (int) hctl->keysize); | ||||
| @@ -479,7 +478,7 @@ call_hash(HTAB *hashp, void *k) | ||||
| 	if (bucket > hctl->max_bucket) | ||||
| 		bucket = bucket & hctl->low_mask; | ||||
|  | ||||
| 	return (uint32) bucket; | ||||
| 	return bucket; | ||||
| } | ||||
|  | ||||
| /*---------- | ||||
| @@ -647,7 +646,7 @@ hash_search(HTAB *hashp, | ||||
| 			/* caller is expected to fill the data field on return */ | ||||
|  | ||||
| 			/* Check if it is time to split the segment */ | ||||
| 			if (++hctl->nentries / (hctl->max_bucket + 1) > hctl->ffactor) | ||||
| 			if (++hctl->nentries / (long) (hctl->max_bucket + 1) > hctl->ffactor) | ||||
| 			{ | ||||
| 				/* | ||||
| 				 * NOTE: failure to expand table is not a fatal error, it | ||||
| @@ -795,10 +794,10 @@ expand_table(HTAB *hashp) | ||||
| 	/* | ||||
| 	 * If we crossed a power of 2, readjust masks. | ||||
| 	 */ | ||||
| 	if (new_bucket > hctl->high_mask) | ||||
| 	if ((uint32) new_bucket > hctl->high_mask) | ||||
| 	{ | ||||
| 		hctl->low_mask = hctl->high_mask; | ||||
| 		hctl->high_mask = new_bucket | hctl->low_mask; | ||||
| 		hctl->high_mask = (uint32) new_bucket | hctl->low_mask; | ||||
| 	} | ||||
|  | ||||
| 	/* | ||||
|   | ||||
| @@ -9,14 +9,16 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.15 2001/10/25 05:49:51 momjian Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.16 2002/03/09 17:35:36 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| #include "postgres.h" | ||||
|  | ||||
| #include "access/hash.h" | ||||
| #include "utils/hsearch.h" | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * string_hash: hash function for keys that are null-terminated strings. | ||||
|  * | ||||
| @@ -27,91 +29,17 @@ | ||||
|  * | ||||
|  * NOTE: this is the default hash function if none is specified. | ||||
|  */ | ||||
| long | ||||
| uint32 | ||||
| string_hash(void *key, int keysize) | ||||
| { | ||||
| 	unsigned char *k = (unsigned char *) key; | ||||
| 	long		h = 0; | ||||
|  | ||||
| 	while (*k) | ||||
| 		h = (h * PRIME1) ^ (*k++); | ||||
|  | ||||
| 	h %= PRIME2; | ||||
|  | ||||
| 	return h; | ||||
| 	return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key))); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * tag_hash: hash function for fixed-size tag values | ||||
|  * | ||||
|  * NB: we assume that the supplied key is aligned at least on an 'int' | ||||
|  * boundary, if its size is >= sizeof(int). | ||||
|  */ | ||||
| long | ||||
| uint32 | ||||
| tag_hash(void *key, int keysize) | ||||
| { | ||||
| 	int		   *k = (int *) key; | ||||
| 	long		h = 0; | ||||
|  | ||||
| 	/* | ||||
| 	 * Use four byte chunks in a "jump table" to go a little faster. | ||||
| 	 * | ||||
| 	 * Currently the maximum keysize is 16 (mar 17 1992).  I have put in | ||||
| 	 * cases for up to 32.	Bigger than this will resort to a for loop | ||||
| 	 * (see the default case). | ||||
| 	 */ | ||||
| 	switch (keysize) | ||||
| 	{ | ||||
| 		case 8 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case 7 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case 6 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case 5 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case 4 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case 3 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case 2 * sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			/* fall through */ | ||||
|  | ||||
| 		case sizeof(int): | ||||
| 			h = (h * PRIME1) ^(*k++); | ||||
| 			break; | ||||
|  | ||||
| 		default: | ||||
| 			/* Do an int at a time */ | ||||
| 			for (; keysize >= (int) sizeof(int); keysize -= sizeof(int)) | ||||
| 				h = (h * PRIME1) ^ (*k++); | ||||
|  | ||||
| 			/* Cope with any partial-int leftover bytes */ | ||||
| 			if (keysize > 0) | ||||
| 			{ | ||||
| 				unsigned char *keybyte = (unsigned char *) k; | ||||
|  | ||||
| 				do | ||||
| 					h = (h * PRIME1) ^ (*keybyte++); | ||||
| 				while (--keysize > 0); | ||||
| 			} | ||||
| 			break; | ||||
| 	} | ||||
|  | ||||
| 	h %= PRIME2; | ||||
|  | ||||
| 	return h; | ||||
| 	return DatumGetUInt32(hash_any((unsigned char *) key, keysize)); | ||||
| } | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $ | ||||
|  * $Id: hash.h,v 1.45 2002/03/09 17:35:37 tgl Exp $ | ||||
|  * | ||||
|  * NOTES | ||||
|  *		modeled after Margo Seltzer's hash implementation for unix. | ||||
| @@ -252,7 +252,8 @@ extern Datum hashbulkdelete(PG_FUNCTION_ARGS); | ||||
|  * Datatype-specific hash functions in hashfunc.c. | ||||
|  * | ||||
|  * NOTE: some of these are also used by catcache operations, without | ||||
|  * any direct connection to hash indexes. | ||||
|  * any direct connection to hash indexes.  Also, the common hash_any | ||||
|  * routine is also used by dynahash tables and hash joins. | ||||
|  */ | ||||
| extern Datum hashchar(PG_FUNCTION_ARGS); | ||||
| extern Datum hashint2(PG_FUNCTION_ARGS); | ||||
| @@ -265,7 +266,7 @@ extern Datum hashoidvector(PG_FUNCTION_ARGS); | ||||
| extern Datum hashint2vector(PG_FUNCTION_ARGS); | ||||
| extern Datum hashname(PG_FUNCTION_ARGS); | ||||
| extern Datum hashvarlena(PG_FUNCTION_ARGS); | ||||
| extern Datum hash_any(register const char *k, register int keylen); | ||||
| extern Datum hash_any(register const unsigned char *k, register int keylen); | ||||
|  | ||||
| /* private routines */ | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $Id: hsearch.h,v 1.25 2001/11/05 17:46:36 momjian Exp $ | ||||
|  * $Id: hsearch.h,v 1.26 2002/03/09 17:35:37 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -35,9 +35,6 @@ | ||||
| #define DEF_DIRSIZE			   256 | ||||
| #define DEF_FFACTOR			   1	/* default fill factor */ | ||||
|  | ||||
| #define PRIME1				   37		/* for the hash function */ | ||||
| #define PRIME2				   1048583 | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * HASHELEMENT is the private part of a hashtable entry.  The caller's data | ||||
| @@ -60,10 +57,10 @@ typedef struct HASHHDR | ||||
| { | ||||
| 	long		dsize;			/* Directory Size */ | ||||
| 	long		ssize;			/* Segment Size --- must be power of 2 */ | ||||
| 	long		sshift;			/* Segment shift */ | ||||
| 	long		max_bucket;		/* ID of Maximum bucket in use */ | ||||
| 	long		high_mask;		/* Mask to modulo into entire table */ | ||||
| 	long		low_mask;		/* Mask to modulo into lower half of table */ | ||||
| 	int			sshift;			/* Segment shift = log2(ssize) */ | ||||
| 	uint32		max_bucket;		/* ID of Maximum bucket in use */ | ||||
| 	uint32		high_mask;		/* Mask to modulo into entire table */ | ||||
| 	uint32		low_mask;		/* Mask to modulo into lower half of table */ | ||||
| 	long		ffactor;		/* Fill factor */ | ||||
| 	long		nentries;		/* Number of entries in hash table */ | ||||
| 	long		nsegs;			/* Number of allocated segments */ | ||||
| @@ -86,7 +83,7 @@ typedef struct HTAB | ||||
| { | ||||
| 	HASHHDR    *hctl;			/* shared control information */ | ||||
| 	HASHSEGMENT *dir;			/* directory of segment starts */ | ||||
| 	long		(*hash) (void *key, int keysize);		/* Hash Function */ | ||||
| 	uint32		(*hash) (void *key, int keysize);		/* Hash Function */ | ||||
| 	void	   *(*alloc) (Size);	/* memory allocator */ | ||||
| 	MemoryContext hcxt;			/* memory context if default allocator | ||||
| 								 * used */ | ||||
| @@ -101,7 +98,7 @@ typedef struct HASHCTL | ||||
| 	long		ssize;			/* Segment Size */ | ||||
| 	long		dsize;			/* (initial) Directory Size */ | ||||
| 	long		ffactor;		/* Fill factor */ | ||||
| 	long		(*hash) (void *key, int keysize);		/* Hash Function */ | ||||
| 	uint32		(*hash) (void *key, int keysize);		/* Hash Function */ | ||||
| 	long		keysize;		/* hash key length in bytes */ | ||||
| 	long		entrysize;		/* total user element size in bytes */ | ||||
| 	long		max_dsize;		/* limit to dsize if directory size is | ||||
| @@ -143,7 +140,7 @@ typedef enum | ||||
| typedef struct | ||||
| { | ||||
| 	HTAB	   *hashp; | ||||
| 	long		curBucket;		/* index of current bucket */ | ||||
| 	uint32		curBucket;		/* index of current bucket */ | ||||
| 	HASHELEMENT *curEntry;		/* current entry in bucket */ | ||||
| } HASH_SEQ_STATUS; | ||||
|  | ||||
| @@ -164,7 +161,7 @@ extern long hash_select_dirsize(long num_entries); | ||||
| /* | ||||
|  * prototypes for functions in hashfn.c | ||||
|  */ | ||||
| extern long string_hash(void *key, int keysize); | ||||
| extern long tag_hash(void *key, int keysize); | ||||
| extern uint32 string_hash(void *key, int keysize); | ||||
| extern uint32 tag_hash(void *key, int keysize); | ||||
|  | ||||
| #endif   /* HSEARCH_H */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user