mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code in dynahash hash tables and hash joins.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
@@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS)
|
||||
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
|
||||
char *nulls = (char *) PG_GETARG_POINTER(2);
|
||||
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
||||
#ifdef NOT_USED
|
||||
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
||||
#endif
|
||||
|
||||
InsertIndexResult res;
|
||||
HashItem hitem;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* These functions are stored in pg_amproc. For each operator class
|
||||
@@ -58,7 +58,7 @@ hashfloat4(PG_FUNCTION_ARGS)
|
||||
{
|
||||
float4 key = PG_GETARG_FLOAT4(0);
|
||||
|
||||
return hash_any((char *) &key, sizeof(key));
|
||||
return hash_any((unsigned char *) &key, sizeof(key));
|
||||
}
|
||||
|
||||
Datum
|
||||
@@ -66,7 +66,7 @@ hashfloat8(PG_FUNCTION_ARGS)
|
||||
{
|
||||
float8 key = PG_GETARG_FLOAT8(0);
|
||||
|
||||
return hash_any((char *) &key, sizeof(key));
|
||||
return hash_any((unsigned char *) &key, sizeof(key));
|
||||
}
|
||||
|
||||
Datum
|
||||
@@ -74,7 +74,7 @@ hashoidvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid *key = (Oid *) PG_GETARG_POINTER(0);
|
||||
|
||||
return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid));
|
||||
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int16 *key = (int16 *) PG_GETARG_POINTER(0);
|
||||
|
||||
return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16));
|
||||
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
|
||||
}
|
||||
|
||||
Datum
|
||||
hashname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *key = NameStr(*PG_GETARG_NAME(0));
|
||||
int keylen = strlen(key);
|
||||
|
||||
Assert(strlen(key) <= NAMEDATALEN);
|
||||
Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
|
||||
|
||||
return hash_any(key, strlen(key));
|
||||
return hash_any((unsigned char *) key, keylen);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -110,7 +111,8 @@ hashvarlena(PG_FUNCTION_ARGS)
|
||||
struct varlena *key = PG_GETARG_VARLENA_P(0);
|
||||
Datum result;
|
||||
|
||||
result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ);
|
||||
result = hash_any((unsigned char *) VARDATA(key),
|
||||
VARSIZE(key) - VARHDRSZ);
|
||||
|
||||
/* Avoid leaking memory for toasted inputs */
|
||||
PG_FREE_IF_COPY(key, 0);
|
||||
@@ -118,13 +120,15 @@ hashvarlena(PG_FUNCTION_ARGS)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* This hash function was written by Bob Jenkins
|
||||
/*
|
||||
* This hash function was written by Bob Jenkins
|
||||
* (bob_jenkins@burtleburtle.net), and superficially adapted
|
||||
* for PostgreSQL by Neil Conway. For more information on this
|
||||
* hash function, see http://burtleburtle.net/bob/hash/doobs.html
|
||||
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
|
||||
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
|
||||
*/
|
||||
|
||||
/*
|
||||
/*----------
|
||||
* mix -- mix 3 32-bit values reversibly.
|
||||
* For every delta with one or two bits set, and the deltas of all three
|
||||
* high bits or all three low bits, whether the original value of a,b,c
|
||||
@@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS)
|
||||
* have at least 1/4 probability of changing.
|
||||
* - If mix() is run forward, every bit of c will change between 1/3 and
|
||||
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
||||
*----------
|
||||
*/
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
@@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS)
|
||||
* hash_any() -- hash a variable-length key into a 32-bit value
|
||||
* k : the key (the unaligned variable-length array of bytes)
|
||||
* len : the length of the key, counting by bytes
|
||||
* Returns a 32-bit value. Every bit of the key affects every bit of
|
||||
*
|
||||
* Returns a uint32 value. Every bit of the key affects every bit of
|
||||
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
||||
* About 6*len+35 instructions. The best hash table sizes are powers
|
||||
* of 2. There is no need to do mod a prime (mod is sooo slow!).
|
||||
* If you need less than 32 bits, use a bitmask.
|
||||
*/
|
||||
Datum
|
||||
hash_any(register const char *k, register int keylen)
|
||||
hash_any(register const unsigned char *k, register int keylen)
|
||||
{
|
||||
register Datum a,b,c,len;
|
||||
register uint32 a,b,c,len;
|
||||
|
||||
/* Set up the internal state */
|
||||
len = keylen;
|
||||
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
/* Another arbitrary value. If the hash function is called
|
||||
* multiple times, this could be the previously generated
|
||||
* hash value; however, the interface currently doesn't allow
|
||||
* this. AFAIK this isn't a big deal.
|
||||
*/
|
||||
c = 3923095;
|
||||
/* Set up the internal state */
|
||||
len = keylen;
|
||||
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
c = 3923095; /* initialize with an arbitrary value */
|
||||
|
||||
/* handle most of the key */
|
||||
while (len >= 12)
|
||||
{
|
||||
a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
|
||||
b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
|
||||
c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
|
||||
mix(a,b,c);
|
||||
k += 12; len -= 12;
|
||||
}
|
||||
/* handle most of the key */
|
||||
while (len >= 12)
|
||||
{
|
||||
a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
|
||||
b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
|
||||
c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
|
||||
mix(a,b,c);
|
||||
k += 12; len -= 12;
|
||||
}
|
||||
|
||||
/* handle the last 11 bytes */
|
||||
c += keylen;
|
||||
switch(len) /* all the case statements fall through */
|
||||
{
|
||||
case 11: c+=((Datum)k[10]<<24);
|
||||
case 10: c+=((Datum)k[9]<<16);
|
||||
case 9 : c+=((Datum)k[8]<<8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8 : b+=((Datum)k[7]<<24);
|
||||
case 7 : b+=((Datum)k[6]<<16);
|
||||
case 6 : b+=((Datum)k[5]<<8);
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((Datum)k[3]<<24);
|
||||
case 3 : a+=((Datum)k[2]<<16);
|
||||
case 2 : a+=((Datum)k[1]<<8);
|
||||
case 1 : a+=k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a,b,c);
|
||||
/* report the result */
|
||||
return c;
|
||||
/* handle the last 11 bytes */
|
||||
c += keylen;
|
||||
switch (len) /* all the case statements fall through */
|
||||
{
|
||||
case 11: c+=((uint32)k[10]<<24);
|
||||
case 10: c+=((uint32)k[9]<<16);
|
||||
case 9 : c+=((uint32)k[8]<<8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8 : b+=((uint32)k[7]<<24);
|
||||
case 7 : b+=((uint32)k[6]<<16);
|
||||
case 6 : b+=((uint32)k[5]<<8);
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((uint32)k[3]<<24);
|
||||
case 3 : a+=((uint32)k[2]<<16);
|
||||
case 2 : a+=((uint32)k[1]<<8);
|
||||
case 1 : a+=k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a,b,c);
|
||||
/* report the result */
|
||||
return UInt32GetDatum(c);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user