mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code in dynahash hash tables and hash joins.
This commit is contained in:
parent
1eb31d197d
commit
c422b5ca6b
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* This file contains only the public interface routines.
|
* This file contains only the public interface routines.
|
||||||
@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS)
|
|||||||
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
|
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
|
||||||
char *nulls = (char *) PG_GETARG_POINTER(2);
|
char *nulls = (char *) PG_GETARG_POINTER(2);
|
||||||
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
||||||
|
#ifdef NOT_USED
|
||||||
|
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
||||||
|
#endif
|
||||||
|
|
||||||
InsertIndexResult res;
|
InsertIndexResult res;
|
||||||
HashItem hitem;
|
HashItem hitem;
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* These functions are stored in pg_amproc. For each operator class
|
* These functions are stored in pg_amproc. For each operator class
|
||||||
@ -58,7 +58,7 @@ hashfloat4(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
float4 key = PG_GETARG_FLOAT4(0);
|
float4 key = PG_GETARG_FLOAT4(0);
|
||||||
|
|
||||||
return hash_any((char *) &key, sizeof(key));
|
return hash_any((unsigned char *) &key, sizeof(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
@ -66,7 +66,7 @@ hashfloat8(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
float8 key = PG_GETARG_FLOAT8(0);
|
float8 key = PG_GETARG_FLOAT8(0);
|
||||||
|
|
||||||
return hash_any((char *) &key, sizeof(key));
|
return hash_any((unsigned char *) &key, sizeof(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
@ -74,7 +74,7 @@ hashoidvector(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
Oid *key = (Oid *) PG_GETARG_POINTER(0);
|
Oid *key = (Oid *) PG_GETARG_POINTER(0);
|
||||||
|
|
||||||
return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid));
|
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
int16 *key = (int16 *) PG_GETARG_POINTER(0);
|
int16 *key = (int16 *) PG_GETARG_POINTER(0);
|
||||||
|
|
||||||
return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16));
|
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
|
||||||
}
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
hashname(PG_FUNCTION_ARGS)
|
hashname(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
char *key = NameStr(*PG_GETARG_NAME(0));
|
char *key = NameStr(*PG_GETARG_NAME(0));
|
||||||
|
int keylen = strlen(key);
|
||||||
|
|
||||||
Assert(strlen(key) <= NAMEDATALEN);
|
Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
|
||||||
|
|
||||||
return hash_any(key, strlen(key));
|
return hash_any((unsigned char *) key, keylen);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -110,7 +111,8 @@ hashvarlena(PG_FUNCTION_ARGS)
|
|||||||
struct varlena *key = PG_GETARG_VARLENA_P(0);
|
struct varlena *key = PG_GETARG_VARLENA_P(0);
|
||||||
Datum result;
|
Datum result;
|
||||||
|
|
||||||
result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ);
|
result = hash_any((unsigned char *) VARDATA(key),
|
||||||
|
VARSIZE(key) - VARHDRSZ);
|
||||||
|
|
||||||
/* Avoid leaking memory for toasted inputs */
|
/* Avoid leaking memory for toasted inputs */
|
||||||
PG_FREE_IF_COPY(key, 0);
|
PG_FREE_IF_COPY(key, 0);
|
||||||
@ -118,13 +120,15 @@ hashvarlena(PG_FUNCTION_ARGS)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This hash function was written by Bob Jenkins
|
/*
|
||||||
|
* This hash function was written by Bob Jenkins
|
||||||
* (bob_jenkins@burtleburtle.net), and superficially adapted
|
* (bob_jenkins@burtleburtle.net), and superficially adapted
|
||||||
* for PostgreSQL by Neil Conway. For more information on this
|
* for PostgreSQL by Neil Conway. For more information on this
|
||||||
* hash function, see http://burtleburtle.net/bob/hash/doobs.html
|
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
|
||||||
|
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*----------
|
||||||
* mix -- mix 3 32-bit values reversibly.
|
* mix -- mix 3 32-bit values reversibly.
|
||||||
* For every delta with one or two bits set, and the deltas of all three
|
* For every delta with one or two bits set, and the deltas of all three
|
||||||
* high bits or all three low bits, whether the original value of a,b,c
|
* high bits or all three low bits, whether the original value of a,b,c
|
||||||
@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS)
|
|||||||
* have at least 1/4 probability of changing.
|
* have at least 1/4 probability of changing.
|
||||||
* - If mix() is run forward, every bit of c will change between 1/3 and
|
* - If mix() is run forward, every bit of c will change between 1/3 and
|
||||||
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
||||||
|
*----------
|
||||||
*/
|
*/
|
||||||
#define mix(a,b,c) \
|
#define mix(a,b,c) \
|
||||||
{ \
|
{ \
|
||||||
@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS)
|
|||||||
* hash_any() -- hash a variable-length key into a 32-bit value
|
* hash_any() -- hash a variable-length key into a 32-bit value
|
||||||
* k : the key (the unaligned variable-length array of bytes)
|
* k : the key (the unaligned variable-length array of bytes)
|
||||||
* len : the length of the key, counting by bytes
|
* len : the length of the key, counting by bytes
|
||||||
* Returns a 32-bit value. Every bit of the key affects every bit of
|
*
|
||||||
|
* Returns a uint32 value. Every bit of the key affects every bit of
|
||||||
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
||||||
* About 6*len+35 instructions. The best hash table sizes are powers
|
* About 6*len+35 instructions. The best hash table sizes are powers
|
||||||
* of 2. There is no need to do mod a prime (mod is sooo slow!).
|
* of 2. There is no need to do mod a prime (mod is sooo slow!).
|
||||||
* If you need less than 32 bits, use a bitmask.
|
* If you need less than 32 bits, use a bitmask.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
hash_any(register const char *k, register int keylen)
|
hash_any(register const unsigned char *k, register int keylen)
|
||||||
{
|
{
|
||||||
register Datum a,b,c,len;
|
register uint32 a,b,c,len;
|
||||||
|
|
||||||
/* Set up the internal state */
|
/* Set up the internal state */
|
||||||
len = keylen;
|
len = keylen;
|
||||||
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||||
/* Another arbitrary value. If the hash function is called
|
c = 3923095; /* initialize with an arbitrary value */
|
||||||
* multiple times, this could be the previously generated
|
|
||||||
* hash value; however, the interface currently doesn't allow
|
|
||||||
* this. AFAIK this isn't a big deal.
|
|
||||||
*/
|
|
||||||
c = 3923095;
|
|
||||||
|
|
||||||
/* handle most of the key */
|
/* handle most of the key */
|
||||||
while (len >= 12)
|
while (len >= 12)
|
||||||
{
|
{
|
||||||
a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
|
a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
|
||||||
b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
|
b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
|
||||||
c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
|
c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
|
||||||
mix(a,b,c);
|
mix(a,b,c);
|
||||||
k += 12; len -= 12;
|
k += 12; len -= 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* handle the last 11 bytes */
|
/* handle the last 11 bytes */
|
||||||
c += keylen;
|
c += keylen;
|
||||||
switch(len) /* all the case statements fall through */
|
switch (len) /* all the case statements fall through */
|
||||||
{
|
{
|
||||||
case 11: c+=((Datum)k[10]<<24);
|
case 11: c+=((uint32)k[10]<<24);
|
||||||
case 10: c+=((Datum)k[9]<<16);
|
case 10: c+=((uint32)k[9]<<16);
|
||||||
case 9 : c+=((Datum)k[8]<<8);
|
case 9 : c+=((uint32)k[8]<<8);
|
||||||
/* the first byte of c is reserved for the length */
|
/* the first byte of c is reserved for the length */
|
||||||
case 8 : b+=((Datum)k[7]<<24);
|
case 8 : b+=((uint32)k[7]<<24);
|
||||||
case 7 : b+=((Datum)k[6]<<16);
|
case 7 : b+=((uint32)k[6]<<16);
|
||||||
case 6 : b+=((Datum)k[5]<<8);
|
case 6 : b+=((uint32)k[5]<<8);
|
||||||
case 5 : b+=k[4];
|
case 5 : b+=k[4];
|
||||||
case 4 : a+=((Datum)k[3]<<24);
|
case 4 : a+=((uint32)k[3]<<24);
|
||||||
case 3 : a+=((Datum)k[2]<<16);
|
case 3 : a+=((uint32)k[2]<<16);
|
||||||
case 2 : a+=((Datum)k[1]<<8);
|
case 2 : a+=((uint32)k[1]<<8);
|
||||||
case 1 : a+=k[0];
|
case 1 : a+=k[0];
|
||||||
/* case 0: nothing left to add */
|
/* case 0: nothing left to add */
|
||||||
}
|
}
|
||||||
mix(a,b,c);
|
mix(a,b,c);
|
||||||
/* report the result */
|
/* report the result */
|
||||||
return c;
|
return UInt32GetDatum(c);
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* $Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
|
* $Id: nodeHash.c,v 1.62 2002/03/09 17:35:35 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -22,6 +22,7 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "access/hash.h"
|
||||||
#include "executor/execdebug.h"
|
#include "executor/execdebug.h"
|
||||||
#include "executor/nodeHash.h"
|
#include "executor/nodeHash.h"
|
||||||
#include "executor/nodeHashjoin.h"
|
#include "executor/nodeHashjoin.h"
|
||||||
@ -31,7 +32,7 @@
|
|||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
|
|
||||||
static int hashFunc(Datum key, int len, bool byVal);
|
static uint32 hashFunc(Datum key, int len, bool byVal);
|
||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
* ExecHash
|
* ExecHash
|
||||||
@ -553,7 +554,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
|
|||||||
bucketno = hashFunc(keyval,
|
bucketno = hashFunc(keyval,
|
||||||
(int) hashtable->typLen,
|
(int) hashtable->typLen,
|
||||||
hashtable->typByVal)
|
hashtable->typByVal)
|
||||||
% hashtable->totalbuckets;
|
% (uint32) hashtable->totalbuckets;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HJDEBUG
|
#ifdef HJDEBUG
|
||||||
@ -624,30 +625,29 @@ ExecScanHashBucket(HashJoinState *hjstate,
|
|||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
* hashFunc
|
* hashFunc
|
||||||
*
|
*
|
||||||
* the hash function, copied from Margo
|
* the hash function for hash joins
|
||||||
*
|
*
|
||||||
* XXX this probably ought to be replaced with datatype-specific
|
* XXX this probably ought to be replaced with datatype-specific
|
||||||
* hash functions, such as those already implemented for hash indexes.
|
* hash functions, such as those already implemented for hash indexes.
|
||||||
* ----------------------------------------------------------------
|
* ----------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static int
|
static uint32
|
||||||
hashFunc(Datum key, int len, bool byVal)
|
hashFunc(Datum key, int len, bool byVal)
|
||||||
{
|
{
|
||||||
unsigned int h = 0;
|
unsigned char *k;
|
||||||
|
|
||||||
if (byVal)
|
if (byVal)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If it's a by-value data type, use the 'len' least significant
|
* If it's a by-value data type, just hash the whole Datum value.
|
||||||
* bytes of the Datum value. This should do the right thing on
|
* This assumes that datatypes narrower than Datum are consistently
|
||||||
* either bigendian or littleendian hardware --- see the Datum
|
* padded (either zero-extended or sign-extended, but not random
|
||||||
* access macros in c.h.
|
* bits) to fill Datum; see the XXXGetDatum macros in postgres.h.
|
||||||
|
* NOTE: it would not work to do hash_any(&key, len) since this
|
||||||
|
* would get the wrong bytes on a big-endian machine.
|
||||||
*/
|
*/
|
||||||
while (len-- > 0)
|
k = (unsigned char *) &key;
|
||||||
{
|
len = sizeof(Datum);
|
||||||
h = (h * PRIME1) ^ (key & 0xFF);
|
|
||||||
key >>= 8;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -662,8 +662,6 @@ hashFunc(Datum key, int len, bool byVal)
|
|||||||
* freeing the detoasted copy; that happens for free when the
|
* freeing the detoasted copy; that happens for free when the
|
||||||
* per-tuple memory context is reset in ExecHashGetBucket.)
|
* per-tuple memory context is reset in ExecHashGetBucket.)
|
||||||
*/
|
*/
|
||||||
unsigned char *k;
|
|
||||||
|
|
||||||
if (len < 0)
|
if (len < 0)
|
||||||
{
|
{
|
||||||
struct varlena *vkey = PG_DETOAST_DATUM(key);
|
struct varlena *vkey = PG_DETOAST_DATUM(key);
|
||||||
@ -673,12 +671,9 @@ hashFunc(Datum key, int len, bool byVal)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
k = (unsigned char *) DatumGetPointer(key);
|
k = (unsigned char *) DatumGetPointer(key);
|
||||||
|
|
||||||
while (len-- > 0)
|
|
||||||
h = (h * PRIME1) ^ (*k++);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return h % PRIME2;
|
return DatumGetUInt32(hash_any(k, len));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.64 2001/11/21 05:57:33 thomas Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.65 2002/03/09 17:35:35 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1116,7 +1116,7 @@ timetz_hash(PG_FUNCTION_ARGS)
|
|||||||
* sizeof(TimeTzADT), so that any garbage pad bytes in the structure
|
* sizeof(TimeTzADT), so that any garbage pad bytes in the structure
|
||||||
* won't be included in the hash!
|
* won't be included in the hash!
|
||||||
*/
|
*/
|
||||||
return hash_any((char *) key, sizeof(double) + sizeof(int4));
|
return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
|
||||||
}
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* PostgreSQL type definitions for MAC addresses.
|
* PostgreSQL type definitions for MAC addresses.
|
||||||
*
|
*
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.21 2001/08/21 21:23:21 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.22 2002/03/09 17:35:35 tgl Exp $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
@ -230,7 +230,7 @@ hashmacaddr(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
macaddr *key = PG_GETARG_MACADDR_P(0);
|
macaddr *key = PG_GETARG_MACADDR_P(0);
|
||||||
|
|
||||||
return hash_any((char *) key, sizeof(macaddr));
|
return hash_any((unsigned char *) key, sizeof(macaddr));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.64 2002/03/06 06:10:18 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.65 2002/03/09 17:35:36 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1017,7 +1017,7 @@ interval_hash(PG_FUNCTION_ARGS)
|
|||||||
* sizeof(Interval), so that any garbage pad bytes in the structure
|
* sizeof(Interval), so that any garbage pad bytes in the structure
|
||||||
* won't be included in the hash!
|
* won't be included in the hash!
|
||||||
*/
|
*/
|
||||||
return hash_any((char *) key, sizeof(double) + sizeof(int4));
|
return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
|
/* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.87 2001/11/18 12:07:07 ishii Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.88 2002/03/09 17:35:36 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -791,7 +791,7 @@ hashbpchar(PG_FUNCTION_ARGS)
|
|||||||
keydata = VARDATA(key);
|
keydata = VARDATA(key);
|
||||||
keylen = bcTruelen(key);
|
keylen = bcTruelen(key);
|
||||||
|
|
||||||
result = hash_any(keydata, keylen);
|
result = hash_any((unsigned char *) keydata, keylen);
|
||||||
|
|
||||||
/* Avoid leaking memory for toasted inputs */
|
/* Avoid leaking memory for toasted inputs */
|
||||||
PG_FREE_IF_COPY(key, 0);
|
PG_FREE_IF_COPY(key, 0);
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.41 2002/03/02 21:39:33 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.42 2002/03/09 17:35:36 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -329,8 +329,7 @@ init_htab(HTAB *hashp, long nelem)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HASH_DEBUG
|
#if HASH_DEBUG
|
||||||
fprintf(stderr, "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
|
fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n",
|
||||||
"init_htab:",
|
|
||||||
"TABLE POINTER ", hashp,
|
"TABLE POINTER ", hashp,
|
||||||
"DIRECTORY SIZE ", hctl->dsize,
|
"DIRECTORY SIZE ", hctl->dsize,
|
||||||
"SEGMENT SIZE ", hctl->ssize,
|
"SEGMENT SIZE ", hctl->ssize,
|
||||||
@ -453,7 +452,7 @@ hash_stats(const char *where, HTAB *hashp)
|
|||||||
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
|
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
|
||||||
where, hashp->hctl->accesses, hashp->hctl->collisions);
|
where, hashp->hctl->accesses, hashp->hctl->collisions);
|
||||||
|
|
||||||
fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %d segmentcount %d\n",
|
fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
|
||||||
hashp->hctl->nentries, hashp->hctl->keysize,
|
hashp->hctl->nentries, hashp->hctl->keysize,
|
||||||
hashp->hctl->max_bucket, hashp->hctl->nsegs);
|
hashp->hctl->max_bucket, hashp->hctl->nsegs);
|
||||||
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
|
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
|
||||||
@ -470,7 +469,7 @@ static uint32
|
|||||||
call_hash(HTAB *hashp, void *k)
|
call_hash(HTAB *hashp, void *k)
|
||||||
{
|
{
|
||||||
HASHHDR *hctl = hashp->hctl;
|
HASHHDR *hctl = hashp->hctl;
|
||||||
long hash_val,
|
uint32 hash_val,
|
||||||
bucket;
|
bucket;
|
||||||
|
|
||||||
hash_val = hashp->hash(k, (int) hctl->keysize);
|
hash_val = hashp->hash(k, (int) hctl->keysize);
|
||||||
@ -479,7 +478,7 @@ call_hash(HTAB *hashp, void *k)
|
|||||||
if (bucket > hctl->max_bucket)
|
if (bucket > hctl->max_bucket)
|
||||||
bucket = bucket & hctl->low_mask;
|
bucket = bucket & hctl->low_mask;
|
||||||
|
|
||||||
return (uint32) bucket;
|
return bucket;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------
|
/*----------
|
||||||
@ -647,7 +646,7 @@ hash_search(HTAB *hashp,
|
|||||||
/* caller is expected to fill the data field on return */
|
/* caller is expected to fill the data field on return */
|
||||||
|
|
||||||
/* Check if it is time to split the segment */
|
/* Check if it is time to split the segment */
|
||||||
if (++hctl->nentries / (hctl->max_bucket + 1) > hctl->ffactor)
|
if (++hctl->nentries / (long) (hctl->max_bucket + 1) > hctl->ffactor)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* NOTE: failure to expand table is not a fatal error, it
|
* NOTE: failure to expand table is not a fatal error, it
|
||||||
@ -795,10 +794,10 @@ expand_table(HTAB *hashp)
|
|||||||
/*
|
/*
|
||||||
* If we crossed a power of 2, readjust masks.
|
* If we crossed a power of 2, readjust masks.
|
||||||
*/
|
*/
|
||||||
if (new_bucket > hctl->high_mask)
|
if ((uint32) new_bucket > hctl->high_mask)
|
||||||
{
|
{
|
||||||
hctl->low_mask = hctl->high_mask;
|
hctl->low_mask = hctl->high_mask;
|
||||||
hctl->high_mask = new_bucket | hctl->low_mask;
|
hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -9,14 +9,16 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.15 2001/10/25 05:49:51 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.16 2002/03/09 17:35:36 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/hash.h"
|
||||||
#include "utils/hsearch.h"
|
#include "utils/hsearch.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* string_hash: hash function for keys that are null-terminated strings.
|
* string_hash: hash function for keys that are null-terminated strings.
|
||||||
*
|
*
|
||||||
@ -27,91 +29,17 @@
|
|||||||
*
|
*
|
||||||
* NOTE: this is the default hash function if none is specified.
|
* NOTE: this is the default hash function if none is specified.
|
||||||
*/
|
*/
|
||||||
long
|
uint32
|
||||||
string_hash(void *key, int keysize)
|
string_hash(void *key, int keysize)
|
||||||
{
|
{
|
||||||
unsigned char *k = (unsigned char *) key;
|
return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
|
||||||
long h = 0;
|
|
||||||
|
|
||||||
while (*k)
|
|
||||||
h = (h * PRIME1) ^ (*k++);
|
|
||||||
|
|
||||||
h %= PRIME2;
|
|
||||||
|
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tag_hash: hash function for fixed-size tag values
|
* tag_hash: hash function for fixed-size tag values
|
||||||
*
|
|
||||||
* NB: we assume that the supplied key is aligned at least on an 'int'
|
|
||||||
* boundary, if its size is >= sizeof(int).
|
|
||||||
*/
|
*/
|
||||||
long
|
uint32
|
||||||
tag_hash(void *key, int keysize)
|
tag_hash(void *key, int keysize)
|
||||||
{
|
{
|
||||||
int *k = (int *) key;
|
return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
|
||||||
long h = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use four byte chunks in a "jump table" to go a little faster.
|
|
||||||
*
|
|
||||||
* Currently the maximum keysize is 16 (mar 17 1992). I have put in
|
|
||||||
* cases for up to 32. Bigger than this will resort to a for loop
|
|
||||||
* (see the default case).
|
|
||||||
*/
|
|
||||||
switch (keysize)
|
|
||||||
{
|
|
||||||
case 8 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case 7 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case 6 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case 5 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case 4 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case 3 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case 2 * sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
case sizeof(int):
|
|
||||||
h = (h * PRIME1) ^(*k++);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
/* Do an int at a time */
|
|
||||||
for (; keysize >= (int) sizeof(int); keysize -= sizeof(int))
|
|
||||||
h = (h * PRIME1) ^ (*k++);
|
|
||||||
|
|
||||||
/* Cope with any partial-int leftover bytes */
|
|
||||||
if (keysize > 0)
|
|
||||||
{
|
|
||||||
unsigned char *keybyte = (unsigned char *) k;
|
|
||||||
|
|
||||||
do
|
|
||||||
h = (h * PRIME1) ^ (*keybyte++);
|
|
||||||
while (--keysize > 0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
h %= PRIME2;
|
|
||||||
|
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $
|
* $Id: hash.h,v 1.45 2002/03/09 17:35:37 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* modeled after Margo Seltzer's hash implementation for unix.
|
* modeled after Margo Seltzer's hash implementation for unix.
|
||||||
@ -252,7 +252,8 @@ extern Datum hashbulkdelete(PG_FUNCTION_ARGS);
|
|||||||
* Datatype-specific hash functions in hashfunc.c.
|
* Datatype-specific hash functions in hashfunc.c.
|
||||||
*
|
*
|
||||||
* NOTE: some of these are also used by catcache operations, without
|
* NOTE: some of these are also used by catcache operations, without
|
||||||
* any direct connection to hash indexes.
|
* any direct connection to hash indexes. Also, the common hash_any
|
||||||
|
* routine is also used by dynahash tables and hash joins.
|
||||||
*/
|
*/
|
||||||
extern Datum hashchar(PG_FUNCTION_ARGS);
|
extern Datum hashchar(PG_FUNCTION_ARGS);
|
||||||
extern Datum hashint2(PG_FUNCTION_ARGS);
|
extern Datum hashint2(PG_FUNCTION_ARGS);
|
||||||
@ -265,7 +266,7 @@ extern Datum hashoidvector(PG_FUNCTION_ARGS);
|
|||||||
extern Datum hashint2vector(PG_FUNCTION_ARGS);
|
extern Datum hashint2vector(PG_FUNCTION_ARGS);
|
||||||
extern Datum hashname(PG_FUNCTION_ARGS);
|
extern Datum hashname(PG_FUNCTION_ARGS);
|
||||||
extern Datum hashvarlena(PG_FUNCTION_ARGS);
|
extern Datum hashvarlena(PG_FUNCTION_ARGS);
|
||||||
extern Datum hash_any(register const char *k, register int keylen);
|
extern Datum hash_any(register const unsigned char *k, register int keylen);
|
||||||
|
|
||||||
/* private routines */
|
/* private routines */
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $Id: hsearch.h,v 1.25 2001/11/05 17:46:36 momjian Exp $
|
* $Id: hsearch.h,v 1.26 2002/03/09 17:35:37 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -35,9 +35,6 @@
|
|||||||
#define DEF_DIRSIZE 256
|
#define DEF_DIRSIZE 256
|
||||||
#define DEF_FFACTOR 1 /* default fill factor */
|
#define DEF_FFACTOR 1 /* default fill factor */
|
||||||
|
|
||||||
#define PRIME1 37 /* for the hash function */
|
|
||||||
#define PRIME2 1048583
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* HASHELEMENT is the private part of a hashtable entry. The caller's data
|
* HASHELEMENT is the private part of a hashtable entry. The caller's data
|
||||||
@ -60,10 +57,10 @@ typedef struct HASHHDR
|
|||||||
{
|
{
|
||||||
long dsize; /* Directory Size */
|
long dsize; /* Directory Size */
|
||||||
long ssize; /* Segment Size --- must be power of 2 */
|
long ssize; /* Segment Size --- must be power of 2 */
|
||||||
long sshift; /* Segment shift */
|
int sshift; /* Segment shift = log2(ssize) */
|
||||||
long max_bucket; /* ID of Maximum bucket in use */
|
uint32 max_bucket; /* ID of Maximum bucket in use */
|
||||||
long high_mask; /* Mask to modulo into entire table */
|
uint32 high_mask; /* Mask to modulo into entire table */
|
||||||
long low_mask; /* Mask to modulo into lower half of table */
|
uint32 low_mask; /* Mask to modulo into lower half of table */
|
||||||
long ffactor; /* Fill factor */
|
long ffactor; /* Fill factor */
|
||||||
long nentries; /* Number of entries in hash table */
|
long nentries; /* Number of entries in hash table */
|
||||||
long nsegs; /* Number of allocated segments */
|
long nsegs; /* Number of allocated segments */
|
||||||
@ -86,7 +83,7 @@ typedef struct HTAB
|
|||||||
{
|
{
|
||||||
HASHHDR *hctl; /* shared control information */
|
HASHHDR *hctl; /* shared control information */
|
||||||
HASHSEGMENT *dir; /* directory of segment starts */
|
HASHSEGMENT *dir; /* directory of segment starts */
|
||||||
long (*hash) (void *key, int keysize); /* Hash Function */
|
uint32 (*hash) (void *key, int keysize); /* Hash Function */
|
||||||
void *(*alloc) (Size); /* memory allocator */
|
void *(*alloc) (Size); /* memory allocator */
|
||||||
MemoryContext hcxt; /* memory context if default allocator
|
MemoryContext hcxt; /* memory context if default allocator
|
||||||
* used */
|
* used */
|
||||||
@ -101,7 +98,7 @@ typedef struct HASHCTL
|
|||||||
long ssize; /* Segment Size */
|
long ssize; /* Segment Size */
|
||||||
long dsize; /* (initial) Directory Size */
|
long dsize; /* (initial) Directory Size */
|
||||||
long ffactor; /* Fill factor */
|
long ffactor; /* Fill factor */
|
||||||
long (*hash) (void *key, int keysize); /* Hash Function */
|
uint32 (*hash) (void *key, int keysize); /* Hash Function */
|
||||||
long keysize; /* hash key length in bytes */
|
long keysize; /* hash key length in bytes */
|
||||||
long entrysize; /* total user element size in bytes */
|
long entrysize; /* total user element size in bytes */
|
||||||
long max_dsize; /* limit to dsize if directory size is
|
long max_dsize; /* limit to dsize if directory size is
|
||||||
@ -143,7 +140,7 @@ typedef enum
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
HTAB *hashp;
|
HTAB *hashp;
|
||||||
long curBucket; /* index of current bucket */
|
uint32 curBucket; /* index of current bucket */
|
||||||
HASHELEMENT *curEntry; /* current entry in bucket */
|
HASHELEMENT *curEntry; /* current entry in bucket */
|
||||||
} HASH_SEQ_STATUS;
|
} HASH_SEQ_STATUS;
|
||||||
|
|
||||||
@ -164,7 +161,7 @@ extern long hash_select_dirsize(long num_entries);
|
|||||||
/*
|
/*
|
||||||
* prototypes for functions in hashfn.c
|
* prototypes for functions in hashfn.c
|
||||||
*/
|
*/
|
||||||
extern long string_hash(void *key, int keysize);
|
extern uint32 string_hash(void *key, int keysize);
|
||||||
extern long tag_hash(void *key, int keysize);
|
extern uint32 tag_hash(void *key, int keysize);
|
||||||
|
|
||||||
#endif /* HSEARCH_H */
|
#endif /* HSEARCH_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user