1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-12 05:01:15 +03:00

Improve dynahash.c's API so that caller can specify the comparison function

as well as the hash function (formerly the comparison function was hardwired
as memcmp()).  This makes it possible to eliminate the special-purpose
hashtable management code in execGrouping.c in favor of using dynahash to
manage tuple hashtables; which is a win because dynahash knows how to expand
a hashtable when the original size estimate was too small, whereas the
special-purpose code was too stupid to do that.  (See recent gripe from
Stephan Szabo about poor performance when hash table size estimate is way
off.)  Free side benefit: when using string_hash, the default comparison
function is now strncmp() instead of memcmp().  This should eliminate some
part of the overhead associated with larger NAMEDATALEN values.
This commit is contained in:
Tom Lane
2003-08-19 01:13:41 +00:00
parent 23e10843db
commit 80860c32d9
9 changed files with 263 additions and 194 deletions

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -44,7 +44,6 @@
#include "postgres.h"
#include "utils/dynahash.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
@@ -63,7 +62,6 @@
* Private function prototypes
*/
static void *DynaHashAlloc(Size size);
static uint32 call_hash(HTAB *hashp, void *k);
static HASHSEGMENT seg_alloc(HTAB *hashp);
static bool element_alloc(HTAB *hashp);
static bool dir_realloc(HTAB *hashp);
@@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
else
hashp->hash = string_hash; /* default hash function */
/*
* If you don't specify a match function, it defaults to strncmp() if
* you used string_hash (either explicitly or by default) and to
* memcmp() otherwise. (Prior to PostgreSQL 7.4, memcmp() was always
* used.)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
else if (hashp->hash == string_hash)
hashp->match = (HashCompareFunc) strncmp;
else
hashp->match = memcmp;
if (flags & HASH_SHARED_MEM)
{
/*
@@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hashp->hctl = NULL;
hashp->dir = NULL;
hashp->alloc = MEM_ALLOC;
hashp->hcxt = DynaHashCxt;
hashp->hcxt = CurrentDynaHashCxt;
hashp->isshared = false;
}
@@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hashp->alloc = info->alloc;
else
{
if (flags & HASH_CONTEXT)
{
/* hash table structures live in child of given context */
CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
"DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
hashp->hcxt = CurrentDynaHashCxt;
}
else
{
/* hash table structures live in child of DynaHashCxt */
CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
"DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
hashp->hcxt = CurrentDynaHashCxt;
}
/* remaining hash table structures live in child of given context */
hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
"DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
CurrentDynaHashCxt = hashp->hcxt;
}
if (!init_htab(hashp, nelem))
@@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
* NB: assumes that all hash structure parameters have default values!
*/
long
hash_estimate_size(long num_entries, long entrysize)
hash_estimate_size(long num_entries, Size entrysize)
{
long size = 0;
long nBuckets,
@@ -447,7 +445,6 @@ void
hash_stats(const char *where, HTAB *hashp)
{
#if HASH_STATISTICS
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
where, hashp->hctl->accesses, hashp->hctl->collisions);
@@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
fprintf(stderr, "hash_stats: total expansions %ld\n",
hash_expansions);
#endif
}
/*******************************SEARCH ROUTINES *****************************/
static uint32
call_hash(HTAB *hashp, void *k)
{
HASHHDR *hctl = hashp->hctl;
uint32 hash_val,
bucket;
hash_val = hashp->hash(k, (int) hctl->keysize);
/* Convert a hash value to a bucket number */
static inline uint32
calc_bucket(HASHHDR *hctl, uint32 hash_val)
{
uint32 bucket;
bucket = hash_val & hctl->high_mask;
if (bucket > hctl->max_bucket)
@@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
*/
void *
hash_search(HTAB *hashp,
void *keyPtr,
const void *keyPtr,
HASHACTION action,
bool *foundPtr)
{
HASHHDR *hctl = hashp->hctl;
uint32 hashvalue = 0;
uint32 bucket;
long segment_num;
long segment_ndx;
@@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
}
else
{
bucket = call_hash(hashp, keyPtr);
HashCompareFunc match;
Size keysize = hctl->keysize;
hashvalue = hashp->hash(keyPtr, keysize);
bucket = calc_bucket(hctl, hashvalue);
segment_num = bucket >> hctl->sshift;
segment_ndx = MOD(bucket, hctl->ssize);
@@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
/*
* Follow collision chain looking for matching key
*/
match = hashp->match; /* save one fetch in inner loop */
while (currBucket != NULL)
{
if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
if (currBucket->hashvalue == hashvalue &&
match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
break;
prevBucketPtr = &(currBucket->link);
currBucket = *prevBucketPtr;
@@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
currBucket->link = NULL;
/* copy key into record */
currBucket->hashvalue = hashvalue;
memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
/* caller is expected to fill the data field on return */
@@ -802,7 +805,7 @@ expand_table(HTAB *hashp)
/*
* Relocate records to the new bucket. NOTE: because of the way the
* hash masking is done in call_hash, only one old bucket can need to
* hash masking is done in calc_bucket, only one old bucket can need to
* be split at this point. With a different way of reducing the hash
* value, that might not be true!
*/
@@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
currElement = nextElement)
{
nextElement = currElement->link;
if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
== old_bucket)
if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
{
*oldlink = currElement;
oldlink = &currElement->link;

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,24 +22,21 @@
/*
* string_hash: hash function for keys that are null-terminated strings.
*
* NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
* the key must actually be zero-padded to the specified maximum length
* to work correctly. However, if it is known that nothing after the
* first zero byte is interesting, this is the right hash function to use.
*
* NOTE: this is the default hash function if none is specified.
*/
uint32
string_hash(void *key, int keysize)
string_hash(const void *key, Size keysize)
{
return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
return DatumGetUInt32(hash_any((const unsigned char *) key,
(int) strlen((const char *) key)));
}
/*
* tag_hash: hash function for fixed-size tag values
*/
uint32
tag_hash(void *key, int keysize)
tag_hash(const void *key, Size keysize)
{
return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
return DatumGetUInt32(hash_any((const unsigned char *) key,
(int) keysize));
}