1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Improve dynahash.c's API so that caller can specify the comparison function

as well as the hash function (formerly the comparison function was hardwired
as memcmp()).  This makes it possible to eliminate the special-purpose
hashtable management code in execGrouping.c in favor of using dynahash to
manage tuple hashtables; which is a win because dynahash knows how to expand
a hashtable when the original size estimate was too small, whereas the
special-purpose code was too stupid to do that.  (See recent gripe from
Stephan Szabo about poor performance when hash table size estimate is way
off.)  Free side benefit: when using string_hash, the default comparison
function is now strncmp() instead of memcmp().  This should eliminate some
part of the overhead associated with larger NAMEDATALEN values.
This commit is contained in:
Tom Lane
2003-08-19 01:13:41 +00:00
parent 23e10843db
commit 80860c32d9
9 changed files with 263 additions and 194 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -23,6 +23,13 @@
#include "utils/syscache.h"
static TupleHashTable CurTupleHashTable = NULL;
static uint32 TupleHashTableHash(const void *key, Size keysize);
static int TupleHashTableMatch(const void *key1, const void *key2,
Size keysize);
/*****************************************************************************
* Utility routines for grouping tuples together
*****************************************************************************/
@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
* numCols, keyColIdx: identify the tuple fields to use as lookup key
* eqfunctions: equality comparison functions to use
* hashfunctions: datatype-specific hashing functions to use
* nbuckets: number of buckets to make
* nbuckets: initial estimate of hashtable size
* entrysize: size of each entry (at least sizeof(TupleHashEntryData))
* tablecxt: memory context in which to store table and table entries
* tempcxt: short-lived context for evaluation hash and comparison functions
@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
MemoryContext tablecxt, MemoryContext tempcxt)
{
TupleHashTable hashtable;
Size tabsize;
HASHCTL hash_ctl;
Assert(nbuckets > 0);
Assert(entrysize >= sizeof(TupleHashEntryData));
tabsize = sizeof(TupleHashTableData) +
(nbuckets - 1) *sizeof(TupleHashEntry);
hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
sizeof(TupleHashTableData));
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize;
hashtable->nbuckets = nbuckets;
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(TupleHashEntryData);
hash_ctl.entrysize = entrysize;
hash_ctl.hash = TupleHashTableHash;
hash_ctl.match = TupleHashTableMatch;
hash_ctl.hcxt = tablecxt;
hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
&hash_ctl,
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
if (hashtable->hashtab == NULL)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
return hashtable;
}
@ -327,19 +346,93 @@ TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
bool *isnew)
{
int numCols = hashtable->numCols;
AttrNumber *keyColIdx = hashtable->keyColIdx;
HeapTuple tuple = slot->val;
TupleDesc tupdesc = slot->ttc_tupleDescriptor;
uint32 hashkey = 0;
int i;
int bucketno;
TupleHashEntry entry;
MemoryContext oldContext;
TupleHashTable saveCurHT;
bool found;
/* Need to run the hash function in short-lived context */
/* Need to run the hash functions in short-lived context */
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
/*
* Set up data needed by hash and match functions
*
* We save and restore CurTupleHashTable just in case someone manages
* to invoke this code re-entrantly.
*/
hashtable->tupdesc = tupdesc;
saveCurHT = CurTupleHashTable;
CurTupleHashTable = hashtable;
/* Search the hash table */
entry = (TupleHashEntry) hash_search(hashtable->hashtab,
&tuple,
isnew ? HASH_ENTER : HASH_FIND,
&found);
if (isnew)
{
if (found)
{
/* found pre-existing entry */
*isnew = false;
}
else
{
/* created new entry ... we hope */
if (entry == NULL)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/*
* Zero any caller-requested space in the entry. (This zaps
* the "key data" dynahash.c copied into the new entry, but
* we don't care since we're about to overwrite it anyway.)
*/
MemSet(entry, 0, hashtable->entrysize);
/* Copy the first tuple into the table context */
MemoryContextSwitchTo(hashtable->tablecxt);
entry->firstTuple = heap_copytuple(tuple);
*isnew = true;
}
}
CurTupleHashTable = saveCurHT;
MemoryContextSwitchTo(oldContext);
return entry;
}
/*
* Compute the hash value for a tuple
*
* The passed-in key is a pointer to a HeapTuple pointer -- this is either
* the firstTuple field of a TupleHashEntry struct, or the key value passed
* to hash_search. We ignore the keysize.
*
* CurTupleHashTable must be set before calling this, since dynahash.c
* doesn't provide any API that would let us get at the hashtable otherwise.
*
* Also, the caller must select an appropriate memory context for running
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
*/
static uint32
TupleHashTableHash(const void *key, Size keysize)
{
HeapTuple tuple = *(const HeapTuple *) key;
TupleHashTable hashtable = CurTupleHashTable;
int numCols = hashtable->numCols;
AttrNumber *keyColIdx = hashtable->keyColIdx;
TupleDesc tupdesc = hashtable->tupdesc;
uint32 hashkey = 0;
int i;
for (i = 0; i < numCols; i++)
{
AttrNumber att = keyColIdx[i];
@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
hashkey ^= hkey;
}
}
bucketno = hashkey % (uint32) hashtable->nbuckets;
for (entry = hashtable->buckets[bucketno];
entry != NULL;
entry = entry->next)
{
/* Quick check using hashkey */
if (entry->hashkey != hashkey)
continue;
if (execTuplesMatch(entry->firstTuple,
tuple,
tupdesc,
numCols, keyColIdx,
hashtable->eqfunctions,
hashtable->tempcxt))
{
if (isnew)
*isnew = false;
MemoryContextSwitchTo(oldContext);
return entry;
}
}
/* Not there, so build a new one if requested */
if (isnew)
{
MemoryContextSwitchTo(hashtable->tablecxt);
entry = (TupleHashEntry) palloc0(hashtable->entrysize);
entry->hashkey = hashkey;
entry->firstTuple = heap_copytuple(tuple);
entry->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = entry;
*isnew = true;
}
MemoryContextSwitchTo(oldContext);
return entry;
return hashkey;
}
/*
* Walk through all the entries of a hash table, in no special order.
* Returns NULL when no more entries remain.
* See whether two tuples (presumably of the same hash value) match
*
* Iterator state must be initialized with ResetTupleHashIterator() macro.
* As above, the passed pointers are pointers to HeapTuple pointers.
*
* CurTupleHashTable must be set before calling this, since dynahash.c
* doesn't provide any API that would let us get at the hashtable otherwise.
*
* Also, the caller must select an appropriate memory context for running
* the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
*/
TupleHashEntry
ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
static int
TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
{
TupleHashEntry entry;
HeapTuple tuple1 = *(const HeapTuple *) key1;
HeapTuple tuple2 = *(const HeapTuple *) key2;
TupleHashTable hashtable = CurTupleHashTable;
entry = state->next_entry;
while (entry == NULL)
{
if (state->next_bucket >= hashtable->nbuckets)
{
/* No more entries in hashtable, so done */
return NULL;
}
entry = hashtable->buckets[state->next_bucket++];
}
state->next_entry = entry->next;
return entry;
if (execTuplesMatch(tuple1,
tuple2,
hashtable->tupdesc,
hashtable->numCols,
hashtable->keyColIdx,
hashtable->eqfunctions,
hashtable->tempcxt))
return 0;
else
return 1;
}