1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Change hash indexes to store only the hash code rather than the whole indexed

value.  This means that hash index lookups are always lossy and have to be
rechecked when the heap is visited; however, the gain in index compactness
outweighs this when the indexed values are wide.  Also, we only need to
perform datatype comparisons when the hash codes match exactly, rather than
for every entry in the hash bucket; so it could also win for datatypes that
have expensive comparison functions.  A small additional win is gained by
keeping hash index pages sorted by hash code and using binary search to reduce
the number of index tuples we have to look at.

Xiao Meng

This commit also incorporates Zdenek Kotala's patch to isolate hash metapages
and hash bitmaps a bit better from the page header datastructures.
This commit is contained in:
Tom Lane
2008-09-15 18:43:41 +00:00
parent 440b3384b0
commit 4adc2f72a4
13 changed files with 313 additions and 129 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.56 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.57 2008/09/15 18:43:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -28,12 +28,21 @@
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
/*
* Currently, we can't check any of the scan conditions since we do
* not have the original index entry value to supply to the sk_func.
* Always return true; we expect that hashgettuple already set the
* recheck flag to make the main indexscan code do it.
*/
#ifdef NOT_USED
TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
ScanKey key = scan->keyData;
int scanKeySize = scan->numberOfKeys;
#endif
IncrIndexProcessed();
#ifdef NOT_USED
while (scanKeySize > 0)
{
Datum datum;
@@ -59,6 +68,7 @@ _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
key++;
scanKeySize--;
}
#endif
return true;
}
@@ -190,7 +200,7 @@ _hash_checkpage(Relation rel, Buffer buf, int flags)
*/
if (flags == LH_META_PAGE)
{
HashMetaPage metap = (HashMetaPage) page;
HashMetaPage metap = HashPageGetMeta(page);
if (metap->hashm_magic != HASH_MAGIC)
ereport(ERROR,
@@ -221,3 +231,123 @@ hashoptions(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(result);
PG_RETURN_NULL();
}
/*
* _hash_get_indextuple_hashkey - get the hash index tuple's hash key value
*/
uint32
_hash_get_indextuple_hashkey(IndexTuple itup)
{
char *attp;
/*
* We assume the hash key is the first attribute and can't be null,
* so this can be done crudely but very very cheaply ...
*/
attp = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
return *((uint32 *) attp);
}
/*
* _hash_form_tuple - form an index tuple containing hash code only
*/
IndexTuple
_hash_form_tuple(Relation index, Datum *values, bool *isnull)
{
IndexTuple itup;
uint32 hashkey;
Datum hashkeydatum;
TupleDesc hashdesc;
if (isnull[0])
hashkeydatum = (Datum) 0;
else
{
hashkey = _hash_datum2hashkey(index, values[0]);
hashkeydatum = UInt32GetDatum(hashkey);
}
hashdesc = RelationGetDescr(index);
Assert(hashdesc->natts == 1);
itup = index_form_tuple(hashdesc, &hashkeydatum, isnull);
return itup;
}
/*
* _hash_binsearch - Return the offset number in the page where the
* specified hash value should be sought or inserted.
*
* We use binary search, relying on the assumption that the existing entries
* are ordered by hash key.
*
* Returns the offset of the first index entry having hashkey >= hash_value,
* or the page's max offset plus one if hash_value is greater than all
* existing hash keys in the page. This is the appropriate place to start
* a search, or to insert a new item.
*/
OffsetNumber
_hash_binsearch(Page page, uint32 hash_value)
{
OffsetNumber upper;
OffsetNumber lower;
/* Loop invariant: lower <= desired place <= upper */
upper = PageGetMaxOffsetNumber(page) + 1;
lower = FirstOffsetNumber;
while (upper > lower)
{
OffsetNumber off;
IndexTuple itup;
uint32 hashkey;
off = (upper + lower) / 2;
Assert(OffsetNumberIsValid(off));
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
hashkey = _hash_get_indextuple_hashkey(itup);
if (hashkey < hash_value)
lower = off + 1;
else
upper = off;
}
return lower;
}
/*
* _hash_binsearch_last
*
* Same as above, except that if there are multiple matching items in the
* page, we return the offset of the last one instead of the first one,
* and the possible range of outputs is 0..maxoffset not 1..maxoffset+1.
* This is handy for starting a new page in a backwards scan.
*/
OffsetNumber
_hash_binsearch_last(Page page, uint32 hash_value)
{
OffsetNumber upper;
OffsetNumber lower;
/* Loop invariant: lower <= desired place <= upper */
upper = PageGetMaxOffsetNumber(page);
lower = FirstOffsetNumber - 1;
while (upper > lower)
{
IndexTuple itup;
OffsetNumber off;
uint32 hashkey;
off = (upper + lower + 1) / 2;
Assert(OffsetNumberIsValid(off));
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
hashkey = _hash_get_indextuple_hashkey(itup);
if (hashkey > hash_value)
upper = off - 1;
else
lower = off;
}
return lower;
}