mirror of
https://github.com/postgres/postgres.git
synced 2025-11-13 16:22:44 +03:00
Postgres95 1.01 Distribution - Virgin Sources
This commit is contained in:
18
src/backend/access/hash/Makefile.inc
Normal file
18
src/backend/access/hash/Makefile.inc
Normal file
@@ -0,0 +1,18 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for access/hash (hash access method)
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= hash.c hashfunc.c hashinsert.c hashovfl.c hashpage.c hashscan.c \
|
||||
hashsearch.c hashstrat.c hashutil.c
|
||||
|
||||
|
||||
|
||||
467
src/backend/access/hash/hash.c
Normal file
467
src/backend/access/hash/hash.c
Normal file
@@ -0,0 +1,467 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hash.c--
|
||||
* Implementation of Margo Seltzer's Hashing package for postgres.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
#include "access/heapam.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/sdir.h"
|
||||
#include "access/hash.h"
|
||||
#include "access/funcindex.h"
|
||||
#include "nodes/execnodes.h"
|
||||
#include "nodes/plannodes.h"
|
||||
#include "executor/executor.h"
|
||||
#include "executor/tuptable.h"
|
||||
#include "catalog/index.h"
|
||||
|
||||
|
||||
bool BuildingHash = false;
|
||||
|
||||
/*
|
||||
* hashbuild() -- build a new hash index.
|
||||
*
|
||||
* We use a global variable to record the fact that we're creating
|
||||
* a new index. This is used to avoid high-concurrency locking,
|
||||
* since the index won't be visible until this transaction commits
|
||||
* and since building is guaranteed to be single-threaded.
|
||||
*/
|
||||
void
|
||||
hashbuild(Relation heap,
|
||||
Relation index,
|
||||
int natts,
|
||||
AttrNumber *attnum,
|
||||
IndexStrategy istrat,
|
||||
uint16 pcount,
|
||||
Datum *params,
|
||||
FuncIndexInfo *finfo,
|
||||
PredInfo *predInfo)
|
||||
{
|
||||
HeapScanDesc hscan;
|
||||
Buffer buffer;
|
||||
HeapTuple htup;
|
||||
IndexTuple itup;
|
||||
TupleDesc htupdesc, itupdesc;
|
||||
Datum *attdata;
|
||||
bool *nulls;
|
||||
InsertIndexResult res;
|
||||
int nhtups, nitups;
|
||||
int i;
|
||||
HashItem hitem;
|
||||
ExprContext *econtext;
|
||||
TupleTable tupleTable;
|
||||
TupleTableSlot *slot;
|
||||
Oid hrelid, irelid;
|
||||
Node *pred, *oldPred;
|
||||
|
||||
/* note that this is a new btree */
|
||||
BuildingHash = true;
|
||||
|
||||
pred = predInfo->pred;
|
||||
oldPred = predInfo->oldPred;
|
||||
|
||||
/* initialize the hash index metadata page (if this is a new index) */
|
||||
if (oldPred == NULL)
|
||||
_hash_metapinit(index);
|
||||
|
||||
/* get tuple descriptors for heap and index relations */
|
||||
htupdesc = RelationGetTupleDescriptor(heap);
|
||||
itupdesc = RelationGetTupleDescriptor(index);
|
||||
|
||||
/* get space for data items that'll appear in the index tuple */
|
||||
attdata = (Datum *) palloc(natts * sizeof(Datum));
|
||||
nulls = (bool *) palloc(natts * sizeof(bool));
|
||||
|
||||
/*
|
||||
* If this is a predicate (partial) index, we will need to evaluate the
|
||||
* predicate using ExecQual, which requires the current tuple to be in a
|
||||
* slot of a TupleTable. In addition, ExecQual must have an ExprContext
|
||||
* referring to that slot. Here, we initialize dummy TupleTable and
|
||||
* ExprContext objects for this purpose. --Nels, Feb '92
|
||||
*/
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
if (pred != NULL || oldPred != NULL) {
|
||||
tupleTable = ExecCreateTupleTable(1);
|
||||
slot = ExecAllocTableSlot(tupleTable);
|
||||
econtext = makeNode(ExprContext);
|
||||
FillDummyExprContext(econtext, slot, htupdesc, buffer);
|
||||
}
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
|
||||
/* start a heap scan */
|
||||
hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
|
||||
htup = heap_getnext(hscan, 0, &buffer);
|
||||
|
||||
/* build the index */
|
||||
nhtups = nitups = 0;
|
||||
|
||||
for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
|
||||
|
||||
nhtups++;
|
||||
|
||||
/*
|
||||
* If oldPred != NULL, this is an EXTEND INDEX command, so skip
|
||||
* this tuple if it was already in the existing partial index
|
||||
*/
|
||||
if (oldPred != NULL) {
|
||||
/*SetSlotContents(slot, htup); */
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
slot->val = htup;
|
||||
if (ExecQual((List*)oldPred, econtext) == true) {
|
||||
nitups++;
|
||||
continue;
|
||||
}
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
}
|
||||
|
||||
/* Skip this tuple if it doesn't satisfy the partial-index predicate */
|
||||
if (pred != NULL) {
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
/*SetSlotContents(slot, htup); */
|
||||
slot->val = htup;
|
||||
if (ExecQual((List*)pred, econtext) == false)
|
||||
continue;
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
}
|
||||
|
||||
nitups++;
|
||||
|
||||
/*
|
||||
* For the current heap tuple, extract all the attributes
|
||||
* we use in this index, and note which are null.
|
||||
*/
|
||||
for (i = 1; i <= natts; i++) {
|
||||
int attoff;
|
||||
bool attnull;
|
||||
|
||||
/*
|
||||
* Offsets are from the start of the tuple, and are
|
||||
* zero-based; indices are one-based. The next call
|
||||
* returns i - 1. That's data hiding for you.
|
||||
*/
|
||||
|
||||
/* attoff = i - 1 */
|
||||
attoff = AttrNumberGetAttrOffset(i);
|
||||
|
||||
/* below, attdata[attoff] set to equal some datum &
|
||||
* attnull is changed to indicate whether or not the attribute
|
||||
* is null for this tuple
|
||||
*/
|
||||
attdata[attoff] = GetIndexValue(htup,
|
||||
htupdesc,
|
||||
attoff,
|
||||
attnum,
|
||||
finfo,
|
||||
&attnull,
|
||||
buffer);
|
||||
nulls[attoff] = (attnull ? 'n' : ' ');
|
||||
}
|
||||
|
||||
/* form an index tuple and point it at the heap tuple */
|
||||
itup = index_formtuple(itupdesc, attdata, nulls);
|
||||
|
||||
/*
|
||||
* If the single index key is null, we don't insert it into
|
||||
* the index. Hash tables support scans on '='.
|
||||
* Relational algebra says that A = B
|
||||
* returns null if either A or B is null. This
|
||||
* means that no qualification used in an index scan could ever
|
||||
* return true on a null attribute. It also means that indices
|
||||
* can't be used by ISNULL or NOTNULL scans, but that's an
|
||||
* artifact of the strategy map architecture chosen in 1986, not
|
||||
* of the way nulls are handled here.
|
||||
*/
|
||||
|
||||
if (itup->t_info & INDEX_NULL_MASK) {
|
||||
pfree(itup);
|
||||
continue;
|
||||
}
|
||||
|
||||
itup->t_tid = htup->t_ctid;
|
||||
hitem = _hash_formitem(itup);
|
||||
res = _hash_doinsert(index, hitem);
|
||||
pfree(hitem);
|
||||
pfree(itup);
|
||||
pfree(res);
|
||||
}
|
||||
|
||||
/* okay, all heap tuples are indexed */
|
||||
heap_endscan(hscan);
|
||||
|
||||
if (pred != NULL || oldPred != NULL) {
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
ExecDestroyTupleTable(tupleTable, true);
|
||||
pfree(econtext);
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
}
|
||||
|
||||
/*
|
||||
* Since we just counted the tuples in the heap, we update its
|
||||
* stats in pg_class to guarantee that the planner takes advantage
|
||||
* of the index we just created. Finally, only update statistics
|
||||
* during normal index definitions, not for indices on system catalogs
|
||||
* created during bootstrap processing. We must close the relations
|
||||
* before updatings statistics to guarantee that the relcache entries
|
||||
* are flushed when we increment the command counter in UpdateStats().
|
||||
*/
|
||||
if (IsNormalProcessingMode())
|
||||
{
|
||||
hrelid = heap->rd_id;
|
||||
irelid = index->rd_id;
|
||||
heap_close(heap);
|
||||
index_close(index);
|
||||
UpdateStats(hrelid, nhtups, true);
|
||||
UpdateStats(irelid, nitups, false);
|
||||
if (oldPred != NULL) {
|
||||
if (nitups == nhtups) pred = NULL;
|
||||
UpdateIndexPredicate(irelid, oldPred, pred);
|
||||
}
|
||||
}
|
||||
|
||||
/* be tidy */
|
||||
pfree(nulls);
|
||||
pfree(attdata);
|
||||
|
||||
/* all done */
|
||||
BuildingHash = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* hashinsert() -- insert an index tuple into a hash table.
|
||||
*
|
||||
* Hash on the index tuple's key, find the appropriate location
|
||||
* for the new tuple, put it there, and return an InsertIndexResult
|
||||
* to the caller.
|
||||
*/
|
||||
InsertIndexResult
|
||||
hashinsert(Relation rel, IndexTuple itup)
|
||||
{
|
||||
HashItem hitem;
|
||||
InsertIndexResult res;
|
||||
|
||||
if (itup->t_info & INDEX_NULL_MASK)
|
||||
return ((InsertIndexResult) NULL);
|
||||
|
||||
hitem = _hash_formitem(itup);
|
||||
|
||||
res = _hash_doinsert(rel, hitem);
|
||||
|
||||
pfree(hitem);
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* hashgettuple() -- Get the next tuple in the scan.
|
||||
*/
|
||||
char *
|
||||
hashgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
RetrieveIndexResult res;
|
||||
|
||||
/*
|
||||
* If we've already initialized this scan, we can just advance it
|
||||
* in the appropriate direction. If we haven't done so yet, we
|
||||
* call a routine to get the first item in the scan.
|
||||
*/
|
||||
|
||||
if (ItemPointerIsValid(&(scan->currentItemData)))
|
||||
res = _hash_next(scan, dir);
|
||||
else
|
||||
res = _hash_first(scan, dir);
|
||||
|
||||
return ((char *) res);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* hashbeginscan() -- start a scan on a hash index
|
||||
*/
|
||||
char *
|
||||
hashbeginscan(Relation rel,
|
||||
bool fromEnd,
|
||||
uint16 keysz,
|
||||
ScanKey scankey)
|
||||
{
|
||||
IndexScanDesc scan;
|
||||
HashScanOpaque so;
|
||||
|
||||
scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
|
||||
so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
|
||||
so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer;
|
||||
scan->opaque = so;
|
||||
scan->flags = 0x0;
|
||||
|
||||
/* register scan in case we change pages it's using */
|
||||
_hash_regscan(scan);
|
||||
|
||||
return ((char *) scan);
|
||||
}
|
||||
|
||||
/*
|
||||
* hashrescan() -- rescan an index relation
|
||||
*/
|
||||
void
|
||||
hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
|
||||
{
|
||||
ItemPointer iptr;
|
||||
HashScanOpaque so;
|
||||
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
|
||||
/* we hold a read lock on the current page in the scan */
|
||||
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
|
||||
_hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
|
||||
so->hashso_curbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
|
||||
_hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
|
||||
so->hashso_mrkbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
|
||||
/* reset the scan key */
|
||||
if (scan->numberOfKeys > 0) {
|
||||
memmove(scan->keyData,
|
||||
scankey,
|
||||
scan->numberOfKeys * sizeof(ScanKeyData));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* hashendscan() -- close down a scan
|
||||
*/
|
||||
void
|
||||
hashendscan(IndexScanDesc scan)
|
||||
{
|
||||
|
||||
ItemPointer iptr;
|
||||
HashScanOpaque so;
|
||||
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
|
||||
/* release any locks we still hold */
|
||||
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
|
||||
_hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
|
||||
so->hashso_curbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
|
||||
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
|
||||
if (BufferIsValid(so->hashso_mrkbuf))
|
||||
_hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
|
||||
so->hashso_mrkbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
|
||||
/* don't need scan registered anymore */
|
||||
_hash_dropscan(scan);
|
||||
|
||||
/* be tidy */
|
||||
#ifdef PERFECT_MMGR
|
||||
pfree (scan->opaque);
|
||||
#endif /* PERFECT_MMGR */
|
||||
}
|
||||
|
||||
/*
|
||||
* hashmarkpos() -- save current scan position
|
||||
*
|
||||
*/
|
||||
void
|
||||
hashmarkpos(IndexScanDesc scan)
|
||||
{
|
||||
ItemPointer iptr;
|
||||
HashScanOpaque so;
|
||||
|
||||
/* see if we ever call this code. if we do, then so_mrkbuf a
|
||||
* useful element in the scan->opaque structure. if this procedure
|
||||
* is never called, so_mrkbuf should be removed from the scan->opaque
|
||||
* structure.
|
||||
*/
|
||||
elog(NOTICE, "Hashmarkpos() called.");
|
||||
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
|
||||
/* release lock on old marked data, if any */
|
||||
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
|
||||
_hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
|
||||
so->hashso_mrkbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
|
||||
/* bump lock on currentItemData and copy to currentMarkData */
|
||||
if (ItemPointerIsValid(&(scan->currentItemData))) {
|
||||
so->hashso_mrkbuf = _hash_getbuf(scan->relation,
|
||||
BufferGetBlockNumber(so->hashso_curbuf),
|
||||
HASH_READ);
|
||||
scan->currentMarkData = scan->currentItemData;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* hashrestrpos() -- restore scan to last saved position
|
||||
*/
|
||||
void
|
||||
hashrestrpos(IndexScanDesc scan)
|
||||
{
|
||||
ItemPointer iptr;
|
||||
HashScanOpaque so;
|
||||
|
||||
/* see if we ever call this code. if we do, then so_mrkbuf a
|
||||
* useful element in the scan->opaque structure. if this procedure
|
||||
* is never called, so_mrkbuf should be removed from the scan->opaque
|
||||
* structure.
|
||||
*/
|
||||
elog(NOTICE, "Hashrestrpos() called.");
|
||||
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
|
||||
/* release lock on current data, if any */
|
||||
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
|
||||
_hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
|
||||
so->hashso_curbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
|
||||
/* bump lock on currentMarkData and copy to currentItemData */
|
||||
if (ItemPointerIsValid(&(scan->currentMarkData))) {
|
||||
so->hashso_curbuf =
|
||||
_hash_getbuf(scan->relation,
|
||||
BufferGetBlockNumber(so->hashso_mrkbuf),
|
||||
HASH_READ);
|
||||
|
||||
scan->currentItemData = scan->currentMarkData;
|
||||
}
|
||||
}
|
||||
|
||||
/* stubs */
|
||||
void
|
||||
hashdelete(Relation rel, ItemPointer tid)
|
||||
{
|
||||
/* adjust any active scans that will be affected by this deletion */
|
||||
_hash_adjscans(rel, tid);
|
||||
|
||||
/* delete the data from the page */
|
||||
_hash_pagedel(rel, tid);
|
||||
}
|
||||
|
||||
276
src/backend/access/hash/hashfunc.c
Normal file
276
src/backend/access/hash/hashfunc.c
Normal file
@@ -0,0 +1,276 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hashfunc.c--
|
||||
* Comparison functions for hash access method.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
* These functions are stored in pg_amproc. For each operator class
|
||||
* defined on hash tables, they compute the hash value of the argument.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
#include "utils/nabstime.h"
|
||||
|
||||
uint32 hashint2(int16 key)
|
||||
{
|
||||
return ((uint32) ~key);
|
||||
}
|
||||
|
||||
uint32 hashint4(uint32 key)
|
||||
{
|
||||
return (~key);
|
||||
}
|
||||
|
||||
/* Hash function from Chris Torek. */
|
||||
uint32 hashfloat4(float32 keyp)
|
||||
{
|
||||
int len;
|
||||
int loop;
|
||||
uint32 h;
|
||||
char *kp = (char *) keyp;
|
||||
|
||||
len = sizeof(float32data);
|
||||
|
||||
#define HASH4a h = (h << 5) - h + *kp++;
|
||||
#define HASH4b h = (h << 5) + h + *kp++;
|
||||
#define HASH4 HASH4b
|
||||
|
||||
|
||||
h = 0;
|
||||
if (len > 0) {
|
||||
loop = (len + 8 - 1) >> 3;
|
||||
|
||||
switch (len & (8 - 1)) {
|
||||
case 0:
|
||||
do { /* All fall throughs */
|
||||
HASH4;
|
||||
case 7:
|
||||
HASH4;
|
||||
case 6:
|
||||
HASH4;
|
||||
case 5:
|
||||
HASH4;
|
||||
case 4:
|
||||
HASH4;
|
||||
case 3:
|
||||
HASH4;
|
||||
case 2:
|
||||
HASH4;
|
||||
case 1:
|
||||
HASH4;
|
||||
} while (--loop);
|
||||
}
|
||||
}
|
||||
return (h);
|
||||
}
|
||||
|
||||
|
||||
uint32 hashfloat8(float64 keyp)
|
||||
{
|
||||
int len;
|
||||
int loop;
|
||||
uint32 h;
|
||||
char *kp = (char *) keyp;
|
||||
|
||||
len = sizeof(float64data);
|
||||
|
||||
#define HASH4a h = (h << 5) - h + *kp++;
|
||||
#define HASH4b h = (h << 5) + h + *kp++;
|
||||
#define HASH4 HASH4b
|
||||
|
||||
|
||||
h = 0;
|
||||
if (len > 0) {
|
||||
loop = (len + 8 - 1) >> 3;
|
||||
|
||||
switch (len & (8 - 1)) {
|
||||
case 0:
|
||||
do { /* All fall throughs */
|
||||
HASH4;
|
||||
case 7:
|
||||
HASH4;
|
||||
case 6:
|
||||
HASH4;
|
||||
case 5:
|
||||
HASH4;
|
||||
case 4:
|
||||
HASH4;
|
||||
case 3:
|
||||
HASH4;
|
||||
case 2:
|
||||
HASH4;
|
||||
case 1:
|
||||
HASH4;
|
||||
} while (--loop);
|
||||
}
|
||||
}
|
||||
return (h);
|
||||
}
|
||||
|
||||
|
||||
uint32 hashoid(Oid key)
|
||||
{
|
||||
return ((uint32) ~key);
|
||||
}
|
||||
|
||||
|
||||
uint32 hashchar(char key)
|
||||
{
|
||||
int len;
|
||||
uint32 h;
|
||||
|
||||
len = sizeof(char);
|
||||
|
||||
#define PRIME1 37
|
||||
#define PRIME2 1048583
|
||||
|
||||
h = 0;
|
||||
/* Convert char to integer */
|
||||
h = h * PRIME1 ^ (key - ' ');
|
||||
h %= PRIME2;
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
uint32 hashchar2(uint16 intkey)
|
||||
{
|
||||
uint32 h;
|
||||
int len;
|
||||
char *key = (char *) &intkey;
|
||||
|
||||
h = 0;
|
||||
len = sizeof(uint16);
|
||||
/* Convert string to integer */
|
||||
while (len--)
|
||||
h = h * PRIME1 ^ (*key++ - ' ');
|
||||
h %= PRIME2;
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
uint32 hashchar4(uint32 intkey)
|
||||
{
|
||||
uint32 h;
|
||||
int len;
|
||||
char *key = (char *) &intkey;
|
||||
|
||||
h = 0;
|
||||
len = sizeof(uint32);
|
||||
/* Convert string to integer */
|
||||
while (len--)
|
||||
h = h * PRIME1 ^ (*key++ - ' ');
|
||||
h %= PRIME2;
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
uint32 hashchar8(char *key)
|
||||
{
|
||||
uint32 h;
|
||||
int len;
|
||||
|
||||
h = 0;
|
||||
len = sizeof(char8);
|
||||
/* Convert string to integer */
|
||||
while (len--)
|
||||
h = h * PRIME1 ^ (*key++ - ' ');
|
||||
h %= PRIME2;
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
uint32 hashname(NameData *n)
|
||||
{
|
||||
uint32 h;
|
||||
int len;
|
||||
char *key;
|
||||
|
||||
key = n->data;
|
||||
|
||||
h = 0;
|
||||
len = NAMEDATALEN;
|
||||
/* Convert string to integer */
|
||||
while (len--)
|
||||
h = h * PRIME1 ^ (*key++ - ' ');
|
||||
h %= PRIME2;
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
|
||||
uint32 hashchar16(char *key)
|
||||
{
|
||||
uint32 h;
|
||||
int len;
|
||||
|
||||
h = 0;
|
||||
len = sizeof(char16);
|
||||
/* Convert string to integer */
|
||||
while (len--)
|
||||
h = h * PRIME1 ^ (*key++ - ' ');
|
||||
h %= PRIME2;
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* (Comment from the original db3 hashing code: )
|
||||
*
|
||||
* "This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
|
||||
* units. On the first time through the loop we get the 'leftover bytes'
|
||||
* (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
|
||||
* all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
|
||||
* this routine is heavily used enough, it's worth the ugly coding.
|
||||
*
|
||||
* "OZ's original sdbm hash"
|
||||
*/
|
||||
uint32 hashtext(struct varlena *key)
|
||||
{
|
||||
int keylen;
|
||||
char *keydata;
|
||||
uint32 n;
|
||||
int loop;
|
||||
|
||||
keydata = VARDATA(key);
|
||||
keylen = VARSIZE(key);
|
||||
|
||||
/* keylen includes the four bytes in which string keylength is stored */
|
||||
keylen -= sizeof(VARSIZE(key));
|
||||
|
||||
#define HASHC n = *keydata++ + 65599 * n
|
||||
|
||||
n = 0;
|
||||
if (keylen > 0) {
|
||||
loop = (keylen + 8 - 1) >> 3;
|
||||
|
||||
switch (keylen & (8 - 1)) {
|
||||
case 0:
|
||||
do { /* All fall throughs */
|
||||
HASHC;
|
||||
case 7:
|
||||
HASHC;
|
||||
case 6:
|
||||
HASHC;
|
||||
case 5:
|
||||
HASHC;
|
||||
case 4:
|
||||
HASHC;
|
||||
case 3:
|
||||
HASHC;
|
||||
case 2:
|
||||
HASHC;
|
||||
case 1:
|
||||
HASHC;
|
||||
} while (--loop);
|
||||
}
|
||||
}
|
||||
return (n);
|
||||
}
|
||||
239
src/backend/access/hash/hashinsert.c
Normal file
239
src/backend/access/hash/hashinsert.c
Normal file
@@ -0,0 +1,239 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hashinsert.c--
|
||||
* Item insertion in hash tables for Postgres.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf);
|
||||
static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
|
||||
|
||||
/*
|
||||
* _hash_doinsert() -- Handle insertion of a single HashItem in the table.
|
||||
*
|
||||
* This routine is called by the public interface routines, hashbuild
|
||||
* and hashinsert. By here, hashitem is filled in, and has a unique
|
||||
* (xid, seqno) pair. The datum to be used as a "key" is in the
|
||||
* hashitem.
|
||||
*/
|
||||
InsertIndexResult
|
||||
_hash_doinsert(Relation rel, HashItem hitem)
|
||||
{
|
||||
Buffer buf;
|
||||
Buffer metabuf;
|
||||
BlockNumber blkno;
|
||||
HashMetaPage metap;
|
||||
IndexTuple itup;
|
||||
InsertIndexResult res;
|
||||
ScanKey itup_scankey;
|
||||
int natts;
|
||||
Page page;
|
||||
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
/* we need a scan key to do our search, so build one */
|
||||
itup = &(hitem->hash_itup);
|
||||
if ((natts = rel->rd_rel->relnatts) != 1)
|
||||
elog(WARN, "Hash indices valid for only one index key.");
|
||||
itup_scankey = _hash_mkscankey(rel, itup, metap);
|
||||
|
||||
/*
|
||||
* find the first page in the bucket chain containing this key and
|
||||
* place it in buf. _hash_search obtains a read lock for us.
|
||||
*/
|
||||
_hash_search(rel, natts, itup_scankey, &buf, metap);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE);
|
||||
|
||||
/*
|
||||
* trade in our read lock for a write lock so that we can do the
|
||||
* insertion.
|
||||
*/
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
_hash_relbuf(rel, buf, HASH_READ);
|
||||
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
|
||||
|
||||
|
||||
/*
|
||||
* XXX btree comment (haven't decided what to do in hash): don't
|
||||
* think the bucket can be split while we're reading the metapage.
|
||||
*
|
||||
* If the page was split between the time that we surrendered our
|
||||
* read lock and acquired our write lock, then this page may no
|
||||
* longer be the right place for the key we want to insert.
|
||||
*/
|
||||
|
||||
/* do the insertion */
|
||||
res = _hash_insertonpg(rel, buf, natts, itup_scankey,
|
||||
hitem, metabuf);
|
||||
|
||||
/* be tidy */
|
||||
_hash_freeskey(itup_scankey);
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_insertonpg() -- Insert a tuple on a particular page in the table.
|
||||
*
|
||||
* This recursive procedure does the following things:
|
||||
*
|
||||
* + if necessary, splits the target page.
|
||||
* + inserts the tuple.
|
||||
*
|
||||
* On entry, we must have the right buffer on which to do the
|
||||
* insertion, and the buffer must be pinned and locked. On return,
|
||||
* we will have dropped both the pin and the write lock on the buffer.
|
||||
*
|
||||
*/
|
||||
static InsertIndexResult
|
||||
_hash_insertonpg(Relation rel,
|
||||
Buffer buf,
|
||||
int keysz,
|
||||
ScanKey scankey,
|
||||
HashItem hitem,
|
||||
Buffer metabuf)
|
||||
{
|
||||
InsertIndexResult res;
|
||||
Page page;
|
||||
BlockNumber itup_blkno;
|
||||
OffsetNumber itup_off;
|
||||
int itemsz;
|
||||
HashPageOpaque pageopaque;
|
||||
bool do_expand = false;
|
||||
Buffer ovflbuf;
|
||||
HashMetaPage metap;
|
||||
Bucket bucket;
|
||||
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
bucket = pageopaque->hasho_bucket;
|
||||
|
||||
itemsz = IndexTupleDSize(hitem->hash_itup)
|
||||
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
|
||||
itemsz = DOUBLEALIGN(itemsz);
|
||||
|
||||
while (PageGetFreeSpace(page) < itemsz) {
|
||||
/*
|
||||
* no space on this page; check for an overflow page
|
||||
*/
|
||||
if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) {
|
||||
/*
|
||||
* ovfl page exists; go get it. if it doesn't have room,
|
||||
* we'll find out next pass through the loop test above.
|
||||
*/
|
||||
ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
|
||||
HASH_WRITE);
|
||||
_hash_relbuf(rel, buf, HASH_WRITE);
|
||||
buf = ovflbuf;
|
||||
page = BufferGetPage(buf);
|
||||
} else {
|
||||
/*
|
||||
* we're at the end of the bucket chain and we haven't
|
||||
* found a page with enough room. allocate a new overflow
|
||||
* page.
|
||||
*/
|
||||
do_expand = true;
|
||||
ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
|
||||
_hash_relbuf(rel, buf, HASH_WRITE);
|
||||
buf = ovflbuf;
|
||||
page = BufferGetPage(buf);
|
||||
|
||||
if (PageGetFreeSpace(page) < itemsz) {
|
||||
/* it doesn't fit on an empty page -- give up */
|
||||
elog(WARN, "hash item too large");
|
||||
}
|
||||
}
|
||||
_hash_checkpage(page, LH_OVERFLOW_PAGE);
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(pageopaque->hasho_bucket == bucket);
|
||||
}
|
||||
|
||||
itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
|
||||
itup_blkno = BufferGetBlockNumber(buf);
|
||||
|
||||
/* by here, the new tuple is inserted */
|
||||
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
|
||||
|
||||
ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
|
||||
|
||||
if (res != NULL) {
|
||||
/*
|
||||
* Increment the number of keys in the table.
|
||||
* We switch lock access type just for a moment
|
||||
* to allow greater accessibility to the metapage.
|
||||
*/
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
|
||||
HASH_READ, HASH_WRITE);
|
||||
metap->hashm_nkeys += 1;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
|
||||
HASH_WRITE, HASH_READ);
|
||||
|
||||
}
|
||||
|
||||
_hash_wrtbuf(rel, buf);
|
||||
|
||||
if (do_expand ||
|
||||
(metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
|
||||
> metap->hashm_ffactor) {
|
||||
_hash_expandtable(rel, metabuf);
|
||||
}
|
||||
_hash_relbuf(rel, metabuf, HASH_READ);
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_pgaddtup() -- add a tuple to a particular page in the index.
|
||||
*
|
||||
* This routine adds the tuple to the page as requested, and keeps the
|
||||
* write lock and reference associated with the page's buffer. It is
|
||||
* an error to call pgaddtup() without a write lock and reference.
|
||||
*/
|
||||
static OffsetNumber
|
||||
_hash_pgaddtup(Relation rel,
|
||||
Buffer buf,
|
||||
int keysz,
|
||||
ScanKey itup_scankey,
|
||||
Size itemsize,
|
||||
HashItem hitem)
|
||||
{
|
||||
OffsetNumber itup_off;
|
||||
Page page;
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
|
||||
itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
||||
(void) PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
|
||||
|
||||
/* write the buffer, but hold our lock */
|
||||
_hash_wrtnorelbuf(rel, buf);
|
||||
|
||||
return (itup_off);
|
||||
}
|
||||
614
src/backend/access/hash/hashovfl.c
Normal file
614
src/backend/access/hash/hashovfl.c
Normal file
@@ -0,0 +1,614 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hashovfl.c--
|
||||
* Overflow page management code for the Postgres hash access method
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Overflow pages look like ordinary relation pages.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp);
|
||||
static uint32 _hash_firstfreebit(uint32 map);
|
||||
|
||||
/*
|
||||
* _hash_addovflpage
|
||||
*
|
||||
* Add an overflow page to the page currently pointed to by the buffer
|
||||
* argument 'buf'.
|
||||
*
|
||||
* *Metabufp has a read lock upon entering the function; buf has a
|
||||
* write lock.
|
||||
*
|
||||
*/
|
||||
Buffer
|
||||
_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf)
|
||||
{
|
||||
|
||||
OverflowPageAddress oaddr;
|
||||
BlockNumber ovflblkno;
|
||||
Buffer ovflbuf;
|
||||
HashMetaPage metap;
|
||||
HashPageOpaque ovflopaque;
|
||||
HashPageOpaque pageopaque;
|
||||
Page page;
|
||||
Page ovflpage;
|
||||
|
||||
/* this had better be the last page in a bucket chain */
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno));
|
||||
|
||||
metap = (HashMetaPage) BufferGetPage(*metabufp);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
/* allocate an empty overflow page */
|
||||
oaddr = _hash_getovfladdr(rel, metabufp);
|
||||
if (oaddr == InvalidOvflAddress) {
|
||||
elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr.");
|
||||
}
|
||||
ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr)));
|
||||
Assert(BlockNumberIsValid(ovflblkno));
|
||||
ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
|
||||
Assert(BufferIsValid(ovflbuf));
|
||||
ovflpage = BufferGetPage(ovflbuf);
|
||||
|
||||
/* initialize the new overflow page */
|
||||
_hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
|
||||
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
|
||||
ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
|
||||
ovflopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
|
||||
ovflopaque->hasho_oaddr = oaddr;
|
||||
ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
|
||||
_hash_wrtnorelbuf(rel, ovflbuf);
|
||||
|
||||
/* logically chain overflow page to previous page */
|
||||
pageopaque->hasho_nextblkno = ovflblkno;
|
||||
_hash_wrtnorelbuf(rel, buf);
|
||||
return (ovflbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_getovfladdr()
|
||||
*
|
||||
* Find an available overflow page and return its address.
|
||||
*
|
||||
* When we enter this function, we have a read lock on *metabufp which
|
||||
* we change to a write lock immediately. Before exiting, the write lock
|
||||
* is exchanged for a read lock.
|
||||
*
|
||||
*/
|
||||
static OverflowPageAddress
|
||||
_hash_getovfladdr(Relation rel, Buffer *metabufp)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
Buffer mapbuf;
|
||||
BlockNumber blkno;
|
||||
PageOffset offset;
|
||||
OverflowPageAddress oaddr;
|
||||
SplitNumber splitnum;
|
||||
uint32 *freep;
|
||||
uint32 max_free;
|
||||
uint32 bit;
|
||||
uint32 first_page;
|
||||
uint32 free_bit;
|
||||
uint32 free_page;
|
||||
uint32 in_use_bits;
|
||||
uint32 i, j;
|
||||
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
|
||||
|
||||
splitnum = metap->OVFL_POINT;
|
||||
max_free = metap->SPARES[splitnum];
|
||||
|
||||
free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT);
|
||||
free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
|
||||
|
||||
/* Look through all the free maps to find the first free block */
|
||||
first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT);
|
||||
for ( i = first_page; i <= free_page; i++ ) {
|
||||
Page mappage;
|
||||
|
||||
blkno = metap->hashm_mapp[i];
|
||||
mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
|
||||
mappage = BufferGetPage(mapbuf);
|
||||
_hash_checkpage(mappage, LH_BITMAP_PAGE);
|
||||
freep = HashPageGetBitmap(mappage);
|
||||
Assert(freep);
|
||||
|
||||
if (i == free_page)
|
||||
in_use_bits = free_bit;
|
||||
else
|
||||
in_use_bits = BMPGSZ_BIT(metap) - 1;
|
||||
|
||||
if (i == first_page) {
|
||||
bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
|
||||
j = bit / BITS_PER_MAP;
|
||||
bit = bit & ~(BITS_PER_MAP - 1);
|
||||
} else {
|
||||
bit = 0;
|
||||
j = 0;
|
||||
}
|
||||
for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
|
||||
if (freep[j] != ALL_SET)
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* No Free Page Found - have to allocate a new page */
|
||||
metap->LAST_FREED = metap->SPARES[splitnum];
|
||||
metap->SPARES[splitnum]++;
|
||||
offset = metap->SPARES[splitnum] -
|
||||
(splitnum ? metap->SPARES[splitnum - 1] : 0);
|
||||
|
||||
#define OVMSG "HASH: Out of overflow pages. Out of luck.\n"
|
||||
|
||||
if (offset > SPLITMASK) {
|
||||
if (++splitnum >= NCACHED) {
|
||||
elog(WARN, OVMSG);
|
||||
}
|
||||
metap->OVFL_POINT = splitnum;
|
||||
metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
|
||||
metap->SPARES[splitnum-1]--;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
/* Check if we need to allocate a new bitmap page */
|
||||
if (free_bit == BMPGSZ_BIT(metap) - 1) {
|
||||
/* won't be needing old map page */
|
||||
|
||||
_hash_relbuf(rel, mapbuf, HASH_WRITE);
|
||||
|
||||
free_page++;
|
||||
if (free_page >= NCACHED) {
|
||||
elog(WARN, OVMSG);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is tricky. The 1 indicates that you want the new page
|
||||
* allocated with 1 clear bit. Actually, you are going to
|
||||
* allocate 2 pages from this map. The first is going to be
|
||||
* the map page, the second is the overflow page we were
|
||||
* looking for. The init_bitmap routine automatically, sets
|
||||
* the first bit of itself to indicate that the bitmap itself
|
||||
* is in use. We would explicitly set the second bit, but
|
||||
* don't have to if we tell init_bitmap not to leave it clear
|
||||
* in the first place.
|
||||
*/
|
||||
if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
|
||||
1, free_page)) {
|
||||
elog(WARN, "overflow_page: problem with _hash_initbitmap.");
|
||||
}
|
||||
metap->SPARES[splitnum]++;
|
||||
offset++;
|
||||
if (offset > SPLITMASK) {
|
||||
if (++splitnum >= NCACHED) {
|
||||
elog(WARN, OVMSG);
|
||||
}
|
||||
metap->OVFL_POINT = splitnum;
|
||||
metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
|
||||
metap->SPARES[splitnum-1]--;
|
||||
offset = 0;
|
||||
}
|
||||
} else {
|
||||
|
||||
/*
|
||||
* Free_bit addresses the last used bit. Bump it to address
|
||||
* the first available bit.
|
||||
*/
|
||||
free_bit++;
|
||||
SETBIT(freep, free_bit);
|
||||
_hash_wrtbuf(rel, mapbuf);
|
||||
}
|
||||
|
||||
/* Calculate address of the new overflow page */
|
||||
oaddr = OADDR_OF(splitnum, offset);
|
||||
_hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
|
||||
return (oaddr);
|
||||
|
||||
found:
|
||||
bit = bit + _hash_firstfreebit(freep[j]);
|
||||
SETBIT(freep, bit);
|
||||
_hash_wrtbuf(rel, mapbuf);
|
||||
|
||||
/*
|
||||
* Bits are addressed starting with 0, but overflow pages are addressed
|
||||
* beginning at 1. Bit is a bit addressnumber, so we need to increment
|
||||
* it to convert it to a page number.
|
||||
*/
|
||||
|
||||
bit = 1 + bit + (i * BMPGSZ_BIT(metap));
|
||||
if (bit >= metap->LAST_FREED) {
|
||||
metap->LAST_FREED = bit - 1;
|
||||
}
|
||||
|
||||
/* Calculate the split number for this page */
|
||||
for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
|
||||
;
|
||||
offset = (i ? bit - metap->SPARES[i - 1] : bit);
|
||||
if (offset >= SPLITMASK) {
|
||||
elog(WARN, OVMSG);
|
||||
}
|
||||
|
||||
/* initialize this page */
|
||||
oaddr = OADDR_OF(i, offset);
|
||||
_hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
|
||||
return (oaddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_firstfreebit()
|
||||
*
|
||||
* Return the first bit that is not set in the argument 'map'. This
|
||||
* function is used to find an available overflow page within a
|
||||
* splitnumber.
|
||||
*
|
||||
*/
|
||||
static uint32
|
||||
_hash_firstfreebit(uint32 map)
|
||||
{
|
||||
uint32 i, mask;
|
||||
|
||||
mask = 0x1;
|
||||
for (i = 0; i < BITS_PER_MAP; i++) {
|
||||
if (!(mask & map))
|
||||
return (i);
|
||||
mask = mask << 1;
|
||||
}
|
||||
return (i);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_freeovflpage() -
|
||||
*
|
||||
* Mark this overflow page as free and return a buffer with
|
||||
* the page that follows it (which may be defined as
|
||||
* InvalidBuffer).
|
||||
*
|
||||
*/
|
||||
Buffer
|
||||
_hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
Buffer metabuf;
|
||||
Buffer mapbuf;
|
||||
BlockNumber prevblkno;
|
||||
BlockNumber blkno;
|
||||
BlockNumber nextblkno;
|
||||
HashPageOpaque ovflopaque;
|
||||
Page ovflpage;
|
||||
Page mappage;
|
||||
OverflowPageAddress addr;
|
||||
SplitNumber splitnum;
|
||||
uint32 *freep;
|
||||
uint32 ovflpgno;
|
||||
int32 bitmappage, bitmapbit;
|
||||
Bucket bucket;
|
||||
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
ovflpage = BufferGetPage(ovflbuf);
|
||||
_hash_checkpage(ovflpage, LH_OVERFLOW_PAGE);
|
||||
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
|
||||
addr = ovflopaque->hasho_oaddr;
|
||||
nextblkno = ovflopaque->hasho_nextblkno;
|
||||
prevblkno = ovflopaque->hasho_prevblkno;
|
||||
bucket = ovflopaque->hasho_bucket;
|
||||
(void) memset(ovflpage, 0, BufferGetPageSize(ovflbuf));
|
||||
_hash_wrtbuf(rel, ovflbuf);
|
||||
|
||||
/*
|
||||
* fix up the bucket chain. this is a doubly-linked list, so we
|
||||
* must fix up the bucket chain members behind and ahead of the
|
||||
* overflow page being deleted.
|
||||
*
|
||||
* XXX this should look like:
|
||||
* - lock prev/next
|
||||
* - modify/write prev/next (how to do write ordering with a
|
||||
* doubly-linked list???)
|
||||
* - unlock prev/next
|
||||
*/
|
||||
if (BlockNumberIsValid(prevblkno)) {
|
||||
Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE);
|
||||
Page prevpage = BufferGetPage(prevbuf);
|
||||
HashPageOpaque prevopaque =
|
||||
(HashPageOpaque) PageGetSpecialPointer(prevpage);
|
||||
|
||||
_hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
Assert(prevopaque->hasho_bucket == bucket);
|
||||
prevopaque->hasho_nextblkno = nextblkno;
|
||||
_hash_wrtbuf(rel, prevbuf);
|
||||
}
|
||||
if (BlockNumberIsValid(nextblkno)) {
|
||||
Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
|
||||
Page nextpage = BufferGetPage(nextbuf);
|
||||
HashPageOpaque nextopaque =
|
||||
(HashPageOpaque) PageGetSpecialPointer(nextpage);
|
||||
|
||||
_hash_checkpage(nextpage, LH_OVERFLOW_PAGE);
|
||||
Assert(nextopaque->hasho_bucket == bucket);
|
||||
nextopaque->hasho_prevblkno = prevblkno;
|
||||
_hash_wrtbuf(rel, nextbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix up the overflow page bitmap that tracks this particular
|
||||
* overflow page. The bitmap can be found in the MetaPageData
|
||||
* array element hashm_mapp[bitmappage].
|
||||
*/
|
||||
splitnum = (addr >> SPLITSHIFT);
|
||||
ovflpgno =
|
||||
(splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
|
||||
|
||||
if (ovflpgno < metap->LAST_FREED) {
|
||||
metap->LAST_FREED = ovflpgno;
|
||||
}
|
||||
|
||||
bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT));
|
||||
bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
|
||||
|
||||
blkno = metap->hashm_mapp[bitmappage];
|
||||
mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
|
||||
mappage = BufferGetPage(mapbuf);
|
||||
_hash_checkpage(mappage, LH_BITMAP_PAGE);
|
||||
freep = HashPageGetBitmap(mappage);
|
||||
CLRBIT(freep, bitmapbit);
|
||||
_hash_wrtbuf(rel, mapbuf);
|
||||
|
||||
_hash_relbuf(rel, metabuf, HASH_WRITE);
|
||||
|
||||
/*
|
||||
* now instantiate the page that replaced this one,
|
||||
* if it exists, and return that buffer with a write lock.
|
||||
*/
|
||||
if (BlockNumberIsValid(nextblkno)) {
|
||||
return (_hash_getbuf(rel, nextblkno, HASH_WRITE));
|
||||
} else {
|
||||
return (InvalidBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _hash_initbitmap()
|
||||
*
|
||||
* Initialize a new bitmap page. The metapage has a write-lock upon
|
||||
* entering the function.
|
||||
*
|
||||
* 'pnum' is the OverflowPageAddress of the new bitmap page.
|
||||
* 'nbits' is how many bits to clear (i.e., make available) in the new
|
||||
* bitmap page. the remainder of the bits (as well as the first bit,
|
||||
* representing the bitmap page itself) will be set.
|
||||
* 'ndx' is the 0-based offset of the new bitmap page within the
|
||||
* metapage's array of bitmap page OverflowPageAddresses.
|
||||
*/
|
||||
|
||||
#define INT_MASK ((1 << INT_TO_BIT) -1)
|
||||
|
||||
int32
|
||||
_hash_initbitmap(Relation rel,
|
||||
HashMetaPage metap,
|
||||
int32 pnum,
|
||||
int32 nbits,
|
||||
int32 ndx)
|
||||
{
|
||||
Buffer buf;
|
||||
BlockNumber blkno;
|
||||
Page pg;
|
||||
HashPageOpaque op;
|
||||
uint32 *freep;
|
||||
int clearbytes, clearints;
|
||||
|
||||
blkno = OADDR_TO_BLKNO(pnum);
|
||||
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
|
||||
pg = BufferGetPage(buf);
|
||||
_hash_pageinit(pg, BufferGetPageSize(buf));
|
||||
op = (HashPageOpaque) PageGetSpecialPointer(pg);
|
||||
op->hasho_oaddr = InvalidOvflAddress;
|
||||
op->hasho_prevblkno = InvalidBlockNumber;
|
||||
op->hasho_nextblkno = InvalidBlockNumber;
|
||||
op->hasho_flag = LH_BITMAP_PAGE;
|
||||
op->hasho_bucket = -1;
|
||||
|
||||
freep = HashPageGetBitmap(pg);
|
||||
|
||||
/* set all of the bits above 'nbits' to 1 */
|
||||
clearints = ((nbits - 1) >> INT_TO_BIT) + 1;
|
||||
clearbytes = clearints << INT_TO_BYTE;
|
||||
(void) memset((char *) freep, 0, clearbytes);
|
||||
(void) memset(((char *) freep) + clearbytes, 0xFF,
|
||||
BMPGSZ_BYTE(metap) - clearbytes);
|
||||
freep[clearints - 1] = ALL_SET << (nbits & INT_MASK);
|
||||
|
||||
/* bit 0 represents the new bitmap page */
|
||||
SETBIT(freep, 0);
|
||||
|
||||
/* metapage already has a write lock */
|
||||
metap->hashm_nmaps++;
|
||||
metap->hashm_mapp[ndx] = blkno;
|
||||
|
||||
/* write out the new bitmap page (releasing its locks) */
|
||||
_hash_wrtbuf(rel, buf);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _hash_squeezebucket(rel, bucket)
|
||||
*
|
||||
* Try to squeeze the tuples onto pages occuring earlier in the
|
||||
* bucket chain in an attempt to free overflow pages. When we start
|
||||
* the "squeezing", the page from which we start taking tuples (the
|
||||
* "read" page) is the last bucket in the bucket chain and the page
|
||||
* onto which we start squeezing tuples (the "write" page) is the
|
||||
* first page in the bucket chain. The read page works backward and
|
||||
* the write page works forward; the procedure terminates when the
|
||||
* read page and write page are the same page.
|
||||
*/
|
||||
void
|
||||
_hash_squeezebucket(Relation rel,
|
||||
HashMetaPage metap,
|
||||
Bucket bucket)
|
||||
{
|
||||
Buffer wbuf;
|
||||
Buffer rbuf;
|
||||
BlockNumber wblkno;
|
||||
BlockNumber rblkno;
|
||||
Page wpage;
|
||||
Page rpage;
|
||||
HashPageOpaque wopaque;
|
||||
HashPageOpaque ropaque;
|
||||
OffsetNumber woffnum;
|
||||
OffsetNumber roffnum;
|
||||
HashItem hitem;
|
||||
int itemsz;
|
||||
|
||||
/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */
|
||||
|
||||
/*
|
||||
* start squeezing into the base bucket page.
|
||||
*/
|
||||
wblkno = BUCKET_TO_BLKNO(bucket);
|
||||
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
|
||||
wpage = BufferGetPage(wbuf);
|
||||
_hash_checkpage(wpage, LH_BUCKET_PAGE);
|
||||
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
||||
|
||||
/*
|
||||
* if there aren't any overflow pages, there's nothing to squeeze.
|
||||
*/
|
||||
if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) {
|
||||
_hash_relbuf(rel, wbuf, HASH_WRITE);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* find the last page in the bucket chain by starting at the base
|
||||
* bucket page and working forward.
|
||||
*
|
||||
* XXX if chains tend to be long, we should probably move forward
|
||||
* using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when
|
||||
* we reach the end. if they are short we probably don't care
|
||||
* very much. if the hash function is working at all, they had
|
||||
* better be short..
|
||||
*/
|
||||
ropaque = wopaque;
|
||||
do {
|
||||
rblkno = ropaque->hasho_nextblkno;
|
||||
if (ropaque != wopaque) {
|
||||
_hash_relbuf(rel, rbuf, HASH_WRITE);
|
||||
}
|
||||
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
_hash_checkpage(rpage, LH_OVERFLOW_PAGE);
|
||||
Assert(!PageIsEmpty(rpage));
|
||||
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
||||
Assert(ropaque->hasho_bucket == bucket);
|
||||
} while (BlockNumberIsValid(ropaque->hasho_nextblkno));
|
||||
|
||||
/*
|
||||
* squeeze the tuples.
|
||||
*/
|
||||
roffnum = FirstOffsetNumber;
|
||||
for(;;) {
|
||||
hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum));
|
||||
itemsz = IndexTupleDSize(hitem->hash_itup)
|
||||
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
|
||||
itemsz = DOUBLEALIGN(itemsz);
|
||||
|
||||
/*
|
||||
* walk up the bucket chain, looking for a page big enough for
|
||||
* this item.
|
||||
*/
|
||||
while (PageGetFreeSpace(wpage) < itemsz) {
|
||||
wblkno = wopaque->hasho_nextblkno;
|
||||
|
||||
_hash_wrtbuf(rel, wbuf);
|
||||
|
||||
if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) {
|
||||
_hash_wrtbuf(rel, rbuf);
|
||||
/* wbuf is already released */
|
||||
return;
|
||||
}
|
||||
|
||||
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
|
||||
wpage = BufferGetPage(wbuf);
|
||||
_hash_checkpage(wpage, LH_OVERFLOW_PAGE);
|
||||
Assert(!PageIsEmpty(wpage));
|
||||
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
||||
Assert(wopaque->hasho_bucket == bucket);
|
||||
}
|
||||
|
||||
/*
|
||||
* if we're here, we have found room so insert on the "write"
|
||||
* page.
|
||||
*/
|
||||
woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
|
||||
(void) PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED);
|
||||
|
||||
/*
|
||||
* delete the tuple from the "read" page.
|
||||
* PageIndexTupleDelete repacks the ItemId array, so 'roffnum'
|
||||
* will be "advanced" to the "next" ItemId.
|
||||
*/
|
||||
PageIndexTupleDelete(rpage, roffnum);
|
||||
_hash_wrtnorelbuf(rel, rbuf);
|
||||
|
||||
/*
|
||||
* if the "read" page is now empty because of the deletion,
|
||||
* free it.
|
||||
*/
|
||||
if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) {
|
||||
rblkno = ropaque->hasho_prevblkno;
|
||||
Assert(BlockNumberIsValid(rblkno));
|
||||
|
||||
/*
|
||||
* free this overflow page. the extra _hash_relbuf is
|
||||
* because _hash_freeovflpage gratuitously returns the
|
||||
* next page (we want the previous page and will get it
|
||||
* ourselves later).
|
||||
*/
|
||||
rbuf = _hash_freeovflpage(rel, rbuf);
|
||||
if (BufferIsValid(rbuf)) {
|
||||
_hash_relbuf(rel, rbuf, HASH_WRITE);
|
||||
}
|
||||
|
||||
if (rblkno == wblkno) {
|
||||
/* rbuf is already released */
|
||||
_hash_wrtbuf(rel, wbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
_hash_checkpage(rpage, LH_OVERFLOW_PAGE);
|
||||
Assert(!PageIsEmpty(rpage));
|
||||
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
||||
Assert(ropaque->hasho_bucket == bucket);
|
||||
|
||||
roffnum = FirstOffsetNumber;
|
||||
}
|
||||
}
|
||||
}
|
||||
669
src/backend/access/hash/hashpage.c
Normal file
669
src/backend/access/hash/hashpage.c
Normal file
@@ -0,0 +1,669 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hashpage.c--
|
||||
* Hash table page management code for the Postgres hash access method
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres hash pages look like ordinary relation pages. The opaque
|
||||
* data at high addresses includes information about the page including
|
||||
* whether a page is an overflow page or a true bucket, the block
|
||||
* numbers of the preceding and following pages, and the overflow
|
||||
* address of the page if it is an overflow page.
|
||||
*
|
||||
* The first page in a hash relation, page zero, is special -- it stores
|
||||
* information describing the hash table; it is referred to as teh
|
||||
* "meta page." Pages one and higher store the actual data.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
|
||||
static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
|
||||
static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
|
||||
|
||||
/*
|
||||
* We use high-concurrency locking on hash indices. There are two cases in
|
||||
* which we don't do locking. One is when we're building the index.
|
||||
* Since the creating transaction has not committed, no one can see
|
||||
* the index, and there's no reason to share locks. The second case
|
||||
* is when we're just starting up the database system. We use some
|
||||
* special-purpose initialization code in the relation cache manager
|
||||
* (see utils/cache/relcache.c) to allow us to do indexed scans on
|
||||
* the system catalogs before we'd normally be able to. This happens
|
||||
* before the lock table is fully initialized, so we can't use it.
|
||||
* Strictly speaking, this violates 2pl, but we don't do 2pl on the
|
||||
* system catalogs anyway.
|
||||
*/
|
||||
|
||||
|
||||
#define USELOCKING (!BuildingHash && !IsInitProcessingMode())
|
||||
|
||||
|
||||
/*
|
||||
* _hash_metapinit() -- Initialize the metadata page of a hash index,
|
||||
* the two buckets that we begin with and the initial
|
||||
* bitmap page.
|
||||
*/
|
||||
void
|
||||
_hash_metapinit(Relation rel)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
HashPageOpaque pageopaque;
|
||||
Buffer metabuf;
|
||||
Buffer buf;
|
||||
Page pg;
|
||||
int nbuckets;
|
||||
uint32 nelem; /* number elements */
|
||||
uint32 lg2nelem; /* _hash_log2(nelem) */
|
||||
uint32 nblocks;
|
||||
uint16 i;
|
||||
|
||||
/* can't be sharing this with anyone, now... */
|
||||
if (USELOCKING)
|
||||
RelationSetLockForWrite(rel);
|
||||
|
||||
if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
|
||||
elog(WARN, "Cannot initialize non-empty hash table %s",
|
||||
RelationGetRelationName(rel));
|
||||
}
|
||||
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
|
||||
pg = BufferGetPage(metabuf);
|
||||
metap = (HashMetaPage) pg;
|
||||
_hash_pageinit(pg, BufferGetPageSize(metabuf));
|
||||
|
||||
metap->hashm_magic = HASH_MAGIC;
|
||||
metap->hashm_version = HASH_VERSION;
|
||||
metap->hashm_nkeys = 0;
|
||||
metap->hashm_nmaps = 0;
|
||||
metap->hashm_ffactor = DEFAULT_FFACTOR;
|
||||
metap->hashm_bsize = BufferGetPageSize(metabuf);
|
||||
metap->hashm_bshift = _hash_log2(metap->hashm_bsize);
|
||||
for (i = metap->hashm_bshift; i > 0; --i) {
|
||||
if ((1 << i) < (metap->hashm_bsize -
|
||||
(DOUBLEALIGN(sizeof(PageHeaderData)) +
|
||||
DOUBLEALIGN(sizeof(HashPageOpaqueData))))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Assert(i);
|
||||
metap->hashm_bmsize = 1 << i;
|
||||
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
|
||||
|
||||
/*
|
||||
* Make nelem = 2 rather than 0 so that we end up allocating space
|
||||
* for the next greater power of two number of buckets.
|
||||
*/
|
||||
nelem = 2;
|
||||
lg2nelem = 1; /*_hash_log2(MAX(nelem, 2)) */
|
||||
nbuckets = 2; /*1 << lg2nelem */
|
||||
|
||||
memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
|
||||
memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
|
||||
|
||||
metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */
|
||||
metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */
|
||||
metap->hashm_ovflpoint = 1; /* lg2nelem */
|
||||
metap->hashm_lastfreed = 2;
|
||||
|
||||
metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */
|
||||
metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */
|
||||
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
|
||||
pageopaque->hasho_oaddr = InvalidOvflAddress;
|
||||
pageopaque->hasho_prevblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_flag = LH_META_PAGE;
|
||||
pageopaque->hasho_bucket = -1;
|
||||
|
||||
/*
|
||||
* First bitmap page is at: splitpoint lg2nelem page offset 1 which
|
||||
* turns out to be page 3. Couldn't initialize page 3 until we created
|
||||
* the first two buckets above.
|
||||
*/
|
||||
if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
|
||||
elog(WARN, "Problem with _hash_initbitmap.");
|
||||
|
||||
/* all done */
|
||||
_hash_wrtnorelbuf(rel, metabuf);
|
||||
|
||||
/*
|
||||
* initialize the first two buckets
|
||||
*/
|
||||
for (i = 0; i <= 1; i++) {
|
||||
buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
|
||||
pg = BufferGetPage(buf);
|
||||
_hash_pageinit(pg, BufferGetPageSize(buf));
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
|
||||
pageopaque->hasho_oaddr = InvalidOvflAddress;
|
||||
pageopaque->hasho_prevblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_flag = LH_BUCKET_PAGE;
|
||||
pageopaque->hasho_bucket = i;
|
||||
_hash_wrtbuf(rel, buf);
|
||||
}
|
||||
|
||||
_hash_relbuf(rel, metabuf, HASH_WRITE);
|
||||
|
||||
if (USELOCKING)
|
||||
RelationUnsetLockForWrite(rel);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_getbuf() -- Get a buffer by block number for read or write.
|
||||
*
|
||||
* When this routine returns, the appropriate lock is set on the
|
||||
* requested buffer its reference count is correct.
|
||||
*
|
||||
* XXX P_NEW is not used because, unlike the tree structures, we
|
||||
* need the bucket blocks to be at certain block numbers. we must
|
||||
* depend on the caller to call _hash_pageinit on the block if it
|
||||
* knows that this is a new block.
|
||||
*/
|
||||
Buffer
|
||||
_hash_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
{
|
||||
Buffer buf;
|
||||
|
||||
if (blkno == P_NEW) {
|
||||
elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
|
||||
}
|
||||
switch (access) {
|
||||
case HASH_WRITE:
|
||||
case HASH_READ:
|
||||
_hash_setpagelock(rel, blkno, access);
|
||||
break;
|
||||
default:
|
||||
elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %.*s",
|
||||
access, NAMEDATALEN, RelationGetRelationName(rel));
|
||||
break;
|
||||
}
|
||||
buf = ReadBuffer(rel, blkno);
|
||||
|
||||
/* ref count and lock type are correct */
|
||||
return (buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_relbuf() -- release a locked buffer.
|
||||
*/
|
||||
void
|
||||
_hash_relbuf(Relation rel, Buffer buf, int access)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
|
||||
switch (access) {
|
||||
case HASH_WRITE:
|
||||
case HASH_READ:
|
||||
_hash_unsetpagelock(rel, blkno, access);
|
||||
break;
|
||||
default:
|
||||
elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %.*s",
|
||||
access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
|
||||
}
|
||||
|
||||
ReleaseBuffer(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_wrtbuf() -- write a hash page to disk.
|
||||
*
|
||||
* This routine releases the lock held on the buffer and our reference
|
||||
* to it. It is an error to call _hash_wrtbuf() without a write lock
|
||||
* or a reference to the buffer.
|
||||
*/
|
||||
void
|
||||
_hash_wrtbuf(Relation rel, Buffer buf)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
WriteBuffer(buf);
|
||||
_hash_unsetpagelock(rel, blkno, HASH_WRITE);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
|
||||
* our reference or lock.
|
||||
*
|
||||
* It is an error to call _hash_wrtnorelbuf() without a write lock
|
||||
* or a reference to the buffer.
|
||||
*/
|
||||
void
|
||||
_hash_wrtnorelbuf(Relation rel, Buffer buf)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
WriteNoReleaseBuffer(buf);
|
||||
}
|
||||
|
||||
Page
|
||||
_hash_chgbufaccess(Relation rel,
|
||||
Buffer *bufp,
|
||||
int from_access,
|
||||
int to_access)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
blkno = BufferGetBlockNumber(*bufp);
|
||||
|
||||
switch (from_access) {
|
||||
case HASH_WRITE:
|
||||
_hash_wrtbuf(rel, *bufp);
|
||||
break;
|
||||
case HASH_READ:
|
||||
_hash_relbuf(rel, *bufp, from_access);
|
||||
break;
|
||||
default:
|
||||
elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %.*s",
|
||||
from_access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
|
||||
break;
|
||||
}
|
||||
*bufp = _hash_getbuf(rel, blkno, to_access);
|
||||
return (BufferGetPage(*bufp));
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_pageinit() -- Initialize a new page.
|
||||
*/
|
||||
void
|
||||
_hash_pageinit(Page page, Size size)
|
||||
{
|
||||
Assert(((PageHeader) page)->pd_lower == 0);
|
||||
Assert(((PageHeader) page)->pd_upper == 0);
|
||||
Assert(((PageHeader) page)->pd_special == 0);
|
||||
|
||||
/*
|
||||
* Cargo-cult programming -- don't really need this to be zero, but
|
||||
* creating new pages is an infrequent occurrence and it makes me feel
|
||||
* good when I know they're empty.
|
||||
*/
|
||||
memset(page, 0, size);
|
||||
|
||||
PageInit(page, size, sizeof(HashPageOpaqueData));
|
||||
}
|
||||
|
||||
static void
|
||||
_hash_setpagelock(Relation rel,
|
||||
BlockNumber blkno,
|
||||
int access)
|
||||
{
|
||||
ItemPointerData iptr;
|
||||
|
||||
if (USELOCKING) {
|
||||
ItemPointerSet(&iptr, blkno, 1);
|
||||
|
||||
switch (access) {
|
||||
case HASH_WRITE:
|
||||
RelationSetSingleWLockPage(rel, &iptr);
|
||||
break;
|
||||
case HASH_READ:
|
||||
RelationSetSingleRLockPage(rel, &iptr);
|
||||
break;
|
||||
default:
|
||||
elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %.*s",
|
||||
access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_hash_unsetpagelock(Relation rel,
|
||||
BlockNumber blkno,
|
||||
int access)
|
||||
{
|
||||
ItemPointerData iptr;
|
||||
|
||||
if (USELOCKING) {
|
||||
ItemPointerSet(&iptr, blkno, 1);
|
||||
|
||||
switch (access) {
|
||||
case HASH_WRITE:
|
||||
RelationUnsetSingleWLockPage(rel, &iptr);
|
||||
break;
|
||||
case HASH_READ:
|
||||
RelationUnsetSingleRLockPage(rel, &iptr);
|
||||
break;
|
||||
default:
|
||||
elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %.*s",
|
||||
access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
_hash_pagedel(Relation rel, ItemPointer tid)
|
||||
{
|
||||
Buffer buf;
|
||||
Buffer metabuf;
|
||||
Page page;
|
||||
BlockNumber blkno;
|
||||
OffsetNumber offno;
|
||||
HashMetaPage metap;
|
||||
HashPageOpaque opaque;
|
||||
|
||||
blkno = ItemPointerGetBlockNumber(tid);
|
||||
offno = ItemPointerGetOffsetNumber(tid);
|
||||
|
||||
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
PageIndexTupleDelete(page, offno);
|
||||
_hash_wrtnorelbuf(rel, buf);
|
||||
|
||||
if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) {
|
||||
buf = _hash_freeovflpage(rel, buf);
|
||||
if (BufferIsValid(buf)) {
|
||||
_hash_relbuf(rel, buf, HASH_WRITE);
|
||||
}
|
||||
} else {
|
||||
_hash_relbuf(rel, buf, HASH_WRITE);
|
||||
}
|
||||
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
++metap->hashm_nkeys;
|
||||
_hash_wrtbuf(rel, metabuf);
|
||||
}
|
||||
|
||||
void
|
||||
_hash_expandtable(Relation rel, Buffer metabuf)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
Bucket old_bucket;
|
||||
Bucket new_bucket;
|
||||
uint32 spare_ndx;
|
||||
|
||||
/* elog(DEBUG, "_hash_expandtable: expanding..."); */
|
||||
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
|
||||
new_bucket = ++metap->MAX_BUCKET;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
|
||||
old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
|
||||
|
||||
/*
|
||||
* If the split point is increasing (MAX_BUCKET's log base 2
|
||||
* * increases), we need to copy the current contents of the spare
|
||||
* split bucket to the next bucket.
|
||||
*/
|
||||
spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
|
||||
if (spare_ndx > metap->OVFL_POINT) {
|
||||
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
|
||||
metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
|
||||
metap->OVFL_POINT = spare_ndx;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
|
||||
}
|
||||
|
||||
if (new_bucket > metap->HIGH_MASK) {
|
||||
|
||||
/* Starting a new doubling */
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
|
||||
metap->LOW_MASK = metap->HIGH_MASK;
|
||||
metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
|
||||
|
||||
}
|
||||
/* Relocate records to the new bucket */
|
||||
_hash_splitpage(rel, metabuf, old_bucket, new_bucket);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _hash_splitpage -- split 'obucket' into 'obucket' and 'nbucket'
|
||||
*
|
||||
* this routine is actually misnamed -- we are splitting a bucket that
|
||||
* consists of a base bucket page and zero or more overflow (bucket
|
||||
* chain) pages.
|
||||
*/
|
||||
static void
|
||||
_hash_splitpage(Relation rel,
|
||||
Buffer metabuf,
|
||||
Bucket obucket,
|
||||
Bucket nbucket)
|
||||
{
|
||||
Bucket bucket;
|
||||
Buffer obuf;
|
||||
Buffer nbuf;
|
||||
Buffer ovflbuf;
|
||||
BlockNumber oblkno;
|
||||
BlockNumber nblkno;
|
||||
bool null;
|
||||
Datum datum;
|
||||
HashItem hitem;
|
||||
HashPageOpaque oopaque;
|
||||
HashPageOpaque nopaque;
|
||||
HashMetaPage metap;
|
||||
IndexTuple itup;
|
||||
int itemsz;
|
||||
OffsetNumber ooffnum;
|
||||
OffsetNumber noffnum;
|
||||
OffsetNumber omaxoffnum;
|
||||
Page opage;
|
||||
Page npage;
|
||||
TupleDesc itupdesc;
|
||||
|
||||
/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
|
||||
obucket, obucket, nbucket);
|
||||
*/
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
/* get the buffers & pages */
|
||||
oblkno = BUCKET_TO_BLKNO(obucket);
|
||||
nblkno = BUCKET_TO_BLKNO(nbucket);
|
||||
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
|
||||
nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
|
||||
opage = BufferGetPage(obuf);
|
||||
npage = BufferGetPage(nbuf);
|
||||
|
||||
/* initialize the new bucket */
|
||||
_hash_pageinit(npage, BufferGetPageSize(nbuf));
|
||||
nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
|
||||
nopaque->hasho_prevblkno = InvalidBlockNumber;
|
||||
nopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
nopaque->hasho_flag = LH_BUCKET_PAGE;
|
||||
nopaque->hasho_oaddr = InvalidOvflAddress;
|
||||
nopaque->hasho_bucket = nbucket;
|
||||
_hash_wrtnorelbuf(rel, nbuf);
|
||||
|
||||
/*
|
||||
* make sure the old bucket isn't empty. advance 'opage' and
|
||||
* friends through the overflow bucket chain until we find a
|
||||
* non-empty page.
|
||||
*
|
||||
* XXX we should only need this once, if we are careful to
|
||||
* preserve the invariant that overflow pages are never empty.
|
||||
*/
|
||||
_hash_checkpage(opage, LH_BUCKET_PAGE);
|
||||
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
|
||||
if (PageIsEmpty(opage)) {
|
||||
oblkno = oopaque->hasho_nextblkno;
|
||||
_hash_relbuf(rel, obuf, HASH_WRITE);
|
||||
if (!BlockNumberIsValid(oblkno)) {
|
||||
/*
|
||||
* the old bucket is completely empty; of course, the new
|
||||
* bucket will be as well, but since it's a base bucket
|
||||
* page we don't care.
|
||||
*/
|
||||
_hash_relbuf(rel, nbuf, HASH_WRITE);
|
||||
return;
|
||||
}
|
||||
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
|
||||
opage = BufferGetPage(obuf);
|
||||
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
|
||||
if (PageIsEmpty(opage)) {
|
||||
elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
|
||||
}
|
||||
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
|
||||
}
|
||||
|
||||
/*
|
||||
* we are now guaranteed that 'opage' is not empty. partition the
|
||||
* tuples in the old bucket between the old bucket and the new
|
||||
* bucket, advancing along their respective overflow bucket chains
|
||||
* and adding overflow pages as needed.
|
||||
*/
|
||||
ooffnum = FirstOffsetNumber;
|
||||
omaxoffnum = PageGetMaxOffsetNumber(opage);
|
||||
for (;;) {
|
||||
/*
|
||||
* at each iteration through this loop, each of these variables
|
||||
* should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
|
||||
*/
|
||||
|
||||
/* check if we're at the end of the page */
|
||||
if (ooffnum > omaxoffnum) {
|
||||
/* at end of page, but check for overflow page */
|
||||
oblkno = oopaque->hasho_nextblkno;
|
||||
if (BlockNumberIsValid(oblkno)) {
|
||||
/*
|
||||
* we ran out of tuples on this particular page, but
|
||||
* we have more overflow pages; re-init values.
|
||||
*/
|
||||
_hash_wrtbuf(rel, obuf);
|
||||
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
|
||||
opage = BufferGetPage(obuf);
|
||||
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
|
||||
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
|
||||
|
||||
/* we're guaranteed that an ovfl page has at least 1 tuple */
|
||||
if (PageIsEmpty(opage)) {
|
||||
elog(WARN, "_hash_splitpage: empty ovfl page %d!",
|
||||
oblkno);
|
||||
}
|
||||
ooffnum = FirstOffsetNumber;
|
||||
omaxoffnum = PageGetMaxOffsetNumber(opage);
|
||||
} else {
|
||||
/*
|
||||
* we're at the end of the bucket chain, so now we're
|
||||
* really done with everything. before quitting, call
|
||||
* _hash_squeezebucket to ensure the tuples in the
|
||||
* bucket (including the overflow pages) are packed as
|
||||
* tightly as possible.
|
||||
*/
|
||||
_hash_wrtbuf(rel, obuf);
|
||||
_hash_wrtbuf(rel, nbuf);
|
||||
_hash_squeezebucket(rel, metap, obucket);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* hash on the tuple */
|
||||
hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
|
||||
itup = &(hitem->hash_itup);
|
||||
itupdesc = RelationGetTupleDescriptor(rel);
|
||||
datum = index_getattr(itup, 1, itupdesc, &null);
|
||||
bucket = _hash_call(rel, metap, datum);
|
||||
|
||||
if (bucket == nbucket) {
|
||||
/*
|
||||
* insert the tuple into the new bucket. if it doesn't
|
||||
* fit on the current page in the new bucket, we must
|
||||
* allocate a new overflow page and place the tuple on
|
||||
* that page instead.
|
||||
*/
|
||||
itemsz = IndexTupleDSize(hitem->hash_itup)
|
||||
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
|
||||
|
||||
itemsz = DOUBLEALIGN(itemsz);
|
||||
|
||||
if (PageGetFreeSpace(npage) < itemsz) {
|
||||
ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
|
||||
_hash_wrtbuf(rel, nbuf);
|
||||
nbuf = ovflbuf;
|
||||
npage = BufferGetPage(nbuf);
|
||||
_hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
}
|
||||
|
||||
noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
|
||||
(void) PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
|
||||
_hash_wrtnorelbuf(rel, nbuf);
|
||||
|
||||
/*
|
||||
* now delete the tuple from the old bucket. after this
|
||||
* section of code, 'ooffnum' will actually point to the
|
||||
* ItemId to which we would point if we had advanced it
|
||||
* before the deletion (PageIndexTupleDelete repacks the
|
||||
* ItemId array). this also means that 'omaxoffnum' is
|
||||
* exactly one less than it used to be, so we really can
|
||||
* just decrement it instead of calling
|
||||
* PageGetMaxOffsetNumber.
|
||||
*/
|
||||
PageIndexTupleDelete(opage, ooffnum);
|
||||
_hash_wrtnorelbuf(rel, obuf);
|
||||
omaxoffnum = OffsetNumberPrev(omaxoffnum);
|
||||
|
||||
/*
|
||||
* tidy up. if the old page was an overflow page and it
|
||||
* is now empty, we must free it (we want to preserve the
|
||||
* invariant that overflow pages cannot be empty).
|
||||
*/
|
||||
if (PageIsEmpty(opage) &&
|
||||
(oopaque->hasho_flag & LH_OVERFLOW_PAGE)) {
|
||||
obuf = _hash_freeovflpage(rel, obuf);
|
||||
|
||||
/* check that we're not through the bucket chain */
|
||||
if (BufferIsInvalid(obuf)) {
|
||||
_hash_wrtbuf(rel, nbuf);
|
||||
_hash_squeezebucket(rel, metap, obucket);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* re-init. again, we're guaranteed that an ovfl page
|
||||
* has at least one tuple.
|
||||
*/
|
||||
opage = BufferGetPage(obuf);
|
||||
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
|
||||
oblkno = BufferGetBlockNumber(obuf);
|
||||
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
|
||||
if (PageIsEmpty(opage)) {
|
||||
elog(WARN, "_hash_splitpage: empty overflow page %d",
|
||||
oblkno);
|
||||
}
|
||||
ooffnum = FirstOffsetNumber;
|
||||
omaxoffnum = PageGetMaxOffsetNumber(opage);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* the tuple stays on this page. we didn't move anything,
|
||||
* so we didn't delete anything and therefore we don't
|
||||
* have to change 'omaxoffnum'.
|
||||
*
|
||||
* XXX any hash value from [0, nbucket-1] will map to this
|
||||
* bucket, which doesn't make sense to me.
|
||||
*/
|
||||
ooffnum = OffsetNumberNext(ooffnum);
|
||||
}
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
172
src/backend/access/hash/hashscan.c
Normal file
172
src/backend/access/hash/hashscan.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hashscan.c--
|
||||
* manage scans on hash tables
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Because we can be doing an index scan on a relation while we
|
||||
* update it, we need to avoid missing data that moves around in
|
||||
* the index. The routines and global variables in this file
|
||||
* guarantee that all scans in the local address space stay
|
||||
* correctly positioned. This is all we need to worry about, since
|
||||
* write locking guarantees that no one else will be on the same
|
||||
* page at the same time as we are.
|
||||
*
|
||||
* The scheme is to manage a list of active scans in the current
|
||||
* backend. Whenever we add or remove records from an index, we
|
||||
* check the list of active scans to see if any has been affected.
|
||||
* A scan is affected only if it is on the same relation, and the
|
||||
* same page, as the update.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/sdir.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
|
||||
static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
|
||||
|
||||
typedef struct HashScanListData {
|
||||
IndexScanDesc hashsl_scan;
|
||||
struct HashScanListData *hashsl_next;
|
||||
} HashScanListData;
|
||||
|
||||
typedef HashScanListData *HashScanList;
|
||||
|
||||
static HashScanList HashScans = (HashScanList) NULL;
|
||||
|
||||
/*
|
||||
* _Hash_regscan() -- register a new scan.
|
||||
*/
|
||||
void
|
||||
_hash_regscan(IndexScanDesc scan)
|
||||
{
|
||||
HashScanList new_el;
|
||||
|
||||
new_el = (HashScanList) palloc(sizeof(HashScanListData));
|
||||
new_el->hashsl_scan = scan;
|
||||
new_el->hashsl_next = HashScans;
|
||||
HashScans = new_el;
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_dropscan() -- drop a scan from the scan list
|
||||
*/
|
||||
void
|
||||
_hash_dropscan(IndexScanDesc scan)
|
||||
{
|
||||
HashScanList chk, last;
|
||||
|
||||
last = (HashScanList) NULL;
|
||||
for (chk = HashScans;
|
||||
chk != (HashScanList) NULL && chk->hashsl_scan != scan;
|
||||
chk = chk->hashsl_next) {
|
||||
last = chk;
|
||||
}
|
||||
|
||||
if (chk == (HashScanList) NULL)
|
||||
elog(WARN, "hash scan list trashed; can't find 0x%lx", scan);
|
||||
|
||||
if (last == (HashScanList) NULL)
|
||||
HashScans = chk->hashsl_next;
|
||||
else
|
||||
last->hashsl_next = chk->hashsl_next;
|
||||
|
||||
#ifdef PERFECT_MEM
|
||||
pfree (chk);
|
||||
#endif /* PERFECT_MEM */
|
||||
}
|
||||
|
||||
void
|
||||
_hash_adjscans(Relation rel, ItemPointer tid)
|
||||
{
|
||||
HashScanList l;
|
||||
Oid relid;
|
||||
|
||||
relid = rel->rd_id;
|
||||
for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) {
|
||||
if (relid == l->hashsl_scan->relation->rd_id)
|
||||
_hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid),
|
||||
ItemPointerGetOffsetNumber(tid));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
|
||||
{
|
||||
ItemPointer current;
|
||||
Buffer buf;
|
||||
Buffer metabuf;
|
||||
HashScanOpaque so;
|
||||
|
||||
if (!_hash_scantouched(scan, blkno, offno))
|
||||
return;
|
||||
|
||||
metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ);
|
||||
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
buf = so->hashso_curbuf;
|
||||
|
||||
current = &(scan->currentItemData);
|
||||
if (ItemPointerIsValid(current)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno) {
|
||||
_hash_step(scan, &buf, BackwardScanDirection, metabuf);
|
||||
so->hashso_curbuf = buf;
|
||||
}
|
||||
|
||||
current = &(scan->currentMarkData);
|
||||
if (ItemPointerIsValid(current)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno) {
|
||||
ItemPointerData tmp;
|
||||
tmp = *current;
|
||||
*current = scan->currentItemData;
|
||||
scan->currentItemData = tmp;
|
||||
_hash_step(scan, &buf, BackwardScanDirection, metabuf);
|
||||
so->hashso_mrkbuf = buf;
|
||||
tmp = *current;
|
||||
*current = scan->currentItemData;
|
||||
scan->currentItemData = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
_hash_scantouched(IndexScanDesc scan,
|
||||
BlockNumber blkno,
|
||||
OffsetNumber offno)
|
||||
{
|
||||
ItemPointer current;
|
||||
|
||||
current = &(scan->currentItemData);
|
||||
if (ItemPointerIsValid(current)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno)
|
||||
return (true);
|
||||
|
||||
current = &(scan->currentMarkData);
|
||||
if (ItemPointerIsValid(current)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno)
|
||||
return (true);
|
||||
|
||||
return (false);
|
||||
}
|
||||
425
src/backend/access/hash/hashsearch.c
Normal file
425
src/backend/access/hash/hashsearch.c
Normal file
@@ -0,0 +1,425 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* hashsearch.c--
|
||||
* search code for postgres hash tables
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "fmgr.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/skey.h"
|
||||
#include "access/sdir.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
/*
|
||||
* _hash_search() -- Finds the page/bucket that the contains the
|
||||
* scankey and loads it into *bufP. the buffer has a read lock.
|
||||
*/
|
||||
void
|
||||
_hash_search(Relation rel,
|
||||
int keysz,
|
||||
ScanKey scankey,
|
||||
Buffer *bufP,
|
||||
HashMetaPage metap)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
Datum keyDatum;
|
||||
Bucket bucket;
|
||||
|
||||
if (scankey == (ScanKey) NULL ||
|
||||
(keyDatum = scankey[0].sk_argument) == (Datum) NULL) {
|
||||
/*
|
||||
* If the scankey argument is NULL, all tuples will satisfy
|
||||
* the scan so we start the scan at the first bucket (bucket
|
||||
* 0).
|
||||
*/
|
||||
bucket = 0;
|
||||
} else {
|
||||
bucket = _hash_call(rel, metap, keyDatum);
|
||||
}
|
||||
|
||||
blkno = BUCKET_TO_BLKNO(bucket);
|
||||
|
||||
*bufP = _hash_getbuf(rel, blkno, HASH_READ);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_next() -- Get the next item in a scan.
|
||||
*
|
||||
* On entry, we have a valid currentItemData in the scan, and a
|
||||
* read lock on the page that contains that item. We do not have
|
||||
* the page pinned. We return the next item in the scan. On
|
||||
* exit, we have the page containing the next item locked but not
|
||||
* pinned.
|
||||
*/
|
||||
RetrieveIndexResult
|
||||
_hash_next(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
Relation rel;
|
||||
Buffer buf;
|
||||
Buffer metabuf;
|
||||
Page page;
|
||||
OffsetNumber offnum;
|
||||
RetrieveIndexResult res;
|
||||
ItemPointer current;
|
||||
ItemPointer iptr;
|
||||
HashItem hitem;
|
||||
IndexTuple itup;
|
||||
HashScanOpaque so;
|
||||
|
||||
rel = scan->relation;
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
current = &(scan->currentItemData);
|
||||
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
|
||||
|
||||
/*
|
||||
* XXX 10 may 91: somewhere there's a bug in our management of the
|
||||
* cached buffer for this scan. wei discovered it. the following
|
||||
* is a workaround so he can work until i figure out what's going on.
|
||||
*/
|
||||
|
||||
if (!BufferIsValid(so->hashso_curbuf)) {
|
||||
so->hashso_curbuf = _hash_getbuf(rel,
|
||||
ItemPointerGetBlockNumber(current),
|
||||
HASH_READ);
|
||||
}
|
||||
|
||||
/* we still have the buffer pinned and locked */
|
||||
buf = so->hashso_curbuf;
|
||||
|
||||
/*
|
||||
* step to next valid tuple. note that _hash_step releases our
|
||||
* lock on 'metabuf'; if we switch to a new 'buf' while looking
|
||||
* for the next tuple, we come back with a lock on that buffer.
|
||||
*/
|
||||
if (!_hash_step(scan, &buf, dir, metabuf)) {
|
||||
return ((RetrieveIndexResult) NULL);
|
||||
}
|
||||
|
||||
/* if we're here, _hash_step found a valid tuple */
|
||||
current = &(scan->currentItemData);
|
||||
offnum = ItemPointerGetOffsetNumber(current);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
|
||||
itup = &hitem->hash_itup;
|
||||
iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
|
||||
memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData));
|
||||
res = FormRetrieveIndexResult(current, iptr);
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
static void
|
||||
_hash_readnext(Relation rel,
|
||||
Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
blkno = (*opaquep)->hasho_nextblkno;
|
||||
_hash_relbuf(rel, *bufp, HASH_READ);
|
||||
*bufp = InvalidBuffer;
|
||||
if (BlockNumberIsValid(blkno)) {
|
||||
*bufp = _hash_getbuf(rel, blkno, HASH_READ);
|
||||
*pagep = BufferGetPage(*bufp);
|
||||
_hash_checkpage(*pagep, LH_OVERFLOW_PAGE);
|
||||
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
|
||||
Assert(!PageIsEmpty(*pagep));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_hash_readprev(Relation rel,
|
||||
Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
blkno = (*opaquep)->hasho_prevblkno;
|
||||
_hash_relbuf(rel, *bufp, HASH_READ);
|
||||
*bufp = InvalidBuffer;
|
||||
if (BlockNumberIsValid(blkno)) {
|
||||
*bufp = _hash_getbuf(rel, blkno, HASH_READ);
|
||||
*pagep = BufferGetPage(*bufp);
|
||||
_hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
|
||||
if (PageIsEmpty(*pagep)) {
|
||||
Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE);
|
||||
_hash_relbuf(rel, *bufp, HASH_READ);
|
||||
*bufp = InvalidBuffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_first() -- Find the first item in a scan.
|
||||
*
|
||||
* Return the RetrieveIndexResult of the first item in the tree that
|
||||
* satisfies the qualificatin associated with the scan descriptor. On
|
||||
* exit, the page containing the current index tuple is read locked
|
||||
* and pinned, and the scan's opaque data entry is updated to
|
||||
* include the buffer.
|
||||
*/
|
||||
RetrieveIndexResult
|
||||
_hash_first(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
Relation rel;
|
||||
Buffer buf;
|
||||
Buffer metabuf;
|
||||
Page page;
|
||||
HashPageOpaque opaque;
|
||||
HashMetaPage metap;
|
||||
HashItem hitem;
|
||||
IndexTuple itup;
|
||||
ItemPointer current;
|
||||
ItemPointer iptr;
|
||||
OffsetNumber offnum;
|
||||
RetrieveIndexResult res;
|
||||
HashScanOpaque so;
|
||||
|
||||
rel = scan->relation;
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
current = &(scan->currentItemData);
|
||||
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
/*
|
||||
* XXX -- The attribute number stored in the scan key is the attno
|
||||
* in the heap relation. We need to transmogrify this into
|
||||
* the index relation attno here. For the moment, we have
|
||||
* hardwired attno == 1.
|
||||
*/
|
||||
|
||||
/* find the correct bucket page and load it into buf */
|
||||
_hash_search(rel, 1, scan->keyData, &buf, metap);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* if we are scanning forward, we need to find the first non-empty
|
||||
* page (if any) in the bucket chain. since overflow pages are
|
||||
* never empty, this had better be either the bucket page or the
|
||||
* first overflow page.
|
||||
*
|
||||
* if we are scanning backward, we always go all the way to the
|
||||
* end of the bucket chain.
|
||||
*/
|
||||
if (PageIsEmpty(page)) {
|
||||
if (BlockNumberIsValid(opaque->hasho_nextblkno)) {
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
} else {
|
||||
ItemPointerSetInvalid(current);
|
||||
so->hashso_curbuf = InvalidBuffer;
|
||||
return ((RetrieveIndexResult) NULL);
|
||||
}
|
||||
}
|
||||
if (ScanDirectionIsBackward(dir)) {
|
||||
while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
}
|
||||
}
|
||||
|
||||
if (!_hash_step(scan, &buf, dir, metabuf)) {
|
||||
return ((RetrieveIndexResult) NULL);
|
||||
}
|
||||
|
||||
/* if we're here, _hash_step found a valid tuple */
|
||||
current = &(scan->currentItemData);
|
||||
offnum = ItemPointerGetOffsetNumber(current);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
|
||||
itup = &hitem->hash_itup;
|
||||
iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
|
||||
memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData));
|
||||
res = FormRetrieveIndexResult(current, iptr);
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_step() -- step to the next valid item in a scan in the bucket.
|
||||
*
|
||||
* If no valid record exists in the requested direction, return
|
||||
* false. Else, return true and set the CurrentItemData for the
|
||||
* scan to the right thing.
|
||||
*
|
||||
* 'bufP' points to the buffer which contains the current page
|
||||
* that we'll step through.
|
||||
*
|
||||
* 'metabuf' is released when this returns.
|
||||
*/
|
||||
bool
|
||||
_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
|
||||
{
|
||||
Relation rel;
|
||||
ItemPointer current;
|
||||
HashScanOpaque so;
|
||||
int allbuckets;
|
||||
HashMetaPage metap;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
HashPageOpaque opaque;
|
||||
OffsetNumber maxoff;
|
||||
OffsetNumber offnum;
|
||||
Bucket bucket;
|
||||
BlockNumber blkno;
|
||||
HashItem hitem;
|
||||
IndexTuple itup;
|
||||
|
||||
rel = scan->relation;
|
||||
current = &(scan->currentItemData);
|
||||
so = (HashScanOpaque) scan->opaque;
|
||||
allbuckets = (scan->numberOfKeys < 1);
|
||||
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
buf = *bufP;
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* If _hash_step is called from _hash_first, current will not be
|
||||
* valid, so we can't dereference it. However, in that case, we
|
||||
* presumably want to start at the beginning/end of the page...
|
||||
*/
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
if (ItemPointerIsValid(current)) {
|
||||
offnum = ItemPointerGetOffsetNumber(current);
|
||||
} else {
|
||||
offnum = InvalidOffsetNumber;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'offnum' now points to the last tuple we have seen (if any).
|
||||
*
|
||||
* continue to step through tuples until:
|
||||
* 1) we get to the end of the bucket chain or
|
||||
* 2) we find a valid tuple.
|
||||
*/
|
||||
do {
|
||||
bucket = opaque->hasho_bucket;
|
||||
|
||||
switch (dir) {
|
||||
case ForwardScanDirection:
|
||||
if (offnum != InvalidOffsetNumber) {
|
||||
offnum = OffsetNumberNext(offnum); /* move forward */
|
||||
} else {
|
||||
offnum = FirstOffsetNumber; /* new page */
|
||||
}
|
||||
while (offnum > maxoff) {
|
||||
/*
|
||||
* either this page is empty (maxoff ==
|
||||
* InvalidOffsetNumber) or we ran off the end.
|
||||
*/
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
if (BufferIsInvalid(buf)) { /* end of chain */
|
||||
if (allbuckets && bucket < metap->hashm_maxbucket) {
|
||||
++bucket;
|
||||
blkno = BUCKET_TO_BLKNO(bucket);
|
||||
buf = _hash_getbuf(rel, blkno, HASH_READ);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(opaque->hasho_bucket == bucket);
|
||||
while (PageIsEmpty(page) &&
|
||||
BlockNumberIsValid(opaque->hasho_nextblkno)) {
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
}
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
offnum = FirstOffsetNumber;
|
||||
} else {
|
||||
maxoff = offnum = InvalidOffsetNumber;
|
||||
break; /* while */
|
||||
}
|
||||
} else {
|
||||
/* _hash_readnext never returns an empty page */
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
offnum = FirstOffsetNumber;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BackwardScanDirection:
|
||||
if (offnum != InvalidOffsetNumber) {
|
||||
offnum = OffsetNumberPrev(offnum); /* move back */
|
||||
} else {
|
||||
offnum = maxoff; /* new page */
|
||||
}
|
||||
while (offnum < FirstOffsetNumber) {
|
||||
/*
|
||||
* either this page is empty (offnum ==
|
||||
* InvalidOffsetNumber) or we ran off the end.
|
||||
*/
|
||||
_hash_readprev(rel, &buf, &page, &opaque);
|
||||
if (BufferIsInvalid(buf)) { /* end of chain */
|
||||
if (allbuckets && bucket > 0) {
|
||||
--bucket;
|
||||
blkno = BUCKET_TO_BLKNO(bucket);
|
||||
buf = _hash_getbuf(rel, blkno, HASH_READ);
|
||||
page = BufferGetPage(buf);
|
||||
_hash_checkpage(page, LH_BUCKET_PAGE);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(opaque->hasho_bucket == bucket);
|
||||
while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
}
|
||||
maxoff = offnum = PageGetMaxOffsetNumber(page);
|
||||
} else {
|
||||
maxoff = offnum = InvalidOffsetNumber;
|
||||
break; /* while */
|
||||
}
|
||||
} else {
|
||||
/* _hash_readprev never returns an empty page */
|
||||
maxoff = offnum = PageGetMaxOffsetNumber(page);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* NoMovementScanDirection */
|
||||
/* this should not be reached */
|
||||
break;
|
||||
}
|
||||
|
||||
/* we ran off the end of the world without finding a match */
|
||||
if (offnum == InvalidOffsetNumber) {
|
||||
_hash_relbuf(rel, metabuf, HASH_READ);
|
||||
*bufP = so->hashso_curbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(current);
|
||||
return(false);
|
||||
}
|
||||
|
||||
/* get ready to check this tuple */
|
||||
hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
|
||||
itup = &hitem->hash_itup;
|
||||
} while (!_hash_checkqual(scan, itup));
|
||||
|
||||
/* if we made it to here, we've found a valid tuple */
|
||||
_hash_relbuf(rel, metabuf, HASH_READ);
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
*bufP = so->hashso_curbuf = buf;
|
||||
ItemPointerSet(current, blkno, offnum);
|
||||
return(true);
|
||||
}
|
||||
104
src/backend/access/hash/hashstrat.c
Normal file
104
src/backend/access/hash/hashstrat.c
Normal file
@@ -0,0 +1,104 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* btstrat.c--
|
||||
* Srategy map entries for the btree indexed access method
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
/*
|
||||
* only one valid strategy for hash tables: equality.
|
||||
*/
|
||||
|
||||
static StrategyNumber HTNegate[1] = {
|
||||
InvalidStrategy
|
||||
};
|
||||
|
||||
static StrategyNumber HTCommute[1] = {
|
||||
HTEqualStrategyNumber
|
||||
};
|
||||
|
||||
static StrategyNumber HTNegateCommute[1] = {
|
||||
InvalidStrategy
|
||||
};
|
||||
|
||||
static StrategyEvaluationData HTEvaluationData = {
|
||||
/* XXX static for simplicity */
|
||||
|
||||
HTMaxStrategyNumber,
|
||||
(StrategyTransformMap)HTNegate,
|
||||
(StrategyTransformMap)HTCommute,
|
||||
(StrategyTransformMap)HTNegateCommute,
|
||||
{NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
|
||||
};
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* RelationGetHashStrategy
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
StrategyNumber
|
||||
_hash_getstrat(Relation rel,
|
||||
AttrNumber attno,
|
||||
RegProcedure proc)
|
||||
{
|
||||
StrategyNumber strat;
|
||||
|
||||
strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc);
|
||||
|
||||
Assert(StrategyNumberIsValid(strat));
|
||||
|
||||
return (strat);
|
||||
}
|
||||
|
||||
bool
|
||||
_hash_invokestrat(Relation rel,
|
||||
AttrNumber attno,
|
||||
StrategyNumber strat,
|
||||
Datum left,
|
||||
Datum right)
|
||||
{
|
||||
return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat,
|
||||
left, right));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
147
src/backend/access/hash/hashutil.c
Normal file
147
src/backend/access/hash/hashutil.c
Normal file
@@ -0,0 +1,147 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* btutils.c--
|
||||
* Utility code for Postgres btree implementation.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/excid.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/iqual.h"
|
||||
#include "access/hash.h"
|
||||
|
||||
ScanKey
|
||||
_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
|
||||
{
|
||||
ScanKey skey;
|
||||
TupleDesc itupdesc;
|
||||
int natts;
|
||||
AttrNumber i;
|
||||
Datum arg;
|
||||
RegProcedure proc;
|
||||
bool null;
|
||||
|
||||
natts = rel->rd_rel->relnatts;
|
||||
itupdesc = RelationGetTupleDescriptor(rel);
|
||||
|
||||
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
|
||||
|
||||
for (i = 0; i < natts; i++) {
|
||||
arg = index_getattr(itup, i + 1, itupdesc, &null);
|
||||
proc = metap->hashm_procid;
|
||||
ScanKeyEntryInitialize(&skey[i],
|
||||
0x0, (AttrNumber) (i + 1), proc, arg);
|
||||
}
|
||||
|
||||
return (skey);
|
||||
}
|
||||
|
||||
void
|
||||
_hash_freeskey(ScanKey skey)
|
||||
{
|
||||
pfree(skey);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
|
||||
{
|
||||
if (scan->numberOfKeys > 0)
|
||||
return (index_keytest(itup,
|
||||
RelationGetTupleDescriptor(scan->relation),
|
||||
scan->numberOfKeys, scan->keyData));
|
||||
else
|
||||
return (true);
|
||||
}
|
||||
|
||||
HashItem
|
||||
_hash_formitem(IndexTuple itup)
|
||||
{
|
||||
int nbytes_hitem;
|
||||
HashItem hitem;
|
||||
Size tuplen;
|
||||
|
||||
/* disallow nulls in hash keys */
|
||||
if (itup->t_info & INDEX_NULL_MASK)
|
||||
elog(WARN, "hash indices cannot include null keys");
|
||||
|
||||
/* make a copy of the index tuple with room for the sequence number */
|
||||
tuplen = IndexTupleSize(itup);
|
||||
nbytes_hitem = tuplen +
|
||||
(sizeof(HashItemData) - sizeof(IndexTupleData));
|
||||
|
||||
hitem = (HashItem) palloc(nbytes_hitem);
|
||||
memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
|
||||
|
||||
return (hitem);
|
||||
}
|
||||
|
||||
Bucket
|
||||
_hash_call(Relation rel, HashMetaPage metap, Datum key)
|
||||
{
|
||||
uint32 n;
|
||||
Bucket bucket;
|
||||
RegProcedure proc;
|
||||
|
||||
proc = metap->hashm_procid;
|
||||
n = (uint32) fmgr(proc, key);
|
||||
bucket = n & metap->hashm_highmask;
|
||||
if (bucket > metap->hashm_maxbucket)
|
||||
bucket = bucket & metap->hashm_lowmask;
|
||||
return (bucket);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_log2 -- returns ceil(lg2(num))
|
||||
*/
|
||||
uint32
|
||||
_hash_log2(uint32 num)
|
||||
{
|
||||
uint32 i, limit;
|
||||
|
||||
limit = 1;
|
||||
for (i = 0; limit < num; limit = limit << 1, i++)
|
||||
;
|
||||
return (i);
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_checkpage -- sanity checks on the format of all hash pages
|
||||
*/
|
||||
void
|
||||
_hash_checkpage(Page page, int flags)
|
||||
{
|
||||
PageHeader ph = (PageHeader) page;
|
||||
HashPageOpaque opaque;
|
||||
|
||||
Assert(page);
|
||||
Assert(ph->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData)));
|
||||
#if 1
|
||||
Assert(ph->pd_upper <=
|
||||
(BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
|
||||
Assert(ph->pd_special ==
|
||||
(BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
|
||||
Assert(ph->pd_opaque.od_pagesize == BLCKSZ);
|
||||
#endif
|
||||
if (flags) {
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(opaque->hasho_flag & flags);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user