mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in different AMs' ambuild routines has been moved out to a common routine in index.c; this means that all index types now do the right things about inserting recently-dead tuples, etc. (I also removed support for EXTEND INDEX in the ambuild routines, since that's about to go away anyway, and it cluttered the code a lot.) The retail indextuple deletion routines have been replaced by a "bulk delete" routine in which the indexscan is inside the access method. I haven't pushed this change as far as it should go yet, but it should allow considerable simplification of the internal bookkeeping for deletions. Also, add flag columns to pg_am to eliminate various hardcoded tests on AM OIDs, and remove unused pg_am columns. Fix rtree and gist index types to not attempt to store NULLs; before this, gist usually crashed, while rtree managed not to crash but computed wacko bounding boxes for NULL entries (which might have had something to do with the performance problems we've heard about occasionally). Add AtEOXact routines to hash, rtree, and gist, all of which have static state that needs to be reset after an error. We discovered this need long ago for btree, but missed the other guys. Oh, one more thing: concurrent VACUUM is now the default.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.52 2001/07/15 22:48:15 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
@@ -21,13 +21,27 @@
|
||||
#include "access/genam.h"
|
||||
#include "access/hash.h"
|
||||
#include "access/heapam.h"
|
||||
#include "access/xlogutils.h"
|
||||
#include "catalog/index.h"
|
||||
#include "executor/executor.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
|
||||
bool BuildingHash = false;
|
||||
|
||||
#include "access/xlogutils.h"
|
||||
|
||||
/* Working state for hashbuild and its callback */
|
||||
typedef struct
|
||||
{
|
||||
double indtuples;
|
||||
} HashBuildState;
|
||||
|
||||
static void hashbuildCallback(Relation index,
|
||||
HeapTuple htup,
|
||||
Datum *attdata,
|
||||
char *nulls,
|
||||
bool tupleIsAlive,
|
||||
void *state);
|
||||
|
||||
|
||||
/*
|
||||
@@ -44,161 +58,32 @@ hashbuild(PG_FUNCTION_ARGS)
|
||||
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
||||
Relation index = (Relation) PG_GETARG_POINTER(1);
|
||||
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
||||
Node *oldPred = (Node *) PG_GETARG_POINTER(3);
|
||||
double reltuples;
|
||||
HashBuildState buildstate;
|
||||
|
||||
#ifdef NOT_USED
|
||||
IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4);
|
||||
|
||||
#endif
|
||||
HeapScanDesc hscan;
|
||||
HeapTuple htup;
|
||||
IndexTuple itup;
|
||||
TupleDesc htupdesc,
|
||||
itupdesc;
|
||||
Datum attdata[INDEX_MAX_KEYS];
|
||||
char nulls[INDEX_MAX_KEYS];
|
||||
double nhtups,
|
||||
nitups;
|
||||
HashItem hitem;
|
||||
Node *pred = indexInfo->ii_Predicate;
|
||||
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
TupleTable tupleTable;
|
||||
TupleTableSlot *slot;
|
||||
|
||||
#endif
|
||||
ExprContext *econtext;
|
||||
InsertIndexResult res = NULL;
|
||||
|
||||
/* note that this is a new hash */
|
||||
/* set flag to disable locking */
|
||||
BuildingHash = true;
|
||||
|
||||
/* initialize the hash index metadata page (if this is a new index) */
|
||||
if (oldPred == NULL)
|
||||
_hash_metapinit(index);
|
||||
|
||||
/* get tuple descriptors for heap and index relations */
|
||||
htupdesc = RelationGetDescr(heap);
|
||||
itupdesc = RelationGetDescr(index);
|
||||
|
||||
/*
|
||||
* If this is a predicate (partial) index, we will need to evaluate
|
||||
* the predicate using ExecQual, which requires the current tuple to
|
||||
* be in a slot of a TupleTable. In addition, ExecQual must have an
|
||||
* ExprContext referring to that slot. Here, we initialize dummy
|
||||
* TupleTable and ExprContext objects for this purpose. --Nels, Feb 92
|
||||
*
|
||||
* We construct the ExprContext anyway since we need a per-tuple
|
||||
* temporary memory context for function evaluation -- tgl July 00
|
||||
* We expect to be called exactly once for any index relation. If
|
||||
* that's not the case, big trouble's what we have.
|
||||
*/
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
if (pred != NULL || oldPred != NULL)
|
||||
{
|
||||
tupleTable = ExecCreateTupleTable(1);
|
||||
slot = ExecAllocTableSlot(tupleTable);
|
||||
ExecSetSlotDescriptor(slot, htupdesc, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
tupleTable = NULL;
|
||||
slot = NULL;
|
||||
}
|
||||
econtext = MakeExprContext(slot, TransactionCommandContext);
|
||||
#else
|
||||
econtext = MakeExprContext(NULL, TransactionCommandContext);
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
if (RelationGetNumberOfBlocks(index) != 0)
|
||||
elog(ERROR, "%s already contains data",
|
||||
RelationGetRelationName(index));
|
||||
|
||||
/* initialize the hash index metadata page */
|
||||
_hash_metapinit(index);
|
||||
|
||||
/* build the index */
|
||||
nhtups = nitups = 0.0;
|
||||
buildstate.indtuples = 0;
|
||||
|
||||
/* start a heap scan */
|
||||
hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
|
||||
/* do the heap scan */
|
||||
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
|
||||
hashbuildCallback, (void *) &buildstate);
|
||||
|
||||
while (HeapTupleIsValid(htup = heap_getnext(hscan, 0)))
|
||||
{
|
||||
MemoryContextReset(econtext->ecxt_per_tuple_memory);
|
||||
|
||||
nhtups += 1.0;
|
||||
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
|
||||
/*
|
||||
* If oldPred != NULL, this is an EXTEND INDEX command, so skip
|
||||
* this tuple if it was already in the existing partial index
|
||||
*/
|
||||
if (oldPred != NULL)
|
||||
{
|
||||
slot->val = htup;
|
||||
if (ExecQual((List *) oldPred, econtext, false))
|
||||
{
|
||||
nitups += 1.0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip this tuple if it doesn't satisfy the partial-index
|
||||
* predicate
|
||||
*/
|
||||
if (pred != NULL)
|
||||
{
|
||||
slot->val = htup;
|
||||
if (!ExecQual((List *) pred, econtext, false))
|
||||
continue;
|
||||
}
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
|
||||
nitups += 1.0;
|
||||
|
||||
/*
|
||||
* For the current heap tuple, extract all the attributes we use
|
||||
* in this index, and note which are null.
|
||||
*/
|
||||
FormIndexDatum(indexInfo,
|
||||
htup,
|
||||
htupdesc,
|
||||
econtext->ecxt_per_tuple_memory,
|
||||
attdata,
|
||||
nulls);
|
||||
|
||||
/* form an index tuple and point it at the heap tuple */
|
||||
itup = index_formtuple(itupdesc, attdata, nulls);
|
||||
|
||||
/*
|
||||
* If the single index key is null, we don't insert it into the
|
||||
* index. Hash tables support scans on '='. Relational algebra
|
||||
* says that A = B returns null if either A or B is null. This
|
||||
* means that no qualification used in an index scan could ever
|
||||
* return true on a null attribute. It also means that indices
|
||||
* can't be used by ISNULL or NOTNULL scans, but that's an
|
||||
* artifact of the strategy map architecture chosen in 1986, not
|
||||
* of the way nulls are handled here.
|
||||
*/
|
||||
|
||||
if (IndexTupleHasNulls(itup))
|
||||
{
|
||||
pfree(itup);
|
||||
continue;
|
||||
}
|
||||
|
||||
itup->t_tid = htup->t_self;
|
||||
hitem = _hash_formitem(itup);
|
||||
|
||||
res = _hash_doinsert(index, hitem);
|
||||
|
||||
pfree(hitem);
|
||||
pfree(itup);
|
||||
pfree(res);
|
||||
}
|
||||
|
||||
/* okay, all heap tuples are indexed */
|
||||
heap_endscan(hscan);
|
||||
|
||||
#ifndef OMIT_PARTIAL_INDEX
|
||||
if (pred != NULL || oldPred != NULL)
|
||||
ExecDropTupleTable(tupleTable, true);
|
||||
#endif /* OMIT_PARTIAL_INDEX */
|
||||
FreeExprContext(econtext);
|
||||
/* all done */
|
||||
BuildingHash = false;
|
||||
|
||||
/*
|
||||
* Since we just counted the tuples in the heap, we update its stats
|
||||
@@ -218,22 +103,53 @@ hashbuild(PG_FUNCTION_ARGS)
|
||||
|
||||
heap_close(heap, NoLock);
|
||||
index_close(index);
|
||||
UpdateStats(hrelid, nhtups);
|
||||
UpdateStats(irelid, nitups);
|
||||
if (oldPred != NULL)
|
||||
{
|
||||
if (nitups == nhtups)
|
||||
pred = NULL;
|
||||
UpdateIndexPredicate(irelid, oldPred, pred);
|
||||
}
|
||||
UpdateStats(hrelid, reltuples);
|
||||
UpdateStats(irelid, buildstate.indtuples);
|
||||
}
|
||||
|
||||
/* all done */
|
||||
BuildingHash = false;
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* Per-tuple callback from IndexBuildHeapScan
|
||||
*/
|
||||
static void
|
||||
hashbuildCallback(Relation index,
|
||||
HeapTuple htup,
|
||||
Datum *attdata,
|
||||
char *nulls,
|
||||
bool tupleIsAlive,
|
||||
void *state)
|
||||
{
|
||||
HashBuildState *buildstate = (HashBuildState *) state;
|
||||
IndexTuple itup;
|
||||
HashItem hitem;
|
||||
InsertIndexResult res;
|
||||
|
||||
/* form an index tuple and point it at the heap tuple */
|
||||
itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
|
||||
itup->t_tid = htup->t_self;
|
||||
|
||||
/* Hash indexes don't index nulls, see notes in hashinsert */
|
||||
if (IndexTupleHasNulls(itup))
|
||||
{
|
||||
pfree(itup);
|
||||
return;
|
||||
}
|
||||
|
||||
hitem = _hash_formitem(itup);
|
||||
|
||||
res = _hash_doinsert(index, hitem);
|
||||
|
||||
if (res)
|
||||
pfree(res);
|
||||
|
||||
buildstate->indtuples += 1;
|
||||
|
||||
pfree(hitem);
|
||||
pfree(itup);
|
||||
}
|
||||
|
||||
/*
|
||||
* hashinsert() -- insert an index tuple into a hash table.
|
||||
*
|
||||
@@ -248,10 +164,8 @@ hashinsert(PG_FUNCTION_ARGS)
|
||||
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
|
||||
char *nulls = (char *) PG_GETARG_POINTER(2);
|
||||
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
||||
|
||||
#ifdef NOT_USED
|
||||
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
||||
|
||||
#endif
|
||||
InsertIndexResult res;
|
||||
HashItem hitem;
|
||||
@@ -261,8 +175,21 @@ hashinsert(PG_FUNCTION_ARGS)
|
||||
itup = index_formtuple(RelationGetDescr(rel), datum, nulls);
|
||||
itup->t_tid = *ht_ctid;
|
||||
|
||||
/*
|
||||
* If the single index key is null, we don't insert it into the
|
||||
* index. Hash tables support scans on '='. Relational algebra
|
||||
* says that A = B returns null if either A or B is null. This
|
||||
* means that no qualification used in an index scan could ever
|
||||
* return true on a null attribute. It also means that indices
|
||||
* can't be used by ISNULL or NOTNULL scans, but that's an
|
||||
* artifact of the strategy map architecture chosen in 1986, not
|
||||
* of the way nulls are handled here.
|
||||
*/
|
||||
if (IndexTupleHasNulls(itup))
|
||||
{
|
||||
pfree(itup);
|
||||
PG_RETURN_POINTER((InsertIndexResult) NULL);
|
||||
}
|
||||
|
||||
hitem = _hash_formitem(itup);
|
||||
|
||||
@@ -471,22 +398,74 @@ hashrestrpos(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* stubs */
|
||||
/*
|
||||
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
||||
* The set of target tuples is specified via a callback routine that tells
|
||||
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
||||
*
|
||||
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
||||
*/
|
||||
Datum
|
||||
hashdelete(PG_FUNCTION_ARGS)
|
||||
hashbulkdelete(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
||||
ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1);
|
||||
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
|
||||
void *callback_state = (void *) PG_GETARG_POINTER(2);
|
||||
IndexBulkDeleteResult *result;
|
||||
BlockNumber num_pages;
|
||||
double tuples_removed;
|
||||
double num_index_tuples;
|
||||
RetrieveIndexResult res;
|
||||
IndexScanDesc iscan;
|
||||
|
||||
/* adjust any active scans that will be affected by this deletion */
|
||||
_hash_adjscans(rel, tid);
|
||||
tuples_removed = 0;
|
||||
num_index_tuples = 0;
|
||||
|
||||
/* delete the data from the page */
|
||||
_hash_pagedel(rel, tid);
|
||||
/*
|
||||
* XXX generic implementation --- should be improved!
|
||||
*/
|
||||
|
||||
PG_RETURN_VOID();
|
||||
/* walk through the entire index */
|
||||
iscan = index_beginscan(rel, false, 0, (ScanKey) NULL);
|
||||
|
||||
while ((res = index_getnext(iscan, ForwardScanDirection))
|
||||
!= (RetrieveIndexResult) NULL)
|
||||
{
|
||||
ItemPointer heapptr = &res->heap_iptr;
|
||||
|
||||
if (callback(heapptr, callback_state))
|
||||
{
|
||||
ItemPointer indexptr = &res->index_iptr;
|
||||
|
||||
/* adjust any active scans that will be affected by deletion */
|
||||
/* (namely, my own scan) */
|
||||
_hash_adjscans(rel, indexptr);
|
||||
|
||||
/* delete the data from the page */
|
||||
_hash_pagedel(rel, indexptr);
|
||||
|
||||
tuples_removed += 1;
|
||||
}
|
||||
else
|
||||
num_index_tuples += 1;
|
||||
|
||||
pfree(res);
|
||||
}
|
||||
|
||||
index_endscan(iscan);
|
||||
|
||||
/* return statistics */
|
||||
num_pages = RelationGetNumberOfBlocks(rel);
|
||||
|
||||
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
|
||||
result->num_pages = num_pages;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->num_index_tuples = num_index_tuples;
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
hash_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.29 2001/03/07 21:20:26 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.30 2001/07/15 22:48:15 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Overflow pages look like ordinary relation pages.
|
||||
@@ -112,14 +112,14 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
|
||||
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
|
||||
|
||||
splitnum = metap->OVFL_POINT;
|
||||
max_free = metap->SPARES[splitnum];
|
||||
splitnum = metap->hashm_ovflpoint;
|
||||
max_free = metap->hashm_spares[splitnum];
|
||||
|
||||
free_page = (max_free - 1) >> (metap->hashm_bshift + BYTE_TO_BIT);
|
||||
free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
|
||||
|
||||
/* Look through all the free maps to find the first free block */
|
||||
first_page = metap->LAST_FREED >> (metap->hashm_bshift + BYTE_TO_BIT);
|
||||
first_page = metap->hashm_lastfreed >> (metap->hashm_bshift + BYTE_TO_BIT);
|
||||
for (i = first_page; i <= free_page; i++)
|
||||
{
|
||||
Page mappage;
|
||||
@@ -138,7 +138,7 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
|
||||
|
||||
if (i == first_page)
|
||||
{
|
||||
bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
|
||||
bit = metap->hashm_lastfreed & (BMPGSZ_BIT(metap) - 1);
|
||||
j = bit / BITS_PER_MAP;
|
||||
bit = bit & ~(BITS_PER_MAP - 1);
|
||||
}
|
||||
@@ -153,10 +153,10 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
|
||||
}
|
||||
|
||||
/* No Free Page Found - have to allocate a new page */
|
||||
metap->LAST_FREED = metap->SPARES[splitnum];
|
||||
metap->SPARES[splitnum]++;
|
||||
offset = metap->SPARES[splitnum] -
|
||||
(splitnum ? metap->SPARES[splitnum - 1] : 0);
|
||||
metap->hashm_lastfreed = metap->hashm_spares[splitnum];
|
||||
metap->hashm_spares[splitnum]++;
|
||||
offset = metap->hashm_spares[splitnum] -
|
||||
(splitnum ? metap->hashm_spares[splitnum - 1] : 0);
|
||||
|
||||
#define OVMSG "HASH: Out of overflow pages. Out of luck.\n"
|
||||
|
||||
@@ -164,9 +164,9 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
|
||||
{
|
||||
if (++splitnum >= NCACHED)
|
||||
elog(ERROR, OVMSG);
|
||||
metap->OVFL_POINT = splitnum;
|
||||
metap->SPARES[splitnum] = metap->SPARES[splitnum - 1];
|
||||
metap->SPARES[splitnum - 1]--;
|
||||
metap->hashm_ovflpoint = splitnum;
|
||||
metap->hashm_spares[splitnum] = metap->hashm_spares[splitnum - 1];
|
||||
metap->hashm_spares[splitnum - 1]--;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
@@ -194,15 +194,15 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
|
||||
if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
|
||||
1, free_page))
|
||||
elog(ERROR, "overflow_page: problem with _hash_initbitmap.");
|
||||
metap->SPARES[splitnum]++;
|
||||
metap->hashm_spares[splitnum]++;
|
||||
offset++;
|
||||
if (offset > SPLITMASK)
|
||||
{
|
||||
if (++splitnum >= NCACHED)
|
||||
elog(ERROR, OVMSG);
|
||||
metap->OVFL_POINT = splitnum;
|
||||
metap->SPARES[splitnum] = metap->SPARES[splitnum - 1];
|
||||
metap->SPARES[splitnum - 1]--;
|
||||
metap->hashm_ovflpoint = splitnum;
|
||||
metap->hashm_spares[splitnum] = metap->hashm_spares[splitnum - 1];
|
||||
metap->hashm_spares[splitnum - 1]--;
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
@@ -235,13 +235,13 @@ found:
|
||||
*/
|
||||
|
||||
bit = 1 + bit + (i * BMPGSZ_BIT(metap));
|
||||
if (bit >= metap->LAST_FREED)
|
||||
metap->LAST_FREED = bit - 1;
|
||||
if (bit >= metap->hashm_lastfreed)
|
||||
metap->hashm_lastfreed = bit - 1;
|
||||
|
||||
/* Calculate the split number for this page */
|
||||
for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
|
||||
for (i = 0; (i < splitnum) && (bit > metap->hashm_spares[i]); i++)
|
||||
;
|
||||
offset = (i ? bit - metap->SPARES[i - 1] : bit);
|
||||
offset = (i ? bit - metap->hashm_spares[i - 1] : bit);
|
||||
if (offset >= SPLITMASK)
|
||||
elog(ERROR, OVMSG);
|
||||
|
||||
@@ -355,10 +355,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
* element hashm_mapp[bitmappage].
|
||||
*/
|
||||
splitnum = (addr >> SPLITSHIFT);
|
||||
ovflpgno = (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
|
||||
ovflpgno = (splitnum ? metap->hashm_spares[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
|
||||
|
||||
if (ovflpgno < metap->LAST_FREED)
|
||||
metap->LAST_FREED = ovflpgno;
|
||||
if (ovflpgno < metap->hashm_lastfreed)
|
||||
metap->hashm_lastfreed = ovflpgno;
|
||||
|
||||
bitmappage = (ovflpgno >> (metap->hashm_bshift + BYTE_TO_BIT));
|
||||
bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.31 2001/06/27 23:31:37 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.32 2001/07/15 22:48:15 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres hash pages look like ordinary relation pages. The opaque
|
||||
@@ -18,7 +18,7 @@
|
||||
* address of the page if it is an overflow page.
|
||||
*
|
||||
* The first page in a hash relation, page zero, is special -- it stores
|
||||
* information describing the hash table; it is referred to as teh
|
||||
* information describing the hash table; it is referred to as the
|
||||
* "meta page." Pages one and higher store the actual data.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
@@ -48,6 +48,19 @@ static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket
|
||||
* before the lock table is fully initialized, so we can't use it.
|
||||
* Strictly speaking, this violates 2pl, but we don't do 2pl on the
|
||||
* system catalogs anyway.
|
||||
*
|
||||
* Note that our page locks are actual lockmanager locks, not buffer
|
||||
* locks (as are used by btree, for example). This is a good idea because
|
||||
* the algorithms are not deadlock-free, and we'd better be able to detect
|
||||
* and recover from deadlocks.
|
||||
*
|
||||
* Another important difference from btree is that a hash indexscan
|
||||
* retains both a lock and a buffer pin on the current index page
|
||||
* between hashgettuple() calls (btree keeps only a buffer pin).
|
||||
* Because of this, it's safe to do item deletions with only a regular
|
||||
* write lock on a hash page --- there cannot be an indexscan stopped on
|
||||
* the page being deleted, other than an indexscan of our own backend,
|
||||
* which will be taken care of by _hash_adjscans.
|
||||
*/
|
||||
|
||||
|
||||
@@ -350,6 +363,16 @@ _hash_unsetpagelock(Relation rel,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a hash index item.
|
||||
*
|
||||
* It is safe to delete an item after acquiring a regular WRITE lock on
|
||||
* the page, because no other backend can hold a READ lock on the page,
|
||||
* and that means no other backend currently has an indexscan stopped on
|
||||
* any item of the item being deleted. Our own backend might have such
|
||||
* an indexscan (in fact *will*, since that's how VACUUM found the item
|
||||
* in the first place), but _hash_adjscans will fix the scan position.
|
||||
*/
|
||||
void
|
||||
_hash_pagedel(Relation rel, ItemPointer tid)
|
||||
{
|
||||
@@ -384,7 +407,7 @@ _hash_pagedel(Relation rel, ItemPointer tid)
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
++metap->hashm_nkeys;
|
||||
metap->hashm_nkeys--;
|
||||
_hash_wrtbuf(rel, metabuf);
|
||||
}
|
||||
|
||||
@@ -402,32 +425,32 @@ _hash_expandtable(Relation rel, Buffer metabuf)
|
||||
_hash_checkpage((Page) metap, LH_META_PAGE);
|
||||
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
|
||||
new_bucket = ++metap->MAX_BUCKET;
|
||||
new_bucket = ++metap->hashm_maxbucket;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
|
||||
old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
|
||||
old_bucket = (metap->hashm_maxbucket & metap->hashm_lowmask);
|
||||
|
||||
/*
|
||||
* If the split point is increasing (MAX_BUCKET's log base 2 *
|
||||
* If the split point is increasing (hashm_maxbucket's log base 2 *
|
||||
* increases), we need to copy the current contents of the spare split
|
||||
* bucket to the next bucket.
|
||||
*/
|
||||
spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
|
||||
if (spare_ndx > metap->OVFL_POINT)
|
||||
spare_ndx = _hash_log2(metap->hashm_maxbucket + 1);
|
||||
if (spare_ndx > metap->hashm_ovflpoint)
|
||||
{
|
||||
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
|
||||
metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
|
||||
metap->OVFL_POINT = spare_ndx;
|
||||
metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
|
||||
metap->hashm_ovflpoint = spare_ndx;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
|
||||
}
|
||||
|
||||
if (new_bucket > metap->HIGH_MASK)
|
||||
if (new_bucket > metap->hashm_highmask)
|
||||
{
|
||||
|
||||
/* Starting a new doubling */
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
|
||||
metap->LOW_MASK = metap->HIGH_MASK;
|
||||
metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
|
||||
metap->hashm_lowmask = metap->hashm_highmask;
|
||||
metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
|
||||
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
|
||||
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.24 2001/01/24 19:42:47 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.25 2001/07/15 22:48:15 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Because we can be doing an index scan on a relation while we
|
||||
@@ -45,6 +45,31 @@ typedef HashScanListData *HashScanList;
|
||||
|
||||
static HashScanList HashScans = (HashScanList) NULL;
|
||||
|
||||
|
||||
/*
|
||||
* AtEOXact_hash() --- clean up hash subsystem at xact abort or commit.
|
||||
*
|
||||
* This is here because it needs to touch this module's static var HashScans.
|
||||
*/
|
||||
void
|
||||
AtEOXact_hash(void)
|
||||
{
|
||||
/*
|
||||
* Note: these actions should only be necessary during xact abort; but
|
||||
* they can't hurt during a commit.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reset the active-scans list to empty. We do not need to free the
|
||||
* list elements, because they're all palloc()'d, so they'll go away
|
||||
* at end of transaction anyway.
|
||||
*/
|
||||
HashScans = NULL;
|
||||
|
||||
/* If we were building a hash, we ain't anymore. */
|
||||
BuildingHash = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* _Hash_regscan() -- register a new scan.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user