1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

GiST improvements:

- make sure we always invoke user-supplied GiST methods in a short-lived
  memory context. This means the backend isn't exposed to any memory leaks
  that be in those methods (in fact, it is probably a net loss for most
  GiST methods to bother manually freeing memory now). This also means
  we can do away with a lot of ugly manual memory management in the
  GiST code itself.

- keep the current page of a GiST index scan pinned, rather than doing a
  ReadBuffer() for each tuple produced by the scan. Since ReadBuffer() is
  expensive, this is a perf. win

- implement dead tuple killing for GiST indexes (which is easy to do, now
  that we keep a pin on the current scan page). Now all the builtin indexes
  implement dead tuple killing.

- cleanup a lot of ugly code in GiST
This commit is contained in:
Neil Conway
2005-05-17 00:59:30 +00:00
parent 818bfda1e2
commit eda6dd32d1
5 changed files with 625 additions and 724 deletions

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.46 2005/05/17 00:59:30 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -16,41 +16,71 @@
#include "access/gist.h"
#include "executor/execdebug.h"
#include "utils/memutils.h"
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
ScanDirection dir);
static bool gistnext(IndexScanDesc scan, ScanDirection dir);
static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
OffsetNumber offset);
static OffsetNumber gistfindnext(IndexScanDesc s, Page p, OffsetNumber n,
ScanDirection dir);
static bool gistscancache(IndexScanDesc s, ScanDirection dir);
static bool gistfirst(IndexScanDesc s, ScanDirection dir);
static bool gistnext(IndexScanDesc s, ScanDirection dir);
static bool gistindex_keytest(IndexTuple tuple,
int scanKeySize, ScanKey key, GISTSTATE *giststate,
Relation r, Page p, OffsetNumber offset);
/*
* gistgettuple() -- Get the next tuple in the scan
*/
Datum
gistgettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
bool res;
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
Page page;
OffsetNumber offnum;
GISTScanOpaque so;
/* if we have it cached in the scan desc, just return the value */
if (gistscancache(s, dir))
PG_RETURN_BOOL(true);
so = (GISTScanOpaque) scan->opaque;
/* not cached, so we'll have to do some work */
if (ItemPointerIsValid(&(s->currentItemData)))
res = gistnext(s, dir);
else
res = gistfirst(s, dir);
PG_RETURN_BOOL(res);
/*
* If we have produced an index tuple in the past and the executor
* has informed us we need to mark it as "killed", do so now.
*
* XXX: right now there is no concurrent access. In the
* future, we should (a) get a read lock on the page (b) check
* that the location of the previously-fetched tuple hasn't
* changed due to concurrent insertions.
*/
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
PageGetItemId(page, offnum)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(so->curbuf);
}
/*
* Get the next tuple that matches the search key. If asked to
* skip killed tuples, continue looping until we find a non-killed
* tuple that matches the search key.
*/
for (;;)
{
bool res = gistnext(scan, dir);
if (res == true && scan->ignore_killed_tuples)
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
if (ItemIdDeleted(PageGetItemId(page, offnum)))
continue;
}
PG_RETURN_BOOL(res);
}
}
Datum
gistgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
@ -60,13 +90,10 @@ gistgetmulti(PG_FUNCTION_ARGS)
/* XXX generic implementation: loop around guts of gistgettuple */
while (ntids < max_tids)
{
if (ItemPointerIsValid(&(s->currentItemData)))
res = gistnext(s, ForwardScanDirection);
else
res = gistfirst(s, ForwardScanDirection);
res = gistnext(scan, ForwardScanDirection);
if (!res)
break;
tids[ntids] = s->xs_ctup.t_self;
tids[ntids] = scan->xs_ctup.t_self;
ntids++;
}
@ -74,166 +101,123 @@ gistgetmulti(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res);
}
/*
* Fetch a tuple that matchs the search key; this can be invoked
* either to fetch the first such tuple or subsequent matching
* tuples. Returns true iff a matching tuple was found.
*/
static bool
gistfirst(IndexScanDesc s, ScanDirection dir)
gistnext(IndexScanDesc scan, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTACK *stk;
BlockNumber blk;
IndexTuple it;
so = (GISTScanOpaque) s->opaque;
so = (GISTScanOpaque) scan->opaque;
b = ReadBuffer(s->indexRelation, GISTP_ROOT);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
for (;;)
if (ItemPointerIsValid(&scan->currentItemData) == false)
{
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = gistfindnext(s, p, maxoff, dir);
else
n = gistfindnext(s, p, FirstOffsetNumber, dir);
while (n < FirstOffsetNumber || n > maxoff)
{
stk = so->s_stack;
if (stk == NULL)
{
ReleaseBuffer(b);
return false;
}
b = ReleaseAndReadBuffer(b, s->indexRelation, stk->gs_blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->gs_child);
else
n = OffsetNumberNext(stk->gs_child);
so->s_stack = stk->gs_parent;
pfree(stk);
n = gistfindnext(s, p, n, dir);
}
if (po->flags & F_LEAF)
{
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true;
}
else
{
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->gs_child = n;
stk->gs_blk = BufferGetBlockNumber(b);
stk->gs_parent = so->s_stack;
so->s_stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
b = ReleaseAndReadBuffer(b, s->indexRelation, blk);
p = BufferGetPage(b);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
}
/* Being asked to fetch the first entry, so start at the root */
Assert(so->curbuf == InvalidBuffer);
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
}
}
static bool
gistnext(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTACK *stk;
BlockNumber blk;
IndexTuple it;
so = (GISTScanOpaque) s->opaque;
blk = ItemPointerGetBlockNumber(&(s->currentItemData));
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
if (ScanDirectionIsForward(dir))
n = OffsetNumberNext(n);
else
n = OffsetNumberPrev(n);
b = ReadBuffer(s->indexRelation, blk);
p = BufferGetPage(b);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
if (ItemPointerIsValid(&scan->currentItemData) == false)
{
if (ScanDirectionIsBackward(dir))
n = PageGetMaxOffsetNumber(p);
else
n = FirstOffsetNumber;
}
else
{
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n);
else
n = OffsetNumberNext(n);
}
for (;;)
{
maxoff = PageGetMaxOffsetNumber(p);
n = gistfindnext(s, p, n, dir);
n = gistfindnext(scan, n, dir);
while (n < FirstOffsetNumber || n > maxoff)
if (!OffsetNumberIsValid(n))
{
stk = so->s_stack;
if (stk == NULL)
/*
* We ran out of matching index entries on the current
* page, so pop the top stack entry and use it to continue
* the search.
*/
/* If we're out of stack entries, we're done */
if (so->stack == NULL)
{
ReleaseBuffer(b);
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
return false;
}
b = ReleaseAndReadBuffer(b, s->indexRelation, stk->gs_blk);
p = BufferGetPage(b);
stk = so->stack;
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->gs_child);
n = OffsetNumberPrev(stk->offset);
else
n = OffsetNumberNext(stk->gs_child);
n = OffsetNumberNext(stk->offset);
so->s_stack = stk->gs_parent;
so->stack = stk->parent;
pfree(stk);
n = gistfindnext(s, p, n, dir);
continue;
}
if (po->flags & F_LEAF)
{
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
/*
* We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so
* that we can efficiently resume the index scan later.
*/
ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
scan->xs_ctup.t_self = it->t_tid;
return true;
}
else
{
/*
* We've found an entry in an internal node whose key is
* consistent with the search key, so continue the search
* in the pointed-to child node (i.e. we search depth
* first). Push the current node onto the stack so we
* resume searching from this node later.
*/
BlockNumber child_block;
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->gs_child = n;
stk->gs_blk = BufferGetBlockNumber(b);
stk->gs_parent = so->s_stack;
so->s_stack = stk;
stk->offset = n;
stk->block = BufferGetBlockNumber(so->curbuf);
stk->parent = so->stack;
so->stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
child_block = ItemPointerGetBlockNumber(&(it->t_tid));
b = ReleaseAndReadBuffer(b, s->indexRelation, blk);
p = BufferGetPage(b);
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
child_block);
p = BufferGetPage(so->curbuf);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
if (ScanDirectionIsBackward(dir))
@ -244,19 +228,34 @@ gistnext(IndexScanDesc s, ScanDirection dir)
}
}
/* Similar to index_keytest, but decompresses the key in the IndexTuple */
/*
* Similar to index_keytest, but first decompress the key in the
* IndexTuple before passing it to the sk_func (and we have previously
* overwritten the sk_func to use the user-defined Consistent method,
* so we actually invoke that). Note that this function is always
* invoked in a short-lived memory context, so we don't need to worry
* about cleaning up allocated memory (either here or in the
* implementation of any Consistent methods).
*/
static bool
gistindex_keytest(IndexTuple tuple,
int scanKeySize,
ScanKey key,
GISTSTATE *giststate,
Relation r,
Page p,
IndexScanDesc scan,
OffsetNumber offset)
{
int keySize = scan->numberOfKeys;
ScanKey key = scan->keyData;
Relation r = scan->indexRelation;
GISTScanOpaque so;
Page p;
GISTSTATE *giststate;
so = (GISTScanOpaque) scan->opaque;
giststate = so->giststate;
p = BufferGetPage(so->curbuf);
IncrIndexProcessed();
while (scanKeySize > 0)
while (keySize > 0)
{
Datum datum;
bool isNull;
@ -297,53 +296,57 @@ gistindex_keytest(IndexTuple tuple,
Int32GetDatum(key->sk_strategy),
ObjectIdGetDatum(key->sk_subtype));
/* if index datum had to be decompressed, free it */
if (de.key != datum && !isAttByVal(giststate, key->sk_attno - 1))
if (DatumGetPointer(de.key) != NULL)
pfree(DatumGetPointer(de.key));
if (!DatumGetBool(test))
return false;
scanKeySize--;
keySize--;
key++;
}
return true;
}
/*
* Return the offset of the first index entry that is consistent with
* the search key after offset 'n' in the current page. If there are
* no more consistent entries, return InvalidOffsetNumber.
*/
static OffsetNumber
gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
{
OffsetNumber maxoff;
IndexTuple it;
GISTPageOpaque po;
GISTScanOpaque so;
GISTSTATE *giststate;
OffsetNumber maxoff;
IndexTuple it;
GISTPageOpaque po;
GISTScanOpaque so;
MemoryContext oldcxt;
Page p;
so = (GISTScanOpaque) scan->opaque;
p = BufferGetPage(so->curbuf);
maxoff = PageGetMaxOffsetNumber(p);
po = (GISTPageOpaque) PageGetSpecialPointer(p);
so = (GISTScanOpaque) s->opaque;
giststate = so->giststate;
/*
* Make sure we're in a short-lived memory context when we invoke
* a user-supplied GiST method in gistindex_keytest(), so we don't
* leak memory
*/
oldcxt = MemoryContextSwitchTo(so->tempCxt);
/*
* If we modified the index during the scan, we may have a pointer to
* a ghost tuple, before the scan. If this is the case, back up one.
*/
if (so->s_flags & GS_CURBEFORE)
if (so->flags & GS_CURBEFORE)
{
so->s_flags &= ~GS_CURBEFORE;
so->flags &= ~GS_CURBEFORE;
n = OffsetNumberPrev(n);
}
while (n >= FirstOffsetNumber && n <= maxoff)
{
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
if (gistindex_keytest(it,
s->numberOfKeys, s->keyData, giststate,
s->indexRelation, p, n))
if (gistindex_keytest(it, scan, n))
break;
if (ScanDirectionIsBackward(dir))
@ -352,28 +355,16 @@ gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
n = OffsetNumberNext(n);
}
return n;
}
static bool
gistscancache(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
IndexTuple it;
if (!(ScanDirectionIsNoMovement(dir)
&& ItemPointerIsValid(&(s->currentItemData))))
return false;
b = ReadBuffer(s->indexRelation,
ItemPointerGetBlockNumber(&(s->currentItemData)));
p = BufferGetPage(b);
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
s->xs_ctup.t_self = it->t_tid;
ReleaseBuffer(b);
return true;
MemoryContextSwitchTo(oldcxt);
MemoryContextReset(so->tempCxt);
/*
* If we found a matching entry, return its offset; otherwise
* return InvalidOffsetNumber to inform the caller to go to the
* next page.
*/
if (n >= FirstOffsetNumber && n <= maxoff)
return n;
else
return InvalidOffsetNumber;
}