1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Concurrency for GiST

- full concurrency for insert/update/select/vacuum:
        - select and vacuum never locks more than one page simultaneously
        - select (gettuple) hasn't any lock across it's calls
        - insert never locks more than two page simultaneously:
                - during search of leaf to insert it locks only one page
                  simultaneously
                - while walk upward to the root it locked only parent (may be
                  non-direct parent) and child. One of them X-lock, another may
                  be S- or X-lock
- 'vacuum full' locks index
- improve gistgetmulti
- simplify XLOG records

Fix bug in index_beginscan_internal: LockRelation may clean
  rd_aminfo structure, so move GET_REL_PROCEDURE after LockRelation
This commit is contained in:
Teodor Sigaev
2005-06-27 12:45:23 +00:00
parent c3be085ab7
commit e8cab5fe49
12 changed files with 994 additions and 598 deletions

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.49 2005/06/20 10:29:36 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.50 2005/06/27 12:45:22 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@ -21,10 +21,63 @@
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
ScanDirection dir);
static bool gistnext(IndexScanDesc scan, ScanDirection dir);
static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples);
static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
OffsetNumber offset);
static void
killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) {
Buffer buffer = so->curbuf;
for(;;) {
Page p;
BlockNumber blkno;
OffsetNumber offset, maxoff;
LockBuffer( buffer, GIST_SHARE );
p = (Page)BufferGetPage( buffer );
if ( buffer == so->curbuf && XLByteEQ( so->stack->lsn, PageGetLSN(p) ) ) {
/* page unchanged, so all is simple */
offset = ItemPointerGetOffsetNumber(iptr);
PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buffer);
LockBuffer( buffer, GIST_UNLOCK );
break;
}
maxoff = PageGetMaxOffsetNumber( p );
for(offset = FirstOffsetNumber; offset<= maxoff; offset = OffsetNumberNext(offset)) {
IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset));
if ( ItemPointerEquals( &(ituple->t_tid), iptr ) ) {
/* found */
PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buffer);
LockBuffer( buffer, GIST_UNLOCK );
if ( buffer != so->curbuf )
ReleaseBuffer( buffer );
return;
}
}
/* follow right link */
/*
* ??? is it good? if tuple dropped by concurrent vacuum,
* we will read all leaf pages...
*/
blkno = GistPageGetOpaque(p)->rightlink;
LockBuffer( buffer, GIST_UNLOCK );
if ( buffer != so->curbuf )
ReleaseBuffer( buffer );
if ( blkno==InvalidBlockNumber )
/* can't found, dropped by somebody else */
return;
buffer = ReadBuffer( r, blkno );
}
}
/*
* gistgettuple() -- Get the next tuple in the scan
@ -34,48 +87,27 @@ gistgettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
Page page;
OffsetNumber offnum;
GISTScanOpaque so;
ItemPointerData tid;
bool res;
so = (GISTScanOpaque) scan->opaque;
/*
* If we have produced an index tuple in the past and the executor
* has informed us we need to mark it as "killed", do so now.
*
* XXX: right now there is no concurrent access. In the
* future, we should (a) get a read lock on the page (b) check
* that the location of the previously-fetched tuple hasn't
* changed due to concurrent insertions.
*/
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
PageGetItemId(page, offnum)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(so->curbuf);
}
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
killtuple(scan->indexRelation, so, &(scan->currentItemData));
/*
* Get the next tuple that matches the search key. If asked to
* skip killed tuples, continue looping until we find a non-killed
* tuple that matches the search key.
*/
for (;;)
{
bool res = gistnext(scan, dir);
res = ( gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples) ) ? true : false;
if (res == true && scan->ignore_killed_tuples)
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->curbuf);
if (ItemIdDeleted(PageGetItemId(page, offnum)))
continue;
}
PG_RETURN_BOOL(res);
}
PG_RETURN_BOOL(res);
}
Datum
@ -85,36 +117,28 @@ gistgetmulti(PG_FUNCTION_ARGS)
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
bool res = true;
int32 ntids = 0;
/* XXX generic implementation: loop around guts of gistgettuple */
while (ntids < max_tids)
{
res = gistnext(scan, ForwardScanDirection);
if (!res)
break;
tids[ntids] = scan->xs_ctup.t_self;
ntids++;
}
*returned_tids = ntids;
PG_RETURN_BOOL(res);
*returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false);
PG_RETURN_BOOL(*returned_tids == max_tids);
}
/*
* Fetch a tuple that matchs the search key; this can be invoked
* Fetch a tuples that matchs the search key; this can be invoked
* either to fetch the first such tuple or subsequent matching
* tuples. Returns true iff a matching tuple was found.
*/
static bool
gistnext(IndexScanDesc scan, ScanDirection dir)
static int
gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples)
{
Page p;
OffsetNumber n;
GISTScanOpaque so;
GISTSTACK *stk;
GISTSearchStack *stk;
IndexTuple it;
GISTPageOpaque opaque;
bool resetoffset=false;
int ntids=0;
so = (GISTScanOpaque) scan->opaque;
@ -122,107 +146,164 @@ gistnext(IndexScanDesc scan, ScanDirection dir)
{
/* Being asked to fetch the first entry, so start at the root */
Assert(so->curbuf == InvalidBuffer);
Assert(so->stack == NULL);
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
stk = so->stack = (GISTSearchStack*) palloc0( sizeof(GISTSearchStack) );
stk->next = NULL;
stk->block = GIST_ROOT_BLKNO;
} else if ( so->curbuf == InvalidBuffer ) {
return 0;
}
p = BufferGetPage(so->curbuf);
for(;;) {
/* First of all, we need lock buffer */
Assert( so->curbuf != InvalidBuffer );
LockBuffer( so->curbuf, GIST_SHARE );
p = BufferGetPage(so->curbuf);
opaque = GistPageGetOpaque( p );
resetoffset = false;
if ( XLogRecPtrIsInvalid( so->stack->lsn ) || !XLByteEQ( so->stack->lsn, PageGetLSN(p) ) ) {
/* page changed from last visit or visit first time , reset offset */
so->stack->lsn = PageGetLSN(p);
resetoffset = true;
if (ItemPointerIsValid(&scan->currentItemData) == false)
{
if (ScanDirectionIsBackward(dir))
n = PageGetMaxOffsetNumber(p);
else
n = FirstOffsetNumber;
}
else
{
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
/* check page split, occured from last visit or visit to parent */
if ( !XLogRecPtrIsInvalid( so->stack->parentlsn ) &&
XLByteLT( so->stack->parentlsn, opaque->nsn ) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
(so->stack->next==NULL || so->stack->next->block != opaque->rightlink) /* check if already added */) {
/* detect page split, follow right link to add pages */
stk = (GISTSearchStack*) palloc( sizeof(GISTSearchStack) );
stk->next = so->stack->next;
stk->block = opaque->rightlink;
stk->parentlsn = so->stack->parentlsn;
memset( &(stk->lsn), 0, sizeof(GistNSN) );
so->stack->next = stk;
}
}
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n);
else
n = OffsetNumberNext(n);
}
/* if page is empty, then just skip it */
if ( PageIsEmpty(p) ) {
LockBuffer( so->curbuf, GIST_UNLOCK );
stk = so->stack->next;
pfree( so->stack );
so->stack = stk;
for (;;)
{
n = gistfindnext(scan, n, dir);
if (!OffsetNumberIsValid(n))
{
/*
* We ran out of matching index entries on the current
* page, so pop the top stack entry and use it to continue
* the search.
*/
/* If we're out of stack entries, we're done */
if (so->stack == NULL)
{
if (so->stack == NULL) {
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
return false;
return ntids;
}
stk = so->stack;
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
p = BufferGetPage(so->curbuf);
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(stk->offset);
else
n = OffsetNumberNext(stk->offset);
so->stack = stk->parent;
pfree(stk);
stk->block);
continue;
}
if (GistPageIsLeaf(p))
if (!GistPageIsLeaf(p) || resetoffset || ItemPointerIsValid(&scan->currentItemData) == false)
{
/*
* We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so
* that we can efficiently resume the index scan later.
*/
ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
scan->xs_ctup.t_self = it->t_tid;
return true;
}
else
{
/*
* We've found an entry in an internal node whose key is
* consistent with the search key, so continue the search
* in the pointed-to child node (i.e. we search depth
* first). Push the current node onto the stack so we
* resume searching from this node later.
*/
BlockNumber child_block;
stk = (GISTSTACK *) palloc(sizeof(GISTSTACK));
stk->offset = n;
stk->block = BufferGetBlockNumber(so->curbuf);
stk->parent = so->stack;
so->stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
child_block = ItemPointerGetBlockNumber(&(it->t_tid));
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
child_block);
p = BufferGetPage(so->curbuf);
if (ScanDirectionIsBackward(dir))
n = PageGetMaxOffsetNumber(p);
else
n = FirstOffsetNumber;
}
else
{
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n);
else
n = OffsetNumberNext(n);
}
/* wonderfull, we can look at page */
for(;;)
{
n = gistfindnext(scan, n, dir);
if (!OffsetNumberIsValid(n))
{
/*
* We ran out of matching index entries on the current
* page, so pop the top stack entry and use it to continue
* the search.
*/
LockBuffer( so->curbuf, GIST_UNLOCK );
stk = so->stack->next;
pfree( so->stack );
so->stack = stk;
/* If we're out of stack entries, we're done */
if (so->stack == NULL)
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
return ntids;
}
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
/* XXX go up */
break;
}
if (GistPageIsLeaf(p))
{
/*
* We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so
* that we can efficiently resume the index scan later.
*/
ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n);
if ( ! ( ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n)) ) ) {
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
tids[ntids] = scan->xs_ctup.t_self = it->t_tid;
ntids++;
if ( ntids == maxtids ) {
LockBuffer( so->curbuf, GIST_UNLOCK );
return ntids;
}
}
}
else
{
/*
* We've found an entry in an internal node whose key is
* consistent with the search key, so push it to stack
*/
stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
stk->block = ItemPointerGetBlockNumber(&(it->t_tid));
memset( &(stk->lsn), 0, sizeof(GistNSN) );
stk->parentlsn = so->stack->lsn;
stk->next = so->stack->next;
so->stack->next = stk;
}
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n);
else
n = OffsetNumberNext(n);
}
}
return ntids;
}
/*
@ -313,6 +394,7 @@ gistindex_keytest(IndexTuple tuple,
* Return the offset of the first index entry that is consistent with
* the search key after offset 'n' in the current page. If there are
* no more consistent entries, return InvalidOffsetNumber.
* Page should be locked....
*/
static OffsetNumber
gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)