1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-13 16:22:44 +03:00

Mark index entries "killed" when they are no longer visible to any

transaction, so as to avoid returning them out of the index AM.  Saves
repeated heap_fetch operations on frequently-updated rows.  Also detect
queries on unique keys (equality to all columns of a unique index), and
don't bother continuing scan once we have found first match.

Killing is implemented in the btree and hash AMs, but not yet in rtree
or gist, because there isn't an equally convenient place to do it in
those AMs (the outer amgetnext routine can't do it without re-pinning
the index page).

Did some small cleanup on APIs of HeapTupleSatisfies, heap_fetch, and
index_insert to make this a little easier.
This commit is contained in:
Tom Lane
2002-05-24 18:57:57 +00:00
parent 2f2d05763d
commit 3f4d488022
30 changed files with 498 additions and 273 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.90 2002/03/06 06:09:17 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.91 2002/05/24 18:57:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -176,7 +176,6 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
Page page;
BTPageOpaque opaque;
Buffer nbuf = InvalidBuffer;
bool chtup = true;
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -194,70 +193,85 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
for (;;)
{
HeapTupleData htup;
Buffer buffer;
Buffer hbuffer;
ItemId curitemid;
BTItem cbti;
BlockNumber nblkno;
/*
* _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's how we
* handling NULLs - and so we must not use _bt_compare in real
* comparison, but only for ordering/finding items on pages. -
* vadim 03/24/97
*
* make sure the offset points to an actual key before trying to
* compare it...
*/
if (offset <= maxoff)
{
/*
* _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's how we
* handling NULLs - and so we must not use _bt_compare in real
* comparison, but only for ordering/finding items on pages. -
* vadim 03/24/97
*/
if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey))
break; /* we're past all the equal tuples */
curitemid = PageGetItemId(page, offset);
/*
* Have to check is inserted heap tuple deleted one (i.e. just
* moved to another place by vacuum)! We only need to do this
* once, but don't want to do it at all unless we see equal
* tuples, so as not to slow down unequal case.
* We can skip the heap fetch if the item is marked killed.
*/
if (chtup)
if (!ItemIdDeleted(curitemid))
{
htup.t_self = btitem->bti_itup.t_tid;
heap_fetch(heapRel, SnapshotDirty, &htup, &buffer, NULL);
if (htup.t_data == NULL) /* YES! */
break;
/* Live tuple is being inserted, so continue checking */
ReleaseBuffer(buffer);
chtup = false;
}
cbti = (BTItem) PageGetItem(page, PageGetItemId(page, offset));
htup.t_self = cbti->bti_itup.t_tid;
heap_fetch(heapRel, SnapshotDirty, &htup, &buffer, NULL);
if (htup.t_data != NULL) /* it is a duplicate */
{
TransactionId xwait =
(TransactionIdIsValid(SnapshotDirty->xmin)) ?
SnapshotDirty->xmin : SnapshotDirty->xmax;
/*
* If this tuple is being updated by other transaction
* then we have to wait for its commit/abort.
*/
ReleaseBuffer(buffer);
if (TransactionIdIsValid(xwait))
cbti = (BTItem) PageGetItem(page, curitemid);
htup.t_self = cbti->bti_itup.t_tid;
if (heap_fetch(heapRel, SnapshotDirty, &htup, &hbuffer,
true, NULL))
{
if (nbuf != InvalidBuffer)
_bt_relbuf(rel, nbuf);
/* Tell _bt_doinsert to wait... */
return xwait;
}
/* it is a duplicate */
TransactionId xwait =
(TransactionIdIsValid(SnapshotDirty->xmin)) ?
SnapshotDirty->xmin : SnapshotDirty->xmax;
/*
* Otherwise we have a definite conflict.
*/
elog(ERROR, "Cannot insert a duplicate key into unique index %s",
RelationGetRelationName(rel));
ReleaseBuffer(hbuffer);
/*
* If this tuple is being updated by other transaction
* then we have to wait for its commit/abort.
*/
if (TransactionIdIsValid(xwait))
{
if (nbuf != InvalidBuffer)
_bt_relbuf(rel, nbuf);
/* Tell _bt_doinsert to wait... */
return xwait;
}
/*
* Otherwise we have a definite conflict.
*/
elog(ERROR, "Cannot insert a duplicate key into unique index %s",
RelationGetRelationName(rel));
}
else
{
/*
* Hmm, if we can't see the tuple, maybe it can be
* marked killed. This logic should match index_getnext
* and btgettuple.
*/
uint16 sv_infomask;
LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
sv_infomask = htup.t_data->t_infomask;
if (HeapTupleSatisfiesVacuum(htup.t_data,
RecentGlobalXmin) ==
HEAPTUPLE_DEAD)
{
curitemid->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buf);
}
if (sv_infomask != htup.t_data->t_infomask)
SetBufferCommitInfoNeedsSave(hbuffer);
LockBuffer(hbuffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(hbuffer);
}
}
/* htup null so no buffer to release */
}
/*

View File

@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.89 2002/05/20 23:51:41 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.90 2002/05/24 18:57:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -271,6 +271,7 @@ btinsert(PG_FUNCTION_ARGS)
char *nulls = (char *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
bool checkUnique = PG_GETARG_BOOL(5);
InsertIndexResult res;
BTItem btitem;
IndexTuple itup;
@@ -280,7 +281,7 @@ btinsert(PG_FUNCTION_ARGS)
itup->t_tid = *ht_ctid;
btitem = _bt_formitem(itup);
res = _bt_doinsert(rel, btitem, rel->rd_uniqueindex, heapRel);
res = _bt_doinsert(rel, btitem, checkUnique, heapRel);
pfree(btitem);
pfree(itup);
@@ -296,14 +297,16 @@ btgettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
bool res;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
Page page;
OffsetNumber offnum;
bool res;
/*
* If we've already initialized this scan, we can just advance it in
* the appropriate direction. If we haven't done so yet, we call a
* routine to get the first item in the scan.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
{
/*
@@ -312,11 +315,47 @@ btgettuple(PG_FUNCTION_ARGS)
* buffer, too.
*/
_bt_restscan(scan);
/*
* Check to see if we should kill the previously-fetched tuple.
*/
if (scan->kill_prior_tuple)
{
/*
* Yes, so mark it by setting the LP_DELETE bit in the item flags.
*/
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->btso_curbuf);
PageGetItemId(page, offnum)->lp_flags |= LP_DELETE;
/*
* Since this can be redone later if needed, it's treated the
* same as a commit-hint-bit status update for heap tuples:
* we mark the buffer dirty but don't make a WAL log entry.
*/
SetBufferCommitInfoNeedsSave(so->btso_curbuf);
}
/*
* Now continue the scan.
*/
res = _bt_next(scan, dir);
}
else
res = _bt_first(scan, dir);
/*
* Skip killed tuples if asked to.
*/
if (scan->ignore_killed_tuples)
{
while (res)
{
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->btso_curbuf);
if (!ItemIdDeleted(PageGetItemId(page, offnum)))
break;
res = _bt_next(scan, dir);
}
}
/*
* Save heap TID to use it in _bt_restscan. Then release the read
* lock on the buffer so that we aren't blocking other backends.

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.70 2002/05/20 23:51:41 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.71 2002/05/24 18:57:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -425,7 +425,8 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
bool
_bt_first(IndexScanDesc scan, ScanDirection dir)
{
Relation rel;
Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
Buffer buf;
Page page;
BTStack stack;
@@ -437,7 +438,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
StrategyNumber strat;
bool res;
int32 result;
BTScanOpaque so;
bool scanFromEnd;
bool continuescan;
ScanKey scankeys = NULL;
@@ -447,14 +447,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
j;
StrategyNumber strat_total;
rel = scan->indexRelation;
so = (BTScanOpaque) scan->opaque;
/*
* Order the scan keys in our canonical fashion and eliminate any
* redundant keys.
*/
_bt_orderkeys(rel, so);
_bt_orderkeys(scan);
/*
* Quit now if _bt_orderkeys() discovered that the scan keys can never

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.48 2002/05/20 23:51:41 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.49 2002/05/24 18:57:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,6 +22,9 @@
#include "executor/execdebug.h"
static int _bt_getstrategynumber(RegProcedure sk_procedure, StrategyMap map);
/*
* _bt_mkscankey
* Build a scan key that contains comparison data from itup
@@ -174,6 +177,11 @@ _bt_formitem(IndexTuple itup)
* attribute, which can be seen to be correct by considering the above
* example.
*
* Furthermore, we detect the case where the index is unique and we have
* equality quals for all columns. In this case there can be at most one
* (visible) matching tuple. index_getnext uses this to avoid uselessly
* continuing the scan after finding one match.
*
* The initial ordering of the keys is expected to be by attribute already
* (see group_clauses_by_indexkey() in indxpath.c). The task here is to
* standardize the appearance of multiple keys for the same attribute.
@@ -191,8 +199,10 @@ _bt_formitem(IndexTuple itup)
*----------
*/
void
_bt_orderkeys(Relation relation, BTScanOpaque so)
_bt_orderkeys(IndexScanDesc scan)
{
Relation relation = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
ScanKeyData xform[BTMaxStrategyNumber];
bool init[BTMaxStrategyNumber];
int numberOfKeys = so->numberOfKeys;
@@ -208,6 +218,7 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
so->qual_ok = true;
so->numberOfRequiredKeys = 0;
scan->keys_are_unique = false;
if (numberOfKeys < 1)
return; /* done if qual-less scan */
@@ -228,6 +239,17 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
*/
if (cur->sk_flags & SK_ISNULL)
so->qual_ok = false;
else if (relation->rd_index->indisunique &&
relation->rd_rel->relnatts == 1)
{
/* it's a unique index, do we have an equality qual? */
map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
BTMaxStrategyNumber,
1);
j = _bt_getstrategynumber(cur->sk_procedure, map);
if (j == (BTEqualStrategyNumber - 1))
scan->keys_are_unique = true;
}
so->numberOfRequiredKeys = 1;
return;
}
@@ -390,17 +412,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
MemSet(init, 0, sizeof(init));
}
/*
* OK, figure out which strategy this key corresponds to
*/
for (j = BTMaxStrategyNumber; --j >= 0;)
{
if (cur->sk_procedure == map->entry[j].sk_procedure)
break;
}
if (j < 0)
elog(ERROR, "_bt_orderkeys: unable to identify operator %u",
cur->sk_procedure);
/* figure out which strategy this key's operator corresponds to */
j = _bt_getstrategynumber(cur->sk_procedure, map);
/* have we seen one of these before? */
if (init[j])
@@ -424,6 +437,34 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
}
so->numberOfKeys = new_numberOfKeys;
/*
* If unique index and we have equality keys for all columns,
* set keys_are_unique flag for higher levels.
*/
if (allEqualSoFar && relation->rd_index->indisunique &&
relation->rd_rel->relnatts == new_numberOfKeys)
scan->keys_are_unique = true;
}
/*
* Determine which btree strategy an operator procedure matches.
*
* Result is strategy number minus 1.
*/
static int
_bt_getstrategynumber(RegProcedure sk_procedure, StrategyMap map)
{
int j;
for (j = BTMaxStrategyNumber; --j >= 0;)
{
if (sk_procedure == map->entry[j].sk_procedure)
return j;
}
elog(ERROR, "_bt_getstrategynumber: unable to identify operator %u",
sk_procedure);
return -1; /* keep compiler quiet */
}
/*