mirror of
https://github.com/postgres/postgres.git
synced 2025-08-27 07:42:10 +03:00
1. Vacuum is updated for MVCC.
2. Much faster btree tuples deletion in the case when first on page index tuple is deleted (no movement to the left page(s)). 3. Remember blkno of new root page in BTPageOpaque of left/right siblings when root page is splitted.
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.41 1999/02/13 23:14:22 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.42 1999/03/28 20:31:56 vadim Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@@ -1270,7 +1270,7 @@ l2:
|
||||
newtup->t_data->t_cmin = GetCurrentCommandId();
|
||||
StoreInvalidTransactionId(&(newtup->t_data->t_xmax));
|
||||
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
|
||||
newtup->t_data->t_infomask |= HEAP_XMAX_INVALID;
|
||||
newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
|
||||
|
||||
/* logically delete old item */
|
||||
TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
|
||||
|
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.35 1999/02/13 23:14:34 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.36 1999/03/28 20:31:56 vadim Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -853,6 +853,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright)
|
||||
lopaque->btpo_next = BufferGetBlockNumber(rbuf);
|
||||
ropaque->btpo_next = oopaque->btpo_next;
|
||||
|
||||
lopaque->btpo_parent = ropaque->btpo_parent = oopaque->btpo_parent;
|
||||
|
||||
/*
|
||||
* If the page we're splitting is not the rightmost page at its level
|
||||
* in the tree, then the first (0) entry on the page is the high key
|
||||
@@ -1103,6 +1105,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
/* get a new root page */
|
||||
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootbknum = BufferGetBlockNumber(rootbuf);
|
||||
_bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
|
||||
|
||||
/* set btree special data */
|
||||
@@ -1119,6 +1122,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
lpage = BufferGetPage(lbuf);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
|
||||
((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo_parent =
|
||||
((BTPageOpaque) PageGetSpecialPointer(rpage))->btpo_parent =
|
||||
rootbknum;
|
||||
|
||||
/*
|
||||
* step over the high key on the left page while building the left
|
||||
* page pointer.
|
||||
@@ -1156,11 +1163,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
pfree(new_item);
|
||||
|
||||
/* write and let go of the root buffer */
|
||||
rootbknum = BufferGetBlockNumber(rootbuf);
|
||||
_bt_wrtbuf(rel, rootbuf);
|
||||
|
||||
/* update metadata page with new root block number */
|
||||
_bt_metaproot(rel, rootbknum, 0);
|
||||
|
||||
WriteNoReleaseBuffer(lbuf);
|
||||
WriteNoReleaseBuffer(rbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1559,6 +1568,7 @@ _bt_shift(Relation rel, Buffer buf, BTStack stack, int keysz,
|
||||
pageop->btpo_flags |= BTP_CHAIN;
|
||||
pageop->btpo_prev = npageop->btpo_prev; /* restore prev */
|
||||
pageop->btpo_next = nbknum; /* next points to the new page */
|
||||
pageop->btpo_parent = npageop->btpo_parent;
|
||||
|
||||
/* init shifted page opaque */
|
||||
npageop->btpo_prev = bknum = BufferGetBlockNumber(buf);
|
||||
|
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.18 1999/02/13 23:14:35 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.19 1999/03/28 20:31:57 vadim Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@@ -421,6 +421,8 @@ _bt_pageinit(Page page, Size size)
|
||||
MemSet(page, 0, size);
|
||||
|
||||
PageInit(page, size, sizeof(BTPageOpaqueData));
|
||||
((BTPageOpaque) PageGetSpecialPointer(page))->btpo_parent =
|
||||
InvalidBlockNumber;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.36 1999/02/21 03:48:27 scrappy Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.37 1999/03/28 20:31:58 vadim Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
@@ -372,11 +372,6 @@ btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation
|
||||
pfree(btitem);
|
||||
pfree(itup);
|
||||
|
||||
#ifdef NOT_USED
|
||||
/* adjust any active scans that will be affected by this insertion */
|
||||
_bt_adjscans(rel, &(res->pointerData), BT_INSERT);
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -396,15 +391,9 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
if (ItemPointerIsValid(&(scan->currentItemData)))
|
||||
{
|
||||
|
||||
/*
|
||||
* Now we don't adjust scans on insertion (comments in
|
||||
* nbtscan.c:_bt_scandel()) and I hope that we will unlock current
|
||||
* index page before leaving index in LLL: this means that current
|
||||
* index tuple could be moved right before we get here and we have
|
||||
* to restore our scan position. We save heap TID pointed by
|
||||
* current index tuple and use it. This will work untill we start
|
||||
* to re-use (move heap tuples) without vacuum... - vadim 07/29/98
|
||||
* Restore scan position using heap TID returned
|
||||
* by previous call to btgettuple().
|
||||
*/
|
||||
_bt_restscan(scan);
|
||||
res = _bt_next(scan, dir);
|
||||
@@ -612,16 +601,12 @@ void
|
||||
btdelete(Relation rel, ItemPointer tid)
|
||||
{
|
||||
/* adjust any active scans that will be affected by this deletion */
|
||||
_bt_adjscans(rel, tid, BT_DELETE);
|
||||
_bt_adjscans(rel, tid);
|
||||
|
||||
/* delete the data from the page */
|
||||
_bt_pagedel(rel, tid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reasons are in btgettuple... We have to find index item that
|
||||
* points to heap tuple returned by previous call to btgettuple().
|
||||
*/
|
||||
static void
|
||||
_bt_restscan(IndexScanDesc scan)
|
||||
{
|
||||
@@ -637,6 +622,20 @@ _bt_restscan(IndexScanDesc scan)
|
||||
BTItem item;
|
||||
BlockNumber blkno;
|
||||
|
||||
/*
|
||||
* We use this as flag when first index tuple on page
|
||||
* is deleted but we do not move left (this would
|
||||
* slowdown vacuum) - so we set current->ip_posid
|
||||
* before first index tuple on the current page
|
||||
* (_bt_step will move it right)...
|
||||
*/
|
||||
if (!ItemPointerIsValid(&target))
|
||||
{
|
||||
ItemPointerSetOffsetNumber(&(scan->currentItemData),
|
||||
OffsetNumberPrev(P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY));
|
||||
return;
|
||||
}
|
||||
|
||||
if (maxoff >= offnum)
|
||||
{
|
||||
|
||||
|
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.19 1999/02/13 23:14:36 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.20 1999/03/28 20:31:58 vadim Exp $
|
||||
*
|
||||
*
|
||||
* NOTES
|
||||
@@ -43,8 +43,7 @@ typedef BTScanListData *BTScanList;
|
||||
|
||||
static BTScanList BTScans = (BTScanList) NULL;
|
||||
|
||||
static void _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno);
|
||||
static bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
|
||||
static void _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
|
||||
|
||||
/*
|
||||
* _bt_regscan() -- register a new scan.
|
||||
@@ -91,7 +90,7 @@ _bt_dropscan(IndexScanDesc scan)
|
||||
* for a given deletion or insertion
|
||||
*/
|
||||
void
|
||||
_bt_adjscans(Relation rel, ItemPointer tid, int op)
|
||||
_bt_adjscans(Relation rel, ItemPointer tid)
|
||||
{
|
||||
BTScanList l;
|
||||
Oid relid;
|
||||
@@ -100,41 +99,25 @@ _bt_adjscans(Relation rel, ItemPointer tid, int op)
|
||||
for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next)
|
||||
{
|
||||
if (relid == RelationGetRelid(l->btsl_scan->relation))
|
||||
_bt_scandel(l->btsl_scan, op,
|
||||
_bt_scandel(l->btsl_scan,
|
||||
ItemPointerGetBlockNumber(tid),
|
||||
ItemPointerGetOffsetNumber(tid));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_scandel() -- adjust a single scan
|
||||
* _bt_scandel() -- adjust a single scan on deletion
|
||||
*
|
||||
* because each index page is always maintained as an ordered array of
|
||||
* index tuples, the index tuples on a given page shift beneath any
|
||||
* given scan. an index modification "behind" a scan position (i.e.,
|
||||
* same page, lower or equal offset number) will therefore force us to
|
||||
* adjust the scan in the following ways:
|
||||
*
|
||||
* - on insertion, we shift the scan forward by one item.
|
||||
* - on deletion, we shift the scan backward by one item.
|
||||
*
|
||||
* note that:
|
||||
*
|
||||
* - we need not worry about the actual ScanDirection of the scan
|
||||
* itself, since the problem is that the "current" scan position has
|
||||
* shifted.
|
||||
* - modifications "ahead" of our scan position do not change the
|
||||
* array index of the current scan position and so can be ignored.
|
||||
*/
|
||||
static void
|
||||
_bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
|
||||
_bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
|
||||
{
|
||||
ItemPointer current;
|
||||
Buffer buf;
|
||||
BTScanOpaque so;
|
||||
|
||||
if (!_bt_scantouched(scan, blkno, offno))
|
||||
return;
|
||||
ItemPointer current;
|
||||
Buffer buf;
|
||||
BTScanOpaque so;
|
||||
OffsetNumber start;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
so = (BTScanOpaque) scan->opaque;
|
||||
buf = so->btso_curbuf;
|
||||
@@ -144,33 +127,23 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno)
|
||||
{
|
||||
switch (op)
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
||||
if (ItemPointerGetOffsetNumber(current) == start)
|
||||
ItemPointerSetInvalid(&(so->curHeapIptr));
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Problems occure when current scan page is splitted!
|
||||
* We saw "Non-functional updates" (ie index tuples were read twice)
|
||||
* and partial updates ("good" tuples were not read at all) - due to
|
||||
* losing scan position here. Look @ nbtree.c:btgettuple()
|
||||
* what we do now... - vadim 07/29/98
|
||||
case BT_INSERT:
|
||||
_bt_step(scan, &buf, ForwardScanDirection);
|
||||
break;
|
||||
*/
|
||||
case BT_DELETE:
|
||||
_bt_step(scan, &buf, BackwardScanDirection);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "_bt_scandel: bad operation '%d'", op);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
so->btso_curbuf = buf;
|
||||
if (ItemPointerIsValid(current))
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
BTItem btitem = (BTItem) PageGetItem(page,
|
||||
PageGetItemId(page, ItemPointerGetOffsetNumber(current)));
|
||||
_bt_step(scan, &buf, BackwardScanDirection);
|
||||
so->btso_curbuf = buf;
|
||||
if (ItemPointerIsValid(current))
|
||||
{
|
||||
Page pg = BufferGetPage(buf);
|
||||
BTItem btitem = (BTItem) PageGetItem(pg,
|
||||
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
|
||||
|
||||
so->curHeapIptr = btitem->bti_itup.t_tid;
|
||||
so->curHeapIptr = btitem->bti_itup.t_tid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,65 +152,39 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno)
|
||||
{
|
||||
ItemPointerData tmp;
|
||||
|
||||
tmp = *current;
|
||||
*current = scan->currentItemData;
|
||||
scan->currentItemData = tmp;
|
||||
so->btso_curbuf = so->btso_mrkbuf;
|
||||
so->btso_mrkbuf = buf;
|
||||
buf = so->btso_curbuf;
|
||||
switch (op)
|
||||
{
|
||||
/*
|
||||
* ...comments are above...
|
||||
case BT_INSERT:
|
||||
_bt_step(scan, &buf, ForwardScanDirection);
|
||||
break;
|
||||
*/
|
||||
case BT_DELETE:
|
||||
_bt_step(scan, &buf, BackwardScanDirection);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "_bt_scandel: bad operation '%d'", op);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
so->btso_curbuf = so->btso_mrkbuf;
|
||||
so->btso_mrkbuf = buf;
|
||||
tmp = *current;
|
||||
*current = scan->currentItemData;
|
||||
scan->currentItemData = tmp;
|
||||
if (ItemPointerIsValid(current))
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
BTItem btitem = (BTItem) PageGetItem(page,
|
||||
PageGetItemId(page, ItemPointerGetOffsetNumber(current)));
|
||||
page = BufferGetPage(so->btso_mrkbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
||||
|
||||
so->mrkHeapIptr = btitem->bti_itup.t_tid;
|
||||
if (ItemPointerGetOffsetNumber(current) == start)
|
||||
ItemPointerSetInvalid(&(so->mrkHeapIptr));
|
||||
else
|
||||
{
|
||||
ItemPointerData tmp;
|
||||
|
||||
tmp = *current;
|
||||
*current = scan->currentItemData;
|
||||
scan->currentItemData = tmp;
|
||||
so->btso_curbuf = so->btso_mrkbuf;
|
||||
so->btso_mrkbuf = buf;
|
||||
buf = so->btso_curbuf;
|
||||
|
||||
_bt_step(scan, &buf, BackwardScanDirection);
|
||||
|
||||
so->btso_curbuf = so->btso_mrkbuf;
|
||||
so->btso_mrkbuf = buf;
|
||||
tmp = *current;
|
||||
*current = scan->currentItemData;
|
||||
scan->currentItemData = tmp;
|
||||
if (ItemPointerIsValid(current))
|
||||
{
|
||||
Page pg = BufferGetPage(buf);
|
||||
BTItem btitem = (BTItem) PageGetItem(pg,
|
||||
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
|
||||
|
||||
so->mrkHeapIptr = btitem->bti_itup.t_tid;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_scantouched() -- check to see if a scan is affected by a given
|
||||
* change to the index
|
||||
*/
|
||||
static bool
|
||||
_bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
|
||||
{
|
||||
ItemPointer current;
|
||||
|
||||
current = &(scan->currentItemData);
|
||||
if (ItemPointerIsValid(current)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno)
|
||||
return true;
|
||||
|
||||
current = &(scan->currentMarkData);
|
||||
if (ItemPointerIsValid(current)
|
||||
&& ItemPointerGetBlockNumber(current) == blkno
|
||||
&& ItemPointerGetOffsetNumber(current) >= offno)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.41 1999/02/21 03:48:27 scrappy Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.42 1999/03/28 20:31:58 vadim Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -706,15 +706,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
|
||||
so = (BTScanOpaque) scan->opaque;
|
||||
current = &(scan->currentItemData);
|
||||
|
||||
/*
|
||||
* XXX 10 may 91: somewhere there's a bug in our management of the
|
||||
* cached buffer for this scan. wei discovered it. the following is
|
||||
* a workaround so he can work until i figure out what's going on.
|
||||
*/
|
||||
|
||||
if (!BufferIsValid(so->btso_curbuf))
|
||||
so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current),
|
||||
BT_READ);
|
||||
Assert (BufferIsValid(so->btso_curbuf));
|
||||
|
||||
/* we still have the buffer pinned and locked */
|
||||
buf = so->btso_curbuf;
|
||||
@@ -1069,7 +1061,11 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
|
||||
rel = scan->relation;
|
||||
current = &(scan->currentItemData);
|
||||
offnum = ItemPointerGetOffsetNumber(current);
|
||||
/*
|
||||
* Don't use ItemPointerGetOffsetNumber or you risk to get
|
||||
* assertion due to ability of ip_posid to be equal 0.
|
||||
*/
|
||||
offnum = current->ip_posid;
|
||||
page = BufferGetPage(*bufP);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
so = (BTScanOpaque) scan->opaque;
|
||||
|
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.32 1999/02/13 23:14:49 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.33 1999/03/28 20:31:59 vadim Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Transaction aborts can now occur two ways:
|
||||
@@ -933,7 +933,10 @@ CommitTransaction()
|
||||
* 11/26/96
|
||||
*/
|
||||
if (MyProc != (PROC *) NULL)
|
||||
{
|
||||
MyProc->xid = InvalidTransactionId;
|
||||
MyProc->xmin = InvalidTransactionId;
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------
|
||||
@@ -951,7 +954,10 @@ AbortTransaction()
|
||||
* 11/26/96
|
||||
*/
|
||||
if (MyProc != (PROC *) NULL)
|
||||
{
|
||||
MyProc->xid = InvalidTransactionId;
|
||||
MyProc->xmin = InvalidTransactionId;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* check the current transaction state
|
||||
|
Reference in New Issue
Block a user