mirror of
https://github.com/postgres/postgres.git
synced 2025-11-15 03:41:20 +03:00
pgindent run.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.102 2003/07/28 00:09:14 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.103 2003/08/04 00:43:15 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -432,9 +432,9 @@ _bt_insertonpg(Relation rel,
|
||||
*
|
||||
* must write-lock that page before releasing write lock on
|
||||
* current page; else someone else's _bt_check_unique scan
|
||||
* could fail to see our insertion. write locks on intermediate
|
||||
* dead pages won't do because we don't know when they will get
|
||||
* de-linked from the tree.
|
||||
* could fail to see our insertion. write locks on
|
||||
* intermediate dead pages won't do because we don't know when
|
||||
* they will get de-linked from the tree.
|
||||
*/
|
||||
Buffer rbuf = InvalidBuffer;
|
||||
|
||||
@@ -523,9 +523,10 @@ _bt_insertonpg(Relation rel,
|
||||
/*
|
||||
* If we are doing this insert because we split a page that was
|
||||
* the only one on its tree level, but was not the root, it may
|
||||
* have been the "fast root". We need to ensure that the fast root
|
||||
* link points at or above the current page. We can safely acquire
|
||||
* a lock on the metapage here --- see comments for _bt_newroot().
|
||||
* have been the "fast root". We need to ensure that the fast
|
||||
* root link points at or above the current page. We can safely
|
||||
* acquire a lock on the metapage here --- see comments for
|
||||
* _bt_newroot().
|
||||
*/
|
||||
if (split_only_page)
|
||||
{
|
||||
@@ -1135,7 +1136,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
|
||||
*
|
||||
* On entry, buf and rbuf are the left and right split pages, which we
|
||||
* still hold write locks on per the L&Y algorithm. We release the
|
||||
* write locks once we have write lock on the parent page. (Any sooner,
|
||||
* write locks once we have write lock on the parent page. (Any sooner,
|
||||
* and it'd be possible for some other process to try to split or delete
|
||||
* one of these pages, and get confused because it cannot find the downlink.)
|
||||
*
|
||||
@@ -1155,19 +1156,19 @@ _bt_insert_parent(Relation rel,
|
||||
bool is_only)
|
||||
{
|
||||
/*
|
||||
* Here we have to do something Lehman and Yao don't talk about:
|
||||
* deal with a root split and construction of a new root. If our
|
||||
* stack is empty then we have just split a node on what had been
|
||||
* the root level when we descended the tree. If it was still the
|
||||
* root then we perform a new-root construction. If it *wasn't*
|
||||
* the root anymore, search to find the next higher level that
|
||||
* someone constructed meanwhile, and find the right place to insert
|
||||
* as for the normal case.
|
||||
* Here we have to do something Lehman and Yao don't talk about: deal
|
||||
* with a root split and construction of a new root. If our stack is
|
||||
* empty then we have just split a node on what had been the root
|
||||
* level when we descended the tree. If it was still the root then we
|
||||
* perform a new-root construction. If it *wasn't* the root anymore,
|
||||
* search to find the next higher level that someone constructed
|
||||
* meanwhile, and find the right place to insert as for the normal
|
||||
* case.
|
||||
*
|
||||
* If we have to search for the parent level, we do so by
|
||||
* re-descending from the root. This is not super-efficient,
|
||||
* but it's rare enough not to matter. (This path is also taken
|
||||
* when called from WAL recovery --- we have no stack in that case.)
|
||||
* If we have to search for the parent level, we do so by re-descending
|
||||
* from the root. This is not super-efficient, but it's rare enough
|
||||
* not to matter. (This path is also taken when called from WAL
|
||||
* recovery --- we have no stack in that case.)
|
||||
*/
|
||||
if (is_root)
|
||||
{
|
||||
@@ -1222,9 +1223,9 @@ _bt_insert_parent(Relation rel,
|
||||
/*
|
||||
* Find the parent buffer and get the parent page.
|
||||
*
|
||||
* Oops - if we were moved right then we need to change stack
|
||||
* item! We want to find parent pointing to where we are,
|
||||
* right ? - vadim 05/27/97
|
||||
* Oops - if we were moved right then we need to change stack item!
|
||||
* We want to find parent pointing to where we are, right ? -
|
||||
* vadim 05/27/97
|
||||
*/
|
||||
ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid),
|
||||
bknum, P_HIKEY);
|
||||
@@ -1296,16 +1297,16 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
|
||||
/*
|
||||
* start = InvalidOffsetNumber means "search the whole page".
|
||||
* We need this test anyway due to possibility that
|
||||
* page has a high key now when it didn't before.
|
||||
* We need this test anyway due to possibility that page has a
|
||||
* high key now when it didn't before.
|
||||
*/
|
||||
if (start < minoff)
|
||||
start = minoff;
|
||||
|
||||
/*
|
||||
* These loops will check every item on the page --- but in an
|
||||
* order that's attuned to the probability of where it actually
|
||||
* is. Scan to the right first, then to the left.
|
||||
* order that's attuned to the probability of where it
|
||||
* actually is. Scan to the right first, then to the left.
|
||||
*/
|
||||
for (offnum = start;
|
||||
offnum <= maxoff;
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.66 2003/07/21 20:29:39 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.67 2003/08/04 00:43:15 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@@ -181,8 +181,8 @@ _bt_getroot(Relation rel, int access)
|
||||
/*
|
||||
* Metadata initialized by someone else. In order to
|
||||
* guarantee no deadlocks, we have to release the metadata
|
||||
* page and start all over again. (Is that really true?
|
||||
* But it's hardly worth trying to optimize this case.)
|
||||
* page and start all over again. (Is that really true? But
|
||||
* it's hardly worth trying to optimize this case.)
|
||||
*/
|
||||
_bt_relbuf(rel, metabuf);
|
||||
return _bt_getroot(rel, access);
|
||||
@@ -190,8 +190,8 @@ _bt_getroot(Relation rel, int access)
|
||||
|
||||
/*
|
||||
* Get, initialize, write, and leave a lock of the appropriate
|
||||
* type on the new root page. Since this is the first page in
|
||||
* the tree, it's a leaf as well as the root.
|
||||
* type on the new root page. Since this is the first page in the
|
||||
* tree, it's a leaf as well as the root.
|
||||
*/
|
||||
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
|
||||
rootblkno = BufferGetBlockNumber(rootbuf);
|
||||
@@ -240,7 +240,7 @@ _bt_getroot(Relation rel, int access)
|
||||
_bt_wrtnorelbuf(rel, rootbuf);
|
||||
|
||||
/*
|
||||
* swap root write lock for read lock. There is no danger of
|
||||
* swap root write lock for read lock. There is no danger of
|
||||
* anyone else accessing the new root page while it's unlocked,
|
||||
* since no one else knows where it is yet.
|
||||
*/
|
||||
@@ -284,8 +284,8 @@ _bt_getroot(Relation rel, int access)
|
||||
}
|
||||
|
||||
/*
|
||||
* By here, we have a pin and read lock on the root page, and no
|
||||
* lock set on the metadata page. Return the root page's buffer.
|
||||
* By here, we have a pin and read lock on the root page, and no lock
|
||||
* set on the metadata page. Return the root page's buffer.
|
||||
*/
|
||||
return rootbuf;
|
||||
}
|
||||
@@ -299,7 +299,7 @@ _bt_getroot(Relation rel, int access)
|
||||
* By the time we acquire lock on the root page, it might have been split and
|
||||
* not be the true root anymore. This is okay for the present uses of this
|
||||
* routine; we only really need to be able to move up at least one tree level
|
||||
* from whatever non-root page we were at. If we ever do need to lock the
|
||||
* from whatever non-root page we were at. If we ever do need to lock the
|
||||
* one true root page, we could loop here, re-reading the metapage on each
|
||||
* failure. (Note that it wouldn't do to hold the lock on the metapage while
|
||||
* moving to the root --- that'd deadlock against any concurrent root split.)
|
||||
@@ -406,9 +406,9 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
* First see if the FSM knows of any free pages.
|
||||
*
|
||||
* We can't trust the FSM's report unreservedly; we have to check
|
||||
* that the page is still free. (For example, an already-free page
|
||||
* could have been re-used between the time the last VACUUM scanned
|
||||
* it and the time the VACUUM made its FSM updates.)
|
||||
* that the page is still free. (For example, an already-free
|
||||
* page could have been re-used between the time the last VACUUM
|
||||
* scanned it and the time the VACUUM made its FSM updates.)
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
@@ -431,10 +431,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
/*
|
||||
* Extend the relation by one page.
|
||||
*
|
||||
* We have to use a lock to ensure no one else is extending the rel at
|
||||
* the same time, else we will both try to initialize the same new
|
||||
* page. We can skip locking for new or temp relations, however,
|
||||
* since no one else could be accessing them.
|
||||
* We have to use a lock to ensure no one else is extending the rel
|
||||
* at the same time, else we will both try to initialize the same
|
||||
* new page. We can skip locking for new or temp relations,
|
||||
* however, since no one else could be accessing them.
|
||||
*/
|
||||
needLock = !(rel->rd_isnew || rel->rd_istemp);
|
||||
|
||||
@@ -444,8 +444,8 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
buf = ReadBuffer(rel, P_NEW);
|
||||
|
||||
/*
|
||||
* Release the file-extension lock; it's now OK for someone else to
|
||||
* extend the relation some more.
|
||||
* Release the file-extension lock; it's now OK for someone else
|
||||
* to extend the relation some more.
|
||||
*/
|
||||
if (needLock)
|
||||
UnlockPage(rel, 0, ExclusiveLock);
|
||||
@@ -484,7 +484,7 @@ _bt_relbuf(Relation rel, Buffer buf)
|
||||
* and a pin on the buffer.
|
||||
*
|
||||
* NOTE: actually, the buffer manager just marks the shared buffer page
|
||||
* dirty here; the real I/O happens later. This is okay since we are not
|
||||
* dirty here; the real I/O happens later. This is okay since we are not
|
||||
* relying on write ordering anyway. The WAL mechanism is responsible for
|
||||
* guaranteeing correctness after a crash.
|
||||
*/
|
||||
@@ -534,13 +534,14 @@ _bt_page_recyclable(Page page)
|
||||
BTPageOpaque opaque;
|
||||
|
||||
/*
|
||||
* It's possible to find an all-zeroes page in an index --- for example,
|
||||
* a backend might successfully extend the relation one page and then
|
||||
* crash before it is able to make a WAL entry for adding the page.
|
||||
* If we find a zeroed page then reclaim it.
|
||||
* It's possible to find an all-zeroes page in an index --- for
|
||||
* example, a backend might successfully extend the relation one page
|
||||
* and then crash before it is able to make a WAL entry for adding the
|
||||
* page. If we find a zeroed page then reclaim it.
|
||||
*/
|
||||
if (PageIsNew(page))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Otherwise, recycle if deleted and too old to have any processes
|
||||
* interested in it.
|
||||
@@ -565,7 +566,7 @@ _bt_page_recyclable(Page page)
|
||||
* mistake. On exit, metapage data is correct and we no longer have
|
||||
* a pin or lock on the metapage.
|
||||
*
|
||||
* Actually this is not used for splitting on-the-fly anymore. It's only used
|
||||
* Actually this is not used for splitting on-the-fly anymore. It's only used
|
||||
* in nbtsort.c at the completion of btree building, where we know we have
|
||||
* sole access to the index anyway.
|
||||
*/
|
||||
@@ -623,7 +624,7 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level)
|
||||
/*
|
||||
* Delete item(s) from a btree page.
|
||||
*
|
||||
* This must only be used for deleting leaf items. Deleting an item on a
|
||||
* This must only be used for deleting leaf items. Deleting an item on a
|
||||
* non-leaf page has to be done as part of an atomic action that includes
|
||||
* deleting the page it points to.
|
||||
*
|
||||
@@ -646,9 +647,7 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||
* adjusting item numbers for previous deletions.
|
||||
*/
|
||||
for (i = nitems - 1; i >= 0; i--)
|
||||
{
|
||||
PageIndexTupleDelete(page, itemnos[i]);
|
||||
}
|
||||
|
||||
/* XLOG stuff */
|
||||
if (!rel->rd_istemp)
|
||||
@@ -666,8 +665,8 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||
rdata[0].next = &(rdata[1]);
|
||||
|
||||
/*
|
||||
* The target-offsets array is not in the buffer, but pretend
|
||||
* that it is. When XLogInsert stores the whole buffer, the offsets
|
||||
* The target-offsets array is not in the buffer, but pretend that
|
||||
* it is. When XLogInsert stores the whole buffer, the offsets
|
||||
* array need not be stored too.
|
||||
*/
|
||||
rdata[1].buffer = buf;
|
||||
@@ -701,7 +700,7 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||
* may currently be trying to follow links leading to the page; they have to
|
||||
* be allowed to use its right-link to recover. See nbtree/README.
|
||||
*
|
||||
* On entry, the target buffer must be pinned and read-locked. This lock and
|
||||
* On entry, the target buffer must be pinned and read-locked. This lock and
|
||||
* pin will be dropped before exiting.
|
||||
*
|
||||
* Returns the number of pages successfully deleted (zero on failure; could
|
||||
@@ -714,7 +713,7 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||
int
|
||||
_bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
{
|
||||
BlockNumber target,
|
||||
BlockNumber target,
|
||||
leftsib,
|
||||
rightsib,
|
||||
parent;
|
||||
@@ -740,17 +739,18 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
BTPageOpaque opaque;
|
||||
|
||||
/*
|
||||
* We can never delete rightmost pages nor root pages. While at it,
|
||||
* We can never delete rightmost pages nor root pages. While at it,
|
||||
* check that page is not already deleted and is empty.
|
||||
*/
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
|
||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))
|
||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))
|
||||
{
|
||||
_bt_relbuf(rel, buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save info about page, including a copy of its high key (it must
|
||||
* have one, being non-rightmost).
|
||||
@@ -760,12 +760,13 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
leftsib = opaque->btpo_prev;
|
||||
itemid = PageGetItemId(page, P_HIKEY);
|
||||
targetkey = CopyBTItem((BTItem) PageGetItem(page, itemid));
|
||||
|
||||
/*
|
||||
* We need to get an approximate pointer to the page's parent page.
|
||||
* Use the standard search mechanism to search for the page's high key;
|
||||
* this will give us a link to either the current parent or someplace
|
||||
* to its left (if there are multiple equal high keys). To avoid
|
||||
* deadlocks, we'd better drop the target page lock first.
|
||||
* Use the standard search mechanism to search for the page's high
|
||||
* key; this will give us a link to either the current parent or
|
||||
* someplace to its left (if there are multiple equal high keys). To
|
||||
* avoid deadlocks, we'd better drop the target page lock first.
|
||||
*/
|
||||
_bt_relbuf(rel, buf);
|
||||
/* we need a scan key to do our search, so build one */
|
||||
@@ -775,9 +776,11 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
&lbuf, BT_READ);
|
||||
/* don't need a pin on that either */
|
||||
_bt_relbuf(rel, lbuf);
|
||||
|
||||
/*
|
||||
* If we are trying to delete an interior page, _bt_search did more
|
||||
* than we needed. Locate the stack item pointing to our parent level.
|
||||
* than we needed. Locate the stack item pointing to our parent
|
||||
* level.
|
||||
*/
|
||||
ilevel = 0;
|
||||
for (;;)
|
||||
@@ -789,10 +792,12 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
stack = stack->bts_parent;
|
||||
ilevel++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to lock the pages we need to modify in the standard order:
|
||||
* moving right, then up. Else we will deadlock against other writers.
|
||||
*
|
||||
* moving right, then up. Else we will deadlock against other
|
||||
* writers.
|
||||
*
|
||||
* So, we need to find and write-lock the current left sibling of the
|
||||
* target page. The sibling that was current a moment ago could have
|
||||
* split, so we may have to move right. This search could fail if
|
||||
@@ -823,21 +828,24 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
}
|
||||
else
|
||||
lbuf = InvalidBuffer;
|
||||
|
||||
/*
|
||||
* Next write-lock the target page itself. It should be okay to take just
|
||||
* a write lock not a superexclusive lock, since no scans would stop on an
|
||||
* empty page.
|
||||
* Next write-lock the target page itself. It should be okay to take
|
||||
* just a write lock not a superexclusive lock, since no scans would
|
||||
* stop on an empty page.
|
||||
*/
|
||||
buf = _bt_getbuf(rel, target, BT_WRITE);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Check page is still empty etc, else abandon deletion. The empty check
|
||||
* is necessary since someone else might have inserted into it while
|
||||
* we didn't have it locked; the others are just for paranoia's sake.
|
||||
* Check page is still empty etc, else abandon deletion. The empty
|
||||
* check is necessary since someone else might have inserted into it
|
||||
* while we didn't have it locked; the others are just for paranoia's
|
||||
* sake.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
|
||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))
|
||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))
|
||||
{
|
||||
_bt_relbuf(rel, buf);
|
||||
if (BufferIsValid(lbuf))
|
||||
@@ -846,14 +854,17 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
}
|
||||
if (opaque->btpo_prev != leftsib)
|
||||
elog(ERROR, "left link changed unexpectedly");
|
||||
|
||||
/*
|
||||
* And next write-lock the (current) right sibling.
|
||||
*/
|
||||
rightsib = opaque->btpo_next;
|
||||
rbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
|
||||
|
||||
/*
|
||||
* Next find and write-lock the current parent of the target page.
|
||||
* This is essentially the same as the corresponding step of splitting.
|
||||
* This is essentially the same as the corresponding step of
|
||||
* splitting.
|
||||
*/
|
||||
ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid),
|
||||
target, P_HIKEY);
|
||||
@@ -863,10 +874,11 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
RelationGetRelationName(rel));
|
||||
parent = stack->bts_blkno;
|
||||
poffset = stack->bts_offset;
|
||||
|
||||
/*
|
||||
* If the target is the rightmost child of its parent, then we can't
|
||||
* delete, unless it's also the only child --- in which case the parent
|
||||
* changes to half-dead status.
|
||||
* delete, unless it's also the only child --- in which case the
|
||||
* parent changes to half-dead status.
|
||||
*/
|
||||
page = BufferGetPage(pbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -893,12 +905,13 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
if (OffsetNumberNext(P_FIRSTDATAKEY(opaque)) == maxoff)
|
||||
parent_one_child = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are deleting the next-to-last page on the target's level,
|
||||
* then the rightsib is a candidate to become the new fast root.
|
||||
* (In theory, it might be possible to push the fast root even further
|
||||
* down, but the odds of doing so are slim, and the locking considerations
|
||||
* daunting.)
|
||||
* then the rightsib is a candidate to become the new fast root. (In
|
||||
* theory, it might be possible to push the fast root even further
|
||||
* down, but the odds of doing so are slim, and the locking
|
||||
* considerations daunting.)
|
||||
*
|
||||
* We can safely acquire a lock on the metapage here --- see comments for
|
||||
* _bt_newroot().
|
||||
@@ -914,12 +927,13 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
|
||||
metapg = BufferGetPage(metabuf);
|
||||
metad = BTPageGetMeta(metapg);
|
||||
|
||||
/*
|
||||
* The expected case here is btm_fastlevel == targetlevel+1;
|
||||
* if the fastlevel is <= targetlevel, something is wrong, and we
|
||||
* choose to overwrite it to fix it.
|
||||
* if the fastlevel is <= targetlevel, something is wrong, and
|
||||
* we choose to overwrite it to fix it.
|
||||
*/
|
||||
if (metad->btm_fastlevel > targetlevel+1)
|
||||
if (metad->btm_fastlevel > targetlevel + 1)
|
||||
{
|
||||
/* no update wanted */
|
||||
_bt_relbuf(rel, metabuf);
|
||||
@@ -937,9 +951,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
|
||||
/*
|
||||
* Update parent. The normal case is a tad tricky because we want to
|
||||
* delete the target's downlink and the *following* key. Easiest way is
|
||||
* to copy the right sibling's downlink over the target downlink, and then
|
||||
* delete the following item.
|
||||
* delete the target's downlink and the *following* key. Easiest way
|
||||
* is to copy the right sibling's downlink over the target downlink,
|
||||
* and then delete the following item.
|
||||
*/
|
||||
page = BufferGetPage(pbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -950,7 +964,7 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber nextoffset;
|
||||
OffsetNumber nextoffset;
|
||||
|
||||
itemid = PageGetItemId(page, poffset);
|
||||
btitem = (BTItem) PageGetItem(page, itemid);
|
||||
@@ -968,8 +982,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
}
|
||||
|
||||
/*
|
||||
* Update siblings' side-links. Note the target page's side-links will
|
||||
* continue to point to the siblings.
|
||||
* Update siblings' side-links. Note the target page's side-links
|
||||
* will continue to point to the siblings.
|
||||
*/
|
||||
if (BufferIsValid(lbuf))
|
||||
{
|
||||
@@ -1096,10 +1110,11 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
|
||||
_bt_wrtbuf(rel, lbuf);
|
||||
|
||||
/*
|
||||
* If parent became half dead, recurse to try to delete it. Otherwise,
|
||||
* if right sibling is empty and is now the last child of the parent,
|
||||
* recurse to try to delete it. (These cases cannot apply at the same
|
||||
* time, though the second case might itself recurse to the first.)
|
||||
* If parent became half dead, recurse to try to delete it.
|
||||
* Otherwise, if right sibling is empty and is now the last child of
|
||||
* the parent, recurse to try to delete it. (These cases cannot apply
|
||||
* at the same time, though the second case might itself recurse to
|
||||
* the first.)
|
||||
*/
|
||||
if (parent_half_dead)
|
||||
{
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.103 2003/07/21 20:29:39 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.104 2003/08/04 00:43:15 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -580,19 +580,20 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* The outer loop iterates over index leaf pages, the inner over items
|
||||
* on a leaf page. We issue just one _bt_delitems() call per page,
|
||||
* so as to minimize WAL traffic.
|
||||
* on a leaf page. We issue just one _bt_delitems() call per page, so
|
||||
* as to minimize WAL traffic.
|
||||
*
|
||||
* Note that we exclusive-lock every leaf page containing data items,
|
||||
* in sequence left to right. It sounds attractive to only exclusive-lock
|
||||
* those containing items we need to delete, but unfortunately that
|
||||
* is not safe: we could then pass a stopped indexscan, which could
|
||||
* in rare cases lead to deleting the item it needs to find when it
|
||||
* resumes. (See _bt_restscan --- this could only happen if an indexscan
|
||||
* stops on a deletable item and then a page split moves that item
|
||||
* into a page further to its right, which the indexscan will have no
|
||||
* pin on.) We can skip obtaining exclusive lock on empty pages
|
||||
* though, since no indexscan could be stopped on those.
|
||||
* Note that we exclusive-lock every leaf page containing data items, in
|
||||
* sequence left to right. It sounds attractive to only
|
||||
* exclusive-lock those containing items we need to delete, but
|
||||
* unfortunately that is not safe: we could then pass a stopped
|
||||
* indexscan, which could in rare cases lead to deleting the item it
|
||||
* needs to find when it resumes. (See _bt_restscan --- this could
|
||||
* only happen if an indexscan stops on a deletable item and then a
|
||||
* page split moves that item into a page further to its right, which
|
||||
* the indexscan will have no pin on.) We can skip obtaining
|
||||
* exclusive lock on empty pages though, since no indexscan could be
|
||||
* stopped on those.
|
||||
*/
|
||||
buf = _bt_get_endpoint(rel, 0, false);
|
||||
if (BufferIsValid(buf)) /* check for empty index */
|
||||
@@ -604,7 +605,7 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
OffsetNumber offnum,
|
||||
minoff,
|
||||
maxoff;
|
||||
BlockNumber nextpage;
|
||||
BlockNumber nextpage;
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
@@ -622,12 +623,14 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
*/
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBufferForCleanup(buf);
|
||||
|
||||
/*
|
||||
* Recompute minoff/maxoff, both of which could have changed
|
||||
* while we weren't holding the lock.
|
||||
* Recompute minoff/maxoff, both of which could have
|
||||
* changed while we weren't holding the lock.
|
||||
*/
|
||||
minoff = P_FIRSTDATAKEY(opaque);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
|
||||
/*
|
||||
* Scan over all items to see which ones need deleted
|
||||
* according to the callback function.
|
||||
@@ -640,7 +643,7 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
ItemPointer htup;
|
||||
|
||||
btitem = (BTItem) PageGetItem(page,
|
||||
PageGetItemId(page, offnum));
|
||||
PageGetItemId(page, offnum));
|
||||
htup = &(btitem->bti_itup.t_tid);
|
||||
if (callback(htup, callback_state))
|
||||
{
|
||||
@@ -651,6 +654,7 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
num_index_tuples += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to delete anything, do it and write the buffer;
|
||||
* else just release the buffer.
|
||||
@@ -662,9 +666,7 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
_bt_wrtbuf(rel, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
_bt_relbuf(rel, buf);
|
||||
}
|
||||
/* And advance to next page, if any */
|
||||
if (nextpage == P_NONE)
|
||||
break;
|
||||
@@ -712,7 +714,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
/* No point in remembering more than MaxFSMPages pages */
|
||||
maxFreePages = MaxFSMPages;
|
||||
if ((BlockNumber) maxFreePages > num_pages)
|
||||
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
|
||||
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
|
||||
freePages = (BlockNumber *) palloc(maxFreePages * sizeof(BlockNumber));
|
||||
nFreePages = 0;
|
||||
|
||||
@@ -728,10 +730,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
* after we start the scan will not be examined; this should be fine,
|
||||
* since they can't possibly be empty.)
|
||||
*/
|
||||
for (blkno = BTREE_METAPAGE+1; blkno < num_pages; blkno++)
|
||||
for (blkno = BTREE_METAPAGE + 1; blkno < num_pages; blkno++)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
@@ -753,7 +755,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page))
|
||||
{
|
||||
/* Empty, try to delete */
|
||||
int ndel;
|
||||
int ndel;
|
||||
|
||||
/* Run pagedel in a temp context to avoid memory leakage */
|
||||
MemoryContextReset(mycontext);
|
||||
@@ -768,7 +770,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
/*
|
||||
* During VACUUM FULL it's okay to recycle deleted pages
|
||||
* immediately, since there can be no other transactions
|
||||
* scanning the index. Note that we will only recycle the
|
||||
* scanning the index. Note that we will only recycle the
|
||||
* current page and not any parent pages that _bt_pagedel
|
||||
* might have recursed to; this seems reasonable in the name
|
||||
* of simplicity. (Trying to do otherwise would mean we'd
|
||||
@@ -787,16 +789,16 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
/*
|
||||
* During VACUUM FULL, we truncate off any recyclable pages at the
|
||||
* end of the index. In a normal vacuum it'd be unsafe to do this
|
||||
* except by acquiring exclusive lock on the index and then rechecking
|
||||
* all the pages; doesn't seem worth it.
|
||||
* During VACUUM FULL, we truncate off any recyclable pages at the end
|
||||
* of the index. In a normal vacuum it'd be unsafe to do this except
|
||||
* by acquiring exclusive lock on the index and then rechecking all
|
||||
* the pages; doesn't seem worth it.
|
||||
*/
|
||||
if (info->vacuum_full && nFreePages > 0)
|
||||
{
|
||||
BlockNumber new_pages = num_pages;
|
||||
BlockNumber new_pages = num_pages;
|
||||
|
||||
while (nFreePages > 0 && freePages[nFreePages-1] == new_pages-1)
|
||||
while (nFreePages > 0 && freePages[nFreePages - 1] == new_pages - 1)
|
||||
{
|
||||
new_pages--;
|
||||
pages_deleted--;
|
||||
@@ -810,9 +812,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
* Okay to truncate.
|
||||
*
|
||||
* First, flush any shared buffers for the blocks we intend to
|
||||
* delete. FlushRelationBuffers is a bit more than we need for
|
||||
* this, since it will also write out dirty buffers for blocks we
|
||||
* aren't deleting, but it's the closest thing in bufmgr's API.
|
||||
* delete. FlushRelationBuffers is a bit more than we need
|
||||
* for this, since it will also write out dirty buffers for
|
||||
* blocks we aren't deleting, but it's the closest thing in
|
||||
* bufmgr's API.
|
||||
*/
|
||||
i = FlushRelationBuffers(rel, new_pages);
|
||||
if (i < 0)
|
||||
@@ -822,7 +825,8 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
* Do the physical truncation.
|
||||
*/
|
||||
new_pages = smgrtruncate(DEFAULT_SMGR, rel, new_pages);
|
||||
rel->rd_nblocks = new_pages; /* update relcache immediately */
|
||||
rel->rd_nblocks = new_pages; /* update relcache
|
||||
* immediately */
|
||||
rel->rd_targblock = InvalidBlockNumber;
|
||||
num_pages = new_pages;
|
||||
}
|
||||
@@ -856,7 +860,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
* and so no deletion can have occurred on that page.
|
||||
*
|
||||
* On entry, we have a pin but no read lock on the buffer that contained
|
||||
* the index tuple we stopped the scan on. On exit, we have pin and read
|
||||
* the index tuple we stopped the scan on. On exit, we have pin and read
|
||||
* lock on the buffer that now contains that index tuple, and the scandesc's
|
||||
* current position is updated to point at it.
|
||||
*/
|
||||
@@ -877,8 +881,8 @@ _bt_restscan(IndexScanDesc scan)
|
||||
BlockNumber blkno;
|
||||
|
||||
/*
|
||||
* Reacquire read lock on the buffer. (We should still have
|
||||
* a reference-count pin on it, so need not get that.)
|
||||
* Reacquire read lock on the buffer. (We should still have a
|
||||
* reference-count pin on it, so need not get that.)
|
||||
*/
|
||||
LockBuffer(buf, BT_READ);
|
||||
|
||||
@@ -921,11 +925,11 @@ _bt_restscan(IndexScanDesc scan)
|
||||
|
||||
/*
|
||||
* The item we're looking for moved right at least one page, so
|
||||
* move right. We are careful here to pin and read-lock the next
|
||||
* non-dead page before releasing the current one. This ensures that
|
||||
* a concurrent btbulkdelete scan cannot pass our position --- if it
|
||||
* did, it might be able to reach and delete our target item before
|
||||
* we can find it again.
|
||||
* move right. We are careful here to pin and read-lock the next
|
||||
* non-dead page before releasing the current one. This ensures
|
||||
* that a concurrent btbulkdelete scan cannot pass our position
|
||||
* --- if it did, it might be able to reach and delete our target
|
||||
* item before we can find it again.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque))
|
||||
elog(ERROR, "failed to re-find previous key in \"%s\"",
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.77 2003/07/29 22:18:38 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.78 2003/08/04 00:43:15 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -64,8 +64,8 @@ _bt_search(Relation rel, int keysz, ScanKey scankey,
|
||||
|
||||
/*
|
||||
* Race -- the page we just grabbed may have split since we read
|
||||
* its pointer in the parent (or metapage). If it has, we may need
|
||||
* to move right to its new sibling. Do that.
|
||||
* its pointer in the parent (or metapage). If it has, we may
|
||||
* need to move right to its new sibling. Do that.
|
||||
*/
|
||||
*bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ);
|
||||
|
||||
@@ -87,14 +87,14 @@ _bt_search(Relation rel, int keysz, ScanKey scankey,
|
||||
par_blkno = BufferGetBlockNumber(*bufP);
|
||||
|
||||
/*
|
||||
* We need to save the location of the index entry we chose in
|
||||
* the parent page on a stack. In case we split the tree, we'll
|
||||
* use the stack to work back up to the parent page. We also save
|
||||
* the actual downlink (TID) to uniquely identify the index entry,
|
||||
* in case it moves right while we're working lower in the
|
||||
* tree. See the paper by Lehman and Yao for how this is detected
|
||||
* and handled. (We use the child link to disambiguate duplicate
|
||||
* keys in the index -- Lehman and Yao disallow duplicate keys.)
|
||||
* We need to save the location of the index entry we chose in the
|
||||
* parent page on a stack. In case we split the tree, we'll use
|
||||
* the stack to work back up to the parent page. We also save the
|
||||
* actual downlink (TID) to uniquely identify the index entry, in
|
||||
* case it moves right while we're working lower in the tree. See
|
||||
* the paper by Lehman and Yao for how this is detected and
|
||||
* handled. (We use the child link to disambiguate duplicate keys
|
||||
* in the index -- Lehman and Yao disallow duplicate keys.)
|
||||
*/
|
||||
new_stack = (BTStack) palloc(sizeof(BTStackData));
|
||||
new_stack->bts_blkno = par_blkno;
|
||||
@@ -151,8 +151,8 @@ _bt_moveright(Relation rel,
|
||||
* might not need to move right; have to scan the page first anyway.)
|
||||
* It could even have split more than once, so scan as far as needed.
|
||||
*
|
||||
* We also have to move right if we followed a link that brought us to
|
||||
* a dead page.
|
||||
* We also have to move right if we followed a link that brought us to a
|
||||
* dead page.
|
||||
*/
|
||||
while (!P_RIGHTMOST(opaque) &&
|
||||
(P_IGNORE(opaque) ||
|
||||
@@ -599,8 +599,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
/*
|
||||
* At this point we are positioned at the first item >= scan key, or
|
||||
* possibly at the end of a page on which all the existing items are
|
||||
* less than the scan key and we know that everything on later
|
||||
* pages is greater than or equal to scan key.
|
||||
* less than the scan key and we know that everything on later pages
|
||||
* is greater than or equal to scan key.
|
||||
*
|
||||
* We could step forward in the latter case, but that'd be a waste of
|
||||
* time if we want to scan backwards. So, it's now time to examine
|
||||
@@ -851,7 +851,8 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* backwards scan */
|
||||
else
|
||||
/* backwards scan */
|
||||
{
|
||||
if (offnum > P_FIRSTDATAKEY(opaque))
|
||||
offnum = OffsetNumberPrev(offnum);
|
||||
@@ -860,9 +861,9 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
/*
|
||||
* Walk left to the next page with data. This is much more
|
||||
* complex than the walk-right case because of the possibility
|
||||
* that the page to our left splits while we are in flight to it,
|
||||
* plus the possibility that the page we were on gets deleted
|
||||
* after we leave it. See nbtree/README for details.
|
||||
* that the page to our left splits while we are in flight to
|
||||
* it, plus the possibility that the page we were on gets
|
||||
* deleted after we leave it. See nbtree/README for details.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
@@ -877,10 +878,11 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
}
|
||||
page = BufferGetPage(*bufP);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Okay, we managed to move left to a non-deleted page.
|
||||
* Done if it's not half-dead and not empty. Else loop back
|
||||
* and do it all again.
|
||||
* Done if it's not half-dead and not empty. Else loop
|
||||
* back and do it all again.
|
||||
*/
|
||||
if (!P_IGNORE(opaque))
|
||||
{
|
||||
@@ -946,17 +948,18 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* If this isn't the page we want, walk right till we find
|
||||
* what we want --- but go no more than four hops (an
|
||||
* arbitrary limit). If we don't find the correct page by then,
|
||||
* the most likely bet is that the original page got deleted
|
||||
* and isn't in the sibling chain at all anymore, not that its
|
||||
* left sibling got split more than four times.
|
||||
* If this isn't the page we want, walk right till we find what we
|
||||
* want --- but go no more than four hops (an arbitrary limit).
|
||||
* If we don't find the correct page by then, the most likely bet
|
||||
* is that the original page got deleted and isn't in the sibling
|
||||
* chain at all anymore, not that its left sibling got split more
|
||||
* than four times.
|
||||
*
|
||||
* Note that it is correct to test P_ISDELETED not P_IGNORE
|
||||
* here, because half-dead pages are still in the sibling
|
||||
* chain. Caller must reject half-dead pages if wanted.
|
||||
* Note that it is correct to test P_ISDELETED not P_IGNORE here,
|
||||
* because half-dead pages are still in the sibling chain. Caller
|
||||
* must reject half-dead pages if wanted.
|
||||
*/
|
||||
tries = 0;
|
||||
for (;;)
|
||||
@@ -983,8 +986,8 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
if (P_ISDELETED(opaque))
|
||||
{
|
||||
/*
|
||||
* It was deleted. Move right to first nondeleted page
|
||||
* (there must be one); that is the page that has acquired the
|
||||
* It was deleted. Move right to first nondeleted page (there
|
||||
* must be one); that is the page that has acquired the
|
||||
* deleted one's keyspace, so stepping left from it will take
|
||||
* us where we want to be.
|
||||
*/
|
||||
@@ -1001,18 +1004,18 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
if (!P_ISDELETED(opaque))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now return to top of loop, resetting obknum to
|
||||
* point to this nondeleted page, and try again.
|
||||
* Now return to top of loop, resetting obknum to point to
|
||||
* this nondeleted page, and try again.
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* It wasn't deleted; the explanation had better be
|
||||
* that the page to the left got split or deleted.
|
||||
* Without this check, we'd go into an infinite loop
|
||||
* if there's anything wrong.
|
||||
* It wasn't deleted; the explanation had better be that the
|
||||
* page to the left got split or deleted. Without this check,
|
||||
* we'd go into an infinite loop if there's anything wrong.
|
||||
*/
|
||||
if (opaque->btpo_prev == lblkno)
|
||||
elog(ERROR, "could not find left sibling in \"%s\"",
|
||||
@@ -1028,7 +1031,7 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
* _bt_get_endpoint() -- Find the first or last page on a given tree level
|
||||
*
|
||||
* If the index is empty, we will return InvalidBuffer; any other failure
|
||||
* condition causes ereport(). We will not return a dead page.
|
||||
* condition causes ereport(). We will not return a dead page.
|
||||
*
|
||||
* The returned buffer is pinned and read-locked.
|
||||
*/
|
||||
@@ -1045,8 +1048,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
|
||||
/*
|
||||
* If we are looking for a leaf page, okay to descend from fast root;
|
||||
* otherwise better descend from true root. (There is no point in being
|
||||
* smarter about intermediate levels.)
|
||||
* otherwise better descend from true root. (There is no point in
|
||||
* being smarter about intermediate levels.)
|
||||
*/
|
||||
if (level == 0)
|
||||
buf = _bt_getroot(rel, BT_READ);
|
||||
@@ -1066,9 +1069,9 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
{
|
||||
/*
|
||||
* If we landed on a deleted page, step right to find a live page
|
||||
* (there must be one). Also, if we want the rightmost page,
|
||||
* step right if needed to get to it (this could happen if the
|
||||
* page split since we obtained a pointer to it).
|
||||
* (there must be one). Also, if we want the rightmost page, step
|
||||
* right if needed to get to it (this could happen if the page
|
||||
* split since we obtained a pointer to it).
|
||||
*/
|
||||
while (P_IGNORE(opaque) ||
|
||||
(rightmost && !P_RIGHTMOST(opaque)))
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.73 2003/07/21 20:29:39 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.74 2003/08/04 00:43:15 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -93,7 +93,7 @@ typedef struct BTPageState
|
||||
|
||||
|
||||
static void _bt_blnewpage(Relation index, Buffer *buf, Page *page,
|
||||
uint32 level);
|
||||
uint32 level);
|
||||
static BTPageState *_bt_pagestate(Relation index, uint32 level);
|
||||
static void _bt_slideleft(Relation index, Buffer buf, Page page);
|
||||
static void _bt_sortaddtup(Page page, Size itemsize,
|
||||
@@ -469,7 +469,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
|
||||
|
||||
oopaque->btpo_next = BufferGetBlockNumber(nbuf);
|
||||
nopaque->btpo_prev = BufferGetBlockNumber(obuf);
|
||||
nopaque->btpo_next = P_NONE; /* redundant */
|
||||
nopaque->btpo_next = P_NONE; /* redundant */
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.3 2003/02/23 22:43:08 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.4 2003/08/04 00:43:15 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -29,10 +29,10 @@
|
||||
typedef struct bt_incomplete_split
|
||||
{
|
||||
RelFileNode node; /* the index */
|
||||
BlockNumber leftblk; /* left half of split */
|
||||
BlockNumber rightblk; /* right half of split */
|
||||
BlockNumber leftblk; /* left half of split */
|
||||
BlockNumber rightblk; /* right half of split */
|
||||
bool is_root; /* we split the root */
|
||||
} bt_incomplete_split;
|
||||
} bt_incomplete_split;
|
||||
|
||||
static List *incomplete_splits;
|
||||
|
||||
@@ -107,7 +107,7 @@ _bt_restore_page(Page page, char *from, int len)
|
||||
}
|
||||
|
||||
static void
|
||||
_bt_restore_meta(Relation reln, XLogRecPtr lsn,
|
||||
_bt_restore_meta(Relation reln, XLogRecPtr lsn,
|
||||
BlockNumber root, uint32 level,
|
||||
BlockNumber fastroot, uint32 fastlevel)
|
||||
{
|
||||
@@ -172,7 +172,7 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
|
||||
if (!redo || !(record->xl_info & XLR_BKP_BLOCK_1))
|
||||
{
|
||||
buffer = XLogReadBuffer(false, reln,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)));
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)));
|
||||
if (!BufferIsValid(buffer))
|
||||
elog(PANIC, "btree_insert_%sdo: block unfound", (redo) ? "re" : "un");
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
@@ -183,13 +183,11 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
|
||||
if (redo)
|
||||
{
|
||||
if (XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
UnlockAndReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (PageAddItem(page, (Item) datapos, datalen,
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
LP_USED) == InvalidOffsetNumber)
|
||||
elog(PANIC, "btree_insert_redo: failed to add item");
|
||||
|
||||
@@ -204,13 +202,9 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
|
||||
elog(PANIC, "btree_insert_undo: bad page LSN");
|
||||
|
||||
if (!P_ISLEAF(pageop))
|
||||
{
|
||||
UnlockAndReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(PANIC, "btree_insert_undo: unimplemented");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,8 +220,8 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
|
||||
if (redo && !isleaf && incomplete_splits != NIL)
|
||||
{
|
||||
forget_matching_split(reln, xlrec->target.node,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
false);
|
||||
}
|
||||
}
|
||||
@@ -238,9 +232,9 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
BlockNumber targetblk;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
BlockNumber targetblk;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
@@ -338,9 +332,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
|
||||
elog(PANIC, "btree_split_redo: uninitialized next right page");
|
||||
|
||||
if (XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
UnlockAndReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -357,8 +349,8 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
|
||||
if (redo && xlrec->level > 0 && incomplete_splits != NIL)
|
||||
{
|
||||
forget_matching_split(reln, xlrec->target.node,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
false);
|
||||
}
|
||||
|
||||
@@ -422,10 +414,10 @@ btree_xlog_delete_page(bool redo, bool ismeta,
|
||||
{
|
||||
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
BlockNumber parent;
|
||||
BlockNumber target;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
BlockNumber parent;
|
||||
BlockNumber target;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
@@ -451,9 +443,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
|
||||
if (PageIsNew((PageHeader) page))
|
||||
elog(PANIC, "btree_delete_page_redo: uninitialized parent page");
|
||||
if (XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
UnlockAndReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber poffset;
|
||||
@@ -469,7 +459,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
|
||||
{
|
||||
ItemId itemid;
|
||||
BTItem btitem;
|
||||
OffsetNumber nextoffset;
|
||||
OffsetNumber nextoffset;
|
||||
|
||||
itemid = PageGetItemId(page, poffset);
|
||||
btitem = (BTItem) PageGetItem(page, itemid);
|
||||
@@ -494,9 +484,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
|
||||
if (PageIsNew((PageHeader) page))
|
||||
elog(PANIC, "btree_delete_page_redo: uninitialized right sibling");
|
||||
if (XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
UnlockAndReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -520,9 +508,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
|
||||
if (PageIsNew((PageHeader) page))
|
||||
elog(PANIC, "btree_delete_page_redo: uninitialized left sibling");
|
||||
if (XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
UnlockAndReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -799,116 +785,116 @@ btree_desc(char *buf, uint8 xl_info, char *rec)
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_BTREE_INSERT_LEAF:
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
|
||||
strcat(buf, "insert: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
break;
|
||||
}
|
||||
strcat(buf, "insert: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_INSERT_UPPER:
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
|
||||
strcat(buf, "insert_upper: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
break;
|
||||
}
|
||||
strcat(buf, "insert_upper: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_INSERT_META:
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
|
||||
strcat(buf, "insert_meta: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
break;
|
||||
}
|
||||
strcat(buf, "insert_meta: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_L:
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
|
||||
strcat(buf, "split_l: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
strcat(buf, "split_l: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_R:
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
|
||||
strcat(buf, "split_r: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
strcat(buf, "split_r: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_L_ROOT:
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
|
||||
strcat(buf, "split_l_root: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
strcat(buf, "split_l_root: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_R_ROOT:
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
|
||||
strcat(buf, "split_r_root: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
strcat(buf, "split_r_root: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_DELETE:
|
||||
{
|
||||
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
||||
{
|
||||
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
||||
|
||||
sprintf(buf + strlen(buf), "delete: node %u/%u; blk %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode, xlrec->block);
|
||||
break;
|
||||
}
|
||||
sprintf(buf + strlen(buf), "delete: node %u/%u; blk %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode, xlrec->block);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_DELETE_PAGE:
|
||||
case XLOG_BTREE_DELETE_PAGE_META:
|
||||
{
|
||||
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) rec;
|
||||
{
|
||||
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) rec;
|
||||
|
||||
strcat(buf, "delete_page: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; dead %u; left %u; right %u",
|
||||
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
strcat(buf, "delete_page: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
sprintf(buf + strlen(buf), "; dead %u; left %u; right %u",
|
||||
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_NEWROOT:
|
||||
{
|
||||
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
|
||||
{
|
||||
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
|
||||
|
||||
sprintf(buf + strlen(buf), "newroot: node %u/%u; root %u lev %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode,
|
||||
xlrec->rootblk, xlrec->level);
|
||||
break;
|
||||
}
|
||||
sprintf(buf + strlen(buf), "newroot: node %u/%u; root %u lev %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode,
|
||||
xlrec->rootblk, xlrec->level);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_NEWMETA:
|
||||
{
|
||||
xl_btree_newmeta *xlrec = (xl_btree_newmeta *) rec;
|
||||
{
|
||||
xl_btree_newmeta *xlrec = (xl_btree_newmeta *) rec;
|
||||
|
||||
sprintf(buf + strlen(buf), "newmeta: node %u/%u; root %u lev %u fast %u lev %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode,
|
||||
xlrec->meta.root, xlrec->meta.level,
|
||||
xlrec->meta.fastroot, xlrec->meta.fastlevel);
|
||||
break;
|
||||
}
|
||||
sprintf(buf + strlen(buf), "newmeta: node %u/%u; root %u lev %u fast %u lev %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode,
|
||||
xlrec->meta.root, xlrec->meta.level,
|
||||
xlrec->meta.fastroot, xlrec->meta.fastlevel);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_NEWPAGE:
|
||||
{
|
||||
xl_btree_newpage *xlrec = (xl_btree_newpage *) rec;
|
||||
{
|
||||
xl_btree_newpage *xlrec = (xl_btree_newpage *) rec;
|
||||
|
||||
sprintf(buf + strlen(buf), "newpage: node %u/%u; page %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode,
|
||||
xlrec->blkno);
|
||||
break;
|
||||
}
|
||||
sprintf(buf + strlen(buf), "newpage: node %u/%u; page %u",
|
||||
xlrec->node.tblNode, xlrec->node.relNode,
|
||||
xlrec->blkno);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
strcat(buf, "UNKNOWN");
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user