mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
pgindent run for 9.4
This includes removing tabs after periods in C comments, which was applied to back branches, so this change should not effect backpatching.
This commit is contained in:
@@ -25,7 +25,7 @@
|
||||
* Although any negative int32 (except INT_MIN) is acceptable for reporting
|
||||
* "<", and any positive int32 is acceptable for reporting ">", routines
|
||||
* that work on 32-bit or wider datatypes can't just return "a - b".
|
||||
* That could overflow and give the wrong answer. Also, one must not
|
||||
* That could overflow and give the wrong answer. Also, one must not
|
||||
* return INT_MIN to report "<", since some callers will negate the result.
|
||||
*
|
||||
* NOTE: it is critical that the comparison function impose a total order
|
||||
|
@@ -90,7 +90,7 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
|
||||
* By here, itup is filled in, including the TID.
|
||||
*
|
||||
* If checkUnique is UNIQUE_CHECK_NO or UNIQUE_CHECK_PARTIAL, this
|
||||
* will allow duplicates. Otherwise (UNIQUE_CHECK_YES or
|
||||
* will allow duplicates. Otherwise (UNIQUE_CHECK_YES or
|
||||
* UNIQUE_CHECK_EXISTING) it will throw error for a duplicate.
|
||||
* For UNIQUE_CHECK_EXISTING we merely run the duplicate check, and
|
||||
* don't actually insert.
|
||||
@@ -129,7 +129,7 @@ top:
|
||||
* If the page was split between the time that we surrendered our read
|
||||
* lock and acquired our write lock, then this page may no longer be the
|
||||
* right place for the key we want to insert. In this case, we need to
|
||||
* move right in the tree. See Lehman and Yao for an excruciatingly
|
||||
* move right in the tree. See Lehman and Yao for an excruciatingly
|
||||
* precise description.
|
||||
*/
|
||||
buf = _bt_moveright(rel, buf, natts, itup_scankey, false,
|
||||
@@ -211,7 +211,7 @@ top:
|
||||
* is the first tuple on the next page.
|
||||
*
|
||||
* Returns InvalidTransactionId if there is no conflict, else an xact ID
|
||||
* we must wait for to see if it commits a conflicting tuple. If an actual
|
||||
* we must wait for to see if it commits a conflicting tuple. If an actual
|
||||
* conflict is detected, no return --- just ereport().
|
||||
*
|
||||
* However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return
|
||||
@@ -293,7 +293,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
|
||||
|
||||
/*
|
||||
* If we are doing a recheck, we expect to find the tuple we
|
||||
* are rechecking. It's not a duplicate, but we have to keep
|
||||
* are rechecking. It's not a duplicate, but we have to keep
|
||||
* scanning.
|
||||
*/
|
||||
if (checkUnique == UNIQUE_CHECK_EXISTING &&
|
||||
@@ -482,7 +482,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
|
||||
* If the new key is equal to one or more existing keys, we can
|
||||
* legitimately place it anywhere in the series of equal keys --- in fact,
|
||||
* if the new key is equal to the page's "high key" we can place it on
|
||||
* the next page. If it is equal to the high key, and there's not room
|
||||
* the next page. If it is equal to the high key, and there's not room
|
||||
* to insert the new tuple on the current page without splitting, then
|
||||
* we can move right hoping to find more free space and avoid a split.
|
||||
* (We should not move right indefinitely, however, since that leads to
|
||||
@@ -494,7 +494,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
|
||||
* removing any LP_DEAD tuples.
|
||||
*
|
||||
* On entry, *buf and *offsetptr point to the first legal position
|
||||
* where the new tuple could be inserted. The caller should hold an
|
||||
* where the new tuple could be inserted. The caller should hold an
|
||||
* exclusive lock on *buf. *offsetptr can also be set to
|
||||
* InvalidOffsetNumber, in which case the function will search for the
|
||||
* right location within the page if needed. On exit, they point to the
|
||||
@@ -564,7 +564,7 @@ _bt_findinsertloc(Relation rel,
|
||||
* on every insert. We implement "get tired" as a random choice,
|
||||
* since stopping after scanning a fixed number of pages wouldn't work
|
||||
* well (we'd never reach the right-hand side of previously split
|
||||
* pages). Currently the probability of moving right is set at 0.99,
|
||||
* pages). Currently the probability of moving right is set at 0.99,
|
||||
* which may seem too high to change the behavior much, but it does an
|
||||
* excellent job of preventing O(N^2) behavior with many equal keys.
|
||||
*----------
|
||||
@@ -574,7 +574,7 @@ _bt_findinsertloc(Relation rel,
|
||||
while (PageGetFreeSpace(page) < itemsz)
|
||||
{
|
||||
Buffer rbuf;
|
||||
BlockNumber rblkno;
|
||||
BlockNumber rblkno;
|
||||
|
||||
/*
|
||||
* before considering moving right, see if we can obtain enough space
|
||||
@@ -620,10 +620,10 @@ _bt_findinsertloc(Relation rel,
|
||||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* If this page was incompletely split, finish the split now.
|
||||
* We do this while holding a lock on the left sibling, which
|
||||
* is not good because finishing the split could be a fairly
|
||||
* lengthy operation. But this should happen very seldom.
|
||||
* If this page was incompletely split, finish the split now. We
|
||||
* do this while holding a lock on the left sibling, which is not
|
||||
* good because finishing the split could be a fairly lengthy
|
||||
* operation. But this should happen very seldom.
|
||||
*/
|
||||
if (P_INCOMPLETE_SPLIT(lpageop))
|
||||
{
|
||||
@@ -681,7 +681,7 @@ _bt_findinsertloc(Relation rel,
|
||||
* + updates the metapage if a true root or fast root is split.
|
||||
*
|
||||
* On entry, we must have the correct buffer in which to do the
|
||||
* insertion, and the buffer must be pinned and write-locked. On return,
|
||||
* insertion, and the buffer must be pinned and write-locked. On return,
|
||||
* we will have dropped both the pin and the lock on the buffer.
|
||||
*
|
||||
* When inserting to a non-leaf page, 'cbuf' is the left-sibling of the
|
||||
@@ -978,7 +978,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
* origpage is the original page to be split. leftpage is a temporary
|
||||
* buffer that receives the left-sibling data, which will be copied back
|
||||
* into origpage on success. rightpage is the new page that receives the
|
||||
* right-sibling data. If we fail before reaching the critical section,
|
||||
* right-sibling data. If we fail before reaching the critical section,
|
||||
* origpage hasn't been modified and leftpage is only workspace. In
|
||||
* principle we shouldn't need to worry about rightpage either, because it
|
||||
* hasn't been linked into the btree page structure; but to avoid leaving
|
||||
@@ -1196,7 +1196,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
* page. If you're confused, imagine that page A splits to A B and
|
||||
* then again, yielding A C B, while vacuum is in progress. Tuples
|
||||
* originally in A could now be in either B or C, hence vacuum must
|
||||
* examine both pages. But if D, our right sibling, has a different
|
||||
* examine both pages. But if D, our right sibling, has a different
|
||||
* cycleid then it could not contain any tuples that were in A when
|
||||
* the vacuum started.
|
||||
*/
|
||||
@@ -1330,11 +1330,10 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
lastrdata++;
|
||||
|
||||
/*
|
||||
* Although we don't need to WAL-log anything on the left page,
|
||||
* we still need XLogInsert to consider storing a full-page image
|
||||
* of the left page, so make an empty entry referencing that
|
||||
* buffer. This also ensures that the left page is always backup
|
||||
* block 1.
|
||||
* Although we don't need to WAL-log anything on the left page, we
|
||||
* still need XLogInsert to consider storing a full-page image of
|
||||
* the left page, so make an empty entry referencing that buffer.
|
||||
* This also ensures that the left page is always backup block 1.
|
||||
*/
|
||||
lastrdata->data = NULL;
|
||||
lastrdata->len = 0;
|
||||
@@ -1448,7 +1447,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
*
|
||||
* We return the index of the first existing tuple that should go on the
|
||||
* righthand page, plus a boolean indicating whether the new tuple goes on
|
||||
* the left or right page. The bool is necessary to disambiguate the case
|
||||
* the left or right page. The bool is necessary to disambiguate the case
|
||||
* where firstright == newitemoff.
|
||||
*/
|
||||
static OffsetNumber
|
||||
@@ -1684,7 +1683,7 @@ _bt_checksplitloc(FindSplitData *state,
|
||||
*
|
||||
* On entry, buf and rbuf are the left and right split pages, which we
|
||||
* still hold write locks on per the L&Y algorithm. We release the
|
||||
* write locks once we have write lock on the parent page. (Any sooner,
|
||||
* write locks once we have write lock on the parent page. (Any sooner,
|
||||
* and it'd be possible for some other process to try to split or delete
|
||||
* one of these pages, and get confused because it cannot find the downlink.)
|
||||
*
|
||||
@@ -1705,7 +1704,7 @@ _bt_insert_parent(Relation rel,
|
||||
* Here we have to do something Lehman and Yao don't talk about: deal with
|
||||
* a root split and construction of a new root. If our stack is empty
|
||||
* then we have just split a node on what had been the root level when we
|
||||
* descended the tree. If it was still the root then we perform a
|
||||
* descended the tree. If it was still the root then we perform a
|
||||
* new-root construction. If it *wasn't* the root anymore, search to find
|
||||
* the next higher level that someone constructed meanwhile, and find the
|
||||
* right place to insert as for the normal case.
|
||||
@@ -1917,7 +1916,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
/*
|
||||
* These loops will check every item on the page --- but in an
|
||||
* order that's attuned to the probability of where it actually
|
||||
* is. Scan to the right first, then to the left.
|
||||
* is. Scan to the right first, then to the left.
|
||||
*/
|
||||
for (offnum = start;
|
||||
offnum <= maxoff;
|
||||
|
@@ -12,7 +12,7 @@
|
||||
* src/backend/access/nbtree/nbtpage.c
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
* data at high addresses includes pointers to left and right siblings
|
||||
* and flag data describing page state. The first page in a btree, page
|
||||
* zero, is special -- it stores meta-information describing the tree.
|
||||
@@ -36,7 +36,7 @@ static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
|
||||
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
|
||||
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
|
||||
BlockNumber *target, BlockNumber *rightsib);
|
||||
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
|
||||
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
|
||||
TransactionId latestRemovedXid);
|
||||
|
||||
/*
|
||||
@@ -62,7 +62,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
|
||||
metaopaque->btpo_flags = BTP_META;
|
||||
|
||||
/*
|
||||
* Set pd_lower just past the end of the metadata. This is not essential
|
||||
* Set pd_lower just past the end of the metadata. This is not essential
|
||||
* but it makes the page look compressible to xlog.c.
|
||||
*/
|
||||
((PageHeader) page)->pd_lower =
|
||||
@@ -80,7 +80,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
|
||||
*
|
||||
* The access type parameter (BT_READ or BT_WRITE) controls whether
|
||||
* a new root page will be created or not. If access = BT_READ,
|
||||
* and no root page exists, we just return InvalidBuffer. For
|
||||
* and no root page exists, we just return InvalidBuffer. For
|
||||
* BT_WRITE, we try to create the root page if it doesn't exist.
|
||||
* NOTE that the returned root page will have only a read lock set
|
||||
* on it even if access = BT_WRITE!
|
||||
@@ -197,7 +197,7 @@ _bt_getroot(Relation rel, int access)
|
||||
/*
|
||||
* Metadata initialized by someone else. In order to guarantee no
|
||||
* deadlocks, we have to release the metadata page and start all
|
||||
* over again. (Is that really true? But it's hardly worth trying
|
||||
* over again. (Is that really true? But it's hardly worth trying
|
||||
* to optimize this case.)
|
||||
*/
|
||||
_bt_relbuf(rel, metabuf);
|
||||
@@ -254,7 +254,7 @@ _bt_getroot(Relation rel, int access)
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* swap root write lock for read lock. There is no danger of anyone
|
||||
* swap root write lock for read lock. There is no danger of anyone
|
||||
* else accessing the new root page while it's unlocked, since no one
|
||||
* else knows where it is yet.
|
||||
*/
|
||||
@@ -322,7 +322,7 @@ _bt_getroot(Relation rel, int access)
|
||||
* By the time we acquire lock on the root page, it might have been split and
|
||||
* not be the true root anymore. This is okay for the present uses of this
|
||||
* routine; we only really need to be able to move up at least one tree level
|
||||
* from whatever non-root page we were at. If we ever do need to lock the
|
||||
* from whatever non-root page we were at. If we ever do need to lock the
|
||||
* one true root page, we could loop here, re-reading the metapage on each
|
||||
* failure. (Note that it wouldn't do to hold the lock on the metapage while
|
||||
* moving to the root --- that'd deadlock against any concurrent root split.)
|
||||
@@ -497,7 +497,7 @@ _bt_checkpage(Relation rel, Buffer buf)
|
||||
/*
|
||||
* ReadBuffer verifies that every newly-read page passes
|
||||
* PageHeaderIsValid, which means it either contains a reasonably sane
|
||||
* page header or is all-zero. We have to defend against the all-zero
|
||||
* page header or is all-zero. We have to defend against the all-zero
|
||||
* case, however.
|
||||
*/
|
||||
if (PageIsNew(page))
|
||||
@@ -564,7 +564,7 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX
|
||||
/*
|
||||
* _bt_getbuf() -- Get a buffer by block number for read or write.
|
||||
*
|
||||
* blkno == P_NEW means to get an unallocated index page. The page
|
||||
* blkno == P_NEW means to get an unallocated index page. The page
|
||||
* will be initialized before returning it.
|
||||
*
|
||||
* When this routine returns, the appropriate lock is set on the
|
||||
@@ -595,7 +595,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
* First see if the FSM knows of any free pages.
|
||||
*
|
||||
* We can't trust the FSM's report unreservedly; we have to check that
|
||||
* the page is still free. (For example, an already-free page could
|
||||
* the page is still free. (For example, an already-free page could
|
||||
* have been re-used between the time the last VACUUM scanned it and
|
||||
* the time the VACUUM made its FSM updates.)
|
||||
*
|
||||
@@ -774,7 +774,7 @@ _bt_page_recyclable(Page page)
|
||||
/*
|
||||
* Delete item(s) from a btree page during VACUUM.
|
||||
*
|
||||
* This must only be used for deleting leaf items. Deleting an item on a
|
||||
* This must only be used for deleting leaf items. Deleting an item on a
|
||||
* non-leaf page has to be done as part of an atomic action that includes
|
||||
* deleting the page it points to.
|
||||
*
|
||||
@@ -842,7 +842,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
|
||||
|
||||
/*
|
||||
* The target-offsets array is not in the buffer, but pretend that it
|
||||
* is. When XLogInsert stores the whole buffer, the offsets array
|
||||
* is. When XLogInsert stores the whole buffer, the offsets array
|
||||
* need not be stored too.
|
||||
*/
|
||||
if (nitems > 0)
|
||||
@@ -1049,11 +1049,12 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
|
||||
lbuf = _bt_getbuf(rel, leftsib, BT_READ);
|
||||
lpage = BufferGetPage(lbuf);
|
||||
lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
||||
|
||||
/*
|
||||
* If the left sibling was concurrently split, so that its
|
||||
* next-pointer doesn't point to the current page anymore,
|
||||
* the split that created the current page must be completed.
|
||||
* (We don't allow splitting an incompletely split page again
|
||||
* next-pointer doesn't point to the current page anymore, the
|
||||
* split that created the current page must be completed. (We
|
||||
* don't allow splitting an incompletely split page again
|
||||
* until the previous split has been completed)
|
||||
*/
|
||||
if (lopaque->btpo_next == parent &&
|
||||
@@ -1066,7 +1067,7 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
|
||||
}
|
||||
|
||||
return _bt_lock_branch_parent(rel, parent, stack->bts_parent,
|
||||
topparent, topoff, target, rightsib);
|
||||
topparent, topoff, target, rightsib);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1112,6 +1113,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
bool rightsib_empty;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
/*
|
||||
* "stack" is a search stack leading (approximately) to the target page.
|
||||
* It is initially NULL, but when iterating, we keep it to avoid
|
||||
@@ -1140,24 +1142,24 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
* was never supposed to leave half-dead pages in the tree, it was
|
||||
* just a transient state, but it was nevertheless possible in
|
||||
* error scenarios. We don't know how to deal with them here. They
|
||||
* are harmless as far as searches are considered, but inserts into
|
||||
* the deleted keyspace could add out-of-order downlinks in the
|
||||
* upper levels. Log a notice, hopefully the admin will notice and
|
||||
* reindex.
|
||||
* are harmless as far as searches are considered, but inserts
|
||||
* into the deleted keyspace could add out-of-order downlinks in
|
||||
* the upper levels. Log a notice, hopefully the admin will notice
|
||||
* and reindex.
|
||||
*/
|
||||
if (P_ISHALFDEAD(opaque))
|
||||
ereport(LOG,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" contains a half-dead internal page",
|
||||
RelationGetRelationName(rel)),
|
||||
errmsg("index \"%s\" contains a half-dead internal page",
|
||||
RelationGetRelationName(rel)),
|
||||
errhint("This can be caused by an interrupt VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
|
||||
_bt_relbuf(rel, buf);
|
||||
return ndeleted;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can never delete rightmost pages nor root pages. While at
|
||||
* it, check that page is not already deleted and is empty.
|
||||
* We can never delete rightmost pages nor root pages. While at it,
|
||||
* check that page is not already deleted and is empty.
|
||||
*
|
||||
* To keep the algorithm simple, we also never delete an incompletely
|
||||
* split page (they should be rare enough that this doesn't make any
|
||||
@@ -1167,10 +1169,10 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
* left half of an incomplete split, but ensuring that it's not the
|
||||
* right half is more complicated. For that, we have to check that
|
||||
* the left sibling doesn't have its INCOMPLETE_SPLIT flag set. On
|
||||
* the first iteration, we temporarily release the lock on the
|
||||
* current page, and check the left sibling and also construct a
|
||||
* search stack to. On subsequent iterations, we know we stepped right
|
||||
* from a page that passed these tests, so it's OK.
|
||||
* the first iteration, we temporarily release the lock on the current
|
||||
* page, and check the left sibling and also construct a search stack
|
||||
* to. On subsequent iterations, we know we stepped right from a page
|
||||
* that passed these tests, so it's OK.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
|
||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
|
||||
@@ -1184,9 +1186,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
}
|
||||
|
||||
/*
|
||||
* First, remove downlink pointing to the page (or a parent of the page,
|
||||
* if we are going to delete a taller branch), and mark the page as
|
||||
* half-dead.
|
||||
* First, remove downlink pointing to the page (or a parent of the
|
||||
* page, if we are going to delete a taller branch), and mark the page
|
||||
* as half-dead.
|
||||
*/
|
||||
if (!P_ISHALFDEAD(opaque))
|
||||
{
|
||||
@@ -1205,7 +1207,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
ItemId itemid;
|
||||
IndexTuple targetkey;
|
||||
Buffer lbuf;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber leftsib;
|
||||
|
||||
itemid = PageGetItemId(page, P_HIKEY);
|
||||
targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
|
||||
@@ -1219,9 +1221,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
/*
|
||||
* Fetch the left sibling, to check that it's not marked
|
||||
* with INCOMPLETE_SPLIT flag. That would mean that the
|
||||
* page to-be-deleted doesn't have a downlink, and the page
|
||||
* Fetch the left sibling, to check that it's not marked with
|
||||
* INCOMPLETE_SPLIT flag. That would mean that the page
|
||||
* to-be-deleted doesn't have a downlink, and the page
|
||||
* deletion algorithm isn't prepared to handle that.
|
||||
*/
|
||||
if (!P_LEFTMOST(opaque))
|
||||
@@ -1267,7 +1269,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
|
||||
/*
|
||||
* Then unlink it from its siblings. Each call to
|
||||
*_bt_unlink_halfdead_page unlinks the topmost page from the branch,
|
||||
* _bt_unlink_halfdead_page unlinks the topmost page from the branch,
|
||||
* making it shallower. Iterate until the leaf page is gone.
|
||||
*/
|
||||
rightsib_empty = false;
|
||||
@@ -1291,8 +1293,8 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
* is that it was the rightmost child of the parent. Now that we
|
||||
* removed the downlink for this page, the right sibling might now be
|
||||
* the only child of the parent, and could be removed. It would be
|
||||
* picked up by the next vacuum anyway, but might as well try to remove
|
||||
* it now, so loop back to process the right sibling.
|
||||
* picked up by the next vacuum anyway, but might as well try to
|
||||
* remove it now, so loop back to process the right sibling.
|
||||
*/
|
||||
if (!rightsib_empty)
|
||||
break;
|
||||
@@ -1310,9 +1312,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
static bool
|
||||
_bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
|
||||
{
|
||||
BlockNumber leafblkno;
|
||||
BlockNumber leafblkno;
|
||||
BlockNumber leafrightsib;
|
||||
BlockNumber target;
|
||||
BlockNumber target;
|
||||
BlockNumber rightsib;
|
||||
ItemId itemid;
|
||||
Page page;
|
||||
@@ -1351,7 +1353,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
|
||||
|
||||
/*
|
||||
* Check that the parent-page index items we're about to delete/overwrite
|
||||
* contain what we expect. This can fail if the index has become corrupt
|
||||
* contain what we expect. This can fail if the index has become corrupt
|
||||
* for some reason. We want to throw any error before entering the
|
||||
* critical section --- otherwise it'd be a PANIC.
|
||||
*
|
||||
@@ -1490,9 +1492,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
|
||||
BlockNumber leafleftsib;
|
||||
BlockNumber leafrightsib;
|
||||
BlockNumber target;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
BlockNumber target;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
Buffer lbuf = InvalidBuffer;
|
||||
Buffer buf;
|
||||
Buffer rbuf;
|
||||
@@ -1506,7 +1508,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||
int targetlevel;
|
||||
ItemPointer leafhikey;
|
||||
BlockNumber nextchild;
|
||||
BlockNumber topblkno;
|
||||
BlockNumber topblkno;
|
||||
|
||||
page = BufferGetPage(leafbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -1596,7 +1598,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||
lbuf = InvalidBuffer;
|
||||
|
||||
/*
|
||||
* Next write-lock the target page itself. It should be okay to take just
|
||||
* Next write-lock the target page itself. It should be okay to take just
|
||||
* a write lock not a superexclusive lock, since no scans would stop on an
|
||||
* empty page.
|
||||
*/
|
||||
@@ -1605,9 +1607,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Check page is still empty etc, else abandon deletion. This is just
|
||||
* for paranoia's sake; a half-dead page cannot resurrect because there
|
||||
* can be only one vacuum process running at a time.
|
||||
* Check page is still empty etc, else abandon deletion. This is just for
|
||||
* paranoia's sake; a half-dead page cannot resurrect because there can be
|
||||
* only one vacuum process running at a time.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
|
||||
{
|
||||
@@ -1733,7 +1735,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||
* we're in VACUUM and would not otherwise have an XID. Having already
|
||||
* updated links to the target, ReadNewTransactionId() suffices as an
|
||||
* upper bound. Any scan having retained a now-stale link is advertising
|
||||
* in its PGXACT an xmin less than or equal to the value we read here. It
|
||||
* in its PGXACT an xmin less than or equal to the value we read here. It
|
||||
* will continue to do so, holding back RecentGlobalXmin, for the duration
|
||||
* of that scan.
|
||||
*/
|
||||
|
@@ -208,7 +208,7 @@ btbuildempty(PG_FUNCTION_ARGS)
|
||||
metapage = (Page) palloc(BLCKSZ);
|
||||
_bt_initmetapage(metapage, P_NONE, 0);
|
||||
|
||||
/* Write the page. If archiving/streaming, XLOG it. */
|
||||
/* Write the page. If archiving/streaming, XLOG it. */
|
||||
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
|
||||
smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
|
||||
(char *) metapage, true);
|
||||
@@ -427,7 +427,7 @@ btbeginscan(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* We don't know yet whether the scan will be index-only, so we do not
|
||||
* allocate the tuple workspace arrays until btrescan. However, we set up
|
||||
* allocate the tuple workspace arrays until btrescan. However, we set up
|
||||
* scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
|
||||
*/
|
||||
so->currTuples = so->markTuples = NULL;
|
||||
@@ -472,7 +472,7 @@ btrescan(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Allocate tuple workspace arrays, if needed for an index-only scan and
|
||||
* not already done in a previous rescan call. To save on palloc
|
||||
* not already done in a previous rescan call. To save on palloc
|
||||
* overhead, both workspaces are allocated as one palloc block; only this
|
||||
* function and btendscan know that.
|
||||
*
|
||||
@@ -952,7 +952,7 @@ restart:
|
||||
vstate->lastBlockLocked = blkno;
|
||||
|
||||
/*
|
||||
* Check whether we need to recurse back to earlier pages. What we
|
||||
* Check whether we need to recurse back to earlier pages. What we
|
||||
* are concerned about is a page split that happened since we started
|
||||
* the vacuum scan. If the split moved some tuples to a lower page
|
||||
* then we might have missed 'em. If so, set up for tail recursion.
|
||||
|
@@ -50,7 +50,7 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
|
||||
*
|
||||
* NOTE that the returned buffer is read-locked regardless of the access
|
||||
* parameter. However, access = BT_WRITE will allow an empty root page
|
||||
* to be created and returned. When access = BT_READ, an empty index
|
||||
* to be created and returned. When access = BT_READ, an empty index
|
||||
* will result in *bufP being set to InvalidBuffer. Also, in BT_WRITE mode,
|
||||
* any incomplete splits encountered during the search will be finished.
|
||||
*/
|
||||
@@ -271,7 +271,7 @@ _bt_moveright(Relation rel,
|
||||
* (or leaf keys > given scankey when nextkey is true).
|
||||
*
|
||||
* This procedure is not responsible for walking right, it just examines
|
||||
* the given page. _bt_binsrch() has no lock or refcount side effects
|
||||
* the given page. _bt_binsrch() has no lock or refcount side effects
|
||||
* on the buffer.
|
||||
*/
|
||||
OffsetNumber
|
||||
@@ -403,7 +403,7 @@ _bt_compare(Relation rel,
|
||||
/*
|
||||
* The scan key is set up with the attribute number associated with each
|
||||
* term in the key. It is important that, if the index is multi-key, the
|
||||
* scan contain the first k key attributes, and that they be in order. If
|
||||
* scan contain the first k key attributes, and that they be in order. If
|
||||
* you think about how multi-key ordering works, you'll understand why
|
||||
* this is.
|
||||
*
|
||||
@@ -442,7 +442,7 @@ _bt_compare(Relation rel,
|
||||
/*
|
||||
* The sk_func needs to be passed the index value as left arg and
|
||||
* the sk_argument as right arg (they might be of different
|
||||
* types). Since it is convenient for callers to think of
|
||||
* types). Since it is convenient for callers to think of
|
||||
* _bt_compare as comparing the scankey to the index item, we have
|
||||
* to flip the sign of the comparison result. (Unless it's a DESC
|
||||
* column, in which case we *don't* flip the sign.)
|
||||
@@ -471,7 +471,7 @@ _bt_compare(Relation rel,
|
||||
* _bt_first() -- Find the first item in a scan.
|
||||
*
|
||||
* We need to be clever about the direction of scan, the search
|
||||
* conditions, and the tree ordering. We find the first item (or,
|
||||
* conditions, and the tree ordering. We find the first item (or,
|
||||
* if backwards scan, the last item) in the tree that satisfies the
|
||||
* qualifications in the scan key. On success exit, the page containing
|
||||
* the current index tuple is pinned but not locked, and data about
|
||||
@@ -527,7 +527,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* We want to identify the keys that can be used as starting boundaries;
|
||||
* these are =, >, or >= keys for a forward scan or =, <, <= keys for
|
||||
* a backwards scan. We can use keys for multiple attributes so long as
|
||||
* the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
|
||||
* the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
|
||||
* a > or < boundary or find an attribute with no boundary (which can be
|
||||
* thought of as the same as "> -infinity"), we can't use keys for any
|
||||
* attributes to its right, because it would break our simplistic notion
|
||||
@@ -742,7 +742,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* even if the row comparison is of ">" or "<" type, because the
|
||||
* condition applied to all but the last row member is effectively
|
||||
* ">=" or "<=", and so the extra keys don't break the positioning
|
||||
* scheme. But, by the same token, if we aren't able to use all
|
||||
* scheme. But, by the same token, if we aren't able to use all
|
||||
* the row members, then the part of the row comparison that we
|
||||
* did use has to be treated as just a ">=" or "<=" condition, and
|
||||
* so we'd better adjust strat_total accordingly.
|
||||
@@ -861,7 +861,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
/*
|
||||
* Find first item >= scankey, then back up one to arrive at last
|
||||
* item < scankey. (Note: this positioning strategy is only used
|
||||
* item < scankey. (Note: this positioning strategy is only used
|
||||
* for a backward scan, so that is always the correct starting
|
||||
* position.)
|
||||
*/
|
||||
@@ -910,7 +910,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
case BTGreaterEqualStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item >= scankey. (This is only used for forward
|
||||
* Find first item >= scankey. (This is only used for forward
|
||||
* scans.)
|
||||
*/
|
||||
nextkey = false;
|
||||
@@ -988,7 +988,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
*
|
||||
* The actually desired starting point is either this item or the prior
|
||||
* one, or in the end-of-page case it's the first item on the next page or
|
||||
* the last item on this page. Adjust the starting offset if needed. (If
|
||||
* the last item on this page. Adjust the starting offset if needed. (If
|
||||
* this results in an offset before the first item or after the last one,
|
||||
* _bt_readpage will report no items found, and then we'll step to the
|
||||
* next page as needed.)
|
||||
@@ -1304,7 +1304,7 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
* than the walk-right case because of the possibility that the page
|
||||
* to our left splits while we are in flight to it, plus the
|
||||
* possibility that the page we were on gets deleted after we leave
|
||||
* it. See nbtree/README for details.
|
||||
* it. See nbtree/README for details.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
@@ -1399,7 +1399,7 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
* anymore, not that its left sibling got split more than four times.
|
||||
*
|
||||
* Note that it is correct to test P_ISDELETED not P_IGNORE here,
|
||||
* because half-dead pages are still in the sibling chain. Caller
|
||||
* because half-dead pages are still in the sibling chain. Caller
|
||||
* must reject half-dead pages if wanted.
|
||||
*/
|
||||
tries = 0;
|
||||
@@ -1425,7 +1425,7 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
if (P_ISDELETED(opaque))
|
||||
{
|
||||
/*
|
||||
* It was deleted. Move right to first nondeleted page (there
|
||||
* It was deleted. Move right to first nondeleted page (there
|
||||
* must be one); that is the page that has acquired the deleted
|
||||
* one's keyspace, so stepping left from it will take us where we
|
||||
* want to be.
|
||||
@@ -1469,7 +1469,7 @@ _bt_walk_left(Relation rel, Buffer buf)
|
||||
* _bt_get_endpoint() -- Find the first or last page on a given tree level
|
||||
*
|
||||
* If the index is empty, we will return InvalidBuffer; any other failure
|
||||
* condition causes ereport(). We will not return a dead page.
|
||||
* condition causes ereport(). We will not return a dead page.
|
||||
*
|
||||
* The returned buffer is pinned and read-locked.
|
||||
*/
|
||||
|
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* We use tuplesort.c to sort the given index tuples into order.
|
||||
* Then we scan the index tuples in order and build the btree pages
|
||||
* for each level. We load source tuples into leaf-level pages.
|
||||
* for each level. We load source tuples into leaf-level pages.
|
||||
* Whenever we fill a page at one level, we add a link to it to its
|
||||
* parent level (starting a new parent level if necessary). When
|
||||
* done, we write out each final page on each level, adding it to
|
||||
@@ -42,11 +42,11 @@
|
||||
*
|
||||
* Since the index will never be used unless it is completely built,
|
||||
* from a crash-recovery point of view there is no need to WAL-log the
|
||||
* steps of the build. After completing the index build, we can just sync
|
||||
* steps of the build. After completing the index build, we can just sync
|
||||
* the whole file to disk using smgrimmedsync() before exiting this module.
|
||||
* This can be seen to be sufficient for crash recovery by considering that
|
||||
* it's effectively equivalent to what would happen if a CHECKPOINT occurred
|
||||
* just after the index build. However, it is clearly not sufficient if the
|
||||
* just after the index build. However, it is clearly not sufficient if the
|
||||
* DBA is using the WAL log for PITR or replication purposes, since another
|
||||
* machine would not be able to reconstruct the index from WAL. Therefore,
|
||||
* we log the completed index pages to WAL if and only if WAL archiving is
|
||||
@@ -89,7 +89,7 @@ struct BTSpool
|
||||
};
|
||||
|
||||
/*
|
||||
* Status record for a btree page being built. We have one of these
|
||||
* Status record for a btree page being built. We have one of these
|
||||
* for each active tree level.
|
||||
*
|
||||
* The reason we need to store a copy of the minimum key is that we'll
|
||||
@@ -160,7 +160,7 @@ _bt_spoolinit(Relation heap, Relation index, bool isunique, bool isdead)
|
||||
* We size the sort area as maintenance_work_mem rather than work_mem to
|
||||
* speed index creation. This should be OK since a single backend can't
|
||||
* run multiple index creations in parallel. Note that creation of a
|
||||
* unique index actually requires two BTSpool objects. We expect that the
|
||||
* unique index actually requires two BTSpool objects. We expect that the
|
||||
* second one (for dead tuples) won't get very full, so we give it only
|
||||
* work_mem.
|
||||
*/
|
||||
@@ -298,7 +298,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
|
||||
PageSetChecksumInplace(page, blkno);
|
||||
|
||||
/*
|
||||
* Now write the page. There's no need for smgr to schedule an fsync for
|
||||
* Now write the page. There's no need for smgr to schedule an fsync for
|
||||
* this write; we'll do it ourselves before ending the build.
|
||||
*/
|
||||
if (blkno == wstate->btws_pages_written)
|
||||
@@ -423,14 +423,14 @@ _bt_sortaddtup(Page page,
|
||||
* A leaf page being built looks like:
|
||||
*
|
||||
* +----------------+---------------------------------+
|
||||
* | PageHeaderData | linp0 linp1 linp2 ... |
|
||||
* | PageHeaderData | linp0 linp1 linp2 ... |
|
||||
* +-----------+----+---------------------------------+
|
||||
* | ... linpN | |
|
||||
* +-----------+--------------------------------------+
|
||||
* | ^ last |
|
||||
* | |
|
||||
* +-------------+------------------------------------+
|
||||
* | | itemN ... |
|
||||
* | | itemN ... |
|
||||
* +-------------+------------------+-----------------+
|
||||
* | ... item3 item2 item1 | "special space" |
|
||||
* +--------------------------------+-----------------+
|
||||
@@ -492,9 +492,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
|
||||
RelationGetRelationName(wstate->index))));
|
||||
|
||||
/*
|
||||
* Check to see if page is "full". It's definitely full if the item won't
|
||||
* Check to see if page is "full". It's definitely full if the item won't
|
||||
* fit. Otherwise, compare to the target freespace derived from the
|
||||
* fillfactor. However, we must put at least two items on each page, so
|
||||
* fillfactor. However, we must put at least two items on each page, so
|
||||
* disregard fillfactor if we don't have that many.
|
||||
*/
|
||||
if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
|
||||
@@ -567,7 +567,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out the old page. We never need to touch it again, so we can
|
||||
* Write out the old page. We never need to touch it again, so we can
|
||||
* free the opage workspace too.
|
||||
*/
|
||||
_bt_blwritepage(wstate, opage, oblkno);
|
||||
@@ -804,7 +804,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
|
||||
|
||||
/*
|
||||
* If the index is WAL-logged, we must fsync it down to disk before it's
|
||||
* safe to commit the transaction. (For a non-WAL-logged index we don't
|
||||
* safe to commit the transaction. (For a non-WAL-logged index we don't
|
||||
* care since the index will be uninteresting after a crash anyway.)
|
||||
*
|
||||
* It's obvious that we must do this when not WAL-logging the build. It's
|
||||
|
@@ -107,7 +107,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
|
||||
* comparison data ultimately used must match the key datatypes.
|
||||
*
|
||||
* The result cannot be used with _bt_compare(), unless comparison
|
||||
* data is first stored into the key entries. Currently this
|
||||
* data is first stored into the key entries. Currently this
|
||||
* routine is only called by nbtsort.c and tuplesort.c, which have
|
||||
* their own comparison routines.
|
||||
*/
|
||||
@@ -269,7 +269,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* First, deconstruct the array into elements. Anything allocated
|
||||
* First, deconstruct the array into elements. Anything allocated
|
||||
* here (including a possibly detoasted array value) is in the
|
||||
* workspace context.
|
||||
*/
|
||||
@@ -283,7 +283,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
|
||||
&elem_values, &elem_nulls, &num_elems);
|
||||
|
||||
/*
|
||||
* Compress out any null elements. We can ignore them since we assume
|
||||
* Compress out any null elements. We can ignore them since we assume
|
||||
* all btree operators are strict.
|
||||
*/
|
||||
num_nonnulls = 0;
|
||||
@@ -517,7 +517,7 @@ _bt_compare_array_elements(const void *a, const void *b, void *arg)
|
||||
* _bt_start_array_keys() -- Initialize array keys at start of a scan
|
||||
*
|
||||
* Set up the cur_elem counters and fill in the first sk_argument value for
|
||||
* each array scankey. We can't do this until we know the scan direction.
|
||||
* each array scankey. We can't do this until we know the scan direction.
|
||||
*/
|
||||
void
|
||||
_bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
|
||||
@@ -670,8 +670,8 @@ _bt_restore_array_keys(IndexScanDesc scan)
|
||||
* so that the index sorts in the desired direction.
|
||||
*
|
||||
* One key purpose of this routine is to discover which scan keys must be
|
||||
* satisfied to continue the scan. It also attempts to eliminate redundant
|
||||
* keys and detect contradictory keys. (If the index opfamily provides
|
||||
* satisfied to continue the scan. It also attempts to eliminate redundant
|
||||
* keys and detect contradictory keys. (If the index opfamily provides
|
||||
* incomplete sets of cross-type operators, we may fail to detect redundant
|
||||
* or contradictory keys, but we can survive that.)
|
||||
*
|
||||
@@ -702,7 +702,7 @@ _bt_restore_array_keys(IndexScanDesc scan)
|
||||
* that's the only one returned. (So, we return either a single = key,
|
||||
* or one or two boundary-condition keys for each attr.) However, if we
|
||||
* cannot compare two keys for lack of a suitable cross-type operator,
|
||||
* we cannot eliminate either. If there are two such keys of the same
|
||||
* we cannot eliminate either. If there are two such keys of the same
|
||||
* operator strategy, the second one is just pushed into the output array
|
||||
* without further processing here. We may also emit both >/>= or both
|
||||
* </<= keys if we can't compare them. The logic about required keys still
|
||||
@@ -737,7 +737,7 @@ _bt_restore_array_keys(IndexScanDesc scan)
|
||||
* Note: the reason we have to copy the preprocessed scan keys into private
|
||||
* storage is that we are modifying the array based on comparisons of the
|
||||
* key argument values, which could change on a rescan or after moving to
|
||||
* new elements of array keys. Therefore we can't overwrite the source data.
|
||||
* new elements of array keys. Therefore we can't overwrite the source data.
|
||||
*/
|
||||
void
|
||||
_bt_preprocess_keys(IndexScanDesc scan)
|
||||
@@ -919,7 +919,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
|
||||
/*
|
||||
* Emit the cleaned-up keys into the outkeys[] array, and then
|
||||
* mark them if they are required. They are required (possibly
|
||||
* mark them if they are required. They are required (possibly
|
||||
* only in one direction) if all attrs before this one had "=".
|
||||
*/
|
||||
for (j = BTMaxStrategyNumber; --j >= 0;)
|
||||
@@ -1017,7 +1017,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
* and amoplefttype/amoprighttype equal to the two argument datatypes.
|
||||
*
|
||||
* If the opfamily doesn't supply a complete set of cross-type operators we
|
||||
* may not be able to make the comparison. If we can make the comparison
|
||||
* may not be able to make the comparison. If we can make the comparison
|
||||
* we store the operator result in *result and return TRUE. We return FALSE
|
||||
* if the comparison could not be made.
|
||||
*
|
||||
@@ -1043,7 +1043,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
|
||||
StrategyNumber strat;
|
||||
|
||||
/*
|
||||
* First, deal with cases where one or both args are NULL. This should
|
||||
* First, deal with cases where one or both args are NULL. This should
|
||||
* only happen when the scankeys represent IS NULL/NOT NULL conditions.
|
||||
*/
|
||||
if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ISNULL)
|
||||
@@ -1183,7 +1183,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
|
||||
*
|
||||
* Lastly, for ordinary scankeys (not IS NULL/NOT NULL), we check for a
|
||||
* NULL comparison value. Since all btree operators are assumed strict,
|
||||
* a NULL means that the qual cannot be satisfied. We return TRUE if the
|
||||
* a NULL means that the qual cannot be satisfied. We return TRUE if the
|
||||
* comparison value isn't NULL, or FALSE if the scan should be abandoned.
|
||||
*
|
||||
* This function is applied to the *input* scankey structure; therefore
|
||||
@@ -1212,7 +1212,7 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
|
||||
* --- we can treat IS NULL as an equality operator for purposes of search
|
||||
* strategy.
|
||||
*
|
||||
* Likewise, "x IS NOT NULL" is supported. We treat that as either "less
|
||||
* Likewise, "x IS NOT NULL" is supported. We treat that as either "less
|
||||
* than NULL" in a NULLS LAST index, or "greater than NULL" in a NULLS
|
||||
* FIRST index.
|
||||
*
|
||||
@@ -1284,7 +1284,7 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
|
||||
* Mark a scankey as "required to continue the scan".
|
||||
*
|
||||
* Depending on the operator type, the key may be required for both scan
|
||||
* directions or just one. Also, if the key is a row comparison header,
|
||||
* directions or just one. Also, if the key is a row comparison header,
|
||||
* we have to mark the appropriate subsidiary ScanKeys as required. In
|
||||
* such cases, the first subsidiary key is required, but subsequent ones
|
||||
* are required only as long as they correspond to successive index columns
|
||||
@@ -1296,7 +1296,7 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
|
||||
* scribbling on a data structure belonging to the index AM's caller, not on
|
||||
* our private copy. This should be OK because the marking will not change
|
||||
* from scan to scan within a query, and so we'd just re-mark the same way
|
||||
* anyway on a rescan. Something to keep an eye on though.
|
||||
* anyway on a rescan. Something to keep an eye on though.
|
||||
*/
|
||||
static void
|
||||
_bt_mark_scankey_required(ScanKey skey)
|
||||
@@ -1482,7 +1482,7 @@ _bt_checkkeys(IndexScanDesc scan,
|
||||
/*
|
||||
* Since NULLs are sorted before non-NULLs, we know we have
|
||||
* reached the lower limit of the range of values for this
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* is one of the "must match" subset. We can stop regardless
|
||||
* of whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
@@ -1498,8 +1498,8 @@ _bt_checkkeys(IndexScanDesc scan,
|
||||
/*
|
||||
* Since NULLs are sorted after non-NULLs, we know we have
|
||||
* reached the upper limit of the range of values for this
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
* a backward scan, however, we must keep going, because we
|
||||
@@ -1593,7 +1593,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
/*
|
||||
* Since NULLs are sorted before non-NULLs, we know we have
|
||||
* reached the lower limit of the range of values for this
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* is one of the "must match" subset. We can stop regardless
|
||||
* of whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
@@ -1609,8 +1609,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
/*
|
||||
* Since NULLs are sorted after non-NULLs, we know we have
|
||||
* reached the upper limit of the range of values for this
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
* a backward scan, however, we must keep going, because we
|
||||
@@ -1631,7 +1631,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
{
|
||||
/*
|
||||
* Unlike the simple-scankey case, this isn't a disallowed case.
|
||||
* But it can never match. If all the earlier row comparison
|
||||
* But it can never match. If all the earlier row comparison
|
||||
* columns are required for the scan direction, we can stop the
|
||||
* scan, because there can't be another tuple that will succeed.
|
||||
*/
|
||||
@@ -1696,7 +1696,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
/*
|
||||
* Tuple fails this qual. If it's a required qual for the current
|
||||
* scan direction, then we can conclude no further tuples will pass,
|
||||
* either. Note we have to look at the deciding column, not
|
||||
* either. Note we have to look at the deciding column, not
|
||||
* necessarily the first or last column of the row condition.
|
||||
*/
|
||||
if ((subkey->sk_flags & SK_BT_REQFWD) &&
|
||||
@@ -1722,7 +1722,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
* is sufficient for setting LP_DEAD status (which is only a hint).
|
||||
*
|
||||
* We match items by heap TID before assuming they are the right ones to
|
||||
* delete. We cope with cases where items have moved right due to insertions.
|
||||
* delete. We cope with cases where items have moved right due to insertions.
|
||||
* If an item has moved off the current page due to a split, we'll fail to
|
||||
* find it and do nothing (this is not an error case --- we assume the item
|
||||
* will eventually get marked in a future indexscan). Note that because we
|
||||
@@ -1806,8 +1806,8 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
|
||||
/*
|
||||
* The following routines manage a shared-memory area in which we track
|
||||
* assignment of "vacuum cycle IDs" to currently-active btree vacuuming
|
||||
* operations. There is a single counter which increments each time we
|
||||
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
|
||||
* operations. There is a single counter which increments each time we
|
||||
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
|
||||
* be active concurrently, we have to track the cycle ID for each active
|
||||
* vacuum; this requires at most MaxBackends entries (usually far fewer).
|
||||
* We assume at most one vacuum can be active for a given index.
|
||||
|
@@ -40,9 +40,9 @@ _bt_restore_page(Page page, char *from, int len)
|
||||
int nitems;
|
||||
|
||||
/*
|
||||
* To get the items back in the original order, we add them to the page
|
||||
* in reverse. To figure out where one tuple ends and another begins,
|
||||
* we have to scan them in forward order first.
|
||||
* To get the items back in the original order, we add them to the page in
|
||||
* reverse. To figure out where one tuple ends and another begins, we
|
||||
* have to scan them in forward order first.
|
||||
*/
|
||||
i = 0;
|
||||
while (from < end)
|
||||
@@ -97,7 +97,7 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
|
||||
pageop->btpo_flags = BTP_META;
|
||||
|
||||
/*
|
||||
* Set pd_lower just past the end of the metadata. This is not essential
|
||||
* Set pd_lower just past the end of the metadata. This is not essential
|
||||
* but it makes the page look compressible to xlog.c.
|
||||
*/
|
||||
((PageHeader) metapg)->pd_lower =
|
||||
@@ -118,7 +118,7 @@ static void
|
||||
_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
|
||||
RelFileNode rnode, BlockNumber cblock)
|
||||
{
|
||||
Buffer buf;
|
||||
Buffer buf;
|
||||
|
||||
buf = XLogReadBuffer(rnode, cblock, false);
|
||||
if (BufferIsValid(buf))
|
||||
@@ -128,6 +128,7 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
|
||||
if (lsn > PageGetLSN(page))
|
||||
{
|
||||
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
|
||||
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
|
||||
|
||||
@@ -153,6 +154,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
|
||||
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
||||
datalen = record->xl_len - SizeOfBtreeInsert;
|
||||
|
||||
/*
|
||||
* if this insert finishes a split at lower level, extract the block
|
||||
* number of the (left) child.
|
||||
@@ -172,10 +174,10 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
}
|
||||
|
||||
/*
|
||||
* Insertion to an internal page finishes an incomplete split at the
|
||||
* child level. Clear the incomplete-split flag in the child. Note:
|
||||
* during normal operation, the child and parent pages are locked at the
|
||||
* same time, so that clearing the flag and inserting the downlink appear
|
||||
* Insertion to an internal page finishes an incomplete split at the child
|
||||
* level. Clear the incomplete-split flag in the child. Note: during
|
||||
* normal operation, the child and parent pages are locked at the same
|
||||
* time, so that clearing the flag and inserting the downlink appear
|
||||
* atomic to other backends. We don't bother with that during replay,
|
||||
* because readers don't care about the incomplete-split flag and there
|
||||
* cannot be updates happening.
|
||||
@@ -279,9 +281,10 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
datapos += left_hikeysz;
|
||||
datalen -= left_hikeysz;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this insertion finishes an incomplete split, get the block number
|
||||
* of the child.
|
||||
* If this insertion finishes an incomplete split, get the block number of
|
||||
* the child.
|
||||
*/
|
||||
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
|
||||
{
|
||||
@@ -439,7 +442,7 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
* the backup block containing right sibling is 2 or 3, depending
|
||||
* whether this was a leaf or internal page.
|
||||
*/
|
||||
int rnext_index = isleaf ? 2 : 3;
|
||||
int rnext_index = isleaf ? 2 : 3;
|
||||
|
||||
if (record->xl_info & XLR_BKP_BLOCK(rnext_index))
|
||||
(void) RestoreBackupBlock(lsn, record, rnext_index, false, false);
|
||||
@@ -620,7 +623,7 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
|
||||
/*
|
||||
* In what follows, we have to examine the previous state of the index
|
||||
* page, as well as the heap page(s) it points to. This is only valid if
|
||||
* page, as well as the heap page(s) it points to. This is only valid if
|
||||
* WAL replay has reached a consistent database state; which means that
|
||||
* the preceding check is not just an optimization, but is *necessary*. We
|
||||
* won't have let in any user sessions before we reach consistency.
|
||||
@@ -629,9 +632,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
elog(PANIC, "btree_xlog_delete_get_latestRemovedXid: cannot operate with inconsistent data");
|
||||
|
||||
/*
|
||||
* Get index page. If the DB is consistent, this should not fail, nor
|
||||
* Get index page. If the DB is consistent, this should not fail, nor
|
||||
* should any of the heap page fetches below. If one does, we return
|
||||
* InvalidTransactionId to cancel all HS transactions. That's probably
|
||||
* InvalidTransactionId to cancel all HS transactions. That's probably
|
||||
* overkill, but it's safe, and certainly better than panicking here.
|
||||
*/
|
||||
ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
|
||||
@@ -716,9 +719,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
/*
|
||||
* If all heap tuples were LP_DEAD then we will be returning
|
||||
* InvalidTransactionId here, which avoids conflicts. This matches
|
||||
* existing logic which assumes that LP_DEAD tuples must already be
|
||||
* older than the latestRemovedXid on the cleanup record that
|
||||
* set them as LP_DEAD, hence must already have generated a conflict.
|
||||
* existing logic which assumes that LP_DEAD tuples must already be older
|
||||
* than the latestRemovedXid on the cleanup record that set them as
|
||||
* LP_DEAD, hence must already have generated a conflict.
|
||||
*/
|
||||
return latestRemovedXid;
|
||||
}
|
||||
@@ -735,7 +738,7 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||
* If we have any conflict processing to do, it must happen before we
|
||||
* update the page.
|
||||
*
|
||||
* Btree delete records can conflict with standby queries. You might
|
||||
* Btree delete records can conflict with standby queries. You might
|
||||
* think that vacuum records would conflict as well, but we've handled
|
||||
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
|
||||
* cleaned by the vacuum of the heap and so we can resolve any conflicts
|
||||
@@ -828,7 +831,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
ItemId itemid;
|
||||
IndexTuple itup;
|
||||
OffsetNumber nextoffset;
|
||||
BlockNumber rightsib;
|
||||
BlockNumber rightsib;
|
||||
|
||||
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
|
||||
|
||||
|
Reference in New Issue
Block a user