mirror of
https://github.com/postgres/postgres.git
synced 2025-12-10 14:22:35 +03:00
Relocate _bt_readpage and related functions.
Quite a bit of code within nbtutils.c is only called by _bt_readpage. Move _bt_readpage and all of the nbtutils.c functions it depends on into a new .c file, nbtreadpage.c. Also reorder some of the functions within the new file for clarity. This commit has no functional impact. It is strictly mechanical. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Victor Yegorov <vyegorov@gmail.com> Discussion: https://postgr.es/m/CAH2-WzmwMwcwKFgaf+mYPwiz3iL4AqpXnwtW_O0vqpWPXRom9Q@mail.gmail.com
This commit is contained in:
@@ -18,6 +18,7 @@ OBJS = \
|
||||
nbtinsert.o \
|
||||
nbtpage.o \
|
||||
nbtpreprocesskeys.o \
|
||||
nbtreadpage.o \
|
||||
nbtree.o \
|
||||
nbtsearch.o \
|
||||
nbtsort.o \
|
||||
|
||||
@@ -6,6 +6,7 @@ backend_sources += files(
|
||||
'nbtinsert.c',
|
||||
'nbtpage.c',
|
||||
'nbtpreprocesskeys.c',
|
||||
'nbtreadpage.c',
|
||||
'nbtree.c',
|
||||
'nbtsearch.c',
|
||||
'nbtsort.c',
|
||||
|
||||
3726
src/backend/access/nbtree/nbtreadpage.c
Normal file
3726
src/backend/access/nbtree/nbtreadpage.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -93,6 +93,7 @@ typedef struct BTParallelScanDescData
|
||||
typedef struct BTParallelScanDescData *BTParallelScanDesc;
|
||||
|
||||
|
||||
static bool _bt_start_prim_scan(IndexScanDesc scan);
|
||||
static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan,
|
||||
BTScanOpaque so);
|
||||
static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan,
|
||||
@@ -276,7 +277,7 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
if (res)
|
||||
break;
|
||||
/* ... otherwise see if we need another primitive index scan */
|
||||
} while (so->numArrayKeys && _bt_start_prim_scan(scan, dir));
|
||||
} while (so->numArrayKeys && _bt_start_prim_scan(scan));
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -324,7 +325,7 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
||||
}
|
||||
}
|
||||
/* Now see if we need another primitive index scan */
|
||||
} while (so->numArrayKeys && _bt_start_prim_scan(scan, ForwardScanDirection));
|
||||
} while (so->numArrayKeys && _bt_start_prim_scan(scan));
|
||||
|
||||
return ntids;
|
||||
}
|
||||
@@ -654,6 +655,75 @@ btestimateparallelscan(Relation rel, int nkeys, int norderbys)
|
||||
return estnbtreeshared;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_start_prim_scan() -- start scheduled primitive index scan?
|
||||
*
|
||||
* Returns true if _bt_checkkeys scheduled another primitive index scan, just
|
||||
* as the last one ended. Otherwise returns false, indicating that the array
|
||||
* keys are now fully exhausted.
|
||||
*
|
||||
* Only call here during scans with one or more equality type array scan keys,
|
||||
* after _bt_first or _bt_next return false.
|
||||
*/
|
||||
static bool
|
||||
_bt_start_prim_scan(IndexScanDesc scan)
|
||||
{
|
||||
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
||||
|
||||
Assert(so->numArrayKeys);
|
||||
|
||||
so->scanBehind = so->oppositeDirCheck = false; /* reset */
|
||||
|
||||
/*
|
||||
* Array keys are advanced within _bt_checkkeys when the scan reaches the
|
||||
* leaf level (more precisely, they're advanced when the scan reaches the
|
||||
* end of each distinct set of array elements). This process avoids
|
||||
* repeat access to leaf pages (across multiple primitive index scans) by
|
||||
* advancing the scan's array keys when it allows the primitive index scan
|
||||
* to find nearby matching tuples (or when it eliminates ranges of array
|
||||
* key space that can't possibly be satisfied by any index tuple).
|
||||
*
|
||||
* _bt_checkkeys sets a simple flag variable to schedule another primitive
|
||||
* index scan. The flag tells us what to do.
|
||||
*
|
||||
* We cannot rely on _bt_first always reaching _bt_checkkeys. There are
|
||||
* various cases where that won't happen. For example, if the index is
|
||||
* completely empty, then _bt_first won't call _bt_readpage/_bt_checkkeys.
|
||||
* We also don't expect a call to _bt_checkkeys during searches for a
|
||||
* non-existent value that happens to be lower/higher than any existing
|
||||
* value in the index.
|
||||
*
|
||||
* We don't require special handling for these cases -- we don't need to
|
||||
* be explicitly instructed to _not_ perform another primitive index scan.
|
||||
* It's up to code under the control of _bt_first to always set the flag
|
||||
* when another primitive index scan will be required.
|
||||
*
|
||||
* This works correctly, even with the tricky cases listed above, which
|
||||
* all involve access to leaf pages "near the boundaries of the key space"
|
||||
* (whether it's from a leftmost/rightmost page, or an imaginary empty
|
||||
* leaf root page). If _bt_checkkeys cannot be reached by a primitive
|
||||
* index scan for one set of array keys, then it also won't be reached for
|
||||
* any later set ("later" in terms of the direction that we scan the index
|
||||
* and advance the arrays). The array keys won't have advanced in these
|
||||
* cases, but that's the correct behavior (even _bt_advance_array_keys
|
||||
* won't always advance the arrays at the point they become "exhausted").
|
||||
*/
|
||||
if (so->needPrimScan)
|
||||
{
|
||||
/*
|
||||
* Flag was set -- must call _bt_first again, which will reset the
|
||||
* scan's needPrimScan flag
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The top-level index scan ran out of tuples in this scan direction */
|
||||
if (scan->parallel_scan != NULL)
|
||||
_bt_parallel_done(scan);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_parallel_serialize_arrays() -- Serialize parallel array state.
|
||||
*
|
||||
|
||||
@@ -32,16 +32,6 @@ static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
|
||||
static OffsetNumber _bt_binsrch(Relation rel, BTScanInsert key, Buffer buf);
|
||||
static int _bt_binsrch_posting(BTScanInsert key, Page page,
|
||||
OffsetNumber offnum);
|
||||
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
|
||||
OffsetNumber offnum, bool firstpage);
|
||||
static void _bt_saveitem(BTScanOpaque so, int itemIndex,
|
||||
OffsetNumber offnum, IndexTuple itup);
|
||||
static int _bt_setuppostingitems(BTScanOpaque so, int itemIndex,
|
||||
OffsetNumber offnum, const ItemPointerData *heapTid,
|
||||
IndexTuple itup);
|
||||
static inline void _bt_savepostingitem(BTScanOpaque so, int itemIndex,
|
||||
OffsetNumber offnum,
|
||||
ItemPointer heapTid, int tupleOffset);
|
||||
static inline void _bt_returnitem(IndexScanDesc scan, BTScanOpaque so);
|
||||
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
|
||||
static bool _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum,
|
||||
@@ -1623,517 +1613,6 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_readpage() -- Load data from current index page into so->currPos
|
||||
*
|
||||
* Caller must have pinned and read-locked so->currPos.buf; the buffer's state
|
||||
* is not changed here. Also, currPos.moreLeft and moreRight must be valid;
|
||||
* they are updated as appropriate. All other fields of so->currPos are
|
||||
* initialized from scratch here.
|
||||
*
|
||||
* We scan the current page starting at offnum and moving in the indicated
|
||||
* direction. All items matching the scan keys are loaded into currPos.items.
|
||||
* moreLeft or moreRight (as appropriate) is cleared if _bt_checkkeys reports
|
||||
* that there can be no more matching tuples in the current scan direction
|
||||
* (could just be for the current primitive index scan when scan has arrays).
|
||||
*
|
||||
* In the case of a parallel scan, caller must have called _bt_parallel_seize
|
||||
* prior to calling this function; this function will invoke
|
||||
* _bt_parallel_release before returning.
|
||||
*
|
||||
* Returns true if any matching items found on the page, false if none.
|
||||
*/
|
||||
static bool
|
||||
_bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
|
||||
bool firstpage)
|
||||
{
|
||||
Relation rel = scan->indexRelation;
|
||||
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
OffsetNumber minoff;
|
||||
OffsetNumber maxoff;
|
||||
BTReadPageState pstate;
|
||||
bool arrayKeys;
|
||||
int itemIndex,
|
||||
indnatts;
|
||||
|
||||
/* save the page/buffer block number, along with its sibling links */
|
||||
page = BufferGetPage(so->currPos.buf);
|
||||
opaque = BTPageGetOpaque(page);
|
||||
so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
|
||||
so->currPos.prevPage = opaque->btpo_prev;
|
||||
so->currPos.nextPage = opaque->btpo_next;
|
||||
/* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
|
||||
so->currPos.dir = dir;
|
||||
so->currPos.nextTupleOffset = 0;
|
||||
|
||||
/* either moreRight or moreLeft should be set now (may be unset later) */
|
||||
Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
|
||||
so->currPos.moreLeft);
|
||||
Assert(!P_IGNORE(opaque));
|
||||
Assert(BTScanPosIsPinned(so->currPos));
|
||||
Assert(!so->needPrimScan);
|
||||
|
||||
if (scan->parallel_scan)
|
||||
{
|
||||
/* allow next/prev page to be read by other worker without delay */
|
||||
if (ScanDirectionIsForward(dir))
|
||||
_bt_parallel_release(scan, so->currPos.nextPage,
|
||||
so->currPos.currPage);
|
||||
else
|
||||
_bt_parallel_release(scan, so->currPos.prevPage,
|
||||
so->currPos.currPage);
|
||||
}
|
||||
|
||||
PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
|
||||
|
||||
/* initialize local variables */
|
||||
indnatts = IndexRelationGetNumberOfAttributes(rel);
|
||||
arrayKeys = so->numArrayKeys != 0;
|
||||
minoff = P_FIRSTDATAKEY(opaque);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
|
||||
/* initialize page-level state that we'll pass to _bt_checkkeys */
|
||||
pstate.minoff = minoff;
|
||||
pstate.maxoff = maxoff;
|
||||
pstate.finaltup = NULL;
|
||||
pstate.page = page;
|
||||
pstate.firstpage = firstpage;
|
||||
pstate.forcenonrequired = false;
|
||||
pstate.startikey = 0;
|
||||
pstate.offnum = InvalidOffsetNumber;
|
||||
pstate.skip = InvalidOffsetNumber;
|
||||
pstate.continuescan = true; /* default assumption */
|
||||
pstate.rechecks = 0;
|
||||
pstate.targetdistance = 0;
|
||||
pstate.nskipadvances = 0;
|
||||
|
||||
if (ScanDirectionIsForward(dir))
|
||||
{
|
||||
/* SK_SEARCHARRAY forward scans must provide high key up front */
|
||||
if (arrayKeys)
|
||||
{
|
||||
if (!P_RIGHTMOST(opaque))
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, P_HIKEY);
|
||||
|
||||
pstate.finaltup = (IndexTuple) PageGetItem(page, iid);
|
||||
|
||||
if (so->scanBehind &&
|
||||
!_bt_scanbehind_checkkeys(scan, dir, pstate.finaltup))
|
||||
{
|
||||
/* Schedule another primitive index scan after all */
|
||||
so->currPos.moreRight = false;
|
||||
so->needPrimScan = true;
|
||||
if (scan->parallel_scan)
|
||||
_bt_parallel_primscan_schedule(scan,
|
||||
so->currPos.currPage);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
so->scanBehind = so->oppositeDirCheck = false; /* reset */
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider pstate.startikey optimization once the ongoing primitive
|
||||
* index scan has already read at least one page
|
||||
*/
|
||||
if (!pstate.firstpage && minoff < maxoff)
|
||||
_bt_set_startikey(scan, &pstate);
|
||||
|
||||
/* load items[] in ascending order */
|
||||
itemIndex = 0;
|
||||
|
||||
offnum = Max(offnum, minoff);
|
||||
|
||||
while (offnum <= maxoff)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, offnum);
|
||||
IndexTuple itup;
|
||||
bool passes_quals;
|
||||
|
||||
/*
|
||||
* If the scan specifies not to return killed tuples, then we
|
||||
* treat a killed tuple as not passing the qual
|
||||
*/
|
||||
if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
|
||||
{
|
||||
offnum = OffsetNumberNext(offnum);
|
||||
continue;
|
||||
}
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, iid);
|
||||
Assert(!BTreeTupleIsPivot(itup));
|
||||
|
||||
pstate.offnum = offnum;
|
||||
passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys,
|
||||
itup, indnatts);
|
||||
|
||||
/*
|
||||
* Check if we need to skip ahead to a later tuple (only possible
|
||||
* when the scan uses array keys)
|
||||
*/
|
||||
if (arrayKeys && OffsetNumberIsValid(pstate.skip))
|
||||
{
|
||||
Assert(!passes_quals && pstate.continuescan);
|
||||
Assert(offnum < pstate.skip);
|
||||
Assert(!pstate.forcenonrequired);
|
||||
|
||||
offnum = pstate.skip;
|
||||
pstate.skip = InvalidOffsetNumber;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (passes_quals)
|
||||
{
|
||||
/* tuple passes all scan key conditions */
|
||||
if (!BTreeTupleIsPosting(itup))
|
||||
{
|
||||
/* Remember it */
|
||||
_bt_saveitem(so, itemIndex, offnum, itup);
|
||||
itemIndex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
int tupleOffset;
|
||||
|
||||
/*
|
||||
* Set up state to return posting list, and remember first
|
||||
* TID
|
||||
*/
|
||||
tupleOffset =
|
||||
_bt_setuppostingitems(so, itemIndex, offnum,
|
||||
BTreeTupleGetPostingN(itup, 0),
|
||||
itup);
|
||||
itemIndex++;
|
||||
/* Remember additional TIDs */
|
||||
for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
|
||||
{
|
||||
_bt_savepostingitem(so, itemIndex, offnum,
|
||||
BTreeTupleGetPostingN(itup, i),
|
||||
tupleOffset);
|
||||
itemIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* When !continuescan, there can't be any more matches, so stop */
|
||||
if (!pstate.continuescan)
|
||||
break;
|
||||
|
||||
offnum = OffsetNumberNext(offnum);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't need to visit page to the right when the high key
|
||||
* indicates that no more matches will be found there.
|
||||
*
|
||||
* Checking the high key like this works out more often than you might
|
||||
* think. Leaf page splits pick a split point between the two most
|
||||
* dissimilar tuples (this is weighed against the need to evenly share
|
||||
* free space). Leaf pages with high key attribute values that can
|
||||
* only appear on non-pivot tuples on the right sibling page are
|
||||
* common.
|
||||
*/
|
||||
if (pstate.continuescan && !so->scanBehind && !P_RIGHTMOST(opaque))
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, P_HIKEY);
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, iid);
|
||||
int truncatt;
|
||||
|
||||
/* Reset arrays, per _bt_set_startikey contract */
|
||||
if (pstate.forcenonrequired)
|
||||
_bt_start_array_keys(scan, dir);
|
||||
pstate.forcenonrequired = false;
|
||||
pstate.startikey = 0; /* _bt_set_startikey ignores P_HIKEY */
|
||||
|
||||
truncatt = BTreeTupleGetNAtts(itup, rel);
|
||||
_bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt);
|
||||
}
|
||||
|
||||
if (!pstate.continuescan)
|
||||
so->currPos.moreRight = false;
|
||||
|
||||
Assert(itemIndex <= MaxTIDsPerBTreePage);
|
||||
so->currPos.firstItem = 0;
|
||||
so->currPos.lastItem = itemIndex - 1;
|
||||
so->currPos.itemIndex = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* SK_SEARCHARRAY backward scans must provide final tuple up front */
|
||||
if (arrayKeys)
|
||||
{
|
||||
if (minoff <= maxoff && !P_LEFTMOST(opaque))
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, minoff);
|
||||
|
||||
pstate.finaltup = (IndexTuple) PageGetItem(page, iid);
|
||||
|
||||
if (so->scanBehind &&
|
||||
!_bt_scanbehind_checkkeys(scan, dir, pstate.finaltup))
|
||||
{
|
||||
/* Schedule another primitive index scan after all */
|
||||
so->currPos.moreLeft = false;
|
||||
so->needPrimScan = true;
|
||||
if (scan->parallel_scan)
|
||||
_bt_parallel_primscan_schedule(scan,
|
||||
so->currPos.currPage);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
so->scanBehind = so->oppositeDirCheck = false; /* reset */
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider pstate.startikey optimization once the ongoing primitive
|
||||
* index scan has already read at least one page
|
||||
*/
|
||||
if (!pstate.firstpage && minoff < maxoff)
|
||||
_bt_set_startikey(scan, &pstate);
|
||||
|
||||
/* load items[] in descending order */
|
||||
itemIndex = MaxTIDsPerBTreePage;
|
||||
|
||||
offnum = Min(offnum, maxoff);
|
||||
|
||||
while (offnum >= minoff)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, offnum);
|
||||
IndexTuple itup;
|
||||
bool tuple_alive;
|
||||
bool passes_quals;
|
||||
|
||||
/*
|
||||
* If the scan specifies not to return killed tuples, then we
|
||||
* treat a killed tuple as not passing the qual. Most of the
|
||||
* time, it's a win to not bother examining the tuple's index
|
||||
* keys, but just skip to the next tuple (previous, actually,
|
||||
* since we're scanning backwards). However, if this is the first
|
||||
* tuple on the page, we do check the index keys, to prevent
|
||||
* uselessly advancing to the page to the left. This is similar
|
||||
* to the high key optimization used by forward scans.
|
||||
*/
|
||||
if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
|
||||
{
|
||||
if (offnum > minoff)
|
||||
{
|
||||
offnum = OffsetNumberPrev(offnum);
|
||||
continue;
|
||||
}
|
||||
|
||||
tuple_alive = false;
|
||||
}
|
||||
else
|
||||
tuple_alive = true;
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, iid);
|
||||
Assert(!BTreeTupleIsPivot(itup));
|
||||
|
||||
pstate.offnum = offnum;
|
||||
if (arrayKeys && offnum == minoff && pstate.forcenonrequired)
|
||||
{
|
||||
/* Reset arrays, per _bt_set_startikey contract */
|
||||
pstate.forcenonrequired = false;
|
||||
pstate.startikey = 0;
|
||||
_bt_start_array_keys(scan, dir);
|
||||
}
|
||||
passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys,
|
||||
itup, indnatts);
|
||||
|
||||
if (arrayKeys && so->scanBehind)
|
||||
{
|
||||
/*
|
||||
* Done scanning this page, but not done with the current
|
||||
* primscan.
|
||||
*
|
||||
* Note: Forward scans don't check this explicitly, since they
|
||||
* prefer to reuse pstate.skip for this instead.
|
||||
*/
|
||||
Assert(!passes_quals && pstate.continuescan);
|
||||
Assert(!pstate.forcenonrequired);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we need to skip ahead to a later tuple (only possible
|
||||
* when the scan uses array keys)
|
||||
*/
|
||||
if (arrayKeys && OffsetNumberIsValid(pstate.skip))
|
||||
{
|
||||
Assert(!passes_quals && pstate.continuescan);
|
||||
Assert(offnum > pstate.skip);
|
||||
Assert(!pstate.forcenonrequired);
|
||||
|
||||
offnum = pstate.skip;
|
||||
pstate.skip = InvalidOffsetNumber;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (passes_quals && tuple_alive)
|
||||
{
|
||||
/* tuple passes all scan key conditions */
|
||||
if (!BTreeTupleIsPosting(itup))
|
||||
{
|
||||
/* Remember it */
|
||||
itemIndex--;
|
||||
_bt_saveitem(so, itemIndex, offnum, itup);
|
||||
}
|
||||
else
|
||||
{
|
||||
int tupleOffset;
|
||||
|
||||
/*
|
||||
* Set up state to return posting list, and remember first
|
||||
* TID.
|
||||
*
|
||||
* Note that we deliberately save/return items from
|
||||
* posting lists in ascending heap TID order for backwards
|
||||
* scans. This allows _bt_killitems() to make a
|
||||
* consistent assumption about the order of items
|
||||
* associated with the same posting list tuple.
|
||||
*/
|
||||
itemIndex--;
|
||||
tupleOffset =
|
||||
_bt_setuppostingitems(so, itemIndex, offnum,
|
||||
BTreeTupleGetPostingN(itup, 0),
|
||||
itup);
|
||||
/* Remember additional TIDs */
|
||||
for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
|
||||
{
|
||||
itemIndex--;
|
||||
_bt_savepostingitem(so, itemIndex, offnum,
|
||||
BTreeTupleGetPostingN(itup, i),
|
||||
tupleOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* When !continuescan, there can't be any more matches, so stop */
|
||||
if (!pstate.continuescan)
|
||||
break;
|
||||
|
||||
offnum = OffsetNumberPrev(offnum);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't need to visit page to the left when no more matches will
|
||||
* be found there
|
||||
*/
|
||||
if (!pstate.continuescan)
|
||||
so->currPos.moreLeft = false;
|
||||
|
||||
Assert(itemIndex >= 0);
|
||||
so->currPos.firstItem = itemIndex;
|
||||
so->currPos.lastItem = MaxTIDsPerBTreePage - 1;
|
||||
so->currPos.itemIndex = MaxTIDsPerBTreePage - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If _bt_set_startikey told us to temporarily treat the scan's keys as
|
||||
* nonrequired (possible only during scans with array keys), there must be
|
||||
* no lasting consequences for the scan's array keys. The scan's arrays
|
||||
* should now have exactly the same elements as they would have had if the
|
||||
* nonrequired behavior had never been used. (In general, a scan's arrays
|
||||
* are expected to track its progress through the index's key space.)
|
||||
*
|
||||
* We are required (by _bt_set_startikey) to call _bt_checkkeys against
|
||||
* pstate.finaltup with pstate.forcenonrequired=false to allow the scan's
|
||||
* arrays to recover. Assert that that step hasn't been missed.
|
||||
*/
|
||||
Assert(!pstate.forcenonrequired);
|
||||
|
||||
return (so->currPos.firstItem <= so->currPos.lastItem);
|
||||
}
|
||||
|
||||
/* Save an index item into so->currPos.items[itemIndex] */
|
||||
static void
|
||||
_bt_saveitem(BTScanOpaque so, int itemIndex,
|
||||
OffsetNumber offnum, IndexTuple itup)
|
||||
{
|
||||
BTScanPosItem *currItem = &so->currPos.items[itemIndex];
|
||||
|
||||
Assert(!BTreeTupleIsPivot(itup) && !BTreeTupleIsPosting(itup));
|
||||
|
||||
currItem->heapTid = itup->t_tid;
|
||||
currItem->indexOffset = offnum;
|
||||
if (so->currTuples)
|
||||
{
|
||||
Size itupsz = IndexTupleSize(itup);
|
||||
|
||||
currItem->tupleOffset = so->currPos.nextTupleOffset;
|
||||
memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz);
|
||||
so->currPos.nextTupleOffset += MAXALIGN(itupsz);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup state to save TIDs/items from a single posting list tuple.
|
||||
*
|
||||
* Saves an index item into so->currPos.items[itemIndex] for TID that is
|
||||
* returned to scan first. Second or subsequent TIDs for posting list should
|
||||
* be saved by calling _bt_savepostingitem().
|
||||
*
|
||||
* Returns an offset into tuple storage space that main tuple is stored at if
|
||||
* needed.
|
||||
*/
|
||||
static int
|
||||
_bt_setuppostingitems(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
|
||||
const ItemPointerData *heapTid, IndexTuple itup)
|
||||
{
|
||||
BTScanPosItem *currItem = &so->currPos.items[itemIndex];
|
||||
|
||||
Assert(BTreeTupleIsPosting(itup));
|
||||
|
||||
currItem->heapTid = *heapTid;
|
||||
currItem->indexOffset = offnum;
|
||||
if (so->currTuples)
|
||||
{
|
||||
/* Save base IndexTuple (truncate posting list) */
|
||||
IndexTuple base;
|
||||
Size itupsz = BTreeTupleGetPostingOffset(itup);
|
||||
|
||||
itupsz = MAXALIGN(itupsz);
|
||||
currItem->tupleOffset = so->currPos.nextTupleOffset;
|
||||
base = (IndexTuple) (so->currTuples + so->currPos.nextTupleOffset);
|
||||
memcpy(base, itup, itupsz);
|
||||
/* Defensively reduce work area index tuple header size */
|
||||
base->t_info &= ~INDEX_SIZE_MASK;
|
||||
base->t_info |= itupsz;
|
||||
so->currPos.nextTupleOffset += itupsz;
|
||||
|
||||
return currItem->tupleOffset;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save an index item into so->currPos.items[itemIndex] for current posting
|
||||
* tuple.
|
||||
*
|
||||
* Assumes that _bt_setuppostingitems() has already been called for current
|
||||
* posting list tuple. Caller passes its return value as tupleOffset.
|
||||
*/
|
||||
static inline void
|
||||
_bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
|
||||
ItemPointer heapTid, int tupleOffset)
|
||||
{
|
||||
BTScanPosItem *currItem = &so->currPos.items[itemIndex];
|
||||
|
||||
currItem->heapTid = *heapTid;
|
||||
currItem->indexOffset = offnum;
|
||||
|
||||
/*
|
||||
* Have index-only scans return the same base IndexTuple for every TID
|
||||
* that originates from the same posting list
|
||||
*/
|
||||
if (so->currTuples)
|
||||
currItem->tupleOffset = tupleOffset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the index item from so->currPos.items[so->currPos.itemIndex] to the
|
||||
* index scan by setting the relevant fields in caller's index scan descriptor
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1096,37 +1096,6 @@ typedef struct BTScanOpaqueData
|
||||
|
||||
typedef BTScanOpaqueData *BTScanOpaque;
|
||||
|
||||
/*
|
||||
* _bt_readpage state used across _bt_checkkeys calls for a page
|
||||
*/
|
||||
typedef struct BTReadPageState
|
||||
{
|
||||
/* Input parameters, set by _bt_readpage for _bt_checkkeys */
|
||||
OffsetNumber minoff; /* Lowest non-pivot tuple's offset */
|
||||
OffsetNumber maxoff; /* Highest non-pivot tuple's offset */
|
||||
IndexTuple finaltup; /* Needed by scans with array keys */
|
||||
Page page; /* Page being read */
|
||||
bool firstpage; /* page is first for primitive scan? */
|
||||
bool forcenonrequired; /* treat all keys as nonrequired? */
|
||||
int startikey; /* start comparisons from this scan key */
|
||||
|
||||
/* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */
|
||||
OffsetNumber offnum; /* current tuple's page offset number */
|
||||
|
||||
/* Output parameters, set by _bt_checkkeys for _bt_readpage */
|
||||
OffsetNumber skip; /* Array keys "look ahead" skip offnum */
|
||||
bool continuescan; /* Terminate ongoing (primitive) index scan? */
|
||||
|
||||
/*
|
||||
* Private _bt_checkkeys state used to manage "look ahead" optimization
|
||||
* and primscan scheduling (only used during scans with array keys)
|
||||
*/
|
||||
int16 rechecks;
|
||||
int16 targetdistance;
|
||||
int16 nskipadvances;
|
||||
|
||||
} BTReadPageState;
|
||||
|
||||
/*
|
||||
* We use some private sk_flags bits in preprocessed scan keys. We're allowed
|
||||
* to use bits 16-31 (see skey.h). The uppermost bits are copied from the
|
||||
@@ -1299,6 +1268,18 @@ extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate);
|
||||
*/
|
||||
extern void _bt_preprocess_keys(IndexScanDesc scan);
|
||||
|
||||
/*
|
||||
* prototypes for functions in nbtreadpage.c
|
||||
*/
|
||||
extern bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
|
||||
OffsetNumber offnum, bool firstpage);
|
||||
extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
|
||||
extern int _bt_binsrch_array_skey(FmgrInfo *orderproc,
|
||||
bool cur_elem_trig, ScanDirection dir,
|
||||
Datum tupdatum, bool tupnull,
|
||||
BTArrayKeyInfo *array, ScanKey cur,
|
||||
int32 *set_elem_result);
|
||||
|
||||
/*
|
||||
* prototypes for functions in nbtsearch.c
|
||||
*/
|
||||
@@ -1315,18 +1296,6 @@ extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost);
|
||||
*/
|
||||
extern BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup);
|
||||
extern void _bt_freestack(BTStack stack);
|
||||
extern bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir);
|
||||
extern int _bt_binsrch_array_skey(FmgrInfo *orderproc,
|
||||
bool cur_elem_trig, ScanDirection dir,
|
||||
Datum tupdatum, bool tupnull,
|
||||
BTArrayKeyInfo *array, ScanKey cur,
|
||||
int32 *set_elem_result);
|
||||
extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
|
||||
extern bool _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
|
||||
IndexTuple tuple, int tupnatts);
|
||||
extern bool _bt_scanbehind_checkkeys(IndexScanDesc scan, ScanDirection dir,
|
||||
IndexTuple finaltup);
|
||||
extern void _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate);
|
||||
extern void _bt_killitems(IndexScanDesc scan);
|
||||
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
|
||||
extern BTCycleId _bt_start_vacuum(Relation rel);
|
||||
|
||||
Reference in New Issue
Block a user