mirror of
https://github.com/postgres/postgres.git
synced 2025-07-07 00:36:50 +03:00
Run pgindent on 9.2 source tree in preparation for first 9.3
commit-fest.
This commit is contained in:
@ -27,7 +27,7 @@
|
||||
/* non-export function prototypes */
|
||||
static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
|
||||
static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
|
||||
GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
|
||||
GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
|
||||
static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
|
||||
GISTSTATE *giststate,
|
||||
IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
|
||||
@ -781,8 +781,8 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
|
||||
{
|
||||
/*
|
||||
* Page was split while we looked elsewhere. We didn't see the
|
||||
* downlink to the right page when we scanned the parent, so
|
||||
* add it to the queue now.
|
||||
* downlink to the right page when we scanned the parent, so add
|
||||
* it to the queue now.
|
||||
*
|
||||
* Put the right page ahead of the queue, so that we visit it
|
||||
* next. That's important, because if this is the lowest internal
|
||||
@ -829,7 +829,7 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
|
||||
|
||||
elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
|
||||
RelationGetRelationName(r), child);
|
||||
return NULL; /* keep compiler quiet */
|
||||
return NULL; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1046,7 +1046,7 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
|
||||
*/
|
||||
static bool
|
||||
gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
|
||||
GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
|
||||
GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
|
||||
{
|
||||
return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
|
||||
InvalidBuffer, InvalidBuffer, false, false);
|
||||
@ -1308,7 +1308,7 @@ initGISTstate(Relation index)
|
||||
giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
|
||||
|
||||
giststate->scanCxt = scanCxt;
|
||||
giststate->tempCxt = scanCxt; /* caller must change this if needed */
|
||||
giststate->tempCxt = scanCxt; /* caller must change this if needed */
|
||||
giststate->tupdesc = index->rd_att;
|
||||
|
||||
for (i = 0; i < index->rd_att->natts; i++)
|
||||
|
@ -48,7 +48,7 @@ typedef enum
|
||||
* before switching to the buffering build
|
||||
* mode */
|
||||
GIST_BUFFERING_ACTIVE /* in buffering build mode */
|
||||
} GistBufferingMode;
|
||||
} GistBufferingMode;
|
||||
|
||||
/* Working state for gistbuild and its callback */
|
||||
typedef struct
|
||||
@ -263,7 +263,7 @@ gistValidateBufferingOption(char *value)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid value for \"buffering\" option"),
|
||||
errdetail("Valid values are \"on\", \"off\", and \"auto\".")));
|
||||
errdetail("Valid values are \"on\", \"off\", and \"auto\".")));
|
||||
}
|
||||
}
|
||||
|
||||
@ -567,7 +567,7 @@ gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
|
||||
BlockNumber childblkno;
|
||||
Buffer buffer;
|
||||
bool result = false;
|
||||
BlockNumber blkno;
|
||||
BlockNumber blkno;
|
||||
int level;
|
||||
OffsetNumber downlinkoffnum = InvalidOffsetNumber;
|
||||
BlockNumber parentblkno = InvalidBlockNumber;
|
||||
@ -623,7 +623,7 @@ gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
|
||||
{
|
||||
gistbufferinginserttuples(buildstate, buffer, level,
|
||||
&newtup, 1, childoffnum,
|
||||
InvalidBlockNumber, InvalidOffsetNumber);
|
||||
InvalidBlockNumber, InvalidOffsetNumber);
|
||||
/* gistbufferinginserttuples() released the buffer */
|
||||
}
|
||||
else
|
||||
@ -716,26 +716,26 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
|
||||
|
||||
/*
|
||||
* All the downlinks on the old root page are now on one of the child
|
||||
* pages. Visit all the new child pages to memorize the parents of
|
||||
* the grandchildren.
|
||||
* pages. Visit all the new child pages to memorize the parents of the
|
||||
* grandchildren.
|
||||
*/
|
||||
if (gfbb->rootlevel > 1)
|
||||
{
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
for (off = FirstOffsetNumber; off <= maxoff; off++)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
|
||||
Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno);
|
||||
Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno);
|
||||
|
||||
LockBuffer(childbuf, GIST_SHARE);
|
||||
gistMemorizeAllDownlinks(buildstate, childbuf);
|
||||
UnlockReleaseBuffer(childbuf);
|
||||
|
||||
/*
|
||||
* Also remember that the parent of the new child page is
|
||||
* the root block.
|
||||
* Also remember that the parent of the new child page is the
|
||||
* root block.
|
||||
*/
|
||||
gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
|
||||
}
|
||||
@ -789,8 +789,8 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
|
||||
* Remember the parent of each new child page in our parent map.
|
||||
* This assumes that the downlinks fit on the parent page. If the
|
||||
* parent page is split, too, when we recurse up to insert the
|
||||
* downlinks, the recursive gistbufferinginserttuples() call
|
||||
* will update the map again.
|
||||
* downlinks, the recursive gistbufferinginserttuples() call will
|
||||
* update the map again.
|
||||
*/
|
||||
if (level > 0)
|
||||
gistMemorizeParent(buildstate,
|
||||
@ -879,8 +879,9 @@ gistBufferingFindCorrectParent(GISTBuildState *buildstate,
|
||||
if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
|
||||
*downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, *downlinkoffnum);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
ItemId iid = PageGetItemId(page, *downlinkoffnum);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
|
||||
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
|
||||
{
|
||||
/* Still there */
|
||||
@ -889,16 +890,17 @@ gistBufferingFindCorrectParent(GISTBuildState *buildstate,
|
||||
}
|
||||
|
||||
/*
|
||||
* Downlink was not at the offset where it used to be. Scan the page
|
||||
* to find it. During normal gist insertions, it might've moved to another
|
||||
* page, to the right, but during a buffering build, we keep track of
|
||||
* the parent of each page in the lookup table so we should always know
|
||||
* what page it's on.
|
||||
* Downlink was not at the offset where it used to be. Scan the page to
|
||||
* find it. During normal gist insertions, it might've moved to another
|
||||
* page, to the right, but during a buffering build, we keep track of the
|
||||
* parent of each page in the lookup table so we should always know what
|
||||
* page it's on.
|
||||
*/
|
||||
for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
|
||||
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
|
||||
{
|
||||
/* yes!!, found it */
|
||||
@ -908,7 +910,7 @@ gistBufferingFindCorrectParent(GISTBuildState *buildstate,
|
||||
}
|
||||
|
||||
elog(ERROR, "failed to re-find parent for block %u", childblkno);
|
||||
return InvalidBuffer; /* keep compiler quiet */
|
||||
return InvalidBuffer; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1129,7 +1131,7 @@ gistGetMaxLevel(Relation index)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
BlockNumber childblkno; /* hash key */
|
||||
BlockNumber childblkno; /* hash key */
|
||||
BlockNumber parentblkno;
|
||||
} ParentMapEntry;
|
||||
|
||||
@ -1156,9 +1158,9 @@ gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber pa
|
||||
bool found;
|
||||
|
||||
entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
|
||||
(const void *) &child,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
(const void *) &child,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
entry->parentblkno = parent;
|
||||
}
|
||||
|
||||
@ -1171,16 +1173,17 @@ gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)
|
||||
OffsetNumber maxoff;
|
||||
OffsetNumber off;
|
||||
BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
|
||||
Page page = BufferGetPage(parentbuf);
|
||||
Page page = BufferGetPage(parentbuf);
|
||||
|
||||
Assert(!GistPageIsLeaf(page));
|
||||
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
for (off = FirstOffsetNumber; off <= maxoff; off++)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
|
||||
|
||||
gistMemorizeParent(buildstate, childblkno, parentblkno);
|
||||
}
|
||||
}
|
||||
@ -1193,9 +1196,9 @@ gistGetParent(GISTBuildState *buildstate, BlockNumber child)
|
||||
|
||||
/* Find node buffer in hash table */
|
||||
entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
|
||||
(const void *) &child,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
(const void *) &child,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
if (!found)
|
||||
elog(ERROR, "could not find parent of block %d in lookup table", child);
|
||||
|
||||
|
@ -528,7 +528,7 @@ typedef struct
|
||||
bool isnull[INDEX_MAX_KEYS];
|
||||
GISTPageSplitInfo *splitinfo;
|
||||
GISTNodeBuffer *nodeBuffer;
|
||||
} RelocationBufferInfo;
|
||||
} RelocationBufferInfo;
|
||||
|
||||
/*
|
||||
* At page split, distribute tuples from the buffer of the split page to
|
||||
|
@ -244,7 +244,7 @@ typedef struct
|
||||
int index;
|
||||
/* Delta between penalties of entry insertion into different groups */
|
||||
double delta;
|
||||
} CommonEntry;
|
||||
} CommonEntry;
|
||||
|
||||
/*
|
||||
* Context for g_box_consider_split. Contains information about currently
|
||||
@ -267,7 +267,7 @@ typedef struct
|
||||
int dim; /* axis of this split */
|
||||
double range; /* width of general MBR projection to the
|
||||
* selected axis */
|
||||
} ConsiderSplitContext;
|
||||
} ConsiderSplitContext;
|
||||
|
||||
/*
|
||||
* Interval represents projection of box to axis.
|
||||
@ -276,7 +276,7 @@ typedef struct
|
||||
{
|
||||
double lower,
|
||||
upper;
|
||||
} SplitInterval;
|
||||
} SplitInterval;
|
||||
|
||||
/*
|
||||
* Interval comparison function by lower bound of the interval;
|
||||
|
@ -124,7 +124,7 @@ gistbeginscan(PG_FUNCTION_ARGS)
|
||||
so->giststate = giststate;
|
||||
giststate->tempCxt = createTempGistContext();
|
||||
so->queue = NULL;
|
||||
so->queueCxt = giststate->scanCxt; /* see gistrescan */
|
||||
so->queueCxt = giststate->scanCxt; /* see gistrescan */
|
||||
|
||||
/* workspaces with size dependent on numberOfOrderBys: */
|
||||
so->tmpTreeItem = palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
|
||||
|
@ -581,8 +581,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
|
||||
if (v->spl_equiv == NULL)
|
||||
{
|
||||
/*
|
||||
* simple case: left and right keys for attno column are
|
||||
* equal
|
||||
* simple case: left and right keys for attno column are equal
|
||||
*/
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
|
||||
}
|
||||
|
@ -391,7 +391,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,
|
||||
uint32 ovflbitno;
|
||||
int32 bitmappage,
|
||||
bitmapbit;
|
||||
Bucket bucket PG_USED_FOR_ASSERTS_ONLY;
|
||||
Bucket bucket PG_USED_FOR_ASSERTS_ONLY;
|
||||
|
||||
/* Get information from the doomed page */
|
||||
_hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
|
||||
|
@ -223,9 +223,9 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
|
||||
}
|
||||
|
||||
/*
|
||||
* Be sure to check for interrupts at least once per page. Checks at
|
||||
* higher code levels won't be able to stop a seqscan that encounters
|
||||
* many pages' worth of consecutive dead tuples.
|
||||
* Be sure to check for interrupts at least once per page. Checks at
|
||||
* higher code levels won't be able to stop a seqscan that encounters many
|
||||
* pages' worth of consecutive dead tuples.
|
||||
*/
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
@ -997,8 +997,8 @@ relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
|
||||
*
|
||||
* Same as relation_openrv, but with an additional missing_ok argument
|
||||
* allowing a NULL return rather than an error if the relation is not
|
||||
* found. (Note that some other causes, such as permissions problems,
|
||||
* will still result in an ereport.)
|
||||
* found. (Note that some other causes, such as permissions problems,
|
||||
* will still result in an ereport.)
|
||||
* ----------------
|
||||
*/
|
||||
Relation
|
||||
@ -1105,7 +1105,7 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
|
||||
* by a RangeVar node
|
||||
*
|
||||
* As above, but optionally return NULL instead of failing for
|
||||
* relation-not-found.
|
||||
* relation-not-found.
|
||||
* ----------------
|
||||
*/
|
||||
Relation
|
||||
@ -1588,10 +1588,10 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
|
||||
|
||||
/*
|
||||
* When first_call is true (and thus, skip is initially false) we'll
|
||||
* return the first tuple we find. But on later passes, heapTuple
|
||||
* return the first tuple we find. But on later passes, heapTuple
|
||||
* will initially be pointing to the tuple we returned last time.
|
||||
* Returning it again would be incorrect (and would loop forever),
|
||||
* so we skip it and return the next match we find.
|
||||
* Returning it again would be incorrect (and would loop forever), so
|
||||
* we skip it and return the next match we find.
|
||||
*/
|
||||
if (!skip)
|
||||
{
|
||||
@ -1651,7 +1651,7 @@ heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
|
||||
{
|
||||
bool result;
|
||||
Buffer buffer;
|
||||
HeapTupleData heapTuple;
|
||||
HeapTupleData heapTuple;
|
||||
|
||||
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
@ -1885,14 +1885,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
||||
heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
|
||||
|
||||
/*
|
||||
* We're about to do the actual insert -- but check for conflict first,
|
||||
* to avoid possibly having to roll back work we've just done.
|
||||
* We're about to do the actual insert -- but check for conflict first, to
|
||||
* avoid possibly having to roll back work we've just done.
|
||||
*
|
||||
* For a heap insert, we only need to check for table-level SSI locks.
|
||||
* Our new tuple can't possibly conflict with existing tuple locks, and
|
||||
* heap page locks are only consolidated versions of tuple locks; they do
|
||||
* not lock "gaps" as index page locks do. So we don't need to identify
|
||||
* a buffer before making the call.
|
||||
* For a heap insert, we only need to check for table-level SSI locks. Our
|
||||
* new tuple can't possibly conflict with existing tuple locks, and heap
|
||||
* page locks are only consolidated versions of tuple locks; they do not
|
||||
* lock "gaps" as index page locks do. So we don't need to identify a
|
||||
* buffer before making the call.
|
||||
*/
|
||||
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
|
||||
|
||||
@ -2123,11 +2123,11 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
||||
* We're about to do the actual inserts -- but check for conflict first,
|
||||
* to avoid possibly having to roll back work we've just done.
|
||||
*
|
||||
* For a heap insert, we only need to check for table-level SSI locks.
|
||||
* Our new tuple can't possibly conflict with existing tuple locks, and
|
||||
* heap page locks are only consolidated versions of tuple locks; they do
|
||||
* not lock "gaps" as index page locks do. So we don't need to identify
|
||||
* a buffer before making the call.
|
||||
* For a heap insert, we only need to check for table-level SSI locks. Our
|
||||
* new tuple can't possibly conflict with existing tuple locks, and heap
|
||||
* page locks are only consolidated versions of tuple locks; they do not
|
||||
* lock "gaps" as index page locks do. So we don't need to identify a
|
||||
* buffer before making the call.
|
||||
*/
|
||||
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
|
||||
|
||||
@ -2137,12 +2137,11 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
||||
Buffer buffer;
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
bool all_visible_cleared = false;
|
||||
int nthispage;
|
||||
int nthispage;
|
||||
|
||||
/*
|
||||
* Find buffer where at least the next tuple will fit. If the page
|
||||
* is all-visible, this will also pin the requisite visibility map
|
||||
* page.
|
||||
* Find buffer where at least the next tuple will fit. If the page is
|
||||
* all-visible, this will also pin the requisite visibility map page.
|
||||
*/
|
||||
buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
|
||||
InvalidBuffer, options, bistate,
|
||||
@ -2358,7 +2357,7 @@ heap_delete(Relation relation, ItemPointer tid,
|
||||
ItemId lp;
|
||||
HeapTupleData tp;
|
||||
Page page;
|
||||
BlockNumber block;
|
||||
BlockNumber block;
|
||||
Buffer buffer;
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
bool have_tuple_lock = false;
|
||||
@ -2372,10 +2371,10 @@ heap_delete(Relation relation, ItemPointer tid,
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
/*
|
||||
* Before locking the buffer, pin the visibility map page if it appears
|
||||
* to be necessary. Since we haven't got the lock yet, someone else might
|
||||
* be in the middle of changing this, so we'll need to recheck after
|
||||
* we have the lock.
|
||||
* Before locking the buffer, pin the visibility map page if it appears to
|
||||
* be necessary. Since we haven't got the lock yet, someone else might be
|
||||
* in the middle of changing this, so we'll need to recheck after we have
|
||||
* the lock.
|
||||
*/
|
||||
if (PageIsAllVisible(page))
|
||||
visibilitymap_pin(relation, block, &vmbuffer);
|
||||
@ -2717,7 +2716,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
||||
HeapTupleData oldtup;
|
||||
HeapTuple heaptup;
|
||||
Page page;
|
||||
BlockNumber block;
|
||||
BlockNumber block;
|
||||
Buffer buffer,
|
||||
newbuf,
|
||||
vmbuffer = InvalidBuffer,
|
||||
@ -2753,10 +2752,10 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
/*
|
||||
* Before locking the buffer, pin the visibility map page if it appears
|
||||
* to be necessary. Since we haven't got the lock yet, someone else might
|
||||
* be in the middle of changing this, so we'll need to recheck after
|
||||
* we have the lock.
|
||||
* Before locking the buffer, pin the visibility map page if it appears to
|
||||
* be necessary. Since we haven't got the lock yet, someone else might be
|
||||
* in the middle of changing this, so we'll need to recheck after we have
|
||||
* the lock.
|
||||
*/
|
||||
if (PageIsAllVisible(page))
|
||||
visibilitymap_pin(relation, block, &vmbuffer);
|
||||
@ -2900,11 +2899,11 @@ l2:
|
||||
|
||||
/*
|
||||
* If we didn't pin the visibility map page and the page has become all
|
||||
* visible while we were busy locking the buffer, or during some subsequent
|
||||
* window during which we had it unlocked, we'll have to unlock and
|
||||
* re-lock, to avoid holding the buffer lock across an I/O. That's a bit
|
||||
* unfortunate, esepecially since we'll now have to recheck whether the
|
||||
* tuple has been locked or updated under us, but hopefully it won't
|
||||
* visible while we were busy locking the buffer, or during some
|
||||
* subsequent window during which we had it unlocked, we'll have to unlock
|
||||
* and re-lock, to avoid holding the buffer lock across an I/O. That's a
|
||||
* bit unfortunate, esepecially since we'll now have to recheck whether
|
||||
* the tuple has been locked or updated under us, but hopefully it won't
|
||||
* happen very often.
|
||||
*/
|
||||
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
|
||||
@ -3196,11 +3195,11 @@ l2:
|
||||
|
||||
/*
|
||||
* Mark old tuple for invalidation from system caches at next command
|
||||
* boundary, and mark the new tuple for invalidation in case we abort.
|
||||
* We have to do this before releasing the buffer because oldtup is in
|
||||
* the buffer. (heaptup is all in local memory, but it's necessary to
|
||||
* process both tuple versions in one call to inval.c so we can avoid
|
||||
* redundant sinval messages.)
|
||||
* boundary, and mark the new tuple for invalidation in case we abort. We
|
||||
* have to do this before releasing the buffer because oldtup is in the
|
||||
* buffer. (heaptup is all in local memory, but it's necessary to process
|
||||
* both tuple versions in one call to inval.c so we can avoid redundant
|
||||
* sinval messages.)
|
||||
*/
|
||||
CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
|
||||
|
||||
@ -4069,7 +4068,7 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid)
|
||||
*/
|
||||
bool
|
||||
heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
||||
Buffer buf)
|
||||
Buffer buf)
|
||||
{
|
||||
TransactionId xid;
|
||||
|
||||
@ -4368,9 +4367,9 @@ log_heap_freeze(Relation reln, Buffer buffer,
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform XLogInsert for a heap-visible operation. 'block' is the block
|
||||
* Perform XLogInsert for a heap-visible operation. 'block' is the block
|
||||
* being marked all-visible, and vm_buffer is the buffer containing the
|
||||
* corresponding visibility map block. Both should have already been modified
|
||||
* corresponding visibility map block. Both should have already been modified
|
||||
* and dirtied.
|
||||
*/
|
||||
XLogRecPtr
|
||||
@ -4705,7 +4704,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
|
||||
Page page;
|
||||
|
||||
/*
|
||||
* Read the heap page, if it still exists. If the heap file has been
|
||||
* Read the heap page, if it still exists. If the heap file has been
|
||||
* dropped or truncated later in recovery, this might fail. In that case,
|
||||
* there's no point in doing anything further, since the visibility map
|
||||
* will have to be cleared out at the same time.
|
||||
@ -4731,17 +4730,16 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
|
||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* We don't bump the LSN of the heap page when setting the visibility
|
||||
* map bit, because that would generate an unworkable volume of
|
||||
* full-page writes. This exposes us to torn page hazards, but since
|
||||
* we're not inspecting the existing page contents in any way, we
|
||||
* don't care.
|
||||
* We don't bump the LSN of the heap page when setting the visibility map
|
||||
* bit, because that would generate an unworkable volume of full-page
|
||||
* writes. This exposes us to torn page hazards, but since we're not
|
||||
* inspecting the existing page contents in any way, we don't care.
|
||||
*
|
||||
* However, all operations that clear the visibility map bit *do* bump
|
||||
* the LSN, and those operations will only be replayed if the XLOG LSN
|
||||
* follows the page LSN. Thus, if the page LSN has advanced past our
|
||||
* XLOG record's LSN, we mustn't mark the page all-visible, because
|
||||
* the subsequent update won't be replayed to clear the flag.
|
||||
* However, all operations that clear the visibility map bit *do* bump the
|
||||
* LSN, and those operations will only be replayed if the XLOG LSN follows
|
||||
* the page LSN. Thus, if the page LSN has advanced past our XLOG
|
||||
* record's LSN, we mustn't mark the page all-visible, because the
|
||||
* subsequent update won't be replayed to clear the flag.
|
||||
*/
|
||||
if (!XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
@ -4772,10 +4770,10 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
|
||||
* Don't set the bit if replay has already passed this point.
|
||||
*
|
||||
* It might be safe to do this unconditionally; if replay has past
|
||||
* this point, we'll replay at least as far this time as we did before,
|
||||
* and if this bit needs to be cleared, the record responsible for
|
||||
* doing so should be again replayed, and clear it. For right now,
|
||||
* out of an abundance of conservatism, we use the same test here
|
||||
* this point, we'll replay at least as far this time as we did
|
||||
* before, and if this bit needs to be cleared, the record responsible
|
||||
* for doing so should be again replayed, and clear it. For right
|
||||
* now, out of an abundance of conservatism, we use the same test here
|
||||
* we did for the heap page; if this results in a dropped bit, no real
|
||||
* harm is done; and the next VACUUM will fix it.
|
||||
*/
|
||||
@ -5183,7 +5181,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
|
||||
if (xlrec->all_visible_cleared)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(xlrec->target.node);
|
||||
BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid);
|
||||
BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, block, &vmbuffer);
|
||||
@ -5267,7 +5265,7 @@ newt:;
|
||||
if (xlrec->new_all_visible_cleared)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(xlrec->target.node);
|
||||
BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid);
|
||||
BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, block, &vmbuffer);
|
||||
@ -5690,7 +5688,7 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
else
|
||||
appendStringInfo(buf, "multi-insert: ");
|
||||
appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||
xlrec->blkno, xlrec->ntuples);
|
||||
}
|
||||
else
|
||||
|
@ -109,8 +109,8 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
|
||||
BlockNumber block1, BlockNumber block2,
|
||||
Buffer *vmbuffer1, Buffer *vmbuffer2)
|
||||
{
|
||||
bool need_to_pin_buffer1;
|
||||
bool need_to_pin_buffer2;
|
||||
bool need_to_pin_buffer1;
|
||||
bool need_to_pin_buffer2;
|
||||
|
||||
Assert(BufferIsValid(buffer1));
|
||||
Assert(buffer2 == InvalidBuffer || buffer1 <= buffer2);
|
||||
@ -145,7 +145,7 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
|
||||
/*
|
||||
* If there are two buffers involved and we pinned just one of them,
|
||||
* it's possible that the second one became all-visible while we were
|
||||
* busy pinning the first one. If it looks like that's a possible
|
||||
* busy pinning the first one. If it looks like that's a possible
|
||||
* scenario, we'll need to make a second pass through this loop.
|
||||
*/
|
||||
if (buffer2 == InvalidBuffer || buffer1 == buffer2
|
||||
@ -302,11 +302,11 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
||||
* block if one was given, taking suitable care with lock ordering and
|
||||
* the possibility they are the same block.
|
||||
*
|
||||
* If the page-level all-visible flag is set, caller will need to clear
|
||||
* both that and the corresponding visibility map bit. However, by the
|
||||
* time we return, we'll have x-locked the buffer, and we don't want to
|
||||
* do any I/O while in that state. So we check the bit here before
|
||||
* taking the lock, and pin the page if it appears necessary.
|
||||
* If the page-level all-visible flag is set, caller will need to
|
||||
* clear both that and the corresponding visibility map bit. However,
|
||||
* by the time we return, we'll have x-locked the buffer, and we don't
|
||||
* want to do any I/O while in that state. So we check the bit here
|
||||
* before taking the lock, and pin the page if it appears necessary.
|
||||
* Checking without the lock creates a risk of getting the wrong
|
||||
* answer, so we'll have to recheck after acquiring the lock.
|
||||
*/
|
||||
@ -347,23 +347,24 @@ RelationGetBufferForTuple(Relation relation, Size len,
|
||||
|
||||
/*
|
||||
* We now have the target page (and the other buffer, if any) pinned
|
||||
* and locked. However, since our initial PageIsAllVisible checks
|
||||
* were performed before acquiring the lock, the results might now
|
||||
* be out of date, either for the selected victim buffer, or for the
|
||||
* other buffer passed by the caller. In that case, we'll need to give
|
||||
* up our locks, go get the pin(s) we failed to get earlier, and
|
||||
* and locked. However, since our initial PageIsAllVisible checks
|
||||
* were performed before acquiring the lock, the results might now be
|
||||
* out of date, either for the selected victim buffer, or for the
|
||||
* other buffer passed by the caller. In that case, we'll need to
|
||||
* give up our locks, go get the pin(s) we failed to get earlier, and
|
||||
* re-lock. That's pretty painful, but hopefully shouldn't happen
|
||||
* often.
|
||||
*
|
||||
* Note that there's a small possibility that we didn't pin the
|
||||
* page above but still have the correct page pinned anyway, either
|
||||
* because we've already made a previous pass through this loop, or
|
||||
* because caller passed us the right page anyway.
|
||||
* Note that there's a small possibility that we didn't pin the page
|
||||
* above but still have the correct page pinned anyway, either because
|
||||
* we've already made a previous pass through this loop, or because
|
||||
* caller passed us the right page anyway.
|
||||
*
|
||||
* Note also that it's possible that by the time we get the pin and
|
||||
* retake the buffer locks, the visibility map bit will have been
|
||||
* cleared by some other backend anyway. In that case, we'll have done
|
||||
* a bit of extra work for no gain, but there's no real harm done.
|
||||
* cleared by some other backend anyway. In that case, we'll have
|
||||
* done a bit of extra work for no gain, but there's no real harm
|
||||
* done.
|
||||
*/
|
||||
if (otherBuffer == InvalidBuffer || buffer <= otherBuffer)
|
||||
GetVisibilityMapPins(relation, buffer, otherBuffer,
|
||||
|
@ -75,7 +75,7 @@ do { \
|
||||
|
||||
static void toast_delete_datum(Relation rel, Datum value);
|
||||
static Datum toast_save_datum(Relation rel, Datum value,
|
||||
struct varlena *oldexternal, int options);
|
||||
struct varlena * oldexternal, int options);
|
||||
static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
|
||||
static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
|
||||
static struct varlena *toast_fetch_datum(struct varlena * attr);
|
||||
@ -1233,7 +1233,7 @@ toast_compress_datum(Datum value)
|
||||
*/
|
||||
static Datum
|
||||
toast_save_datum(Relation rel, Datum value,
|
||||
struct varlena *oldexternal, int options)
|
||||
struct varlena * oldexternal, int options)
|
||||
{
|
||||
Relation toastrel;
|
||||
Relation toastidx;
|
||||
@ -1353,7 +1353,7 @@ toast_save_datum(Relation rel, Datum value,
|
||||
* those versions could easily reference the same toast value.
|
||||
* When we copy the second or later version of such a row,
|
||||
* reusing the OID will mean we select an OID that's already
|
||||
* in the new toast table. Check for that, and if so, just
|
||||
* in the new toast table. Check for that, and if so, just
|
||||
* fall through without writing the data again.
|
||||
*
|
||||
* While annoying and ugly-looking, this is a good thing
|
||||
|
@ -16,7 +16,7 @@
|
||||
* visibilitymap_pin_ok - check whether correct map page is already pinned
|
||||
* visibilitymap_set - set a bit in a previously pinned page
|
||||
* visibilitymap_test - test if a bit is set
|
||||
* visibilitymap_count - count number of bits set in visibility map
|
||||
* visibilitymap_count - count number of bits set in visibility map
|
||||
* visibilitymap_truncate - truncate the visibility map
|
||||
*
|
||||
* NOTES
|
||||
@ -27,7 +27,7 @@
|
||||
* the sense that we make sure that whenever a bit is set, we know the
|
||||
* condition is true, but if a bit is not set, it might or might not be true.
|
||||
*
|
||||
* Clearing a visibility map bit is not separately WAL-logged. The callers
|
||||
* Clearing a visibility map bit is not separately WAL-logged. The callers
|
||||
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
|
||||
* replay of the updating operation as well.
|
||||
*
|
||||
@ -36,9 +36,9 @@
|
||||
* it may still be the case that every tuple on the page is visible to all
|
||||
* transactions; we just don't know that for certain. The difficulty is that
|
||||
* there are two bits which are typically set together: the PD_ALL_VISIBLE bit
|
||||
* on the page itself, and the visibility map bit. If a crash occurs after the
|
||||
* on the page itself, and the visibility map bit. If a crash occurs after the
|
||||
* visibility map page makes it to disk and before the updated heap page makes
|
||||
* it to disk, redo must set the bit on the heap page. Otherwise, the next
|
||||
* it to disk, redo must set the bit on the heap page. Otherwise, the next
|
||||
* insert, update, or delete on the heap page will fail to realize that the
|
||||
* visibility map bit must be cleared, possibly causing index-only scans to
|
||||
* return wrong answers.
|
||||
@ -59,10 +59,10 @@
|
||||
* the buffer lock over any I/O that may be required to read in the visibility
|
||||
* map page. To avoid this, we examine the heap page before locking it;
|
||||
* if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
|
||||
* bit. Then, we lock the buffer. But this creates a race condition: there
|
||||
* bit. Then, we lock the buffer. But this creates a race condition: there
|
||||
* is a possibility that in the time it takes to lock the buffer, the
|
||||
* PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
|
||||
* buffer, pin the visibility map page, and relock the buffer. This shouldn't
|
||||
* buffer, pin the visibility map page, and relock the buffer. This shouldn't
|
||||
* happen often, because only VACUUM currently sets visibility map bits,
|
||||
* and the race will only occur if VACUUM processes a given page at almost
|
||||
* exactly the same time that someone tries to further modify it.
|
||||
@ -227,9 +227,9 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
|
||||
* visibilitymap_set - set a bit on a previously pinned page
|
||||
*
|
||||
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
|
||||
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
|
||||
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
|
||||
* one provided; in normal running, we generate a new XLOG record and set the
|
||||
* page LSN to that value. cutoff_xid is the largest xmin on the page being
|
||||
* page LSN to that value. cutoff_xid is the largest xmin on the page being
|
||||
* marked all-visible; it is needed for Hot Standby, and can be
|
||||
* InvalidTransactionId if the page contains no tuples.
|
||||
*
|
||||
@ -295,10 +295,10 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
|
||||
* releasing *buf after it's done testing and setting bits.
|
||||
*
|
||||
* NOTE: This function is typically called without a lock on the heap page,
|
||||
* so somebody else could change the bit just after we look at it. In fact,
|
||||
* so somebody else could change the bit just after we look at it. In fact,
|
||||
* since we don't lock the visibility map page either, it's even possible that
|
||||
* someone else could have changed the bit just before we look at it, but yet
|
||||
* we might see the old value. It is the caller's responsibility to deal with
|
||||
* we might see the old value. It is the caller's responsibility to deal with
|
||||
* all concurrency issues!
|
||||
*/
|
||||
bool
|
||||
@ -344,7 +344,7 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
|
||||
}
|
||||
|
||||
/*
|
||||
* visibilitymap_count - count number of bits set in visibility map
|
||||
* visibilitymap_count - count number of bits set in visibility map
|
||||
*
|
||||
* Note: we ignore the possibility of race conditions when the table is being
|
||||
* extended concurrently with the call. New pages added to the table aren't
|
||||
@ -356,16 +356,16 @@ visibilitymap_count(Relation rel)
|
||||
BlockNumber result = 0;
|
||||
BlockNumber mapBlock;
|
||||
|
||||
for (mapBlock = 0; ; mapBlock++)
|
||||
for (mapBlock = 0;; mapBlock++)
|
||||
{
|
||||
Buffer mapBuffer;
|
||||
unsigned char *map;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Read till we fall off the end of the map. We assume that any
|
||||
* extra bytes in the last page are zeroed, so we don't bother
|
||||
* excluding them from the count.
|
||||
* Read till we fall off the end of the map. We assume that any extra
|
||||
* bytes in the last page are zeroed, so we don't bother excluding
|
||||
* them from the count.
|
||||
*/
|
||||
mapBuffer = vm_readbuf(rel, mapBlock, false);
|
||||
if (!BufferIsValid(mapBuffer))
|
||||
@ -496,11 +496,11 @@ vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
|
||||
Buffer buf;
|
||||
|
||||
/*
|
||||
* We might not have opened the relation at the smgr level yet, or we might
|
||||
* have been forced to close it by a sinval message. The code below won't
|
||||
* necessarily notice relation extension immediately when extend = false,
|
||||
* so we rely on sinval messages to ensure that our ideas about the size of
|
||||
* the map aren't too far out of date.
|
||||
* We might not have opened the relation at the smgr level yet, or we
|
||||
* might have been forced to close it by a sinval message. The code below
|
||||
* won't necessarily notice relation extension immediately when extend =
|
||||
* false, so we rely on sinval messages to ensure that our ideas about the
|
||||
* size of the map aren't too far out of date.
|
||||
*/
|
||||
RelationOpenSmgr(rel);
|
||||
|
||||
|
@ -93,7 +93,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
|
||||
else
|
||||
scan->orderByData = NULL;
|
||||
|
||||
scan->xs_want_itup = false; /* may be set later */
|
||||
scan->xs_want_itup = false; /* may be set later */
|
||||
|
||||
/*
|
||||
* During recovery we ignore killed tuples and don't bother to kill them
|
||||
|
@ -435,7 +435,7 @@ index_restrpos(IndexScanDesc scan)
|
||||
ItemPointer
|
||||
index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
|
||||
{
|
||||
FmgrInfo *procedure;
|
||||
FmgrInfo *procedure;
|
||||
bool found;
|
||||
|
||||
SCAN_CHECKS;
|
||||
@ -495,7 +495,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
|
||||
HeapTuple
|
||||
index_fetch_heap(IndexScanDesc scan)
|
||||
{
|
||||
ItemPointer tid = &scan->xs_ctup.t_self;
|
||||
ItemPointer tid = &scan->xs_ctup.t_self;
|
||||
bool all_dead = false;
|
||||
bool got_heap_tuple;
|
||||
|
||||
@ -530,8 +530,8 @@ index_fetch_heap(IndexScanDesc scan)
|
||||
if (got_heap_tuple)
|
||||
{
|
||||
/*
|
||||
* Only in a non-MVCC snapshot can more than one member of the
|
||||
* HOT chain be visible.
|
||||
* Only in a non-MVCC snapshot can more than one member of the HOT
|
||||
* chain be visible.
|
||||
*/
|
||||
scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
|
||||
pgstat_count_heap_fetch(scan->indexRelation);
|
||||
@ -544,7 +544,7 @@ index_fetch_heap(IndexScanDesc scan)
|
||||
/*
|
||||
* If we scanned a whole HOT chain and found only dead tuples, tell index
|
||||
* AM to kill its entry for that TID (this will take effect in the next
|
||||
* amgettuple call, in index_getnext_tid). We do not do this when in
|
||||
* amgettuple call, in index_getnext_tid). We do not do this when in
|
||||
* recovery because it may violate MVCC to do so. See comments in
|
||||
* RelationGetIndexScan().
|
||||
*/
|
||||
|
@ -82,7 +82,7 @@ btint2fastcmp(Datum x, Datum y, SortSupport ssup)
|
||||
Datum
|
||||
btint2sortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = btint2fastcmp;
|
||||
PG_RETURN_VOID();
|
||||
@ -119,7 +119,7 @@ btint4fastcmp(Datum x, Datum y, SortSupport ssup)
|
||||
Datum
|
||||
btint4sortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = btint4fastcmp;
|
||||
PG_RETURN_VOID();
|
||||
@ -156,7 +156,7 @@ btint8fastcmp(Datum x, Datum y, SortSupport ssup)
|
||||
Datum
|
||||
btint8sortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = btint8fastcmp;
|
||||
PG_RETURN_VOID();
|
||||
@ -277,7 +277,7 @@ btoidfastcmp(Datum x, Datum y, SortSupport ssup)
|
||||
Datum
|
||||
btoidsortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = btoidfastcmp;
|
||||
PG_RETURN_VOID();
|
||||
@ -338,7 +338,7 @@ btnamefastcmp(Datum x, Datum y, SortSupport ssup)
|
||||
Datum
|
||||
btnamesortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = btnamefastcmp;
|
||||
PG_RETURN_VOID();
|
||||
|
@ -1362,7 +1362,7 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack)
|
||||
* we're in VACUUM and would not otherwise have an XID. Having already
|
||||
* updated links to the target, ReadNewTransactionId() suffices as an
|
||||
* upper bound. Any scan having retained a now-stale link is advertising
|
||||
* in its PGXACT an xmin less than or equal to the value we read here. It
|
||||
* in its PGXACT an xmin less than or equal to the value we read here. It
|
||||
* will continue to do so, holding back RecentGlobalXmin, for the duration
|
||||
* of that scan.
|
||||
*/
|
||||
|
@ -433,7 +433,7 @@ btbeginscan(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* We don't know yet whether the scan will be index-only, so we do not
|
||||
* allocate the tuple workspace arrays until btrescan. However, we set up
|
||||
* allocate the tuple workspace arrays until btrescan. However, we set up
|
||||
* scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
|
||||
*/
|
||||
so->currTuples = so->markTuples = NULL;
|
||||
@ -478,7 +478,7 @@ btrescan(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Allocate tuple workspace arrays, if needed for an index-only scan and
|
||||
* not already done in a previous rescan call. To save on palloc
|
||||
* not already done in a previous rescan call. To save on palloc
|
||||
* overhead, both workspaces are allocated as one palloc block; only this
|
||||
* function and btendscan know that.
|
||||
*
|
||||
|
@ -564,11 +564,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
ScanKeyEntryInitialize(chosen,
|
||||
(SK_SEARCHNOTNULL | SK_ISNULL |
|
||||
(impliesNN->sk_flags &
|
||||
(SK_BT_DESC | SK_BT_NULLS_FIRST))),
|
||||
(SK_BT_DESC | SK_BT_NULLS_FIRST))),
|
||||
curattr,
|
||||
((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
|
||||
BTGreaterStrategyNumber :
|
||||
BTLessStrategyNumber),
|
||||
((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
|
||||
BTGreaterStrategyNumber :
|
||||
BTLessStrategyNumber),
|
||||
InvalidOid,
|
||||
InvalidOid,
|
||||
InvalidOid,
|
||||
|
@ -37,10 +37,10 @@ typedef struct BTSortArrayContext
|
||||
static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
|
||||
StrategyNumber strat,
|
||||
Datum *elems, int nelems);
|
||||
static int _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
|
||||
static int _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
|
||||
bool reverse,
|
||||
Datum *elems, int nelems);
|
||||
static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
|
||||
static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
|
||||
static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
|
||||
ScanKey leftarg, ScanKey rightarg,
|
||||
bool *result);
|
||||
@ -227,8 +227,8 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
|
||||
}
|
||||
|
||||
/*
|
||||
* Make a scan-lifespan context to hold array-associated data, or reset
|
||||
* it if we already have one from a previous rescan cycle.
|
||||
* Make a scan-lifespan context to hold array-associated data, or reset it
|
||||
* if we already have one from a previous rescan cycle.
|
||||
*/
|
||||
if (so->arrayContext == NULL)
|
||||
so->arrayContext = AllocSetContextCreate(CurrentMemoryContext,
|
||||
@ -269,7 +269,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* First, deconstruct the array into elements. Anything allocated
|
||||
* First, deconstruct the array into elements. Anything allocated
|
||||
* here (including a possibly detoasted array value) is in the
|
||||
* workspace context.
|
||||
*/
|
||||
@ -283,7 +283,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
|
||||
&elem_values, &elem_nulls, &num_elems);
|
||||
|
||||
/*
|
||||
* Compress out any null elements. We can ignore them since we assume
|
||||
* Compress out any null elements. We can ignore them since we assume
|
||||
* all btree operators are strict.
|
||||
*/
|
||||
num_nonnulls = 0;
|
||||
@ -338,7 +338,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
|
||||
* successive primitive indexscans produce data in index order.
|
||||
*/
|
||||
num_elems = _bt_sort_array_elements(scan, cur,
|
||||
(indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0,
|
||||
(indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0,
|
||||
elem_values, num_nonnulls);
|
||||
|
||||
/*
|
||||
@ -387,9 +387,10 @@ _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
|
||||
/*
|
||||
* Look up the appropriate comparison operator in the opfamily.
|
||||
*
|
||||
* Note: it's possible that this would fail, if the opfamily is incomplete,
|
||||
* but it seems quite unlikely that an opfamily would omit non-cross-type
|
||||
* comparison operators for any datatype that it supports at all.
|
||||
* Note: it's possible that this would fail, if the opfamily is
|
||||
* incomplete, but it seems quite unlikely that an opfamily would omit
|
||||
* non-cross-type comparison operators for any datatype that it supports
|
||||
* at all.
|
||||
*/
|
||||
cmp_op = get_opfamily_member(rel->rd_opfamily[skey->sk_attno - 1],
|
||||
elemtype,
|
||||
@ -455,9 +456,10 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
|
||||
/*
|
||||
* Look up the appropriate comparison function in the opfamily.
|
||||
*
|
||||
* Note: it's possible that this would fail, if the opfamily is incomplete,
|
||||
* but it seems quite unlikely that an opfamily would omit non-cross-type
|
||||
* support functions for any datatype that it supports at all.
|
||||
* Note: it's possible that this would fail, if the opfamily is
|
||||
* incomplete, but it seems quite unlikely that an opfamily would omit
|
||||
* non-cross-type support functions for any datatype that it supports at
|
||||
* all.
|
||||
*/
|
||||
cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
|
||||
elemtype,
|
||||
@ -515,7 +517,7 @@ _bt_compare_array_elements(const void *a, const void *b, void *arg)
|
||||
* _bt_start_array_keys() -- Initialize array keys at start of a scan
|
||||
*
|
||||
* Set up the cur_elem counters and fill in the first sk_argument value for
|
||||
* each array scankey. We can't do this until we know the scan direction.
|
||||
* each array scankey. We can't do this until we know the scan direction.
|
||||
*/
|
||||
void
|
||||
_bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
|
||||
@ -609,8 +611,8 @@ _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
|
||||
* so that the index sorts in the desired direction.
|
||||
*
|
||||
* One key purpose of this routine is to discover which scan keys must be
|
||||
* satisfied to continue the scan. It also attempts to eliminate redundant
|
||||
* keys and detect contradictory keys. (If the index opfamily provides
|
||||
* satisfied to continue the scan. It also attempts to eliminate redundant
|
||||
* keys and detect contradictory keys. (If the index opfamily provides
|
||||
* incomplete sets of cross-type operators, we may fail to detect redundant
|
||||
* or contradictory keys, but we can survive that.)
|
||||
*
|
||||
@ -676,7 +678,7 @@ _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
|
||||
* Note: the reason we have to copy the preprocessed scan keys into private
|
||||
* storage is that we are modifying the array based on comparisons of the
|
||||
* key argument values, which could change on a rescan or after moving to
|
||||
* new elements of array keys. Therefore we can't overwrite the source data.
|
||||
* new elements of array keys. Therefore we can't overwrite the source data.
|
||||
*/
|
||||
void
|
||||
_bt_preprocess_keys(IndexScanDesc scan)
|
||||
@ -781,8 +783,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
* set qual_ok to false and abandon further processing.
|
||||
*
|
||||
* We also have to deal with the case of "key IS NULL", which is
|
||||
* unsatisfiable in combination with any other index condition.
|
||||
* By the time we get here, that's been classified as an equality
|
||||
* unsatisfiable in combination with any other index condition. By
|
||||
* the time we get here, that's been classified as an equality
|
||||
* check, and we've rejected any combination of it with a regular
|
||||
* equality condition; but not with other types of conditions.
|
||||
*/
|
||||
@ -1421,12 +1423,12 @@ _bt_checkkeys(IndexScanDesc scan,
|
||||
/*
|
||||
* Since NULLs are sorted before non-NULLs, we know we have
|
||||
* reached the lower limit of the range of values for this
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* is one of the "must match" subset. We can stop regardless
|
||||
* of whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass.
|
||||
* On a forward scan, however, we must keep going, because we
|
||||
* may have initially positioned to the start of the index.
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
* a forward scan, however, we must keep going, because we may
|
||||
* have initially positioned to the start of the index.
|
||||
*/
|
||||
if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
|
||||
ScanDirectionIsBackward(dir))
|
||||
@ -1437,11 +1439,11 @@ _bt_checkkeys(IndexScanDesc scan,
|
||||
/*
|
||||
* Since NULLs are sorted after non-NULLs, we know we have
|
||||
* reached the upper limit of the range of values for this
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass.
|
||||
* On a backward scan, however, we must keep going, because we
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
* a backward scan, however, we must keep going, because we
|
||||
* may have initially positioned to the end of the index.
|
||||
*/
|
||||
if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
|
||||
@ -1532,12 +1534,12 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
/*
|
||||
* Since NULLs are sorted before non-NULLs, we know we have
|
||||
* reached the lower limit of the range of values for this
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* index attr. On a backward scan, we can stop if this qual
|
||||
* is one of the "must match" subset. We can stop regardless
|
||||
* of whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass.
|
||||
* On a forward scan, however, we must keep going, because we
|
||||
* may have initially positioned to the start of the index.
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
* a forward scan, however, we must keep going, because we may
|
||||
* have initially positioned to the start of the index.
|
||||
*/
|
||||
if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
|
||||
ScanDirectionIsBackward(dir))
|
||||
@ -1548,11 +1550,11 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
/*
|
||||
* Since NULLs are sorted after non-NULLs, we know we have
|
||||
* reached the upper limit of the range of values for this
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* index attr. On a forward scan, we can stop if this qual is
|
||||
* one of the "must match" subset. We can stop regardless of
|
||||
* whether the qual is > or <, so long as it's required,
|
||||
* because it's not possible for any future tuples to pass.
|
||||
* On a backward scan, however, we must keep going, because we
|
||||
* because it's not possible for any future tuples to pass. On
|
||||
* a backward scan, however, we must keep going, because we
|
||||
* may have initially positioned to the end of the index.
|
||||
*/
|
||||
if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
|
||||
|
@ -24,7 +24,7 @@
|
||||
/*
|
||||
* SPPageDesc tracks all info about a page we are inserting into. In some
|
||||
* situations it actually identifies a tuple, or even a specific node within
|
||||
* an inner tuple. But any of the fields can be invalid. If the buffer
|
||||
* an inner tuple. But any of the fields can be invalid. If the buffer
|
||||
* field is valid, it implies we hold pin and exclusive lock on that buffer.
|
||||
* page pointer should be valid exactly when buffer is.
|
||||
*/
|
||||
@ -129,8 +129,8 @@ spgPageIndexMultiDelete(SpGistState *state, Page page,
|
||||
int firststate, int reststate,
|
||||
BlockNumber blkno, OffsetNumber offnum)
|
||||
{
|
||||
OffsetNumber firstItem;
|
||||
OffsetNumber *sortednos;
|
||||
OffsetNumber firstItem;
|
||||
OffsetNumber *sortednos;
|
||||
SpGistDeadTuple tuple = NULL;
|
||||
int i;
|
||||
|
||||
@ -155,8 +155,8 @@ spgPageIndexMultiDelete(SpGistState *state, Page page,
|
||||
|
||||
for (i = 0; i < nitems; i++)
|
||||
{
|
||||
OffsetNumber itemno = sortednos[i];
|
||||
int tupstate;
|
||||
OffsetNumber itemno = sortednos[i];
|
||||
int tupstate;
|
||||
|
||||
tupstate = (itemno == firstItem) ? firststate : reststate;
|
||||
if (tuple == NULL || tuple->tupstate != tupstate)
|
||||
@ -200,7 +200,7 @@ saveNodeLink(Relation index, SPPageDesc *parent,
|
||||
*/
|
||||
static void
|
||||
addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
|
||||
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
|
||||
{
|
||||
XLogRecData rdata[4];
|
||||
spgxlogAddLeaf xlrec;
|
||||
@ -230,7 +230,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
/* Tuple is not part of a chain */
|
||||
leafTuple->nextOffset = InvalidOffsetNumber;
|
||||
current->offnum = SpGistPageAddNewItem(state, current->page,
|
||||
(Item) leafTuple, leafTuple->size,
|
||||
(Item) leafTuple, leafTuple->size,
|
||||
NULL, false);
|
||||
|
||||
xlrec.offnumLeaf = current->offnum;
|
||||
@ -250,9 +250,9 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Tuple must be inserted into existing chain. We mustn't change
|
||||
* the chain's head address, but we don't need to chase the entire
|
||||
* chain to put the tuple at the end; we can insert it second.
|
||||
* Tuple must be inserted into existing chain. We mustn't change the
|
||||
* chain's head address, but we don't need to chase the entire chain
|
||||
* to put the tuple at the end; we can insert it second.
|
||||
*
|
||||
* Also, it's possible that the "chain" consists only of a DEAD tuple,
|
||||
* in which case we should replace the DEAD tuple in-place.
|
||||
@ -261,7 +261,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
OffsetNumber offnum;
|
||||
|
||||
head = (SpGistLeafTuple) PageGetItem(current->page,
|
||||
PageGetItemId(current->page, current->offnum));
|
||||
PageGetItemId(current->page, current->offnum));
|
||||
if (head->tupstate == SPGIST_LIVE)
|
||||
{
|
||||
leafTuple->nextOffset = head->nextOffset;
|
||||
@ -274,7 +274,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
* and set new second element
|
||||
*/
|
||||
head = (SpGistLeafTuple) PageGetItem(current->page,
|
||||
PageGetItemId(current->page, current->offnum));
|
||||
PageGetItemId(current->page, current->offnum));
|
||||
head->nextOffset = offnum;
|
||||
|
||||
xlrec.offnumLeaf = offnum;
|
||||
@ -483,7 +483,7 @@ moveLeafs(Relation index, SpGistState *state,
|
||||
for (i = 0; i < nDelete; i++)
|
||||
{
|
||||
it = (SpGistLeafTuple) PageGetItem(current->page,
|
||||
PageGetItemId(current->page, toDelete[i]));
|
||||
PageGetItemId(current->page, toDelete[i]));
|
||||
Assert(it->tupstate == SPGIST_LIVE);
|
||||
|
||||
/*
|
||||
@ -516,12 +516,12 @@ moveLeafs(Relation index, SpGistState *state,
|
||||
leafptr += newLeafTuple->size;
|
||||
|
||||
/*
|
||||
* Now delete the old tuples, leaving a redirection pointer behind for
|
||||
* the first one, unless we're doing an index build; in which case there
|
||||
* can't be any concurrent scan so we need not provide a redirect.
|
||||
* Now delete the old tuples, leaving a redirection pointer behind for the
|
||||
* first one, unless we're doing an index build; in which case there can't
|
||||
* be any concurrent scan so we need not provide a redirect.
|
||||
*/
|
||||
spgPageIndexMultiDelete(state, current->page, toDelete, nDelete,
|
||||
state->isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
|
||||
state->isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
|
||||
SPGIST_PLACEHOLDER,
|
||||
nblkno, r);
|
||||
|
||||
@ -575,7 +575,7 @@ setRedirectionTuple(SPPageDesc *current, OffsetNumber position,
|
||||
SpGistDeadTuple dt;
|
||||
|
||||
dt = (SpGistDeadTuple) PageGetItem(current->page,
|
||||
PageGetItemId(current->page, position));
|
||||
PageGetItemId(current->page, position));
|
||||
Assert(dt->tupstate == SPGIST_REDIRECT);
|
||||
Assert(ItemPointerGetBlockNumber(&dt->pointer) == SPGIST_METAPAGE_BLKNO);
|
||||
ItemPointerSet(&dt->pointer, blkno, offnum);
|
||||
@ -640,7 +640,7 @@ checkAllTheSame(spgPickSplitIn *in, spgPickSplitOut *out, bool tooBig,
|
||||
/* The opclass may not use node labels, but if it does, duplicate 'em */
|
||||
if (out->nodeLabels)
|
||||
{
|
||||
Datum theLabel = out->nodeLabels[theNode];
|
||||
Datum theLabel = out->nodeLabels[theNode];
|
||||
|
||||
out->nodeLabels = (Datum *) palloc(sizeof(Datum) * out->nNodes);
|
||||
for (i = 0; i < out->nNodes; i++)
|
||||
@ -754,8 +754,8 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
{
|
||||
/*
|
||||
* We are splitting the root (which up to now is also a leaf page).
|
||||
* Its tuples are not linked, so scan sequentially to get them all.
|
||||
* We ignore the original value of current->offnum.
|
||||
* Its tuples are not linked, so scan sequentially to get them all. We
|
||||
* ignore the original value of current->offnum.
|
||||
*/
|
||||
for (i = FirstOffsetNumber; i <= max; i++)
|
||||
{
|
||||
@ -773,7 +773,7 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
/* we will delete the tuple altogether, so count full space */
|
||||
spaceToDelete += it->size + sizeof(ItemIdData);
|
||||
}
|
||||
else /* tuples on root should be live */
|
||||
else /* tuples on root should be live */
|
||||
elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
|
||||
}
|
||||
}
|
||||
@ -820,7 +820,7 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
* We may not actually insert new tuple because another picksplit may be
|
||||
* necessary due to too large value, but we will try to allocate enough
|
||||
* space to include it; and in any case it has to be included in the input
|
||||
* for the picksplit function. So don't increment nToInsert yet.
|
||||
* for the picksplit function. So don't increment nToInsert yet.
|
||||
*/
|
||||
in.datums[in.nTuples] = SGLTDATUM(newLeafTuple, state);
|
||||
heapPtrs[in.nTuples] = newLeafTuple->heapPtr;
|
||||
@ -878,7 +878,7 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
/*
|
||||
* Check to see if the picksplit function failed to separate the values,
|
||||
* ie, it put them all into the same child node. If so, select allTheSame
|
||||
* mode and create a random split instead. See comments for
|
||||
* mode and create a random split instead. See comments for
|
||||
* checkAllTheSame as to why we need to know if the new leaf tuples could
|
||||
* fit on one page.
|
||||
*/
|
||||
@ -924,8 +924,8 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
innerTuple->allTheSame = allTheSame;
|
||||
|
||||
/*
|
||||
* Update nodes[] array to point into the newly formed innerTuple, so
|
||||
* that we can adjust their downlinks below.
|
||||
* Update nodes[] array to point into the newly formed innerTuple, so that
|
||||
* we can adjust their downlinks below.
|
||||
*/
|
||||
SGITITERATE(innerTuple, i, node)
|
||||
{
|
||||
@ -944,13 +944,13 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
}
|
||||
|
||||
/*
|
||||
* To perform the split, we must insert a new inner tuple, which can't
|
||||
* go on a leaf page; and unless we are splitting the root page, we
|
||||
* must then update the parent tuple's downlink to point to the inner
|
||||
* tuple. If there is room, we'll put the new inner tuple on the same
|
||||
* page as the parent tuple, otherwise we need another non-leaf buffer.
|
||||
* But if the parent page is the root, we can't add the new inner tuple
|
||||
* there, because the root page must have only one inner tuple.
|
||||
* To perform the split, we must insert a new inner tuple, which can't go
|
||||
* on a leaf page; and unless we are splitting the root page, we must then
|
||||
* update the parent tuple's downlink to point to the inner tuple. If
|
||||
* there is room, we'll put the new inner tuple on the same page as the
|
||||
* parent tuple, otherwise we need another non-leaf buffer. But if the
|
||||
* parent page is the root, we can't add the new inner tuple there,
|
||||
* because the root page must have only one inner tuple.
|
||||
*/
|
||||
xlrec.initInner = false;
|
||||
if (parent->buffer != InvalidBuffer &&
|
||||
@ -965,9 +965,9 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
{
|
||||
/* Send tuple to page with next triple parity (see README) */
|
||||
newInnerBuffer = SpGistGetBuffer(index,
|
||||
GBUF_INNER_PARITY(parent->blkno + 1) |
|
||||
GBUF_INNER_PARITY(parent->blkno + 1) |
|
||||
(isNulls ? GBUF_NULLS : 0),
|
||||
innerTuple->size + sizeof(ItemIdData),
|
||||
innerTuple->size + sizeof(ItemIdData),
|
||||
&xlrec.initInner);
|
||||
}
|
||||
else
|
||||
@ -977,22 +977,22 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
}
|
||||
|
||||
/*
|
||||
* Because a WAL record can't involve more than four buffers, we can
|
||||
* only afford to deal with two leaf pages in each picksplit action,
|
||||
* ie the current page and at most one other.
|
||||
* Because a WAL record can't involve more than four buffers, we can only
|
||||
* afford to deal with two leaf pages in each picksplit action, ie the
|
||||
* current page and at most one other.
|
||||
*
|
||||
* The new leaf tuples converted from the existing ones should require
|
||||
* the same or less space, and therefore should all fit onto one page
|
||||
* The new leaf tuples converted from the existing ones should require the
|
||||
* same or less space, and therefore should all fit onto one page
|
||||
* (although that's not necessarily the current page, since we can't
|
||||
* delete the old tuples but only replace them with placeholders).
|
||||
* However, the incoming new tuple might not also fit, in which case
|
||||
* we might need another picksplit cycle to reduce it some more.
|
||||
* However, the incoming new tuple might not also fit, in which case we
|
||||
* might need another picksplit cycle to reduce it some more.
|
||||
*
|
||||
* If there's not room to put everything back onto the current page,
|
||||
* then we decide on a per-node basis which tuples go to the new page.
|
||||
* (We do it like that because leaf tuple chains can't cross pages,
|
||||
* so we must place all leaf tuples belonging to the same parent node
|
||||
* on the same page.)
|
||||
* If there's not room to put everything back onto the current page, then
|
||||
* we decide on a per-node basis which tuples go to the new page. (We do
|
||||
* it like that because leaf tuple chains can't cross pages, so we must
|
||||
* place all leaf tuples belonging to the same parent node on the same
|
||||
* page.)
|
||||
*
|
||||
* If we are splitting the root page (turning it from a leaf page into an
|
||||
* inner page), then no leaf tuples can go back to the current page; they
|
||||
@ -1037,12 +1037,13 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
int newspace;
|
||||
|
||||
newLeafBuffer = SpGistGetBuffer(index,
|
||||
GBUF_LEAF | (isNulls ? GBUF_NULLS : 0),
|
||||
GBUF_LEAF | (isNulls ? GBUF_NULLS : 0),
|
||||
Min(totalLeafSizes,
|
||||
SPGIST_PAGE_CAPACITY),
|
||||
&xlrec.initDest);
|
||||
|
||||
/*
|
||||
* Attempt to assign node groups to the two pages. We might fail to
|
||||
* Attempt to assign node groups to the two pages. We might fail to
|
||||
* do so, even if totalLeafSizes is less than the available space,
|
||||
* because we can't split a group across pages.
|
||||
*/
|
||||
@ -1054,12 +1055,12 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
{
|
||||
if (leafSizes[i] <= curspace)
|
||||
{
|
||||
nodePageSelect[i] = 0; /* signifies current page */
|
||||
nodePageSelect[i] = 0; /* signifies current page */
|
||||
curspace -= leafSizes[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
nodePageSelect[i] = 1; /* signifies new leaf page */
|
||||
nodePageSelect[i] = 1; /* signifies new leaf page */
|
||||
newspace -= leafSizes[i];
|
||||
}
|
||||
}
|
||||
@ -1075,7 +1076,7 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
else if (includeNew)
|
||||
{
|
||||
/* We must exclude the new leaf tuple from the split */
|
||||
int nodeOfNewTuple = out.mapTuplesToNodes[in.nTuples - 1];
|
||||
int nodeOfNewTuple = out.mapTuplesToNodes[in.nTuples - 1];
|
||||
|
||||
leafSizes[nodeOfNewTuple] -=
|
||||
newLeafs[in.nTuples - 1]->size + sizeof(ItemIdData);
|
||||
@ -1087,12 +1088,12 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
{
|
||||
if (leafSizes[i] <= curspace)
|
||||
{
|
||||
nodePageSelect[i] = 0; /* signifies current page */
|
||||
nodePageSelect[i] = 0; /* signifies current page */
|
||||
curspace -= leafSizes[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
nodePageSelect[i] = 1; /* signifies new leaf page */
|
||||
nodePageSelect[i] = 1; /* signifies new leaf page */
|
||||
newspace -= leafSizes[i];
|
||||
}
|
||||
}
|
||||
@ -1204,7 +1205,7 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
for (i = 0; i < nToInsert; i++)
|
||||
{
|
||||
SpGistLeafTuple it = newLeafs[i];
|
||||
Buffer leafBuffer;
|
||||
Buffer leafBuffer;
|
||||
BlockNumber leafBlock;
|
||||
OffsetNumber newoffset;
|
||||
|
||||
@ -1584,12 +1585,12 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
xlrec.nodeI = parent->node;
|
||||
|
||||
/*
|
||||
* obtain new buffer with the same parity as current, since it will
|
||||
* be a child of same parent tuple
|
||||
* obtain new buffer with the same parity as current, since it will be
|
||||
* a child of same parent tuple
|
||||
*/
|
||||
current->buffer = SpGistGetBuffer(index,
|
||||
GBUF_INNER_PARITY(current->blkno),
|
||||
newInnerTuple->size + sizeof(ItemIdData),
|
||||
newInnerTuple->size + sizeof(ItemIdData),
|
||||
&xlrec.newPage);
|
||||
current->blkno = BufferGetBlockNumber(current->buffer);
|
||||
current->page = BufferGetPage(current->buffer);
|
||||
@ -1597,15 +1598,15 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
xlrec.blknoNew = current->blkno;
|
||||
|
||||
/*
|
||||
* Let's just make real sure new current isn't same as old. Right
|
||||
* now that's impossible, but if SpGistGetBuffer ever got smart enough
|
||||
* to delete placeholder tuples before checking space, maybe it
|
||||
* wouldn't be impossible. The case would appear to work except that
|
||||
* WAL replay would be subtly wrong, so I think a mere assert isn't
|
||||
* enough here.
|
||||
* Let's just make real sure new current isn't same as old. Right now
|
||||
* that's impossible, but if SpGistGetBuffer ever got smart enough to
|
||||
* delete placeholder tuples before checking space, maybe it wouldn't
|
||||
* be impossible. The case would appear to work except that WAL
|
||||
* replay would be subtly wrong, so I think a mere assert isn't enough
|
||||
* here.
|
||||
*/
|
||||
if (xlrec.blknoNew == xlrec.blkno)
|
||||
elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
|
||||
if (xlrec.blknoNew == xlrec.blkno)
|
||||
elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
|
||||
|
||||
/*
|
||||
* New current and parent buffer will both be modified; but note that
|
||||
@ -1707,9 +1708,9 @@ spgSplitNodeAction(Relation index, SpGistState *state,
|
||||
Assert(!SpGistPageStoresNulls(current->page));
|
||||
|
||||
/*
|
||||
* Construct new prefix tuple, containing a single node with the
|
||||
* specified label. (We'll update the node's downlink to point to the
|
||||
* new postfix tuple, below.)
|
||||
* Construct new prefix tuple, containing a single node with the specified
|
||||
* label. (We'll update the node's downlink to point to the new postfix
|
||||
* tuple, below.)
|
||||
*/
|
||||
node = spgFormNodeTuple(state, out->result.splitTuple.nodeLabel, false);
|
||||
|
||||
@ -1888,9 +1889,9 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
|
||||
(unsigned long) (leafSize - sizeof(ItemIdData)),
|
||||
(unsigned long) (SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
|
||||
(unsigned long) (SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
|
||||
RelationGetRelationName(index)),
|
||||
errhint("Values larger than a buffer page cannot be indexed.")));
|
||||
errhint("Values larger than a buffer page cannot be indexed.")));
|
||||
|
||||
/* Initialize "current" to the appropriate root page */
|
||||
current.blkno = isnull ? SPGIST_NULL_BLKNO : SPGIST_ROOT_BLKNO;
|
||||
@ -1920,7 +1921,7 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
if (current.blkno == InvalidBlockNumber)
|
||||
{
|
||||
/*
|
||||
* Create a leaf page. If leafSize is too large to fit on a page,
|
||||
* Create a leaf page. If leafSize is too large to fit on a page,
|
||||
* we won't actually use the page yet, but it simplifies the API
|
||||
* for doPickSplit to always have a leaf page at hand; so just
|
||||
* quietly limit our request to a page size.
|
||||
@ -1968,7 +1969,7 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
}
|
||||
else if ((sizeToSplit =
|
||||
checkSplitConditions(index, state, ¤t,
|
||||
&nToSplit)) < SPGIST_PAGE_CAPACITY / 2 &&
|
||||
&nToSplit)) < SPGIST_PAGE_CAPACITY / 2 &&
|
||||
nToSplit < 64 &&
|
||||
leafTuple->size + sizeof(ItemIdData) + sizeToSplit <= SPGIST_PAGE_CAPACITY)
|
||||
{
|
||||
@ -2077,8 +2078,8 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop around and attempt to insert the new leafDatum
|
||||
* at "current" (which might reference an existing child
|
||||
* Loop around and attempt to insert the new leafDatum at
|
||||
* "current" (which might reference an existing child
|
||||
* tuple, or might be invalid to force us to find a new
|
||||
* page for the tuple).
|
||||
*
|
||||
@ -2102,8 +2103,8 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
out.result.addNode.nodeLabel);
|
||||
|
||||
/*
|
||||
* Retry insertion into the enlarged node. We assume
|
||||
* that we'll get a MatchNode result this time.
|
||||
* Retry insertion into the enlarged node. We assume that
|
||||
* we'll get a MatchNode result this time.
|
||||
*/
|
||||
goto process_inner_tuple;
|
||||
break;
|
||||
|
@ -123,7 +123,7 @@ spgbuild(PG_FUNCTION_ARGS)
|
||||
buildstate.spgstate.isBuild = true;
|
||||
|
||||
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"SP-GiST build temporary context",
|
||||
"SP-GiST build temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
@ -135,12 +135,12 @@ spg_kd_picksplit(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Note: points that have coordinates exactly equal to coord may get
|
||||
* classified into either node, depending on where they happen to fall
|
||||
* in the sorted list. This is okay as long as the inner_consistent
|
||||
* function descends into both sides for such cases. This is better
|
||||
* than the alternative of trying to have an exact boundary, because
|
||||
* it keeps the tree balanced even when we have many instances of the
|
||||
* same point value. So we should never trigger the allTheSame logic.
|
||||
* classified into either node, depending on where they happen to fall in
|
||||
* the sorted list. This is okay as long as the inner_consistent function
|
||||
* descends into both sides for such cases. This is better than the
|
||||
* alternative of trying to have an exact boundary, because it keeps the
|
||||
* tree balanced even when we have many instances of the same point value.
|
||||
* So we should never trigger the allTheSame logic.
|
||||
*/
|
||||
for (i = 0; i < in->nTuples; i++)
|
||||
{
|
||||
|
@ -253,8 +253,8 @@ spg_quad_inner_consistent(PG_FUNCTION_ARGS)
|
||||
boxQuery = DatumGetBoxP(in->scankeys[i].sk_argument);
|
||||
|
||||
if (DatumGetBool(DirectFunctionCall2(box_contain_pt,
|
||||
PointerGetDatum(boxQuery),
|
||||
PointerGetDatum(centroid))))
|
||||
PointerGetDatum(boxQuery),
|
||||
PointerGetDatum(centroid))))
|
||||
{
|
||||
/* centroid is in box, so all quadrants are OK */
|
||||
}
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
|
||||
typedef void (*storeRes_func) (SpGistScanOpaque so, ItemPointer heapPtr,
|
||||
Datum leafValue, bool isnull, bool recheck);
|
||||
Datum leafValue, bool isnull, bool recheck);
|
||||
|
||||
typedef struct ScanStackEntry
|
||||
{
|
||||
@ -88,7 +88,7 @@ resetSpGistScanOpaque(SpGistScanOpaque so)
|
||||
if (so->want_itup)
|
||||
{
|
||||
/* Must pfree IndexTuples to avoid memory leak */
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < so->nPtrs; i++)
|
||||
pfree(so->indexTups[i]);
|
||||
@ -102,7 +102,7 @@ resetSpGistScanOpaque(SpGistScanOpaque so)
|
||||
* Sets searchNulls, searchNonNulls, numberOfKeys, keyData fields of *so.
|
||||
*
|
||||
* The point here is to eliminate null-related considerations from what the
|
||||
* opclass consistent functions need to deal with. We assume all SPGiST-
|
||||
* opclass consistent functions need to deal with. We assume all SPGiST-
|
||||
* indexable operators are strict, so any null RHS value makes the scan
|
||||
* condition unsatisfiable. We also pull out any IS NULL/IS NOT NULL
|
||||
* conditions; their effect is reflected into searchNulls/searchNonNulls.
|
||||
@ -177,6 +177,7 @@ spgbeginscan(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
||||
int keysz = PG_GETARG_INT32(1);
|
||||
|
||||
/* ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); */
|
||||
IndexScanDesc scan;
|
||||
SpGistScanOpaque so;
|
||||
@ -457,7 +458,7 @@ redirect:
|
||||
MemoryContext oldCtx;
|
||||
|
||||
innerTuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, offset));
|
||||
PageGetItemId(page, offset));
|
||||
|
||||
if (innerTuple->tupstate != SPGIST_LIVE)
|
||||
{
|
||||
@ -522,7 +523,7 @@ redirect:
|
||||
|
||||
for (i = 0; i < out.nNodes; i++)
|
||||
{
|
||||
int nodeN = out.nodeNumbers[i];
|
||||
int nodeN = out.nodeNumbers[i];
|
||||
|
||||
Assert(nodeN >= 0 && nodeN < in.nNodes);
|
||||
if (ItemPointerIsValid(&nodes[nodeN]->t_tid))
|
||||
@ -598,7 +599,7 @@ storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr,
|
||||
if (so->want_itup)
|
||||
{
|
||||
/*
|
||||
* Reconstruct desired IndexTuple. We have to copy the datum out of
|
||||
* Reconstruct desired IndexTuple. We have to copy the datum out of
|
||||
* the temp context anyway, so we may as well create the tuple here.
|
||||
*/
|
||||
so->indexTups[so->nPtrs] = index_form_tuple(so->indexTupDesc,
|
||||
@ -636,7 +637,7 @@ spggettuple(PG_FUNCTION_ARGS)
|
||||
if (so->want_itup)
|
||||
{
|
||||
/* Must pfree IndexTuples to avoid memory leak */
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < so->nPtrs; i++)
|
||||
pfree(so->indexTups[i]);
|
||||
|
@ -26,7 +26,7 @@
|
||||
* In the worst case, a inner tuple in a text suffix tree could have as many
|
||||
* as 256 nodes (one for each possible byte value). Each node can take 16
|
||||
* bytes on MAXALIGN=8 machines. The inner tuple must fit on an index page
|
||||
* of size BLCKSZ. Rather than assuming we know the exact amount of overhead
|
||||
* of size BLCKSZ. Rather than assuming we know the exact amount of overhead
|
||||
* imposed by page headers, tuple headers, etc, we leave 100 bytes for that
|
||||
* (the actual overhead should be no more than 56 bytes at this writing, so
|
||||
* there is slop in this number). The upshot is that the maximum safe prefix
|
||||
@ -209,9 +209,9 @@ spg_text_choose(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/*
|
||||
* Descend to existing node. (If in->allTheSame, the core code will
|
||||
* ignore our nodeN specification here, but that's OK. We still
|
||||
* have to provide the correct levelAdd and restDatum values, and
|
||||
* those are the same regardless of which node gets chosen by core.)
|
||||
* ignore our nodeN specification here, but that's OK. We still have
|
||||
* to provide the correct levelAdd and restDatum values, and those are
|
||||
* the same regardless of which node gets chosen by core.)
|
||||
*/
|
||||
out->resultType = spgMatchNode;
|
||||
out->result.matchNode.nodeN = i;
|
||||
@ -227,10 +227,10 @@ spg_text_choose(PG_FUNCTION_ARGS)
|
||||
else if (in->allTheSame)
|
||||
{
|
||||
/*
|
||||
* Can't use AddNode action, so split the tuple. The upper tuple
|
||||
* has the same prefix as before and uses an empty node label for
|
||||
* the lower tuple. The lower tuple has no prefix and the same
|
||||
* node labels as the original tuple.
|
||||
* Can't use AddNode action, so split the tuple. The upper tuple has
|
||||
* the same prefix as before and uses an empty node label for the
|
||||
* lower tuple. The lower tuple has no prefix and the same node
|
||||
* labels as the original tuple.
|
||||
*/
|
||||
out->resultType = spgSplitTuple;
|
||||
out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
|
||||
@ -315,13 +315,13 @@ spg_text_picksplit(PG_FUNCTION_ARGS)
|
||||
if (commonLen < VARSIZE_ANY_EXHDR(texti))
|
||||
nodes[i].c = *(uint8 *) (VARDATA_ANY(texti) + commonLen);
|
||||
else
|
||||
nodes[i].c = '\0'; /* use \0 if string is all common */
|
||||
nodes[i].c = '\0'; /* use \0 if string is all common */
|
||||
nodes[i].i = i;
|
||||
nodes[i].d = in->datums[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort by label bytes so that we can group the values into nodes. This
|
||||
* Sort by label bytes so that we can group the values into nodes. This
|
||||
* also ensures that the nodes are ordered by label value, allowing the
|
||||
* use of binary search in searchChar.
|
||||
*/
|
||||
@ -371,7 +371,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Reconstruct values represented at this tuple, including parent data,
|
||||
* prefix of this tuple if any, and the node label if any. in->level
|
||||
* prefix of this tuple if any, and the node label if any. in->level
|
||||
* should be the length of the previously reconstructed value, and the
|
||||
* number of bytes added here is prefixSize or prefixSize + 1.
|
||||
*
|
||||
@ -381,7 +381,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
|
||||
* long-format reconstructed values.
|
||||
*/
|
||||
Assert(in->level == 0 ? DatumGetPointer(in->reconstructedValue) == NULL :
|
||||
VARSIZE_ANY_EXHDR(DatumGetPointer(in->reconstructedValue)) == in->level);
|
||||
VARSIZE_ANY_EXHDR(DatumGetPointer(in->reconstructedValue)) == in->level);
|
||||
|
||||
maxReconstrLen = in->level + 1;
|
||||
if (in->hasPrefix)
|
||||
@ -530,7 +530,7 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
|
||||
}
|
||||
else
|
||||
{
|
||||
text *fullText = palloc(VARHDRSZ + fullLen);
|
||||
text *fullText = palloc(VARHDRSZ + fullLen);
|
||||
|
||||
SET_VARSIZE(fullText, VARHDRSZ + fullLen);
|
||||
fullValue = VARDATA(fullText);
|
||||
|
@ -235,7 +235,7 @@ SpGistUpdateMetaPage(Relation index)
|
||||
*
|
||||
* When requesting an inner page, if we get one with the wrong parity,
|
||||
* we just release the buffer and try again. We will get a different page
|
||||
* because GetFreeIndexPage will have marked the page used in FSM. The page
|
||||
* because GetFreeIndexPage will have marked the page used in FSM. The page
|
||||
* is entered in our local lastUsedPages cache, so there's some hope of
|
||||
* making use of it later in this session, but otherwise we rely on VACUUM
|
||||
* to eventually re-enter the page in FSM, making it available for recycling.
|
||||
@ -245,7 +245,7 @@ SpGistUpdateMetaPage(Relation index)
|
||||
*
|
||||
* When we return a buffer to the caller, the page is *not* entered into
|
||||
* the lastUsedPages cache; we expect the caller will do so after it's taken
|
||||
* whatever space it will use. This is because after the caller has used up
|
||||
* whatever space it will use. This is because after the caller has used up
|
||||
* some space, the page might have less space than whatever was cached already
|
||||
* so we'd rather not trash the old cache entry.
|
||||
*/
|
||||
@ -275,7 +275,7 @@ allocNewBuffer(Relation index, int flags)
|
||||
else
|
||||
{
|
||||
BlockNumber blkno = BufferGetBlockNumber(buffer);
|
||||
int blkFlags = GBUF_INNER_PARITY(blkno);
|
||||
int blkFlags = GBUF_INNER_PARITY(blkno);
|
||||
|
||||
if ((flags & GBUF_PARITY_MASK) == blkFlags)
|
||||
{
|
||||
@ -317,7 +317,7 @@ SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
|
||||
|
||||
/*
|
||||
* If possible, increase the space request to include relation's
|
||||
* fillfactor. This ensures that when we add unrelated tuples to a page,
|
||||
* fillfactor. This ensures that when we add unrelated tuples to a page,
|
||||
* we try to keep 100-fillfactor% available for adding tuples that are
|
||||
* related to the ones already on it. But fillfactor mustn't cause an
|
||||
* error for requests that would otherwise be legal.
|
||||
@ -664,7 +664,7 @@ spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
|
||||
errmsg("SPGiST inner tuple size %lu exceeds maximum %lu",
|
||||
(unsigned long) size,
|
||||
(unsigned long) (SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))),
|
||||
errhint("Values larger than a buffer page cannot be indexed.")));
|
||||
errhint("Values larger than a buffer page cannot be indexed.")));
|
||||
|
||||
/*
|
||||
* Check for overflow of header fields --- probably can't fail if the
|
||||
@ -801,7 +801,7 @@ SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
|
||||
for (; i <= maxoff; i++)
|
||||
{
|
||||
SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
|
||||
PageGetItemId(page, i));
|
||||
PageGetItemId(page, i));
|
||||
|
||||
if (it->tupstate == SPGIST_PLACEHOLDER)
|
||||
{
|
||||
|
@ -31,8 +31,8 @@
|
||||
/* Entry in pending-list of TIDs we need to revisit */
|
||||
typedef struct spgVacPendingItem
|
||||
{
|
||||
ItemPointerData tid; /* redirection target to visit */
|
||||
bool done; /* have we dealt with this? */
|
||||
ItemPointerData tid; /* redirection target to visit */
|
||||
bool done; /* have we dealt with this? */
|
||||
struct spgVacPendingItem *next; /* list link */
|
||||
} spgVacPendingItem;
|
||||
|
||||
@ -46,10 +46,10 @@ typedef struct spgBulkDeleteState
|
||||
void *callback_state;
|
||||
|
||||
/* Additional working state */
|
||||
SpGistState spgstate; /* for SPGiST operations that need one */
|
||||
spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
|
||||
TransactionId myXmin; /* for detecting newly-added redirects */
|
||||
TransactionId OldestXmin; /* for deciding a redirect is obsolete */
|
||||
SpGistState spgstate; /* for SPGiST operations that need one */
|
||||
spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
|
||||
TransactionId myXmin; /* for detecting newly-added redirects */
|
||||
TransactionId OldestXmin; /* for deciding a redirect is obsolete */
|
||||
BlockNumber lastFilledBlock; /* last non-deletable block */
|
||||
} spgBulkDeleteState;
|
||||
|
||||
@ -213,7 +213,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
* Figure out exactly what we have to do. We do this separately from
|
||||
* actually modifying the page, mainly so that we have a representation
|
||||
* that can be dumped into WAL and then the replay code can do exactly
|
||||
* the same thing. The output of this step consists of six arrays
|
||||
* the same thing. The output of this step consists of six arrays
|
||||
* describing four kinds of operations, to be performed in this order:
|
||||
*
|
||||
* toDead[]: tuple numbers to be replaced with DEAD tuples
|
||||
@ -276,8 +276,8 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
else if (prevLive == InvalidOffsetNumber)
|
||||
{
|
||||
/*
|
||||
* This is the first live tuple in the chain. It has
|
||||
* to move to the head position.
|
||||
* This is the first live tuple in the chain. It has to move
|
||||
* to the head position.
|
||||
*/
|
||||
moveSrc[xlrec.nMove] = j;
|
||||
moveDest[xlrec.nMove] = i;
|
||||
@ -289,7 +289,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Second or later live tuple. Arrange to re-chain it to the
|
||||
* Second or later live tuple. Arrange to re-chain it to the
|
||||
* previous live one, if there was a gap.
|
||||
*/
|
||||
if (interveningDeletable)
|
||||
@ -353,11 +353,11 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
InvalidBlockNumber, InvalidOffsetNumber);
|
||||
|
||||
/*
|
||||
* We implement the move step by swapping the item pointers of the
|
||||
* source and target tuples, then replacing the newly-source tuples
|
||||
* with placeholders. This is perhaps unduly friendly with the page
|
||||
* data representation, but it's fast and doesn't risk page overflow
|
||||
* when a tuple to be relocated is large.
|
||||
* We implement the move step by swapping the item pointers of the source
|
||||
* and target tuples, then replacing the newly-source tuples with
|
||||
* placeholders. This is perhaps unduly friendly with the page data
|
||||
* representation, but it's fast and doesn't risk page overflow when a
|
||||
* tuple to be relocated is large.
|
||||
*/
|
||||
for (i = 0; i < xlrec.nMove; i++)
|
||||
{
|
||||
@ -518,7 +518,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer,
|
||||
*/
|
||||
for (i = max;
|
||||
i >= FirstOffsetNumber &&
|
||||
(opaque->nRedirection > 0 || !hasNonPlaceholder);
|
||||
(opaque->nRedirection > 0 || !hasNonPlaceholder);
|
||||
i--)
|
||||
{
|
||||
SpGistDeadTuple dt;
|
||||
@ -651,9 +651,9 @@ spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
|
||||
|
||||
/*
|
||||
* The root pages must never be deleted, nor marked as available in FSM,
|
||||
* because we don't want them ever returned by a search for a place to
|
||||
* put a new tuple. Otherwise, check for empty/deletable page, and
|
||||
* make sure FSM knows about it.
|
||||
* because we don't want them ever returned by a search for a place to put
|
||||
* a new tuple. Otherwise, check for empty/deletable page, and make sure
|
||||
* FSM knows about it.
|
||||
*/
|
||||
if (!SpGistBlockIsRoot(blkno))
|
||||
{
|
||||
@ -688,7 +688,7 @@ spgprocesspending(spgBulkDeleteState *bds)
|
||||
Relation index = bds->info->index;
|
||||
spgVacPendingItem *pitem;
|
||||
spgVacPendingItem *nitem;
|
||||
BlockNumber blkno;
|
||||
BlockNumber blkno;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
@ -741,11 +741,11 @@ spgprocesspending(spgBulkDeleteState *bds)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* On an inner page, visit the referenced inner tuple and add
|
||||
* all its downlinks to the pending list. We might have pending
|
||||
* items for more than one inner tuple on the same page (in fact
|
||||
* this is pretty likely given the way space allocation works),
|
||||
* so get them all while we are here.
|
||||
* On an inner page, visit the referenced inner tuple and add all
|
||||
* its downlinks to the pending list. We might have pending items
|
||||
* for more than one inner tuple on the same page (in fact this is
|
||||
* pretty likely given the way space allocation works), so get
|
||||
* them all while we are here.
|
||||
*/
|
||||
for (nitem = pitem; nitem != NULL; nitem = nitem->next)
|
||||
{
|
||||
@ -774,7 +774,7 @@ spgprocesspending(spgBulkDeleteState *bds)
|
||||
{
|
||||
/* transfer attention to redirect point */
|
||||
spgAddPendingTID(bds,
|
||||
&((SpGistDeadTuple) innerTuple)->pointer);
|
||||
&((SpGistDeadTuple) innerTuple)->pointer);
|
||||
}
|
||||
else
|
||||
elog(ERROR, "unexpected SPGiST tuple state: %d",
|
||||
@ -825,8 +825,8 @@ spgvacuumscan(spgBulkDeleteState *bds)
|
||||
* physical order (we hope the kernel will cooperate in providing
|
||||
* read-ahead for speed). It is critical that we visit all leaf pages,
|
||||
* including ones added after we start the scan, else we might fail to
|
||||
* delete some deletable tuples. See more extensive comments about
|
||||
* this in btvacuumscan().
|
||||
* delete some deletable tuples. See more extensive comments about this
|
||||
* in btvacuumscan().
|
||||
*/
|
||||
blkno = SPGIST_METAPAGE_BLKNO + 1;
|
||||
for (;;)
|
||||
|
@ -40,7 +40,7 @@ fillFakeState(SpGistState *state, spgxlogState stateSrc)
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a leaf tuple, or replace an existing placeholder tuple. This is used
|
||||
* Add a leaf tuple, or replace an existing placeholder tuple. This is used
|
||||
* to replay SpGistPageAddNewItem() operations. If the offset points at an
|
||||
* existing tuple, it had better be a placeholder tuple.
|
||||
*/
|
||||
@ -50,7 +50,7 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
|
||||
if (offset <= PageGetMaxOffsetNumber(page))
|
||||
{
|
||||
SpGistDeadTuple dt = (SpGistDeadTuple) PageGetItem(page,
|
||||
PageGetItemId(page, offset));
|
||||
PageGetItemId(page, offset));
|
||||
|
||||
if (dt->tupstate != SPGIST_PLACEHOLDER)
|
||||
elog(ERROR, "SPGiST tuple to be replaced is not a placeholder");
|
||||
@ -126,7 +126,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
if (xldata->newPage)
|
||||
SpGistInitBuffer(buffer,
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
|
||||
if (!XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
@ -143,7 +143,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistLeafTuple head;
|
||||
|
||||
head = (SpGistLeafTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumHeadLeaf));
|
||||
PageGetItemId(page, xldata->offnumHeadLeaf));
|
||||
Assert(head->nextOffset == leafTuple->nextOffset);
|
||||
head->nextOffset = xldata->offnumLeaf;
|
||||
}
|
||||
@ -154,7 +154,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
PageIndexTupleDelete(page, xldata->offnumLeaf);
|
||||
if (PageAddItem(page,
|
||||
(Item) leafTuple, leafTuple->size,
|
||||
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
|
||||
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
leafTuple->size);
|
||||
}
|
||||
@ -180,7 +180,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistInnerTuple tuple;
|
||||
|
||||
tuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(tuple, xldata->nodeI,
|
||||
xldata->blknoLeaf, xldata->offnumLeaf);
|
||||
@ -229,7 +229,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
if (xldata->newPage)
|
||||
SpGistInitBuffer(buffer,
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
|
||||
if (!XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
@ -261,7 +261,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
if (!XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
|
||||
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
|
||||
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
|
||||
SPGIST_PLACEHOLDER,
|
||||
xldata->blknoDst,
|
||||
toInsert[nInsert - 1]);
|
||||
@ -286,7 +286,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistInnerTuple tuple;
|
||||
|
||||
tuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(tuple, xldata->nodeI,
|
||||
xldata->blknoDst, toInsert[nInsert - 1]);
|
||||
@ -413,7 +413,7 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/*
|
||||
* Update parent downlink. Since parent could be in either of the
|
||||
* Update parent downlink. Since parent could be in either of the
|
||||
* previous two buffers, it's a bit tricky to determine which BKP bit
|
||||
* applies.
|
||||
*/
|
||||
@ -435,7 +435,7 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistInnerTuple innerTuple;
|
||||
|
||||
innerTuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(innerTuple, xldata->nodeI,
|
||||
xldata->blknoNew, xldata->offnumNew);
|
||||
@ -504,7 +504,7 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
PageIndexTupleDelete(page, xldata->offnumPrefix);
|
||||
if (PageAddItem(page, (Item) prefixTuple, prefixTuple->size,
|
||||
xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
|
||||
xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
prefixTuple->size);
|
||||
|
||||
@ -571,7 +571,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
page = (Page) BufferGetPage(srcBuffer);
|
||||
|
||||
SpGistInitBuffer(srcBuffer,
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
/* don't update LSN etc till we're done with it */
|
||||
}
|
||||
else
|
||||
@ -587,8 +587,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
/*
|
||||
* We have it a bit easier here than in doPickSplit(),
|
||||
* because we know the inner tuple's location already,
|
||||
* so we can inject the correct redirection tuple now.
|
||||
* because we know the inner tuple's location already, so
|
||||
* we can inject the correct redirection tuple now.
|
||||
*/
|
||||
if (!state.isBuild)
|
||||
spgPageIndexMultiDelete(&state, page,
|
||||
@ -627,7 +627,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
page = (Page) BufferGetPage(destBuffer);
|
||||
|
||||
SpGistInitBuffer(destBuffer,
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
/* don't update LSN etc till we're done with it */
|
||||
}
|
||||
else
|
||||
@ -707,9 +707,9 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistInnerTuple parent;
|
||||
|
||||
parent = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
spgUpdateNodeLink(parent, xldata->nodeI,
|
||||
xldata->blknoInner, xldata->offnumInner);
|
||||
xldata->blknoInner, xldata->offnumInner);
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@ -742,9 +742,9 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistInnerTuple parent;
|
||||
|
||||
parent = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
spgUpdateNodeLink(parent, xldata->nodeI,
|
||||
xldata->blknoInner, xldata->offnumInner);
|
||||
xldata->blknoInner, xldata->offnumInner);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
@ -803,7 +803,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
spgPageIndexMultiDelete(&state, page,
|
||||
toPlaceholder, xldata->nPlaceholder,
|
||||
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
|
||||
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
|
||||
InvalidBlockNumber,
|
||||
InvalidOffsetNumber);
|
||||
|
||||
@ -821,7 +821,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
spgPageIndexMultiDelete(&state, page,
|
||||
moveSrc, xldata->nMove,
|
||||
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
|
||||
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
|
||||
InvalidBlockNumber,
|
||||
InvalidOffsetNumber);
|
||||
|
||||
@ -906,7 +906,7 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistDeadTuple dt;
|
||||
|
||||
dt = (SpGistDeadTuple) PageGetItem(page,
|
||||
PageGetItemId(page, itemToPlaceholder[i]));
|
||||
PageGetItemId(page, itemToPlaceholder[i]));
|
||||
Assert(dt->tupstate == SPGIST_REDIRECT);
|
||||
dt->tupstate = SPGIST_PLACEHOLDER;
|
||||
ItemPointerSetInvalid(&dt->pointer);
|
||||
|
@ -417,7 +417,7 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
|
||||
* Testing during the PostgreSQL 9.2 development cycle revealed that on a
|
||||
* large multi-processor system, it was possible to have more CLOG page
|
||||
* requests in flight at one time than the numebr of CLOG buffers which existed
|
||||
* at that time, which was hardcoded to 8. Further testing revealed that
|
||||
* at that time, which was hardcoded to 8. Further testing revealed that
|
||||
* performance dropped off with more than 32 CLOG buffers, possibly because
|
||||
* the linear buffer search algorithm doesn't scale well.
|
||||
*
|
||||
|
@ -903,12 +903,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
|
||||
{
|
||||
int slotno;
|
||||
int cur_count;
|
||||
int bestvalidslot = 0; /* keep compiler quiet */
|
||||
int bestvalidslot = 0; /* keep compiler quiet */
|
||||
int best_valid_delta = -1;
|
||||
int best_valid_page_number = 0; /* keep compiler quiet */
|
||||
int bestinvalidslot = 0; /* keep compiler quiet */
|
||||
int best_valid_page_number = 0; /* keep compiler quiet */
|
||||
int bestinvalidslot = 0; /* keep compiler quiet */
|
||||
int best_invalid_delta = -1;
|
||||
int best_invalid_page_number = 0; /* keep compiler quiet */
|
||||
int best_invalid_page_number = 0; /* keep compiler quiet */
|
||||
|
||||
/* See if page already has a buffer assigned */
|
||||
for (slotno = 0; slotno < shared->num_slots; slotno++)
|
||||
@ -920,15 +920,15 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
|
||||
|
||||
/*
|
||||
* If we find any EMPTY slot, just select that one. Else choose a
|
||||
* victim page to replace. We normally take the least recently used
|
||||
* victim page to replace. We normally take the least recently used
|
||||
* valid page, but we will never take the slot containing
|
||||
* latest_page_number, even if it appears least recently used. We
|
||||
* latest_page_number, even if it appears least recently used. We
|
||||
* will select a slot that is already I/O busy only if there is no
|
||||
* other choice: a read-busy slot will not be least recently used once
|
||||
* the read finishes, and waiting for an I/O on a write-busy slot is
|
||||
* inferior to just picking some other slot. Testing shows the slot
|
||||
* we pick instead will often be clean, allowing us to begin a read
|
||||
* at once.
|
||||
* we pick instead will often be clean, allowing us to begin a read at
|
||||
* once.
|
||||
*
|
||||
* Normally the page_lru_count values will all be different and so
|
||||
* there will be a well-defined LRU page. But since we allow
|
||||
@ -997,10 +997,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
|
||||
|
||||
/*
|
||||
* If all pages (except possibly the latest one) are I/O busy, we'll
|
||||
* have to wait for an I/O to complete and then retry. In that unhappy
|
||||
* case, we choose to wait for the I/O on the least recently used slot,
|
||||
* on the assumption that it was likely initiated first of all the I/Os
|
||||
* in progress and may therefore finish first.
|
||||
* have to wait for an I/O to complete and then retry. In that
|
||||
* unhappy case, we choose to wait for the I/O on the least recently
|
||||
* used slot, on the assumption that it was likely initiated first of
|
||||
* all the I/Os in progress and may therefore finish first.
|
||||
*/
|
||||
if (best_valid_delta < 0)
|
||||
{
|
||||
@ -1168,20 +1168,20 @@ restart:;
|
||||
|
||||
/*
|
||||
* SlruScanDirectory callback
|
||||
* This callback reports true if there's any segment prior to the one
|
||||
* containing the page passed as "data".
|
||||
* This callback reports true if there's any segment prior to the one
|
||||
* containing the page passed as "data".
|
||||
*/
|
||||
bool
|
||||
SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
|
||||
{
|
||||
int cutoffPage = *(int *) data;
|
||||
int cutoffPage = *(int *) data;
|
||||
|
||||
cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
|
||||
|
||||
if (ctl->PagePrecedes(segpage, cutoffPage))
|
||||
return true; /* found one; don't iterate any more */
|
||||
return true; /* found one; don't iterate any more */
|
||||
|
||||
return false; /* keep going */
|
||||
return false; /* keep going */
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1191,8 +1191,8 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data
|
||||
static bool
|
||||
SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
|
||||
{
|
||||
char path[MAXPGPATH];
|
||||
int cutoffPage = *(int *) data;
|
||||
char path[MAXPGPATH];
|
||||
int cutoffPage = *(int *) data;
|
||||
|
||||
if (ctl->PagePrecedes(segpage, cutoffPage))
|
||||
{
|
||||
@ -1202,7 +1202,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
|
||||
unlink(path);
|
||||
}
|
||||
|
||||
return false; /* keep going */
|
||||
return false; /* keep going */
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1212,14 +1212,14 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
|
||||
bool
|
||||
SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
|
||||
{
|
||||
char path[MAXPGPATH];
|
||||
char path[MAXPGPATH];
|
||||
|
||||
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
|
||||
ereport(DEBUG2,
|
||||
(errmsg("removing file \"%s\"", path)));
|
||||
unlink(path);
|
||||
|
||||
return false; /* keep going */
|
||||
return false; /* keep going */
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -360,8 +360,9 @@ static void
|
||||
GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
|
||||
TransactionId *children)
|
||||
{
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
|
||||
/* We need no extra lock since the GXACT isn't valid yet */
|
||||
if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
|
||||
{
|
||||
@ -410,7 +411,7 @@ LockGXact(const char *gid, Oid user)
|
||||
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
|
||||
{
|
||||
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
|
||||
/* Ignore not-yet-valid GIDs */
|
||||
if (!gxact->valid)
|
||||
@ -523,7 +524,7 @@ TransactionIdIsPrepared(TransactionId xid)
|
||||
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
|
||||
{
|
||||
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
|
||||
if (gxact->valid && pgxact->xid == xid)
|
||||
{
|
||||
@ -648,8 +649,8 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
|
||||
while (status->array != NULL && status->currIdx < status->ngxacts)
|
||||
{
|
||||
GlobalTransaction gxact = &status->array[status->currIdx++];
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
Datum values[5];
|
||||
bool nulls[5];
|
||||
HeapTuple tuple;
|
||||
@ -719,7 +720,7 @@ TwoPhaseGetDummyProc(TransactionId xid)
|
||||
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
|
||||
{
|
||||
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
|
||||
if (pgxact->xid == xid)
|
||||
{
|
||||
@ -850,8 +851,8 @@ save_state_data(const void *data, uint32 len)
|
||||
void
|
||||
StartPrepare(GlobalTransaction gxact)
|
||||
{
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
TransactionId xid = pgxact->xid;
|
||||
TwoPhaseFileHeader hdr;
|
||||
TransactionId *children;
|
||||
@ -1063,9 +1064,9 @@ EndPrepare(GlobalTransaction gxact)
|
||||
errmsg("could not close two-phase state file: %m")));
|
||||
|
||||
/*
|
||||
* Mark the prepared transaction as valid. As soon as xact.c marks MyPgXact
|
||||
* as not running our XID (which it will do immediately after this
|
||||
* function returns), others can commit/rollback the xact.
|
||||
* Mark the prepared transaction as valid. As soon as xact.c marks
|
||||
* MyPgXact as not running our XID (which it will do immediately after
|
||||
* this function returns), others can commit/rollback the xact.
|
||||
*
|
||||
* NB: a side effect of this is to make a dummy ProcArray entry for the
|
||||
* prepared XID. This must happen before we clear the XID from MyPgXact,
|
||||
@ -1551,7 +1552,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
|
||||
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
|
||||
{
|
||||
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
|
||||
|
||||
if (gxact->valid &&
|
||||
XLByteLE(gxact->prepare_lsn, redo_horizon))
|
||||
@ -1707,7 +1708,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
|
||||
* XID, and they may force us to advance nextXid.
|
||||
*
|
||||
* We don't expect anyone else to modify nextXid, hence we don't
|
||||
* need to hold a lock while examining it. We still acquire the
|
||||
* need to hold a lock while examining it. We still acquire the
|
||||
* lock to modify it, though.
|
||||
*/
|
||||
subxids = (TransactionId *)
|
||||
|
@ -174,8 +174,8 @@ GetNewTransactionId(bool isSubXact)
|
||||
* latestCompletedXid is present in the ProcArray, which is essential for
|
||||
* correct OldestXmin tracking; see src/backend/access/transam/README.
|
||||
*
|
||||
* XXX by storing xid into MyPgXact without acquiring ProcArrayLock, we are
|
||||
* relying on fetch/store of an xid to be atomic, else other backends
|
||||
* XXX by storing xid into MyPgXact without acquiring ProcArrayLock, we
|
||||
* are relying on fetch/store of an xid to be atomic, else other backends
|
||||
* might see a partially-set xid here. But holding both locks at once
|
||||
* would be a nasty concurrency hit. So for now, assume atomicity.
|
||||
*
|
||||
|
@ -1019,6 +1019,7 @@ RecordTransactionCommit(void)
|
||||
XLogRecData rdata[4];
|
||||
int lastrdata = 0;
|
||||
xl_xact_commit xlrec;
|
||||
|
||||
/*
|
||||
* Set flags required for recovery processing of commits.
|
||||
*/
|
||||
@ -1073,7 +1074,8 @@ RecordTransactionCommit(void)
|
||||
{
|
||||
XLogRecData rdata[2];
|
||||
int lastrdata = 0;
|
||||
xl_xact_commit_compact xlrec;
|
||||
xl_xact_commit_compact xlrec;
|
||||
|
||||
xlrec.xact_time = xactStopTimestamp;
|
||||
xlrec.nsubxacts = nchildren;
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
@ -2102,7 +2104,7 @@ PrepareTransaction(void)
|
||||
if (XactHasExportedSnapshots())
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot PREPARE a transaction that has exported snapshots")));
|
||||
errmsg("cannot PREPARE a transaction that has exported snapshots")));
|
||||
|
||||
/* Prevent cancel/die interrupt while cleaning up */
|
||||
HOLD_INTERRUPTS();
|
||||
@ -2602,10 +2604,10 @@ CommitTransactionCommand(void)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We were issued a RELEASE command, so we end the
|
||||
* current subtransaction and return to the parent transaction.
|
||||
* The parent might be ended too, so repeat till we find an
|
||||
* INPROGRESS transaction or subtransaction.
|
||||
* We were issued a RELEASE command, so we end the current
|
||||
* subtransaction and return to the parent transaction. The parent
|
||||
* might be ended too, so repeat till we find an INPROGRESS
|
||||
* transaction or subtransaction.
|
||||
*/
|
||||
case TBLOCK_SUBRELEASE:
|
||||
do
|
||||
@ -2623,9 +2625,9 @@ CommitTransactionCommand(void)
|
||||
* hierarchy and perform final commit. We do this by rolling up
|
||||
* any subtransactions into their parent, which leads to O(N^2)
|
||||
* operations with respect to resource owners - this isn't that
|
||||
* bad until we approach a thousands of savepoints but is necessary
|
||||
* for correctness should after triggers create new resource
|
||||
* owners.
|
||||
* bad until we approach a thousands of savepoints but is
|
||||
* necessary for correctness should after triggers create new
|
||||
* resource owners.
|
||||
*/
|
||||
case TBLOCK_SUBCOMMIT:
|
||||
do
|
||||
@ -4551,11 +4553,11 @@ xactGetCommittedChildren(TransactionId **ptr)
|
||||
*/
|
||||
static void
|
||||
xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
|
||||
TransactionId *sub_xids, int nsubxacts,
|
||||
SharedInvalidationMessage *inval_msgs, int nmsgs,
|
||||
RelFileNode *xnodes, int nrels,
|
||||
Oid dbId, Oid tsId,
|
||||
uint32 xinfo)
|
||||
TransactionId *sub_xids, int nsubxacts,
|
||||
SharedInvalidationMessage *inval_msgs, int nmsgs,
|
||||
RelFileNode *xnodes, int nrels,
|
||||
Oid dbId, Oid tsId,
|
||||
uint32 xinfo)
|
||||
{
|
||||
TransactionId max_xid;
|
||||
int i;
|
||||
@ -4659,12 +4661,13 @@ xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
|
||||
XLogFlush(lsn);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility function to call xact_redo_commit_internal after breaking down xlrec
|
||||
*/
|
||||
static void
|
||||
xact_redo_commit(xl_xact_commit *xlrec,
|
||||
TransactionId xid, XLogRecPtr lsn)
|
||||
TransactionId xid, XLogRecPtr lsn)
|
||||
{
|
||||
TransactionId *subxacts;
|
||||
SharedInvalidationMessage *inval_msgs;
|
||||
@ -4675,11 +4678,11 @@ xact_redo_commit(xl_xact_commit *xlrec,
|
||||
inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
|
||||
|
||||
xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
|
||||
inval_msgs, xlrec->nmsgs,
|
||||
xlrec->xnodes, xlrec->nrels,
|
||||
xlrec->dbId,
|
||||
xlrec->tsId,
|
||||
xlrec->xinfo);
|
||||
inval_msgs, xlrec->nmsgs,
|
||||
xlrec->xnodes, xlrec->nrels,
|
||||
xlrec->dbId,
|
||||
xlrec->tsId,
|
||||
xlrec->xinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4687,14 +4690,14 @@ xact_redo_commit(xl_xact_commit *xlrec,
|
||||
*/
|
||||
static void
|
||||
xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
|
||||
TransactionId xid, XLogRecPtr lsn)
|
||||
TransactionId xid, XLogRecPtr lsn)
|
||||
{
|
||||
xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
|
||||
NULL, 0, /* inval msgs */
|
||||
NULL, 0, /* relfilenodes */
|
||||
InvalidOid, /* dbId */
|
||||
InvalidOid, /* tsId */
|
||||
0); /* xinfo */
|
||||
NULL, 0, /* inval msgs */
|
||||
NULL, 0, /* relfilenodes */
|
||||
InvalidOid, /* dbId */
|
||||
InvalidOid, /* tsId */
|
||||
0); /* xinfo */
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -344,10 +344,10 @@ typedef struct XLogCtlInsert
|
||||
|
||||
/*
|
||||
* fullPageWrites is the master copy used by all backends to determine
|
||||
* whether to write full-page to WAL, instead of using process-local
|
||||
* one. This is required because, when full_page_writes is changed
|
||||
* by SIGHUP, we must WAL-log it before it actually affects
|
||||
* WAL-logging by backends. Checkpointer sets at startup or after SIGHUP.
|
||||
* whether to write full-page to WAL, instead of using process-local one.
|
||||
* This is required because, when full_page_writes is changed by SIGHUP,
|
||||
* we must WAL-log it before it actually affects WAL-logging by backends.
|
||||
* Checkpointer sets at startup or after SIGHUP.
|
||||
*/
|
||||
bool fullPageWrites;
|
||||
|
||||
@ -455,8 +455,11 @@ typedef struct XLogCtlData
|
||||
XLogRecPtr recoveryLastRecPtr;
|
||||
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
|
||||
TimestampTz recoveryLastXTime;
|
||||
/* timestamp of when we started replaying the current chunk of WAL data,
|
||||
* only relevant for replication or archive recovery */
|
||||
|
||||
/*
|
||||
* timestamp of when we started replaying the current chunk of WAL data,
|
||||
* only relevant for replication or archive recovery
|
||||
*/
|
||||
TimestampTz currentChunkStartTime;
|
||||
/* end of the last record restored from the archive */
|
||||
XLogRecPtr restoreLastRecPtr;
|
||||
@ -580,7 +583,7 @@ static bool updateMinRecoveryPoint = true;
|
||||
* to replay all the WAL, so reachedConsistency is never set. During archive
|
||||
* recovery, the database is consistent once minRecoveryPoint is reached.
|
||||
*/
|
||||
bool reachedConsistency = false;
|
||||
bool reachedConsistency = false;
|
||||
|
||||
static bool InRedo = false;
|
||||
|
||||
@ -750,8 +753,8 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
|
||||
* insert lock, but it seems better to avoid doing CRC calculations while
|
||||
* holding the lock.
|
||||
*
|
||||
* We add entries for backup blocks to the chain, so that they don't
|
||||
* need any special treatment in the critical section where the chunks are
|
||||
* We add entries for backup blocks to the chain, so that they don't need
|
||||
* any special treatment in the critical section where the chunks are
|
||||
* copied into the WAL buffers. Those entries have to be unlinked from the
|
||||
* chain if we have to loop back here.
|
||||
*/
|
||||
@ -896,10 +899,10 @@ begin:;
|
||||
/*
|
||||
* Calculate CRC of the data, including all the backup blocks
|
||||
*
|
||||
* Note that the record header isn't added into the CRC initially since
|
||||
* we don't know the prev-link yet. Thus, the CRC will represent the CRC
|
||||
* of the whole record in the order: rdata, then backup blocks, then
|
||||
* record header.
|
||||
* Note that the record header isn't added into the CRC initially since we
|
||||
* don't know the prev-link yet. Thus, the CRC will represent the CRC of
|
||||
* the whole record in the order: rdata, then backup blocks, then record
|
||||
* header.
|
||||
*/
|
||||
INIT_CRC32(rdata_crc);
|
||||
for (rdt = rdata; rdt != NULL; rdt = rdt->next)
|
||||
@ -948,10 +951,10 @@ begin:;
|
||||
}
|
||||
|
||||
/*
|
||||
* Also check to see if fullPageWrites or forcePageWrites was just turned on;
|
||||
* if we weren't already doing full-page writes then go back and recompute.
|
||||
* (If it was just turned off, we could recompute the record without full pages,
|
||||
* but we choose not to bother.)
|
||||
* Also check to see if fullPageWrites or forcePageWrites was just turned
|
||||
* on; if we weren't already doing full-page writes then go back and
|
||||
* recompute. (If it was just turned off, we could recompute the record
|
||||
* without full pages, but we choose not to bother.)
|
||||
*/
|
||||
if ((Insert->fullPageWrites || Insert->forcePageWrites) && !doPageWrites)
|
||||
{
|
||||
@ -1575,15 +1578,15 @@ AdvanceXLInsertBuffer(bool new_segment)
|
||||
* WAL records beginning in this page have removable backup blocks. This
|
||||
* allows the WAL archiver to know whether it is safe to compress archived
|
||||
* WAL data by transforming full-block records into the non-full-block
|
||||
* format. It is sufficient to record this at the page level because we
|
||||
* format. It is sufficient to record this at the page level because we
|
||||
* force a page switch (in fact a segment switch) when starting a backup,
|
||||
* so the flag will be off before any records can be written during the
|
||||
* backup. At the end of a backup, the last page will be marked as all
|
||||
* backup. At the end of a backup, the last page will be marked as all
|
||||
* unsafe when perhaps only part is unsafe, but at worst the archiver
|
||||
* would miss the opportunity to compress a few records.
|
||||
*/
|
||||
if (!Insert->forcePageWrites)
|
||||
NewPage->xlp_info |= XLP_BKP_REMOVABLE;
|
||||
NewPage ->xlp_info |= XLP_BKP_REMOVABLE;
|
||||
|
||||
/*
|
||||
* If first page of an XLOG segment file, make it a long header.
|
||||
@ -1827,11 +1830,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
|
||||
Write->lastSegSwitchTime = (pg_time_t) time(NULL);
|
||||
|
||||
/*
|
||||
* Request a checkpoint if we've consumed too
|
||||
* much xlog since the last one. For speed, we first check
|
||||
* using the local copy of RedoRecPtr, which might be out of
|
||||
* date; if it looks like a checkpoint is needed, forcibly
|
||||
* update RedoRecPtr and recheck.
|
||||
* Request a checkpoint if we've consumed too much xlog since
|
||||
* the last one. For speed, we first check using the local
|
||||
* copy of RedoRecPtr, which might be out of date; if it looks
|
||||
* like a checkpoint is needed, forcibly update RedoRecPtr and
|
||||
* recheck.
|
||||
*/
|
||||
if (IsUnderPostmaster &&
|
||||
XLogCheckpointNeeded(openLogId, openLogSeg))
|
||||
@ -1931,7 +1934,7 @@ XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
|
||||
|
||||
/*
|
||||
* If the WALWriter is sleeping, we should kick it to make it come out of
|
||||
* low-power mode. Otherwise, determine whether there's a full page of
|
||||
* low-power mode. Otherwise, determine whether there's a full page of
|
||||
* WAL available to write.
|
||||
*/
|
||||
if (!sleeping)
|
||||
@ -1945,9 +1948,9 @@ XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
|
||||
}
|
||||
|
||||
/*
|
||||
* Nudge the WALWriter: it has a full page of WAL to write, or we want
|
||||
* it to come out of low-power mode so that this async commit will reach
|
||||
* disk within the expected amount of time.
|
||||
* Nudge the WALWriter: it has a full page of WAL to write, or we want it
|
||||
* to come out of low-power mode so that this async commit will reach disk
|
||||
* within the expected amount of time.
|
||||
*/
|
||||
if (ProcGlobal->walwriterLatch)
|
||||
SetLatch(ProcGlobal->walwriterLatch);
|
||||
@ -2076,8 +2079,8 @@ XLogFlush(XLogRecPtr record)
|
||||
WriteRqstPtr = record;
|
||||
|
||||
/*
|
||||
* Now wait until we get the write lock, or someone else does the
|
||||
* flush for us.
|
||||
* Now wait until we get the write lock, or someone else does the flush
|
||||
* for us.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
@ -2182,7 +2185,7 @@ XLogFlush(XLogRecPtr record)
|
||||
* block, and flush through the latest one of those. Thus, if async commits
|
||||
* are not being used, we will flush complete blocks only. We can guarantee
|
||||
* that async commits reach disk after at most three cycles; normally only
|
||||
* one or two. (When flushing complete blocks, we allow XLogWrite to write
|
||||
* one or two. (When flushing complete blocks, we allow XLogWrite to write
|
||||
* "flexibly", meaning it can stop at the end of the buffer ring; this makes a
|
||||
* difference only with very high load or long wal_writer_delay, but imposes
|
||||
* one extra cycle for the worst case for async commits.)
|
||||
@ -2273,7 +2276,8 @@ XLogBackgroundFlush(void)
|
||||
|
||||
/*
|
||||
* If we wrote something then we have something to send to standbys also,
|
||||
* otherwise the replication delay become around 7s with just async commit.
|
||||
* otherwise the replication delay become around 7s with just async
|
||||
* commit.
|
||||
*/
|
||||
if (wrote_something)
|
||||
WalSndWakeup();
|
||||
@ -2776,17 +2780,17 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
|
||||
}
|
||||
|
||||
/*
|
||||
* If the segment was fetched from archival storage, replace
|
||||
* the existing xlog segment (if any) with the archival version.
|
||||
* If the segment was fetched from archival storage, replace the existing
|
||||
* xlog segment (if any) with the archival version.
|
||||
*/
|
||||
if (source == XLOG_FROM_ARCHIVE)
|
||||
{
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
XLogRecPtr endptr;
|
||||
char xlogfpath[MAXPGPATH];
|
||||
bool reload = false;
|
||||
struct stat statbuf;
|
||||
XLogRecPtr endptr;
|
||||
char xlogfpath[MAXPGPATH];
|
||||
bool reload = false;
|
||||
struct stat statbuf;
|
||||
|
||||
XLogFilePath(xlogfpath, tli, log, seg);
|
||||
if (stat(xlogfpath, &statbuf) == 0)
|
||||
@ -2801,9 +2805,9 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
|
||||
|
||||
if (rename(path, xlogfpath) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not rename file \"%s\" to \"%s\": %m",
|
||||
path, xlogfpath)));
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not rename file \"%s\" to \"%s\": %m",
|
||||
path, xlogfpath)));
|
||||
|
||||
/*
|
||||
* If the existing segment was replaced, since walsenders might have
|
||||
@ -3812,7 +3816,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
|
||||
RecPtr = &tmpRecPtr;
|
||||
|
||||
/*
|
||||
* RecPtr is pointing to end+1 of the previous WAL record. We must
|
||||
* RecPtr is pointing to end+1 of the previous WAL record. We must
|
||||
* advance it if necessary to where the next record starts. First,
|
||||
* align to next page if no more records can fit on the current page.
|
||||
*/
|
||||
@ -5389,10 +5393,10 @@ readRecoveryCommandFile(void)
|
||||
}
|
||||
if (rtli)
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("recovery_target_timeline = %u", rtli)));
|
||||
(errmsg_internal("recovery_target_timeline = %u", rtli)));
|
||||
else
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("recovery_target_timeline = latest")));
|
||||
(errmsg_internal("recovery_target_timeline = latest")));
|
||||
}
|
||||
else if (strcmp(item->name, "recovery_target_xid") == 0)
|
||||
{
|
||||
@ -5404,7 +5408,7 @@ readRecoveryCommandFile(void)
|
||||
item->value)));
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("recovery_target_xid = %u",
|
||||
recoveryTargetXid)));
|
||||
recoveryTargetXid)));
|
||||
recoveryTarget = RECOVERY_TARGET_XID;
|
||||
}
|
||||
else if (strcmp(item->name, "recovery_target_time") == 0)
|
||||
@ -5428,7 +5432,7 @@ readRecoveryCommandFile(void)
|
||||
Int32GetDatum(-1)));
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("recovery_target_time = '%s'",
|
||||
timestamptz_to_str(recoveryTargetTime))));
|
||||
timestamptz_to_str(recoveryTargetTime))));
|
||||
}
|
||||
else if (strcmp(item->name, "recovery_target_name") == 0)
|
||||
{
|
||||
@ -5576,13 +5580,13 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are establishing a new timeline, we have to copy data from
|
||||
* the last WAL segment of the old timeline to create a starting WAL
|
||||
* segment for the new timeline.
|
||||
* If we are establishing a new timeline, we have to copy data from the
|
||||
* last WAL segment of the old timeline to create a starting WAL segment
|
||||
* for the new timeline.
|
||||
*
|
||||
* Notify the archiver that the last WAL segment of the old timeline
|
||||
* is ready to copy to archival storage. Otherwise, it is not archived
|
||||
* for a while.
|
||||
* Notify the archiver that the last WAL segment of the old timeline is
|
||||
* ready to copy to archival storage. Otherwise, it is not archived for a
|
||||
* while.
|
||||
*/
|
||||
if (endTLI != ThisTimeLineID)
|
||||
{
|
||||
@ -5604,8 +5608,8 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
|
||||
XLogArchiveCleanup(xlogpath);
|
||||
|
||||
/*
|
||||
* Since there might be a partial WAL segment named RECOVERYXLOG,
|
||||
* get rid of it.
|
||||
* Since there might be a partial WAL segment named RECOVERYXLOG, get rid
|
||||
* of it.
|
||||
*/
|
||||
snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
|
||||
unlink(recoveryPath); /* ignore any error */
|
||||
@ -6323,11 +6327,11 @@ StartupXLOG(void)
|
||||
/*
|
||||
* Set backupStartPoint if we're starting recovery from a base backup.
|
||||
*
|
||||
* Set backupEndPoint and use minRecoveryPoint as the backup end location
|
||||
* if we're starting recovery from a base backup which was taken from
|
||||
* the standby. In this case, the database system status in pg_control must
|
||||
* indicate DB_IN_ARCHIVE_RECOVERY. If not, which means that backup
|
||||
* is corrupted, so we cancel recovery.
|
||||
* Set backupEndPoint and use minRecoveryPoint as the backup end
|
||||
* location if we're starting recovery from a base backup which was
|
||||
* taken from the standby. In this case, the database system status in
|
||||
* pg_control must indicate DB_IN_ARCHIVE_RECOVERY. If not, which
|
||||
* means that backup is corrupted, so we cancel recovery.
|
||||
*/
|
||||
if (haveBackupLabel)
|
||||
{
|
||||
@ -6340,7 +6344,7 @@ StartupXLOG(void)
|
||||
ereport(FATAL,
|
||||
(errmsg("backup_label contains inconsistent data with control file"),
|
||||
errhint("This means that the backup is corrupted and you will "
|
||||
"have to use another backup for recovery.")));
|
||||
"have to use another backup for recovery.")));
|
||||
ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
|
||||
}
|
||||
}
|
||||
@ -6383,15 +6387,15 @@ StartupXLOG(void)
|
||||
|
||||
/*
|
||||
* We're in recovery, so unlogged relations may be trashed and must be
|
||||
* reset. This should be done BEFORE allowing Hot Standby connections,
|
||||
* so that read-only backends don't try to read whatever garbage is
|
||||
* left over from before.
|
||||
* reset. This should be done BEFORE allowing Hot Standby
|
||||
* connections, so that read-only backends don't try to read whatever
|
||||
* garbage is left over from before.
|
||||
*/
|
||||
ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP);
|
||||
|
||||
/*
|
||||
* Likewise, delete any saved transaction snapshot files that got
|
||||
* left behind by crashed backends.
|
||||
* Likewise, delete any saved transaction snapshot files that got left
|
||||
* behind by crashed backends.
|
||||
*/
|
||||
DeleteAllExportedSnapshotFiles();
|
||||
|
||||
@ -6489,10 +6493,11 @@ StartupXLOG(void)
|
||||
|
||||
/*
|
||||
* Let postmaster know we've started redo now, so that it can launch
|
||||
* checkpointer to perform restartpoints. We don't bother during crash
|
||||
* recovery as restartpoints can only be performed during archive
|
||||
* recovery. And we'd like to keep crash recovery simple, to avoid
|
||||
* introducing bugs that could affect you when recovering after crash.
|
||||
* checkpointer to perform restartpoints. We don't bother during
|
||||
* crash recovery as restartpoints can only be performed during
|
||||
* archive recovery. And we'd like to keep crash recovery simple, to
|
||||
* avoid introducing bugs that could affect you when recovering after
|
||||
* crash.
|
||||
*
|
||||
* After this point, we can no longer assume that we're the only
|
||||
* process in addition to postmaster! Also, fsync requests are
|
||||
@ -6649,8 +6654,8 @@ StartupXLOG(void)
|
||||
{
|
||||
/*
|
||||
* We have reached the end of base backup, the point where
|
||||
* the minimum recovery point in pg_control indicates.
|
||||
* The data on disk is now consistent. Reset backupStartPoint
|
||||
* the minimum recovery point in pg_control indicates. The
|
||||
* data on disk is now consistent. Reset backupStartPoint
|
||||
* and backupEndPoint.
|
||||
*/
|
||||
elog(DEBUG1, "end of backup reached");
|
||||
@ -6863,9 +6868,9 @@ StartupXLOG(void)
|
||||
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
|
||||
|
||||
/*
|
||||
* Update full_page_writes in shared memory and write an
|
||||
* XLOG_FPW_CHANGE record before resource manager writes cleanup
|
||||
* WAL records or checkpoint record is written.
|
||||
* Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
|
||||
* record before resource manager writes cleanup WAL records or checkpoint
|
||||
* record is written.
|
||||
*/
|
||||
Insert->fullPageWrites = lastFullPageWrites;
|
||||
LocalSetXLogInsertAllowed();
|
||||
@ -6954,8 +6959,8 @@ StartupXLOG(void)
|
||||
LWLockRelease(ProcArrayLock);
|
||||
|
||||
/*
|
||||
* Start up the commit log and subtrans, if not already done for
|
||||
* hot standby.
|
||||
* Start up the commit log and subtrans, if not already done for hot
|
||||
* standby.
|
||||
*/
|
||||
if (standbyState == STANDBY_DISABLED)
|
||||
{
|
||||
@ -7705,9 +7710,9 @@ CreateCheckPoint(int flags)
|
||||
checkPoint.time = (pg_time_t) time(NULL);
|
||||
|
||||
/*
|
||||
* For Hot Standby, derive the oldestActiveXid before we fix the redo pointer.
|
||||
* This allows us to begin accumulating changes to assemble our starting
|
||||
* snapshot of locks and transactions.
|
||||
* For Hot Standby, derive the oldestActiveXid before we fix the redo
|
||||
* pointer. This allows us to begin accumulating changes to assemble our
|
||||
* starting snapshot of locks and transactions.
|
||||
*/
|
||||
if (!shutdown && XLogStandbyInfoActive())
|
||||
checkPoint.oldestActiveXid = GetOldestActiveTransactionId();
|
||||
@ -8062,7 +8067,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
|
||||
/*
|
||||
* Is it safe to restartpoint? We must ask each of the resource managers
|
||||
* Is it safe to restartpoint? We must ask each of the resource managers
|
||||
* whether they have any partial state information that might prevent a
|
||||
* correct restart from this point. If so, we skip this opportunity, but
|
||||
* return at the next checkpoint record for another try.
|
||||
@ -8082,10 +8087,11 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
}
|
||||
|
||||
/*
|
||||
* Also refrain from creating a restartpoint if we have seen any references
|
||||
* to non-existent pages. Restarting recovery from the restartpoint would
|
||||
* not see the references, so we would lose the cross-check that the pages
|
||||
* belonged to a relation that was dropped later.
|
||||
* Also refrain from creating a restartpoint if we have seen any
|
||||
* references to non-existent pages. Restarting recovery from the
|
||||
* restartpoint would not see the references, so we would lose the
|
||||
* cross-check that the pages belonged to a relation that was dropped
|
||||
* later.
|
||||
*/
|
||||
if (XLogHaveInvalidPages())
|
||||
{
|
||||
@ -8098,8 +8104,8 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the checkpoint record to shared memory, so that checkpointer
|
||||
* can work out the next time it wants to perform a restartpoint.
|
||||
* Copy the checkpoint record to shared memory, so that checkpointer can
|
||||
* work out the next time it wants to perform a restartpoint.
|
||||
*/
|
||||
SpinLockAcquire(&xlogctl->info_lck);
|
||||
XLogCtl->lastCheckPointRecPtr = ReadRecPtr;
|
||||
@ -8493,8 +8499,8 @@ UpdateFullPageWrites(void)
|
||||
* Do nothing if full_page_writes has not been changed.
|
||||
*
|
||||
* It's safe to check the shared full_page_writes without the lock,
|
||||
* because we assume that there is no concurrently running process
|
||||
* which can update it.
|
||||
* because we assume that there is no concurrently running process which
|
||||
* can update it.
|
||||
*/
|
||||
if (fullPageWrites == Insert->fullPageWrites)
|
||||
return;
|
||||
@ -8505,8 +8511,8 @@ UpdateFullPageWrites(void)
|
||||
* It's always safe to take full page images, even when not strictly
|
||||
* required, but not the other round. So if we're setting full_page_writes
|
||||
* to true, first set it true and then write the WAL record. If we're
|
||||
* setting it to false, first write the WAL record and then set the
|
||||
* global flag.
|
||||
* setting it to false, first write the WAL record and then set the global
|
||||
* flag.
|
||||
*/
|
||||
if (fullPageWrites)
|
||||
{
|
||||
@ -8516,12 +8522,12 @@ UpdateFullPageWrites(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* Write an XLOG_FPW_CHANGE record. This allows us to keep
|
||||
* track of full_page_writes during archive recovery, if required.
|
||||
* Write an XLOG_FPW_CHANGE record. This allows us to keep track of
|
||||
* full_page_writes during archive recovery, if required.
|
||||
*/
|
||||
if (XLogStandbyInfoActive() && !RecoveryInProgress())
|
||||
{
|
||||
XLogRecData rdata;
|
||||
XLogRecData rdata;
|
||||
|
||||
rdata.data = (char *) (&fullPageWrites);
|
||||
rdata.len = sizeof(bool);
|
||||
@ -8561,7 +8567,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
/*
|
||||
* We used to try to take the maximum of ShmemVariableCache->nextOid
|
||||
* and the recorded nextOid, but that fails if the OID counter wraps
|
||||
* around. Since no OID allocation should be happening during replay
|
||||
* around. Since no OID allocation should be happening during replay
|
||||
* anyway, better to just believe the record exactly. We still take
|
||||
* OidGenLock while setting the variable, just in case.
|
||||
*/
|
||||
@ -8597,7 +8603,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
!XLogRecPtrIsInvalid(ControlFile->backupStartPoint) &&
|
||||
XLogRecPtrIsInvalid(ControlFile->backupEndPoint))
|
||||
ereport(PANIC,
|
||||
(errmsg("online backup was canceled, recovery cannot continue")));
|
||||
(errmsg("online backup was canceled, recovery cannot continue")));
|
||||
|
||||
/*
|
||||
* If we see a shutdown checkpoint, we know that nothing was running
|
||||
@ -8797,9 +8803,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
|
||||
|
||||
/*
|
||||
* Update the LSN of the last replayed XLOG_FPW_CHANGE record
|
||||
* so that do_pg_start_backup() and do_pg_stop_backup() can check
|
||||
* whether full_page_writes has been disabled during online backup.
|
||||
* Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
|
||||
* do_pg_start_backup() and do_pg_stop_backup() can check whether
|
||||
* full_page_writes has been disabled during online backup.
|
||||
*/
|
||||
if (!fpw)
|
||||
{
|
||||
@ -8825,7 +8831,7 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
CheckPoint *checkpoint = (CheckPoint *) rec;
|
||||
|
||||
appendStringInfo(buf, "checkpoint: redo %X/%X; "
|
||||
"tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; "
|
||||
"tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; "
|
||||
"oldest xid %u in DB %u; oldest running xid %u; %s",
|
||||
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
|
||||
checkpoint->ThisTimeLineID,
|
||||
@ -9115,8 +9121,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
|
||||
errhint("WAL control functions cannot be executed during recovery.")));
|
||||
|
||||
/*
|
||||
* During recovery, we don't need to check WAL level. Because, if WAL level
|
||||
* is not sufficient, it's impossible to get here during recovery.
|
||||
* During recovery, we don't need to check WAL level. Because, if WAL
|
||||
* level is not sufficient, it's impossible to get here during recovery.
|
||||
*/
|
||||
if (!backup_started_in_recovery && !XLogIsNeeded())
|
||||
ereport(ERROR,
|
||||
@ -9179,7 +9185,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
|
||||
* old timeline IDs. That would otherwise happen if you called
|
||||
* pg_start_backup() right after restoring from a PITR archive: the
|
||||
* first WAL segment containing the startup checkpoint has pages in
|
||||
* the beginning with the old timeline ID. That can cause trouble at
|
||||
* the beginning with the old timeline ID. That can cause trouble at
|
||||
* recovery: we won't have a history file covering the old timeline if
|
||||
* pg_xlog directory was not included in the base backup and the WAL
|
||||
* archive was cleared too before starting the backup.
|
||||
@ -9202,17 +9208,18 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
|
||||
bool checkpointfpw;
|
||||
|
||||
/*
|
||||
* Force a CHECKPOINT. Aside from being necessary to prevent torn
|
||||
* Force a CHECKPOINT. Aside from being necessary to prevent torn
|
||||
* page problems, this guarantees that two successive backup runs
|
||||
* will have different checkpoint positions and hence different
|
||||
* history file names, even if nothing happened in between.
|
||||
*
|
||||
* During recovery, establish a restartpoint if possible. We use the last
|
||||
* restartpoint as the backup starting checkpoint. This means that two
|
||||
* successive backup runs can have same checkpoint positions.
|
||||
* During recovery, establish a restartpoint if possible. We use
|
||||
* the last restartpoint as the backup starting checkpoint. This
|
||||
* means that two successive backup runs can have same checkpoint
|
||||
* positions.
|
||||
*
|
||||
* Since the fact that we are executing do_pg_start_backup() during
|
||||
* recovery means that checkpointer is running, we can use
|
||||
* Since the fact that we are executing do_pg_start_backup()
|
||||
* during recovery means that checkpointer is running, we can use
|
||||
* RequestCheckpoint() to establish a restartpoint.
|
||||
*
|
||||
* We use CHECKPOINT_IMMEDIATE only if requested by user (via
|
||||
@ -9237,12 +9244,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
|
||||
{
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
/*
|
||||
* Check to see if all WAL replayed during online backup (i.e.,
|
||||
* since last restartpoint used as backup starting checkpoint)
|
||||
* contain full-page writes.
|
||||
* Check to see if all WAL replayed during online backup
|
||||
* (i.e., since last restartpoint used as backup starting
|
||||
* checkpoint) contain full-page writes.
|
||||
*/
|
||||
SpinLockAcquire(&xlogctl->info_lck);
|
||||
recptr = xlogctl->lastFpwDisableRecPtr;
|
||||
@ -9250,20 +9257,20 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
|
||||
|
||||
if (!checkpointfpw || XLByteLE(startpoint, recptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("WAL generated with full_page_writes=off was replayed "
|
||||
"since last restartpoint"),
|
||||
errhint("This means that the backup being taken on standby "
|
||||
"is corrupt and should not be used. "
|
||||
"Enable full_page_writes and run CHECKPOINT on the master, "
|
||||
"and then try an online backup again.")));
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("WAL generated with full_page_writes=off was replayed "
|
||||
"since last restartpoint"),
|
||||
errhint("This means that the backup being taken on standby "
|
||||
"is corrupt and should not be used. "
|
||||
"Enable full_page_writes and run CHECKPOINT on the master, "
|
||||
"and then try an online backup again.")));
|
||||
|
||||
/*
|
||||
* During recovery, since we don't use the end-of-backup WAL
|
||||
* record and don't write the backup history file, the starting WAL
|
||||
* location doesn't need to be unique. This means that two base
|
||||
* backups started at the same time might use the same checkpoint
|
||||
* as starting locations.
|
||||
* record and don't write the backup history file, the
|
||||
* starting WAL location doesn't need to be unique. This means
|
||||
* that two base backups started at the same time might use
|
||||
* the same checkpoint as starting locations.
|
||||
*/
|
||||
gotUniqueStartpoint = true;
|
||||
}
|
||||
@ -9443,8 +9450,8 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
|
||||
errhint("WAL control functions cannot be executed during recovery.")));
|
||||
|
||||
/*
|
||||
* During recovery, we don't need to check WAL level. Because, if WAL level
|
||||
* is not sufficient, it's impossible to get here during recovery.
|
||||
* During recovery, we don't need to check WAL level. Because, if WAL
|
||||
* level is not sufficient, it's impossible to get here during recovery.
|
||||
*/
|
||||
if (!backup_started_in_recovery && !XLogIsNeeded())
|
||||
ereport(ERROR,
|
||||
@ -9537,9 +9544,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
|
||||
remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
|
||||
|
||||
/*
|
||||
* Parse the BACKUP FROM line. If we are taking an online backup from
|
||||
* the standby, we confirm that the standby has not been promoted
|
||||
* during the backup.
|
||||
* Parse the BACKUP FROM line. If we are taking an online backup from the
|
||||
* standby, we confirm that the standby has not been promoted during the
|
||||
* backup.
|
||||
*/
|
||||
ptr = strstr(remaining, "BACKUP FROM:");
|
||||
if (!ptr || sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
|
||||
@ -9555,30 +9562,30 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
|
||||
"Try taking another online backup.")));
|
||||
|
||||
/*
|
||||
* During recovery, we don't write an end-of-backup record. We assume
|
||||
* that pg_control was backed up last and its minimum recovery
|
||||
* point can be available as the backup end location. Since we don't
|
||||
* have an end-of-backup record, we use the pg_control value to check
|
||||
* whether we've reached the end of backup when starting recovery from
|
||||
* this backup. We have no way of checking if pg_control wasn't backed
|
||||
* up last however.
|
||||
* During recovery, we don't write an end-of-backup record. We assume that
|
||||
* pg_control was backed up last and its minimum recovery point can be
|
||||
* available as the backup end location. Since we don't have an
|
||||
* end-of-backup record, we use the pg_control value to check whether
|
||||
* we've reached the end of backup when starting recovery from this
|
||||
* backup. We have no way of checking if pg_control wasn't backed up last
|
||||
* however.
|
||||
*
|
||||
* We don't force a switch to new WAL file and wait for all the required
|
||||
* files to be archived. This is okay if we use the backup to start
|
||||
* the standby. But, if it's for an archive recovery, to ensure all the
|
||||
* required files are available, a user should wait for them to be archived,
|
||||
* or include them into the backup.
|
||||
* files to be archived. This is okay if we use the backup to start the
|
||||
* standby. But, if it's for an archive recovery, to ensure all the
|
||||
* required files are available, a user should wait for them to be
|
||||
* archived, or include them into the backup.
|
||||
*
|
||||
* We return the current minimum recovery point as the backup end
|
||||
* location. Note that it's would be bigger than the exact backup end
|
||||
* location if the minimum recovery point is updated since the backup
|
||||
* of pg_control. This is harmless for current uses.
|
||||
* location if the minimum recovery point is updated since the backup of
|
||||
* pg_control. This is harmless for current uses.
|
||||
*
|
||||
* XXX currently a backup history file is for informational and debug
|
||||
* purposes only. It's not essential for an online backup. Furthermore,
|
||||
* even if it's created, it will not be archived during recovery because
|
||||
* an archiver is not invoked. So it doesn't seem worthwhile to write
|
||||
* a backup history file during recovery.
|
||||
* an archiver is not invoked. So it doesn't seem worthwhile to write a
|
||||
* backup history file during recovery.
|
||||
*/
|
||||
if (backup_started_in_recovery)
|
||||
{
|
||||
@ -9597,12 +9604,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
|
||||
if (XLByteLE(startpoint, recptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("WAL generated with full_page_writes=off was replayed "
|
||||
"during online backup"),
|
||||
errhint("This means that the backup being taken on standby "
|
||||
"is corrupt and should not be used. "
|
||||
"Enable full_page_writes and run CHECKPOINT on the master, "
|
||||
"and then try an online backup again.")));
|
||||
errmsg("WAL generated with full_page_writes=off was replayed "
|
||||
"during online backup"),
|
||||
errhint("This means that the backup being taken on standby "
|
||||
"is corrupt and should not be used. "
|
||||
"Enable full_page_writes and run CHECKPOINT on the master, "
|
||||
"and then try an online backup again.")));
|
||||
|
||||
|
||||
LWLockAcquire(ControlFileLock, LW_SHARED);
|
||||
@ -9905,10 +9912,11 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
|
||||
|
||||
/*
|
||||
* BACKUP METHOD and BACKUP FROM lines are new in 9.2. We can't
|
||||
* restore from an older backup anyway, but since the information on it
|
||||
* is not strictly required, don't error out if it's missing for some reason.
|
||||
* BACKUP METHOD and BACKUP FROM lines are new in 9.2. We can't restore
|
||||
* from an older backup anyway, but since the information on it is not
|
||||
* strictly required, don't error out if it's missing for some reason.
|
||||
*/
|
||||
if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
|
||||
{
|
||||
@ -10050,8 +10058,8 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
|
||||
if (readFile >= 0 && !XLByteInSeg(*RecPtr, readId, readSeg))
|
||||
{
|
||||
/*
|
||||
* Request a restartpoint if we've replayed too much
|
||||
* xlog since the last one.
|
||||
* Request a restartpoint if we've replayed too much xlog since the
|
||||
* last one.
|
||||
*/
|
||||
if (StandbyMode && bgwriterLaunched)
|
||||
{
|
||||
|
@ -80,10 +80,10 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
|
||||
/*
|
||||
* Once recovery has reached a consistent state, the invalid-page table
|
||||
* should be empty and remain so. If a reference to an invalid page is
|
||||
* found after consistency is reached, PANIC immediately. This might
|
||||
* seem aggressive, but it's better than letting the invalid reference
|
||||
* linger in the hash table until the end of recovery and PANIC there,
|
||||
* which might come only much later if this is a standby server.
|
||||
* found after consistency is reached, PANIC immediately. This might seem
|
||||
* aggressive, but it's better than letting the invalid reference linger
|
||||
* in the hash table until the end of recovery and PANIC there, which
|
||||
* might come only much later if this is a standby server.
|
||||
*/
|
||||
if (reachedConsistency)
|
||||
{
|
||||
|
Reference in New Issue
Block a user