1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-13 16:22:44 +03:00

pgindent run for 9.4

This includes removing tabs after periods in C comments, which was
applied to back branches, so this change should not effect backpatching.
This commit is contained in:
Bruce Momjian
2014-05-06 12:12:18 -04:00
parent fb85cd4320
commit 0a78320057
854 changed files with 7848 additions and 7368 deletions

View File

@@ -21,7 +21,7 @@
* tuptoaster.c.
*
* This change will break any code that assumes it needn't detoast values
* that have been put into a tuple but never sent to disk. Hopefully there
* that have been put into a tuple but never sent to disk. Hopefully there
* are few such places.
*
* Varlenas still have alignment 'i' (or 'd') in pg_type/pg_attribute, since
@@ -387,7 +387,7 @@ nocachegetattr(HeapTuple tuple,
/*
* Otherwise, check for non-fixed-length attrs up to and including
* target. If there aren't any, it's safe to cheaply initialize the
* target. If there aren't any, it's safe to cheaply initialize the
* cached offsets for these attrs.
*/
if (HeapTupleHasVarWidth(tuple))
@@ -454,7 +454,7 @@ nocachegetattr(HeapTuple tuple,
*
* Note - This loop is a little tricky. For each non-null attribute,
* we have to first account for alignment padding before the attr,
* then advance over the attr based on its length. Nulls have no
* then advance over the attr based on its length. Nulls have no
* storage and no alignment padding either. We can use/set
* attcacheoff until we reach either a null or a var-width attribute.
*/
@@ -549,7 +549,7 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
/*
* cmin and cmax are now both aliases for the same field, which
* can in fact also be a combo command id. XXX perhaps we should
* can in fact also be a combo command id. XXX perhaps we should
* return the "real" cmin or cmax if possible, that is if we are
* inside the originating transaction?
*/
@@ -709,7 +709,7 @@ heap_form_tuple(TupleDesc tupleDescriptor,
len += data_len;
/*
* Allocate and zero the space needed. Note that the tuple body and
* Allocate and zero the space needed. Note that the tuple body and
* HeapTupleData management structure are allocated in one chunk.
*/
tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len);

View File

@@ -71,7 +71,7 @@ index_form_tuple(TupleDesc tupleDescriptor,
/*
* If value is stored EXTERNAL, must fetch it so we are not depending
* on outside storage. This should be improved someday.
* on outside storage. This should be improved someday.
*/
if (VARATT_IS_EXTERNAL(DatumGetPointer(values[i])))
{
@@ -280,7 +280,7 @@ nocache_index_getattr(IndexTuple tup,
/*
* Otherwise, check for non-fixed-length attrs up to and including
* target. If there aren't any, it's safe to cheaply initialize the
* target. If there aren't any, it's safe to cheaply initialize the
* cached offsets for these attrs.
*/
if (IndexTupleHasVarwidths(tup))
@@ -347,7 +347,7 @@ nocache_index_getattr(IndexTuple tup,
*
* Note - This loop is a little tricky. For each non-null attribute,
* we have to first account for alignment padding before the attr,
* then advance over the attr based on its length. Nulls have no
* then advance over the attr based on its length. Nulls have no
* storage and no alignment padding either. We can use/set
* attcacheoff until we reach either a null or a var-width attribute.
*/

View File

@@ -182,7 +182,7 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
* or some similar function; it does not contain a full set of fields.
* The targetlist will be NIL when executing a utility function that does
* not have a plan. If the targetlist isn't NIL then it is a Query node's
* targetlist; it is up to us to ignore resjunk columns in it. The formats[]
* targetlist; it is up to us to ignore resjunk columns in it. The formats[]
* array pointer might be NULL (if we are doing Describe on a prepared stmt);
* send zeroes for the format codes in that case.
*/

View File

@@ -540,7 +540,7 @@ add_real_reloption(bits32 kinds, char *name, char *desc, double default_val,
* Add a new string reloption
*
* "validator" is an optional function pointer that can be used to test the
* validity of the values. It must elog(ERROR) when the argument string is
* validity of the values. It must elog(ERROR) when the argument string is
* not acceptable for the variable. Note that the default value must pass
* the validation.
*/
@@ -868,7 +868,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, Oid amoptions)
* is returned.
*
* Note: values of type int, bool and real are allocated as part of the
* returned array. Values of type string are allocated separately and must
* returned array. Values of type string are allocated separately and must
* be freed by the caller.
*/
relopt_value *
@@ -1205,7 +1205,7 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{"check_option", RELOPT_TYPE_STRING,
offsetof(StdRdOptions, check_option_offset)},
{"user_catalog_table", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, user_catalog_table)}
offsetof(StdRdOptions, user_catalog_table)}
};
options = parseRelOptions(reloptions, validate, kind, &numoptions);

View File

@@ -5,7 +5,7 @@
*
* These functions provide conversion between rowtypes that are logically
* equivalent but might have columns in a different order or different sets
* of dropped columns. There is some overlap of functionality with the
* of dropped columns. There is some overlap of functionality with the
* executor's "junkfilter" routines, but these functions work on bare
* HeapTuples rather than TupleTableSlots.
*

View File

@@ -581,7 +581,7 @@ TupleDescInitEntryCollation(TupleDesc desc,
* Given a relation schema (list of ColumnDef nodes), build a TupleDesc.
*
* Note: the default assumption is no OIDs; caller may modify the returned
* TupleDesc if it wants OIDs. Also, tdtypeid will need to be filled in
* TupleDesc if it wants OIDs. Also, tdtypeid will need to be filled in
* later on.
*/
TupleDesc

View File

@@ -197,7 +197,7 @@ ginarrayconsistent(PG_FUNCTION_ARGS)
/*
* Must have all elements in check[] true; no discrimination
* against nulls here. This is because array_contain_compare and
* against nulls here. This is because array_contain_compare and
* array_eq handle nulls differently ...
*/
res = true;
@@ -279,9 +279,10 @@ ginarraytriconsistent(PG_FUNCTION_ARGS)
res = GIN_MAYBE;
break;
case GinEqualStrategy:
/*
* Must have all elements in check[] true; no discrimination
* against nulls here. This is because array_contain_compare and
* against nulls here. This is because array_contain_compare and
* array_eq handle nulls differently ...
*/
res = GIN_MAYBE;

View File

@@ -251,6 +251,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
Assert(blkno != btree->rootBlkno);
ptr->blkno = blkno;
ptr->buffer = buffer;
/*
* parent may be wrong, but if so, the ginFinishSplit call will
* recurse to call ginFindParents again to fix it.
@@ -328,7 +329,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
GinPlaceToPageRC rc;
uint16 xlflags = 0;
Page childpage = NULL;
Page newlpage = NULL, newrpage = NULL;
Page newlpage = NULL,
newrpage = NULL;
if (GinPageIsData(page))
xlflags |= GIN_INSERT_ISDATA;
@@ -346,8 +348,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/*
* Try to put the incoming tuple on the page. placeToPage will decide
* if the page needs to be split.
* Try to put the incoming tuple on the page. placeToPage will decide if
* the page needs to be split.
*/
rc = btree->placeToPage(btree, stack->buffer, stack,
insertdata, updateblkno,
@@ -371,7 +373,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
XLogRecPtr recptr;
XLogRecData rdata[3];
ginxlogInsert xlrec;
BlockIdData childblknos[2];
BlockIdData childblknos[2];
xlrec.node = btree->index->rd_node;
xlrec.blkno = BufferGetBlockNumber(stack->buffer);
@@ -449,7 +451,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
data.flags = xlflags;
if (childbuf != InvalidBuffer)
{
Page childpage = BufferGetPage(childbuf);
Page childpage = BufferGetPage(childbuf);
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
data.leftChildBlkno = BufferGetBlockNumber(childbuf);
@@ -505,8 +508,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/*
* Construct a new root page containing downlinks to the new left
* and right pages. (do this in a temporary copy first rather
* than overwriting the original page directly, so that we can still
* and right pages. (do this in a temporary copy first rather than
* overwriting the original page directly, so that we can still
* abort gracefully if this fails.)
*/
newrootpg = PageGetTempPage(newrpage);
@@ -604,7 +607,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
else
{
elog(ERROR, "unknown return code from GIN placeToPage method: %d", rc);
return false; /* keep compiler quiet */
return false; /* keep compiler quiet */
}
}
@@ -627,8 +630,8 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
bool first = true;
/*
* freestack == false when we encounter an incompletely split page during a
* scan, while freestack == true is used in the normal scenario that a
* freestack == false when we encounter an incompletely split page during
* a scan, while freestack == true is used in the normal scenario that a
* split is finished right after the initial insert.
*/
if (!freestack)
@@ -650,8 +653,8 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
* then continue with the current one.
*
* Note: we have to finish *all* incomplete splits we encounter, even
* if we have to move right. Otherwise we might choose as the target
* a page that has no downlink in the parent, and splitting it further
* if we have to move right. Otherwise we might choose as the target a
* page that has no downlink in the parent, and splitting it further
* would fail.
*/
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))

View File

@@ -187,7 +187,7 @@ ginInsertBAEntry(BuildAccumulator *accum,
* Since the entries are being inserted into a balanced binary tree, you
* might think that the order of insertion wouldn't be critical, but it turns
* out that inserting the entries in sorted order results in a lot of
* rebalancing operations and is slow. To prevent this, we attempt to insert
* rebalancing operations and is slow. To prevent this, we attempt to insert
* the nodes in an order that will produce a nearly-balanced tree if the input
* is in fact sorted.
*

View File

@@ -49,8 +49,8 @@ typedef struct
dlist_head segments; /* a list of leafSegmentInfos */
/*
* The following fields represent how the segments are split across
* pages, if a page split is required. Filled in by leafRepackItems.
* The following fields represent how the segments are split across pages,
* if a page split is required. Filled in by leafRepackItems.
*/
dlist_node *lastleft; /* last segment on left page */
int lsize; /* total size on left page */
@@ -61,7 +61,7 @@ typedef struct
typedef struct
{
dlist_node node; /* linked list pointers */
dlist_node node; /* linked list pointers */
/*-------------
* 'action' indicates the status of this in-memory segment, compared to
@@ -83,9 +83,9 @@ typedef struct
int nmodifieditems;
/*
* The following fields represent the items in this segment. If 'items'
* is not NULL, it contains a palloc'd array of the itemsin this segment.
* If 'seg' is not NULL, it contains the items in an already-compressed
* The following fields represent the items in this segment. If 'items' is
* not NULL, it contains a palloc'd array of the itemsin this segment. If
* 'seg' is not NULL, it contains the items in an already-compressed
* format. It can point to an on-disk page (!modified), or a palloc'd
* segment in memory. If both are set, they must represent the same items.
*/
@@ -386,7 +386,7 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
if (offset != maxoff + 1)
memmove(ptr + sizeof(PostingItem),
ptr,
(maxoff - offset + 1) * sizeof(PostingItem));
(maxoff - offset + 1) *sizeof(PostingItem));
}
memcpy(ptr, data, sizeof(PostingItem));
@@ -436,8 +436,8 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
int maxitems = items->nitem - items->curitem;
Page page = BufferGetPage(buf);
int i;
ItemPointerData rbound;
ItemPointerData lbound;
ItemPointerData rbound;
ItemPointerData lbound;
bool needsplit;
bool append;
int segsize;
@@ -451,7 +451,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
Assert(GinPageIsData(page));
rbound = *GinDataPageGetRightBound(page);
rbound = *GinDataPageGetRightBound(page);
/*
* Count how many of the new items belong to this page.
@@ -464,8 +464,8 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
{
/*
* This needs to go to some other location in the tree. (The
* caller should've chosen the insert location so that at least
* the first item goes here.)
* caller should've chosen the insert location so that at
* least the first item goes here.)
*/
Assert(i > 0);
break;
@@ -553,7 +553,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
/* Add the new items to the segments */
if (!addItemsToLeaf(leaf, newItems, maxitems))
{
/* all items were duplicates, we have nothing to do */
/* all items were duplicates, we have nothing to do */
items->curitem += maxitems;
MemoryContextSwitchTo(oldCxt);
@@ -680,7 +680,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
GinDataPageGetRightBound(*newrpage)) < 0);
GinDataPageGetRightBound(*newrpage)) < 0);
if (append)
elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
@@ -769,16 +769,16 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
* We don't try to re-encode the segments here, even though some of them
* might be really small now that we've removed some items from them. It
* seems like a waste of effort, as there isn't really any benefit from
* larger segments per se; larger segments only help to pack more items
* in the same space. We might as well delay doing that until the next
* larger segments per se; larger segments only help to pack more items in
* the same space. We might as well delay doing that until the next
* insertion, which will need to re-encode at least part of the page
* anyway.
*
* Also note if the page was in uncompressed, pre-9.4 format before, it
* is now represented as one huge segment that contains all the items.
* It might make sense to split that, to speed up random access, but we
* don't bother. You'll have to REINDEX anyway if you want the full gain
* of the new tighter index format.
* Also note if the page was in uncompressed, pre-9.4 format before, it is
* now represented as one huge segment that contains all the items. It
* might make sense to split that, to speed up random access, but we don't
* bother. You'll have to REINDEX anyway if you want the full gain of the
* new tighter index format.
*/
if (removedsomething)
{
@@ -795,6 +795,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
{
leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
iter.cur);
if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
modified = true;
if (modified && seginfo->action != GIN_SEGMENT_DELETE)
@@ -862,10 +863,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
}
walbufbegin = palloc(
sizeof(ginxlogRecompressDataLeaf) +
BLCKSZ + /* max size needed to hold the segment data */
nmodified * 2 + /* (segno + action) per action */
sizeof(XLogRecData));
sizeof(ginxlogRecompressDataLeaf) +
BLCKSZ + /* max size needed to hold the segment
* data */
nmodified * 2 + /* (segno + action) per action */
sizeof(XLogRecData));
walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@@ -965,9 +967,9 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
int segsize;
/*
* If the page was in pre-9.4 format before, convert the header, and
* force all segments to be copied to the page whether they were modified
* or not.
* If the page was in pre-9.4 format before, convert the header, and force
* all segments to be copied to the page whether they were modified or
* not.
*/
if (!GinPageIsCompressed(page))
{
@@ -1022,6 +1024,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
dlist_node *node;
dlist_node *firstright;
leafSegmentInfo *seginfo;
/* these must be static so they can be returned to caller */
static ginxlogSplitDataLeaf split_xlog;
static XLogRecData rdata[3];
@@ -1121,6 +1124,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
PostingItem *pitem;
/* these must be static so they can be returned to caller */
static XLogRecData rdata;
static ginxlogInsertDataInternal data;
@@ -1198,7 +1202,7 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
int nrightitems;
Size pageSize = PageGetPageSize(oldpage);
ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
ItemPointer bound;
ItemPointer bound;
Page lpage;
Page rpage;
OffsetNumber separator;
@@ -1216,8 +1220,8 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
*prdata = rdata;
/*
* First construct a new list of PostingItems, which includes all the
* old items, and the new item.
* First construct a new list of PostingItems, which includes all the old
* items, and the new item.
*/
memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
(off - 1) * sizeof(PostingItem));
@@ -1402,8 +1406,8 @@ addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
leafSegmentInfo *newseg;
/*
* If the page is completely empty, just construct one new segment to
* hold all the new items.
* If the page is completely empty, just construct one new segment to hold
* all the new items.
*/
if (dlist_is_empty(&leaf->segments))
{
@@ -1418,9 +1422,9 @@ addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
dlist_foreach(iter, &leaf->segments)
{
leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
int nthis;
ItemPointer tmpitems;
ItemPointer tmpitems;
int ntmpitems;
/*
@@ -1434,7 +1438,7 @@ addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
ItemPointerData next_first;
next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
dlist_next_node(&leaf->segments, iter.cur));
dlist_next_node(&leaf->segments, iter.cur));
if (next->items)
next_first = next->items[0];
else
@@ -1556,27 +1560,27 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
if (seginfo->seg == NULL)
{
if (seginfo->nitems > GinPostingListSegmentMaxSize)
npacked = 0; /* no chance that it would fit. */
npacked = 0; /* no chance that it would fit. */
else
{
seginfo->seg = ginCompressPostingList(seginfo->items,
seginfo->nitems,
GinPostingListSegmentMaxSize,
GinPostingListSegmentMaxSize,
&npacked);
}
if (npacked != seginfo->nitems)
{
/*
* Too large. Compress again to the target size, and create
* a new segment to represent the remaining items. The new
* segment is inserted after this one, so it will be
* processed in the next iteration of this loop.
* Too large. Compress again to the target size, and
* create a new segment to represent the remaining items.
* The new segment is inserted after this one, so it will
* be processed in the next iteration of this loop.
*/
if (seginfo->seg)
pfree(seginfo->seg);
seginfo->seg = ginCompressPostingList(seginfo->items,
seginfo->nitems,
GinPostingListSegmentTargetSize,
GinPostingListSegmentTargetSize,
&npacked);
if (seginfo->action != GIN_SEGMENT_INSERT)
seginfo->action = GIN_SEGMENT_REPLACE;
@@ -1596,7 +1600,7 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
*/
if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
{
int nmerged;
int nmerged;
nextseg = dlist_container(leafSegmentInfo, node, next_node);
@@ -1741,8 +1745,8 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
/*
* Write as many of the items to the root page as fit. In segments
* of max GinPostingListSegmentMaxSize bytes each.
* Write as many of the items to the root page as fit. In segments of max
* GinPostingListSegmentMaxSize bytes each.
*/
nrootitems = 0;
rootsize = 0;

View File

@@ -135,7 +135,8 @@ GinFormTuple(GinState *ginstate,
*/
if (data)
{
char *ptr = GinGetPosting(itup);
char *ptr = GinGetPosting(itup);
memcpy(ptr, data, dataSize);
}
@@ -162,7 +163,7 @@ ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup,
{
Pointer ptr = GinGetPosting(itup);
int nipd = GinGetNPosting(itup);
ItemPointer ipd;
ItemPointer ipd;
int ndecoded;
if (GinItupIsCompressed(itup))
@@ -192,7 +193,7 @@ ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup,
* Form a non-leaf entry tuple by copying the key data from the given tuple,
* which can be either a leaf or non-leaf entry tuple.
*
* Any posting list in the source tuple is not copied. The specified child
* Any posting list in the source tuple is not copied. The specified child
* block number is inserted into t_tid.
*/
static IndexTuple

View File

@@ -440,7 +440,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
* Create temporary index tuples for a single indexable item (one index column
* for the heap tuple specified by ht_ctid), and append them to the array
* in *collector. They will subsequently be written out using
* ginHeapTupleFastInsert. Note that to guarantee consistent state, all
* ginHeapTupleFastInsert. Note that to guarantee consistent state, all
* temp tuples for a given heap tuple must be written in one call to
* ginHeapTupleFastInsert.
*/
@@ -707,7 +707,7 @@ processPendingPage(BuildAccumulator *accum, KeyArray *ka,
*
* This can be called concurrently by multiple backends, so it must cope.
* On first glance it looks completely not concurrent-safe and not crash-safe
* either. The reason it's okay is that multiple insertion of the same entry
* either. The reason it's okay is that multiple insertion of the same entry
* is detected and treated as a no-op by gininsert.c. If we crash after
* posting entries to the main index and before removing them from the
* pending list, it's okay because when we redo the posting later on, nothing
@@ -761,7 +761,7 @@ ginInsertCleanup(GinState *ginstate,
LockBuffer(metabuffer, GIN_UNLOCK);
/*
* Initialize. All temporary space will be in opCtx
* Initialize. All temporary space will be in opCtx
*/
opCtx = AllocSetContextCreate(CurrentMemoryContext,
"GIN insert cleanup temporary context",
@@ -855,7 +855,7 @@ ginInsertCleanup(GinState *ginstate,
/*
* While we left the page unlocked, more stuff might have gotten
* added to it. If so, process those entries immediately. There
* added to it. If so, process those entries immediately. There
* shouldn't be very many, so we don't worry about the fact that
* we're doing this with exclusive lock. Insertion algorithm
* guarantees that inserted row(s) will not continue on next page.

View File

@@ -85,7 +85,8 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
page = BufferGetPage(buffer);
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
{
int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
scanEntry->predictNumberResult += n;
}
@@ -100,7 +101,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
/*
* Collects TIDs into scanEntry->matchBitmap for all heap tuples that
* match the search entry. This supports three different match modes:
* match the search entry. This supports three different match modes:
*
* 1. Partial-match support: scan from current point until the
* comparePartialFn says we're done.
@@ -196,7 +197,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
/*
* In ALL mode, we are not interested in null items, so we can
* stop if we get to a null-item placeholder (which will be the
* last entry for a given attnum). We do want to include NULL_KEY
* last entry for a given attnum). We do want to include NULL_KEY
* and EMPTY_ITEM entries, though.
*/
if (icategory == GIN_CAT_NULL_ITEM)
@@ -407,7 +408,7 @@ restartScanEntry:
else if (GinGetNPosting(itup) > 0)
{
entry->list = ginReadTuple(ginstate, entry->attnum, itup,
&entry->nlist);
&entry->nlist);
entry->predictNumberResult = entry->nlist;
entry->isFinished = FALSE;
@@ -463,11 +464,11 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
* considerably, if the frequent term can be put in the additional set.
*
* There can be many legal ways to divide them entries into these two
* sets. A conservative division is to just put everything in the
* required set, but the more you can put in the additional set, the more
* you can skip during the scan. To maximize skipping, we try to put as
* many frequent items as possible into additional, and less frequent
* ones into required. To do that, sort the entries by frequency
* sets. A conservative division is to just put everything in the required
* set, but the more you can put in the additional set, the more you can
* skip during the scan. To maximize skipping, we try to put as many
* frequent items as possible into additional, and less frequent ones into
* required. To do that, sort the entries by frequency
* (predictNumberResult), and put entries into the required set in that
* order, until the consistent function says that none of the remaining
* entries can form a match, without any items from the required set. The
@@ -635,8 +636,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
if (stepright)
{
/*
* We've processed all the entries on this page. If it was the last
* page in the tree, we're done.
* We've processed all the entries on this page. If it was the
* last page in the tree, we're done.
*/
if (GinPageRightMost(page))
{
@@ -647,8 +648,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
}
/*
* Step to next page, following the right link. then find the first
* ItemPointer greater than advancePast.
* Step to next page, following the right link. then find the
* first ItemPointer greater than advancePast.
*/
entry->buffer = ginStepRight(entry->buffer,
ginstate->index,
@@ -658,7 +659,7 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
stepright = true;
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
continue; /* page was deleted by concurrent vacuum */
continue; /* page was deleted by concurrent vacuum */
/*
* The first item > advancePast might not be on this page, but
@@ -781,6 +782,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
gotitem = true;
break;
}
/*
* Not a lossy page. Skip over any offsets <= advancePast, and
* return that.
@@ -788,8 +790,9 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
if (entry->matchResult->blockno == advancePastBlk)
{
/*
* First, do a quick check against the last offset on the page.
* If that's > advancePast, so are all the other offsets.
* First, do a quick check against the last offset on the
* page. If that's > advancePast, so are all the other
* offsets.
*/
if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
{
@@ -890,8 +893,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
/*
* We might have already tested this item; if so, no need to repeat work.
* (Note: the ">" case can happen, if advancePast is exact but we previously
* had to set curItem to a lossy-page pointer.)
* (Note: the ">" case can happen, if advancePast is exact but we
* previously had to set curItem to a lossy-page pointer.)
*/
if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
return;
@@ -942,8 +945,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
/*
* Ok, we now know that there are no matches < minItem.
*
* If minItem is lossy, it means that there were no exact items on
* the page among requiredEntries, because lossy pointers sort after exact
* If minItem is lossy, it means that there were no exact items on the
* page among requiredEntries, because lossy pointers sort after exact
* items. However, there might be exact items for the same page among
* additionalEntries, so we mustn't advance past them.
*/
@@ -1085,6 +1088,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
if (entry->isFinished)
key->entryRes[i] = GIN_FALSE;
#if 0
/*
* This case can't currently happen, because we loaded all the entries
* for this item earlier.
@@ -1119,6 +1123,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
break;
default:
/*
* the 'default' case shouldn't happen, but if the consistent
* function returns something bogus, this is the safe result
@@ -1129,11 +1134,10 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
}
/*
* We have a tuple, and we know if it matches or not. If it's a
* non-match, we could continue to find the next matching tuple, but
* let's break out and give scanGetItem a chance to advance the other
* keys. They might be able to skip past to a much higher TID, allowing
* us to save work.
* We have a tuple, and we know if it matches or not. If it's a non-match,
* we could continue to find the next matching tuple, but let's break out
* and give scanGetItem a chance to advance the other keys. They might be
* able to skip past to a much higher TID, allowing us to save work.
*/
/* clean up after consistentFn calls */
@@ -1165,14 +1169,14 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
* matching item.
*
* This logic works only if a keyGetItem stream can never contain both
* exact and lossy pointers for the same page. Else we could have a
* exact and lossy pointers for the same page. Else we could have a
* case like
*
* stream 1 stream 2
* ... ...
* ... ...
* 42/6 42/7
* 50/1 42/0xffff
* ... ...
* ... ...
*
* We would conclude that 42/6 is not a match and advance stream 1,
* thus never detecting the match to the lossy pointer in stream 2.
@@ -1205,12 +1209,11 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
}
/*
* It's a match. We can conclude that nothing < matches, so
* the other key streams can skip to this item.
* It's a match. We can conclude that nothing < matches, so the
* other key streams can skip to this item.
*
* Beware of lossy pointers, though; from a lossy pointer, we
* can only conclude that nothing smaller than this *block*
* matches.
* Beware of lossy pointers, though; from a lossy pointer, we can
* only conclude that nothing smaller than this *block* matches.
*/
if (ItemPointerIsLossyPage(&key->curItem))
{
@@ -1229,8 +1232,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
}
/*
* If this is the first key, remember this location as a
* potential match, and proceed to check the rest of the keys.
* If this is the first key, remember this location as a potential
* match, and proceed to check the rest of the keys.
*
* Otherwise, check if this is the same item that we checked the
* previous keys for (or a lossy pointer for the same page). If
@@ -1247,7 +1250,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
if (ItemPointerIsLossyPage(&key->curItem) ||
ItemPointerIsLossyPage(item))
{
Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
match = (GinItemPointerGetBlockNumber(&key->curItem) ==
GinItemPointerGetBlockNumber(item));
}
@@ -1264,8 +1267,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
/*
* Now *item contains the first ItemPointer after previous result that
* satisfied all the keys for that exact TID, or a lossy reference
* to the same page.
* satisfied all the keys for that exact TID, or a lossy reference to the
* same page.
*
* We must return recheck = true if any of the keys are marked recheck.
*/
@@ -1776,10 +1779,10 @@ gingetbitmap(PG_FUNCTION_ARGS)
/*
* First, scan the pending list and collect any matching entries into the
* bitmap. After we scan a pending item, some other backend could post it
* bitmap. After we scan a pending item, some other backend could post it
* into the main index, and so we might visit it a second time during the
* main scan. This is okay because we'll just re-set the same bit in the
* bitmap. (The possibility of duplicate visits is a major reason why GIN
* bitmap. (The possibility of duplicate visits is a major reason why GIN
* can't support the amgettuple API, however.) Note that it would not do
* to scan the main index before the pending list, since concurrent
* cleanup could then make us miss entries entirely.

View File

@@ -40,7 +40,7 @@ typedef struct
* Adds array of item pointers to tuple's posting list, or
* creates posting tree and tuple pointing to tree in case
* of not enough space. Max size of tuple is defined in
* GinFormTuple(). Returns a new, modified index tuple.
* GinFormTuple(). Returns a new, modified index tuple.
* items[] must be in sorted order with no duplicates.
*/
static IndexTuple

View File

@@ -47,7 +47,7 @@
* Maximum number of MAYBE inputs that shimTriConsistentFn will try to
* resolve by calling all combinations.
*/
#define MAX_MAYBE_ENTRIES 4
#define MAX_MAYBE_ENTRIES 4
/*
* Dummy consistent functions for an EVERYTHING key. Just claim it matches.
@@ -95,14 +95,14 @@ static GinTernaryValue
directTriConsistentFn(GinScanKey key)
{
return DatumGetGinTernaryValue(FunctionCall7Coll(
key->triConsistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(key->queryValues),
key->triConsistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(key->queryValues),
PointerGetDatum(key->queryCategories)));
}
@@ -115,15 +115,16 @@ static bool
shimBoolConsistentFn(GinScanKey key)
{
GinTernaryValue result;
result = DatumGetGinTernaryValue(FunctionCall7Coll(
key->triConsistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(key->queryValues),
key->triConsistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(key->queryValues),
PointerGetDatum(key->queryCategories)));
if (result == GIN_MAYBE)
{
@@ -240,8 +241,8 @@ ginInitConsistentFunction(GinState *ginstate, GinScanKey key)
key->boolConsistentFn = shimBoolConsistentFn;
if (OidIsValid(ginstate->triConsistentFn[key->attnum - 1].fn_oid))
key->triConsistentFn = directTriConsistentFn;
key->triConsistentFn = directTriConsistentFn;
else
key->triConsistentFn = shimTriConsistentFn;
key->triConsistentFn = shimTriConsistentFn;
}
}

View File

@@ -126,9 +126,9 @@ encode_varbyte(uint64 val, unsigned char **ptr)
static uint64
decode_varbyte(unsigned char **ptr)
{
uint64 val;
uint64 val;
unsigned char *p = *ptr;
uint64 c;
uint64 c;
c = *(p++);
val = c & 0x7F;
@@ -210,7 +210,7 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
uint64 val = itemptr_to_uint64(&ipd[totalpacked]);
uint64 delta = val - prev;
Assert (val > prev);
Assert(val > prev);
if (endptr - ptr >= 6)
encode_varbyte(delta, &ptr);
@@ -225,7 +225,7 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
encode_varbyte(delta, &p);
if (p - buf > (endptr - ptr))
break; /* output is full */
break; /* output is full */
memcpy(ptr, buf, p - buf);
ptr += (p - buf);
@@ -286,7 +286,7 @@ ginPostingListDecode(GinPostingList *plist, int *ndecoded)
ItemPointer
ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
{
ItemPointer result;
ItemPointer result;
int nallocated;
uint64 val;
char *endseg = ((char *) segment) + len;
@@ -349,7 +349,7 @@ ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
TIDBitmap *tbm)
{
int ndecoded;
ItemPointer items;
ItemPointer items;
items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
tbm_add_tuples(tbm, items, ndecoded, false);
@@ -374,8 +374,8 @@ ginMergeItemPointers(ItemPointerData *a, uint32 na,
dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
/*
* If the argument arrays don't overlap, we can just append them to
* each other.
* If the argument arrays don't overlap, we can just append them to each
* other.
*/
if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
{

View File

@@ -389,7 +389,7 @@ ginNewScanKey(IndexScanDesc scan)
/*
* If the index is version 0, it may be missing null and placeholder
* entries, which would render searches for nulls and full-index scans
* unreliable. Throw an error if so.
* unreliable. Throw an error if so.
*/
if (hasNullQuery && !so->isVoidRes)
{

View File

@@ -67,6 +67,7 @@ initGinState(GinState *state, Relation index)
fmgr_info_copy(&(state->extractQueryFn[i]),
index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
CurrentMemoryContext);
/*
* Check opclass capability to do tri-state or binary logic consistent
* check.
@@ -74,14 +75,14 @@ initGinState(GinState *state, Relation index)
if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid)
{
fmgr_info_copy(&(state->triConsistentFn[i]),
index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC),
index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC),
CurrentMemoryContext);
}
if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid)
{
fmgr_info_copy(&(state->consistentFn[i]),
index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
CurrentMemoryContext);
}
@@ -458,7 +459,7 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
* If there's more than one key, sort and unique-ify.
*
* XXX Using qsort here is notationally painful, and the overhead is
* pretty bad too. For small numbers of keys it'd likely be better to use
* pretty bad too. For small numbers of keys it'd likely be better to use
* a simple insertion sort.
*/
if (*nentries > 1)

View File

@@ -47,7 +47,7 @@ ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items,
{
int i,
remaining = 0;
ItemPointer tmpitems = NULL;
ItemPointer tmpitems = NULL;
/*
* Iterate over TIDs array
@@ -208,8 +208,8 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
}
/*
* if we have root and there are empty pages in tree, then we don't release
* lock to go further processing and guarantee that tree is unused
* if we have root and there are empty pages in tree, then we don't
* release lock to go further processing and guarantee that tree is unused
*/
if (!(isRoot && hasVoidPage))
{
@@ -236,7 +236,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
Buffer pBuffer;
Page page,
parentPage;
BlockNumber rightlink;
BlockNumber rightlink;
/*
* Lock the pages in the same order as an insertion would, to avoid
@@ -302,11 +302,11 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
data.rightLink = GinPageGetOpaque(page)->rightlink;
/*
* We can't pass buffer_std = TRUE, because we didn't set pd_lower
* on pre-9.4 versions. The page might've been binary-upgraded from
* an older version, and hence not have pd_lower set correctly.
* Ditto for the left page, but removing the item from the parent
* updated its pd_lower, so we know that's OK at this point.
* We can't pass buffer_std = TRUE, because we didn't set pd_lower on
* pre-9.4 versions. The page might've been binary-upgraded from an
* older version, and hence not have pd_lower set correctly. Ditto for
* the left page, but removing the item from the parent updated its
* pd_lower, so we know that's OK at this point.
*/
rdata[0].buffer = dBuffer;
rdata[0].buffer_std = FALSE;
@@ -538,7 +538,8 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
}
/*
* if we already created a temporary page, make changes in place
* if we already created a temporary page, make changes in
* place
*/
if (tmppage == origpage)
{

View File

@@ -133,7 +133,7 @@ ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rda
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber)
{
RelFileNode node;
ForkNumber forknum;
ForkNumber forknum;
BlockNumber blknum;
BufferGetTag(buffer, &node, &forknum, &blknum);
@@ -341,8 +341,8 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
/*
* First clear incomplete-split flag on child page if this finishes
* a split.
* First clear incomplete-split flag on child page if this finishes a
* split.
*/
if (!isLeaf)
{
@@ -472,8 +472,8 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
/*
* First clear incomplete-split flag on child page if this finishes
* a split
* First clear incomplete-split flag on child page if this finishes a
* split
*/
if (!isLeaf)
{
@@ -522,7 +522,7 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
if (isRoot)
{
BlockNumber rootBlkno = data->rrlink;
BlockNumber rootBlkno = data->rrlink;
Buffer rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
Page rootPage = BufferGetPage(rootBuf);
@@ -711,9 +711,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer;
/*
* Restore the metapage. This is essentially the same as a full-page image,
* so restore the metapage unconditionally without looking at the LSN, to
* avoid torn page hazards.
* Restore the metapage. This is essentially the same as a full-page
* image, so restore the metapage unconditionally without looking at the
* LSN, to avoid torn page hazards.
*/
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
if (!BufferIsValid(metabuffer))
@@ -877,7 +877,7 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
/*
* In normal operation, shiftList() takes exclusive lock on all the
* pages-to-be-deleted simultaneously. During replay, however, it should
* pages-to-be-deleted simultaneously. During replay, however, it should
* be all right to lock them one at a time. This is dependent on the fact
* that we are deleting pages from the head of the list, and that readers
* share-lock the next page before releasing the one they are on. So we

View File

@@ -1382,7 +1382,7 @@ initGISTstate(Relation index)
/*
* If the index column has a specified collation, we should honor that
* while doing comparisons. However, we may have a collatable storage
* type for a noncollatable indexed data type. If there's no index
* type for a noncollatable indexed data type. If there's no index
* collation then specify default collation in case the support
* functions need collation. This is harmless if the support
* functions don't care about collation, so we just do it

View File

@@ -31,7 +31,7 @@
*
* On success return for a heap tuple, *recheck_p is set to indicate
* whether recheck is needed. We recheck if any of the consistent() functions
* request it. recheck is not interesting when examining a non-leaf entry,
* request it. recheck is not interesting when examining a non-leaf entry,
* since we must visit the lower index page if there's any doubt.
*
* If we are doing an ordered scan, so->distances[] is filled with distance
@@ -62,7 +62,7 @@ gistindex_keytest(IndexScanDesc scan,
/*
* If it's a leftover invalid tuple from pre-9.1, treat it as a match with
* minimum possible distances. This means we'll always follow it to the
* minimum possible distances. This means we'll always follow it to the
* referenced page.
*/
if (GistTupleIsInvalid(tuple))
@@ -224,7 +224,7 @@ gistindex_keytest(IndexScanDesc scan,
* ntids: if not NULL, gistgetbitmap's output tuple counter
*
* If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
* tuples should be reported directly into the bitmap. If they are NULL,
* tuples should be reported directly into the bitmap. If they are NULL,
* we're doing a plain or ordered indexscan. For a plain indexscan, heap
* tuple TIDs are returned into so->pageData[]. For an ordered indexscan,
* heap tuple TIDs are pushed into individual search queue items.

View File

@@ -56,7 +56,7 @@ GISTSearchTreeItemCombiner(RBNode *existing, const RBNode *newrb, void *arg)
/*
* If new item is heap tuple, it goes to front of chain; otherwise insert
* it before the first index-page item, so that index pages are visited in
* LIFO order, ensuring depth-first search of index pages. See comments
* LIFO order, ensuring depth-first search of index pages. See comments
* in gist_private.h.
*/
if (GISTSearchItemIsHeap(*newitem))

View File

@@ -71,7 +71,7 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
* Recompute unions of left- and right-side subkeys after a page split,
* ignoring any tuples that are marked in spl->spl_dontcare[].
*
* Note: we always recompute union keys for all index columns. In some cases
* Note: we always recompute union keys for all index columns. In some cases
* this might represent duplicate work for the leftmost column(s), but it's
* not safe to assume that "zero penalty to move a tuple" means "the union
* key doesn't change at all". Penalty functions aren't 100% accurate.
@@ -160,7 +160,7 @@ findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
/*
* Remove tuples that are marked don't-cares from the tuple index array a[]
* of length *len. This is applied separately to the spl_left and spl_right
* of length *len. This is applied separately to the spl_left and spl_right
* arrays.
*/
static void
@@ -193,7 +193,7 @@ removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
/*
* Place a single don't-care tuple into either the left or right side of the
* split, according to which has least penalty for merging the tuple into
* the previously-computed union keys. We need consider only columns starting
* the previously-computed union keys. We need consider only columns starting
* at attno.
*/
static void
@@ -291,7 +291,7 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
/*
* There is only one previously defined union, so we just choose swap
* or not by lowest penalty for that side. We can only get here if a
* or not by lowest penalty for that side. We can only get here if a
* secondary split happened to have all NULLs in its column in the
* tuples that the outer recursion level had assigned to one side.
* (Note that the null checks in gistSplitByKey don't prevent the
@@ -427,7 +427,7 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
sv->spl_rdatum = v->spl_rattr[attno];
/*
* Let the opclass-specific PickSplit method do its thing. Note that at
* Let the opclass-specific PickSplit method do its thing. Note that at
* this point we know there are no null keys in the entryvec.
*/
FunctionCall2Coll(&giststate->picksplitFn[attno],

View File

@@ -414,7 +414,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
* some inserts to go to other equally-good subtrees.
*
* keep_current_best is -1 if we haven't yet had to make a random choice
* whether to keep the current best tuple. If we have done so, and
* whether to keep the current best tuple. If we have done so, and
* decided to keep it, keep_current_best is 1; if we've decided to
* replace, keep_current_best is 0. (This state will be reset to -1 as
* soon as we've made the replacement, but sometimes we make the choice in
@@ -456,7 +456,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
{
/*
* New best penalty for column. Tentatively select this tuple
* as the target, and record the best penalty. Then reset the
* as the target, and record the best penalty. Then reset the
* next column's penalty to "unknown" (and indirectly, the
* same for all the ones to its right). This will force us to
* adopt this tuple's penalty values as the best for all the
@@ -475,7 +475,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
{
/*
* The current tuple is exactly as good for this column as the
* best tuple seen so far. The next iteration of this loop
* best tuple seen so far. The next iteration of this loop
* will compare the next column.
*/
}
@@ -681,7 +681,7 @@ gistcheckpage(Relation rel, Buffer buf)
/*
* ReadBuffer verifies that every newly-read page passes
* PageHeaderIsValid, which means it either contains a reasonably sane
* page header or is all-zero. We have to defend against the all-zero
* page header or is all-zero. We have to defend against the all-zero
* case, however.
*/
if (PageIsNew(page))

View File

@@ -49,7 +49,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
stats->estimated_count = info->estimated_count;
/*
* XXX the above is wrong if index is partial. Would it be OK to just
* XXX the above is wrong if index is partial. Would it be OK to just
* return NULL, or is there work we must do below?
*/
}

View File

@@ -38,7 +38,7 @@ static MemoryContext opCtx; /* working memory for operations */
* follow-right flag, because that change is not included in the full-page
* image. To be sure that the intermediate state with the wrong flag value is
* not visible to concurrent Hot Standby queries, this function handles
* restoring the full-page image as well as updating the flag. (Note that
* restoring the full-page image as well as updating the flag. (Note that
* we never need to do anything else to the child page in the current WAL
* action.)
*/
@@ -89,7 +89,7 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
/*
* We need to acquire and hold lock on target page while updating the left
* child page. If we have a full-page image of target page, getting the
* child page. If we have a full-page image of target page, getting the
* lock is a side-effect of restoring that image. Note that even if the
* target page no longer exists, we'll still attempt to replay the change
* on the child page.
@@ -387,6 +387,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
for (ptr = dist; ptr; ptr = ptr->next)
npage++;
/*
* the caller should've checked this already, but doesn't hurt to check
* again.

View File

@@ -78,7 +78,7 @@ hashbuild(PG_FUNCTION_ARGS)
* (assuming their hash codes are pretty random) there will be no locality
* of access to the index, and if the index is bigger than available RAM
* then we'll thrash horribly. To prevent that scenario, we can sort the
* tuples by (expected) bucket number. However, such a sort is useless
* tuples by (expected) bucket number. However, such a sort is useless
* overhead when the index does fit in RAM. We choose to sort if the
* initial index size exceeds NBuffers.
*
@@ -248,7 +248,7 @@ hashgettuple(PG_FUNCTION_ARGS)
/*
* An insertion into the current index page could have happened while
* we didn't have read lock on it. Re-find our position by looking
* for the TID we previously returned. (Because we hold share lock on
* for the TID we previously returned. (Because we hold share lock on
* the bucket, no deletions or splits could have occurred; therefore
* we can expect that the TID still exists in the current index page,
* at an offset >= where we were.)
@@ -524,7 +524,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
/*
* Read the metapage to fetch original bucket and tuple counts. Also, we
* keep a copy of the last-seen metapage so that we can use its
* hashm_spares[] values to compute bucket page addresses. This is a bit
* hashm_spares[] values to compute bucket page addresses. This is a bit
* hokey but perfectly safe, since the interesting entries in the spares
* array cannot change under us; and it beats rereading the metapage for
* each bucket.
@@ -655,7 +655,7 @@ loop_top:
{
/*
* Otherwise, our count is untrustworthy since we may have
* double-scanned tuples in split buckets. Proceed by dead-reckoning.
* double-scanned tuples in split buckets. Proceed by dead-reckoning.
* (Note: we still return estimated_count = false, because using this
* count is better than not updating reltuples at all.)
*/

View File

@@ -11,7 +11,7 @@
* src/backend/access/hash/hashfunc.c
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
* These functions are stored in pg_amproc. For each operator class
* defined for hash indexes, they compute the hash value of the argument.
*
* Additional hash functions appear in /utils/adt/ files for various
@@ -158,7 +158,7 @@ hashtext(PG_FUNCTION_ARGS)
/*
* Note: this is currently identical in behavior to hashvarlena, but keep
* it as a separate function in case we someday want to do something
* different in non-C locales. (See also hashbpchar, if so.)
* different in non-C locales. (See also hashbpchar, if so.)
*/
result = hash_any((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
@@ -236,7 +236,7 @@ hashvarlena(PG_FUNCTION_ARGS)
*
* This allows some parallelism. Read-after-writes are good at doubling
* the number of bits affected, so the goal of mixing pulls in the opposite
* direction from the goal of parallelism. I did what I could. Rotates
* direction from the goal of parallelism. I did what I could. Rotates
* seem to cost as much as shifts on every machine I could lay my hands on,
* and rotates are much kinder to the top and bottom bits, so I used rotates.
*----------
@@ -270,7 +270,7 @@ hashvarlena(PG_FUNCTION_ARGS)
* substantial performance increase since final() does not need to
* do well in reverse, but is does need to affect all output bits.
* mix(), on the other hand, does not need to affect all output
* bits (affecting 32 bits is enough). The original hash function had
* bits (affecting 32 bits is enough). The original hash function had
* a single mixing operation that had to satisfy both sets of requirements
* and was slower as a result.
*----------
@@ -291,7 +291,7 @@ hashvarlena(PG_FUNCTION_ARGS)
* k : the key (the unaligned variable-length array of bytes)
* len : the length of the key, counting by bytes
*
* Returns a uint32 value. Every bit of the key affects every bit of
* Returns a uint32 value. Every bit of the key affects every bit of
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
* About 6*len+35 instructions. The best hash table sizes are powers
* of 2. There is no need to do mod a prime (mod is sooo slow!).

View File

@@ -89,7 +89,7 @@ _hash_doinsert(Relation rel, IndexTuple itup)
/*
* If the previous iteration of this loop locked what is still the
* correct target bucket, we are done. Otherwise, drop any old lock
* correct target bucket, we are done. Otherwise, drop any old lock
* and lock what now appears to be the correct bucket.
*/
if (retry)

View File

@@ -80,7 +80,7 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
*
* Add an overflow page to the bucket whose last page is pointed to by 'buf'.
*
* On entry, the caller must hold a pin but no lock on 'buf'. The pin is
* On entry, the caller must hold a pin but no lock on 'buf'. The pin is
* dropped before exiting (we assume the caller is not interested in 'buf'
* anymore). The returned overflow page will be pinned and write-locked;
* it is guaranteed to be empty.
@@ -89,12 +89,12 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
* That buffer is returned in the same state.
*
* The caller must hold at least share lock on the bucket, to ensure that
* no one else tries to compact the bucket meanwhile. This guarantees that
* no one else tries to compact the bucket meanwhile. This guarantees that
* 'buf' won't stop being part of the bucket while it's unlocked.
*
* NB: since this could be executed concurrently by multiple processes,
* one should not assume that the returned overflow page will be the
* immediate successor of the originally passed 'buf'. Additional overflow
* immediate successor of the originally passed 'buf'. Additional overflow
* pages might have been added to the bucket chain in between.
*/
Buffer
@@ -157,7 +157,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
/*
* _hash_getovflpage()
*
* Find an available overflow page and return it. The returned buffer
* Find an available overflow page and return it. The returned buffer
* is pinned and write-locked, and has had _hash_pageinit() applied,
* but it is caller's responsibility to fill the special space.
*
@@ -253,7 +253,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
* We create the new bitmap page with all pages marked "in use".
* Actually two pages in the new bitmap's range will exist
* immediately: the bitmap page itself, and the following page which
* is the one we return to the caller. Both of these are correctly
* is the one we return to the caller. Both of these are correctly
* marked "in use". Subsequent pages do not exist yet, but it is
* convenient to pre-mark them as "in use" too.
*/
@@ -284,7 +284,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
metap->hashm_spares[splitnum]++;
/*
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
* changing it if someone moved it while we were searching bitmap pages.
*/
if (metap->hashm_firstfree == orig_firstfree)
@@ -313,7 +313,7 @@ found:
blkno = bitno_to_blkno(metap, bit);
/*
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
* changing it if someone moved it while we were searching bitmap pages.
*/
if (metap->hashm_firstfree == orig_firstfree)
@@ -494,7 +494,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,
/*
* _hash_initbitmap()
*
* Initialize a new bitmap page. The metapage has a write-lock upon
* Initialize a new bitmap page. The metapage has a write-lock upon
* entering the function, and must be written by caller after return.
*
* 'blkno' is the block number of the new bitmap page.

View File

@@ -49,7 +49,7 @@ static void _hash_splitbucket(Relation rel, Buffer metabuf,
* of the locking rules). However, we can skip taking lmgr locks when the
* index is local to the current backend (ie, either temp or new in the
* current transaction). No one else can see it, so there's no reason to
* take locks. We still take buffer-level locks, but not lmgr locks.
* take locks. We still take buffer-level locks, but not lmgr locks.
*/
#define USELOCKING(rel) (!RELATION_IS_LOCAL(rel))
@@ -136,7 +136,7 @@ _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
*
* This must be used only to fetch pages that are known to be before
* the index's filesystem EOF, but are to be filled from scratch.
* _hash_pageinit() is applied automatically. Otherwise it has
* _hash_pageinit() is applied automatically. Otherwise it has
* effects similar to _hash_getbuf() with access = HASH_WRITE.
*
* When this routine returns, a write lock is set on the
@@ -344,7 +344,7 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
/*
* Determine the target fill factor (in tuples per bucket) for this index.
* The idea is to make the fill factor correspond to pages about as full
* as the user-settable fillfactor parameter says. We can compute it
* as the user-settable fillfactor parameter says. We can compute it
* exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
*/
data_width = sizeof(uint32);
@@ -377,7 +377,7 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
/*
* We initialize the metapage, the first N bucket pages, and the first
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
* calls to occur. This ensures that the smgr level has the right idea of
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
*/
metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
@@ -545,7 +545,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
/*
* Determine which bucket is to be split, and attempt to lock the old
* bucket. If we can't get the lock, give up.
* bucket. If we can't get the lock, give up.
*
* The lock protects us against other backends, but not against our own
* backend. Must check for active scans separately.
@@ -603,7 +603,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
}
/*
* Okay to proceed with split. Update the metapage bucket mapping info.
* Okay to proceed with split. Update the metapage bucket mapping info.
*
* Since we are scribbling on the metapage data right in the shared
* buffer, any failure in this next little bit leaves us with a big
@@ -641,7 +641,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
* Copy bucket mapping info now; this saves re-accessing the meta page
* inside _hash_splitbucket's inner loop. Note that once we drop the
* split lock, other splits could begin, so these values might be out of
* date before _hash_splitbucket finishes. That's okay, since all it
* date before _hash_splitbucket finishes. That's okay, since all it
* needs is to tell which of these two buckets to map hashkeys into.
*/
maxbucket = metap->hashm_maxbucket;
@@ -876,7 +876,7 @@ _hash_splitbucket(Relation rel,
/*
* We're at the end of the old bucket chain, so we're done partitioning
* the tuples. Before quitting, call _hash_squeezebucket to ensure the
* the tuples. Before quitting, call _hash_squeezebucket to ensure the
* tuples remaining in the old bucket (including the overflow pages) are
* packed as tightly as possible. The new bucket is already tight.
*/

View File

@@ -210,7 +210,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
/*
* If the previous iteration of this loop locked what is still the
* correct target bucket, we are done. Otherwise, drop any old lock
* correct target bucket, we are done. Otherwise, drop any old lock
* and lock what now appears to be the correct bucket.
*/
if (retry)
@@ -269,7 +269,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
* _hash_step() -- step to the next valid item in a scan in the bucket.
*
* If no valid record exists in the requested direction, return
* false. Else, return true and set the hashso_curpos for the
* false. Else, return true and set the hashso_curpos for the
* scan to the right thing.
*
* 'bufP' points to the current buffer, which is pinned and read-locked.

View File

@@ -8,7 +8,7 @@
* thrashing. We use tuplesort.c to sort the given index tuples into order.
*
* Note: if the number of rows in the table has been underestimated,
* bucket splits may occur during the index build. In that case we'd
* bucket splits may occur during the index build. In that case we'd
* be inserting into two or more buckets for each possible masked-off
* hash code value. That's no big problem though, since we'll still have
* plenty of locality of access.
@@ -52,7 +52,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
hspool->index = index;
/*
* Determine the bitmask for hash code values. Since there are currently
* Determine the bitmask for hash code values. Since there are currently
* num_buckets buckets in the index, the appropriate mask can be computed
* as follows.
*

View File

@@ -160,7 +160,7 @@ _hash_checkpage(Relation rel, Buffer buf, int flags)
/*
* ReadBuffer verifies that every newly-read page passes
* PageHeaderIsValid, which means it either contains a reasonably sane
* page header or is all-zero. We have to defend against the all-zero
* page header or is all-zero. We have to defend against the all-zero
* case, however.
*/
if (PageIsNew(page))
@@ -280,7 +280,7 @@ _hash_form_tuple(Relation index, Datum *values, bool *isnull)
*
* Returns the offset of the first index entry having hashkey >= hash_value,
* or the page's max offset plus one if hash_value is greater than all
* existing hash keys in the page. This is the appropriate place to start
* existing hash keys in the page. This is the appropriate place to start
* a search, or to insert a new item.
*/
OffsetNumber

View File

@@ -88,11 +88,11 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
HeapTuple newtup, HeapTuple old_key_tup,
bool all_visible_cleared, bool new_all_visible_cleared);
static void HeapSatisfiesHOTandKeyUpdate(Relation relation,
Bitmapset *hot_attrs,
Bitmapset *key_attrs, Bitmapset *id_attrs,
bool *satisfies_hot, bool *satisfies_key,
bool *satisfies_id,
HeapTuple oldtup, HeapTuple newtup);
Bitmapset *hot_attrs,
Bitmapset *key_attrs, Bitmapset *id_attrs,
bool *satisfies_hot, bool *satisfies_key,
bool *satisfies_id,
HeapTuple oldtup, HeapTuple newtup);
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
uint16 old_infomask2, TransactionId add_to_xmax,
LockTupleMode mode, bool is_update,
@@ -113,7 +113,7 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
XLTW_Oper oper, int *remaining);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
static HeapTuple ExtractReplicaIdentity(Relation rel, HeapTuple tup, bool key_modified,
bool *copy);
bool *copy);
/*
@@ -213,7 +213,7 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
* while the scan is in progress will be invisible to my snapshot anyway.
* (That is not true when using a non-MVCC snapshot. However, we couldn't
* guarantee to return tuples added after scan start anyway, since they
* might go into pages we already scanned. To guarantee consistent
* might go into pages we already scanned. To guarantee consistent
* results for a non-MVCC snapshot, the caller must hold some higher-level
* lock that ensures the interesting tuple(s) won't change.)
*/
@@ -221,7 +221,7 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
/*
* If the table is large relative to NBuffers, use a bulk-read access
* strategy and enable synchronized scanning (see syncscan.c). Although
* strategy and enable synchronized scanning (see syncscan.c). Although
* the thresholds for these features could be different, we make them the
* same so that there are only two behaviors to tune rather than four.
* (However, some callers need to be able to disable one or both of these
@@ -325,7 +325,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
}
/*
* Be sure to check for interrupts at least once per page. Checks at
* Be sure to check for interrupts at least once per page. Checks at
* higher code levels won't be able to stop a seqscan that encounters many
* pages' worth of consecutive dead tuples.
*/
@@ -349,7 +349,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
/*
* We must hold share lock on the buffer content while examining tuple
* visibility. Afterwards, however, the tuples we have found to be
* visibility. Afterwards, however, the tuples we have found to be
* visible are guaranteed good as long as we hold the buffer pin.
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
@@ -1126,7 +1126,7 @@ relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
*
* Same as relation_openrv, but with an additional missing_ok argument
* allowing a NULL return rather than an error if the relation is not
* found. (Note that some other causes, such as permissions problems,
* found. (Note that some other causes, such as permissions problems,
* will still result in an ereport.)
* ----------------
*/
@@ -1740,7 +1740,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
/*
* When first_call is true (and thus, skip is initially false) we'll
* return the first tuple we find. But on later passes, heapTuple
* return the first tuple we find. But on later passes, heapTuple
* will initially be pointing to the tuple we returned last time.
* Returning it again would be incorrect (and would loop forever), so
* we skip it and return the next match we find.
@@ -1834,7 +1834,7 @@ heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
* possibly uncommitted version.
*
* *tid is both an input and an output parameter: it is updated to
* show the latest version of the row. Note that it will not be changed
* show the latest version of the row. Note that it will not be changed
* if no version of the row passes the snapshot test.
*/
void
@@ -1955,7 +1955,7 @@ heap_get_latest_tid(Relation relation,
*
* This is called after we have waited for the XMAX transaction to terminate.
* If the transaction aborted, we guarantee the XMAX_INVALID hint bit will
* be set on exit. If the transaction committed, we set the XMAX_COMMITTED
* be set on exit. If the transaction committed, we set the XMAX_COMMITTED
* hint bit if possible --- but beware that that may not yet be possible,
* if the transaction committed asynchronously.
*
@@ -2042,7 +2042,7 @@ FreeBulkInsertState(BulkInsertState bistate)
* The return value is the OID assigned to the tuple (either here or by the
* caller), or InvalidOid if no OID. The header fields of *tup are updated
* to match the stored tuple; in particular tup->t_self receives the actual
* TID where the tuple was stored. But note that any toasting of fields
* TID where the tuple was stored. But note that any toasting of fields
* within the tuple data is NOT reflected into *tup.
*/
Oid
@@ -2071,7 +2071,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* For a heap insert, we only need to check for table-level SSI locks. Our
* new tuple can't possibly conflict with existing tuple locks, and heap
* page locks are only consolidated versions of tuple locks; they do not
* lock "gaps" as index page locks do. So we don't need to identify a
* lock "gaps" as index page locks do. So we don't need to identify a
* buffer before making the call.
*/
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
@@ -2123,8 +2123,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool need_tuple_data;
/*
* For logical decoding, we need the tuple even if we're doing a
* full page write, so make sure to log it separately. (XXX We could
* For logical decoding, we need the tuple even if we're doing a full
* page write, so make sure to log it separately. (XXX We could
* alternatively store a pointer into the FPW).
*
* Also, if this is a catalog, we need to transmit combocids to
@@ -2165,9 +2165,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
rdata[2].next = NULL;
/*
* Make a separate rdata entry for the tuple's buffer if we're
* doing logical decoding, so that an eventual FPW doesn't
* remove the tuple's data.
* Make a separate rdata entry for the tuple's buffer if we're doing
* logical decoding, so that an eventual FPW doesn't remove the
* tuple's data.
*/
if (need_tuple_data)
{
@@ -2248,7 +2248,7 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
/*
* If the object id of this tuple has already been assigned, trust the
* caller. There are a couple of ways this can happen. At initial db
* caller. There are a couple of ways this can happen. At initial db
* creation, the backend program sets oids for tuples. When we define
* an index, we set the oid. Finally, in the future, we may allow
* users to set their own object ids in order to support a persistent
@@ -2342,7 +2342,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
* For a heap insert, we only need to check for table-level SSI locks. Our
* new tuple can't possibly conflict with existing tuple locks, and heap
* page locks are only consolidated versions of tuple locks; they do not
* lock "gaps" as index page locks do. So we don't need to identify a
* lock "gaps" as index page locks do. So we don't need to identify a
* buffer before making the call.
*/
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
@@ -2356,7 +2356,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
int nthispage;
/*
* Find buffer where at least the next tuple will fit. If the page is
* Find buffer where at least the next tuple will fit. If the page is
* all-visible, this will also pin the requisite visibility map page.
*/
buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
@@ -2487,9 +2487,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
rdata[1].next = NULL;
/*
* Make a separate rdata entry for the tuple's buffer if
* we're doing logical decoding, so that an eventual FPW
* doesn't remove the tuple's data.
* Make a separate rdata entry for the tuple's buffer if we're
* doing logical decoding, so that an eventual FPW doesn't remove
* the tuple's data.
*/
if (need_tuple_data)
{
@@ -2597,8 +2597,8 @@ compute_infobits(uint16 infomask, uint16 infomask2)
static inline bool
xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
{
const uint16 interesting =
HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY | HEAP_LOCK_MASK;
const uint16 interesting =
HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY | HEAP_LOCK_MASK;
if ((new_infomask & interesting) != (old_infomask & interesting))
return true;
@@ -2650,7 +2650,7 @@ heap_delete(Relation relation, ItemPointer tid,
bool have_tuple_lock = false;
bool iscombo;
bool all_visible_cleared = false;
HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
bool old_key_copied = false;
Assert(ItemPointerIsValid(tid));
@@ -2751,10 +2751,10 @@ l1:
/*
* You might think the multixact is necessarily done here, but not
* so: it could have surviving members, namely our own xact or
* other subxacts of this backend. It is legal for us to delete
* other subxacts of this backend. It is legal for us to delete
* the tuple in either case, however (the latter case is
* essentially a situation of upgrading our former shared lock to
* exclusive). We don't bother changing the on-disk hint bits
* exclusive). We don't bother changing the on-disk hint bits
* since we are about to overwrite the xmax altogether.
*/
}
@@ -2836,7 +2836,7 @@ l1:
* If this is the first possibly-multixact-able operation in the current
* transaction, set my per-backend OldestMemberMXactId setting. We can be
* certain that the transaction will never become a member of any older
* MultiXactIds than that. (We have to do this even if we end up just
* MultiXactIds than that. (We have to do this even if we end up just
* using our own TransactionId below, since some other backend could
* incorporate our XID into a MultiXact immediately afterwards.)
*/
@@ -2852,7 +2852,7 @@ l1:
/*
* If this transaction commits, the tuple will become DEAD sooner or
* later. Set flag that this page is a candidate for pruning once our xid
* falls below the OldestXmin horizon. If the transaction finally aborts,
* falls below the OldestXmin horizon. If the transaction finally aborts,
* the subsequent page pruning will be a no-op and the hint will be
* cleared.
*/
@@ -2919,7 +2919,7 @@ l1:
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
rdata[1].next = &(rdata[2]);
rdata[2].data = (char*)&xlhdr;
rdata[2].data = (char *) &xlhdr;
rdata[2].len = SizeOfHeapHeader;
rdata[2].buffer = InvalidBuffer;
rdata[2].next = NULL;
@@ -2994,7 +2994,7 @@ l1:
*
* This routine may be used to delete a tuple when concurrent updates of
* the target tuple are not expected (for example, because we have a lock
* on the relation associated with the tuple). Any failure is reported
* on the relation associated with the tuple). Any failure is reported
* via ereport().
*/
void
@@ -3110,7 +3110,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
/*
* Fetch the list of attributes to be checked for HOT update. This is
* wasted effort if we fail to update or have to put the new tuple on a
* different page. But we must compute the list before obtaining buffer
* different page. But we must compute the list before obtaining buffer
* lock --- in the worst case, if we are doing an update on one of the
* relevant system catalogs, we could deadlock if we try to fetch the list
* later. In any case, the relcache caches the data so this is usually
@@ -3122,7 +3122,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_ALL);
key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
id_attrs = RelationGetIndexAttrBitmap(relation,
INDEX_ATTR_BITMAP_IDENTITY_KEY);
INDEX_ATTR_BITMAP_IDENTITY_KEY);
block = ItemPointerGetBlockNumber(otid);
buffer = ReadBuffer(relation, block);
@@ -3193,7 +3193,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
* If this is the first possibly-multixact-able operation in the
* current transaction, set my per-backend OldestMemberMXactId
* setting. We can be certain that the transaction will never become a
* member of any older MultiXactIds than that. (We have to do this
* member of any older MultiXactIds than that. (We have to do this
* even if we end up just using our own TransactionId below, since
* some other backend could incorporate our XID into a MultiXact
* immediately afterwards.)
@@ -3238,7 +3238,7 @@ l2:
/*
* XXX note that we don't consider the "no wait" case here. This
* isn't a problem currently because no caller uses that case, but it
* should be fixed if such a caller is introduced. It wasn't a
* should be fixed if such a caller is introduced. It wasn't a
* problem previously because this code would always wait, but now
* that some tuple locks do not conflict with one of the lock modes we
* use, it is possible that this case is interesting to handle
@@ -3276,7 +3276,7 @@ l2:
* it as locker, unless it is gone completely.
*
* If it's not a multi, we need to check for sleeping conditions
* before actually going to sleep. If the update doesn't conflict
* before actually going to sleep. If the update doesn't conflict
* with the locks, we just continue without sleeping (but making sure
* it is preserved).
*/
@@ -3302,10 +3302,10 @@ l2:
goto l2;
/*
* Note that the multixact may not be done by now. It could have
* Note that the multixact may not be done by now. It could have
* surviving members; our own xact or other subxacts of this
* backend, and also any other concurrent transaction that locked
* the tuple with KeyShare if we only got TupleLockUpdate. If
* the tuple with KeyShare if we only got TupleLockUpdate. If
* this is the case, we have to be careful to mark the updated
* tuple with the surviving members in Xmax.
*
@@ -3512,7 +3512,7 @@ l2:
* If the toaster needs to be activated, OR if the new tuple will not fit
* on the same page as the old, then we need to release the content lock
* (but not the pin!) on the old tuple's buffer while we are off doing
* TOAST and/or table-file-extension work. We must mark the old tuple to
* TOAST and/or table-file-extension work. We must mark the old tuple to
* show that it's already being updated, else other processes may try to
* update it themselves.
*
@@ -3578,7 +3578,7 @@ l2:
* there's more free now than before.
*
* What's more, if we need to get a new page, we will need to acquire
* buffer locks on both old and new pages. To avoid deadlock against
* buffer locks on both old and new pages. To avoid deadlock against
* some other backend trying to get the same two locks in the other
* order, we must be consistent about the order we get the locks in.
* We use the rule "lock the lower-numbered page of the relation
@@ -3638,7 +3638,7 @@ l2:
/*
* At this point newbuf and buffer are both pinned and locked, and newbuf
* has enough space for the new tuple. If they are the same buffer, only
* has enough space for the new tuple. If they are the same buffer, only
* one pin is held.
*/
@@ -3646,7 +3646,7 @@ l2:
{
/*
* Since the new tuple is going into the same page, we might be able
* to do a HOT update. Check if any of the index columns have been
* to do a HOT update. Check if any of the index columns have been
* changed. If not, then HOT update is possible.
*/
if (satisfies_hot)
@@ -3672,13 +3672,13 @@ l2:
/*
* If this transaction commits, the old tuple will become DEAD sooner or
* later. Set flag that this page is a candidate for pruning once our xid
* falls below the OldestXmin horizon. If the transaction finally aborts,
* falls below the OldestXmin horizon. If the transaction finally aborts,
* the subsequent page pruning will be a no-op and the hint will be
* cleared.
*
* XXX Should we set hint on newbuf as well? If the transaction aborts,
* there would be a prunable tuple in the newbuf; but for now we choose
* not to optimize for aborts. Note that heap_xlog_update must be kept in
* not to optimize for aborts. Note that heap_xlog_update must be kept in
* sync if this decision changes.
*/
PageSetPrunable(page, xid);
@@ -3775,7 +3775,7 @@ l2:
* Mark old tuple for invalidation from system caches at next command
* boundary, and mark the new tuple for invalidation in case we abort. We
* have to do this before releasing the buffer because oldtup is in the
* buffer. (heaptup is all in local memory, but it's necessary to process
* buffer. (heaptup is all in local memory, but it's necessary to process
* both tuple versions in one call to inval.c so we can avoid redundant
* sinval messages.)
*/
@@ -3853,7 +3853,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
/*
* Extract the corresponding values. XXX this is pretty inefficient if
* there are many indexed columns. Should HeapSatisfiesHOTandKeyUpdate do
* there are many indexed columns. Should HeapSatisfiesHOTandKeyUpdate do
* a single heap_deform_tuple call on each tuple, instead? But that
* doesn't work for system columns ...
*/
@@ -3876,7 +3876,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
/*
* We do simple binary comparison of the two datums. This may be overly
* strict because there can be multiple binary representations for the
* same logical value. But we should be OK as long as there are no false
* same logical value. But we should be OK as long as there are no false
* positives. Using a type-specific equality operator is messy because
* there could be multiple notions of equality in different operator
* classes; furthermore, we cannot safely invoke user-defined functions
@@ -3951,8 +3951,7 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
/*
* Since the HOT attributes are a superset of the key attributes and
* the key attributes are a superset of the id attributes, this logic
* is guaranteed to identify the next column that needs to be
* checked.
* is guaranteed to identify the next column that needs to be checked.
*/
if (hot_result && next_hot_attnum > FirstLowInvalidHeapAttributeNumber)
check_now = next_hot_attnum;
@@ -3981,12 +3980,11 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
}
/*
* Advance the next attribute numbers for the sets that contain
* the attribute we just checked. As we work our way through the
* columns, the next_attnum values will rise; but when each set
* becomes empty, bms_first_member() will return -1 and the attribute
* number will end up with a value less than
* FirstLowInvalidHeapAttributeNumber.
* Advance the next attribute numbers for the sets that contain the
* attribute we just checked. As we work our way through the columns,
* the next_attnum values will rise; but when each set becomes empty,
* bms_first_member() will return -1 and the attribute number will end
* up with a value less than FirstLowInvalidHeapAttributeNumber.
*/
if (hot_result && check_now == next_hot_attnum)
{
@@ -4015,7 +4013,7 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
*
* This routine may be used to update a tuple when concurrent updates of
* the target tuple are not expected (for example, because we have a lock
* on the relation associated with the tuple). Any failure is reported
* on the relation associated with the tuple). Any failure is reported
* via ereport().
*/
void
@@ -4057,7 +4055,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
static MultiXactStatus
get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
{
int retval;
int retval;
if (is_update)
retval = tupleLockExtraInfo[mode].updstatus;
@@ -4239,15 +4237,15 @@ l3:
* However, if there are updates, we need to walk the update chain
* to mark future versions of the row as locked, too. That way,
* if somebody deletes that future version, we're protected
* against the key going away. This locking of future versions
* against the key going away. This locking of future versions
* could block momentarily, if a concurrent transaction is
* deleting a key; or it could return a value to the effect that
* the transaction deleting the key has already committed. So we
* the transaction deleting the key has already committed. So we
* do this before re-locking the buffer; otherwise this would be
* prone to deadlocks.
*
* Note that the TID we're locking was grabbed before we unlocked
* the buffer. For it to change while we're not looking, the
* the buffer. For it to change while we're not looking, the
* other properties we're testing for below after re-locking the
* buffer would also change, in which case we would restart this
* loop above.
@@ -4472,7 +4470,7 @@ l3:
* Of course, the multixact might not be done here: if we're
* requesting a light lock mode, other transactions with light
* locks could still be alive, as well as locks owned by our
* own xact or other subxacts of this backend. We need to
* own xact or other subxacts of this backend. We need to
* preserve the surviving MultiXact members. Note that it
* isn't absolutely necessary in the latter case, but doing so
* is simpler.
@@ -4516,7 +4514,7 @@ l3:
/*
* xwait is done, but if xwait had just locked the tuple then
* some other xact could update this tuple before we get to
* this point. Check for xmax change, and start over if so.
* this point. Check for xmax change, and start over if so.
*/
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(
@@ -4525,7 +4523,7 @@ l3:
goto l3;
/*
* Otherwise check if it committed or aborted. Note we cannot
* Otherwise check if it committed or aborted. Note we cannot
* be here if the tuple was only locked by somebody who didn't
* conflict with us; that should have been handled above. So
* that transaction must necessarily be gone by now.
@@ -4605,7 +4603,7 @@ failed:
* If this is the first possibly-multixact-able operation in the current
* transaction, set my per-backend OldestMemberMXactId setting. We can be
* certain that the transaction will never become a member of any older
* MultiXactIds than that. (We have to do this even if we end up just
* MultiXactIds than that. (We have to do this even if we end up just
* using our own TransactionId below, since some other backend could
* incorporate our XID into a MultiXact immediately afterwards.)
*/
@@ -4641,7 +4639,7 @@ failed:
HeapTupleHeaderSetXmax(tuple->t_data, xid);
/*
* Make sure there is no forward chain link in t_ctid. Note that in the
* Make sure there is no forward chain link in t_ctid. Note that in the
* cases where the tuple has been updated, we must not overwrite t_ctid,
* because it was set by the updater. Moreover, if the tuple has been
* updated, we need to follow the update chain to lock the new versions of
@@ -4653,8 +4651,8 @@ failed:
MarkBufferDirty(*buffer);
/*
* XLOG stuff. You might think that we don't need an XLOG record because
* there is no state change worth restoring after a crash. You would be
* XLOG stuff. You might think that we don't need an XLOG record because
* there is no state change worth restoring after a crash. You would be
* wrong however: we have just written either a TransactionId or a
* MultiXactId that may never have been seen on disk before, and we need
* to make sure that there are XLOG entries covering those ID numbers.
@@ -4818,7 +4816,7 @@ l5:
* If the XMAX is already a MultiXactId, then we need to expand it to
* include add_to_xmax; but if all the members were lockers and are
* all gone, we can do away with the IS_MULTI bit and just set
* add_to_xmax as the only locker/updater. If all lockers are gone
* add_to_xmax as the only locker/updater. If all lockers are gone
* and we have an updater that aborted, we can also do without a
* multi.
*
@@ -4881,7 +4879,7 @@ l5:
*/
MultiXactStatus new_status;
MultiXactStatus old_status;
LockTupleMode old_mode;
LockTupleMode old_mode;
if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask))
{
@@ -4900,8 +4898,8 @@ l5:
{
/*
* LOCK_ONLY can be present alone only when a page has been
* upgraded by pg_upgrade. But in that case,
* TransactionIdIsInProgress() should have returned false. We
* upgraded by pg_upgrade. But in that case,
* TransactionIdIsInProgress() should have returned false. We
* assume it's no longer locked in this case.
*/
elog(WARNING, "LOCK_ONLY found for Xid in progress %u", xmax);
@@ -4929,12 +4927,13 @@ l5:
if (xmax == add_to_xmax)
{
/*
* Note that it's not possible for the original tuple to be updated:
* we wouldn't be here because the tuple would have been invisible and
* we wouldn't try to update it. As a subtlety, this code can also
* run when traversing an update chain to lock future versions of a
* tuple. But we wouldn't be here either, because the add_to_xmax
* would be different from the original updater.
* Note that it's not possible for the original tuple to be
* updated: we wouldn't be here because the tuple would have been
* invisible and we wouldn't try to update it. As a subtlety,
* this code can also run when traversing an update chain to lock
* future versions of a tuple. But we wouldn't be here either,
* because the add_to_xmax would be different from the original
* updater.
*/
Assert(HEAP_XMAX_IS_LOCKED_ONLY(old_infomask));
@@ -5013,7 +5012,7 @@ static HTSU_Result
test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
LockTupleMode mode, bool *needwait)
{
MultiXactStatus wantedstatus;
MultiXactStatus wantedstatus;
*needwait = false;
wantedstatus = get_mxact_status_for_lock(mode, false);
@@ -5026,18 +5025,18 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
if (TransactionIdIsCurrentTransactionId(xid))
{
/*
* Updated by our own transaction? Just return failure. This shouldn't
* normally happen.
* Updated by our own transaction? Just return failure. This
* shouldn't normally happen.
*/
return HeapTupleSelfUpdated;
}
else if (TransactionIdIsInProgress(xid))
{
/*
* If the locking transaction is running, what we do depends on whether
* the lock modes conflict: if they do, then we must wait for it to
* finish; otherwise we can fall through to lock this tuple version
* without waiting.
* If the locking transaction is running, what we do depends on
* whether the lock modes conflict: if they do, then we must wait for
* it to finish; otherwise we can fall through to lock this tuple
* version without waiting.
*/
if (DoLockModesConflict(LOCKMODE_from_mxstatus(status),
LOCKMODE_from_mxstatus(wantedstatus)))
@@ -5046,8 +5045,8 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
}
/*
* If we set needwait above, then this value doesn't matter; otherwise,
* this value signals to caller that it's okay to proceed.
* If we set needwait above, then this value doesn't matter;
* otherwise, this value signals to caller that it's okay to proceed.
*/
return HeapTupleMayBeUpdated;
}
@@ -5059,7 +5058,7 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
* The other transaction committed. If it was only a locker, then the
* lock is completely gone now and we can return success; but if it
* was an update, then what we do depends on whether the two lock
* modes conflict. If they conflict, then we must report error to
* modes conflict. If they conflict, then we must report error to
* caller. But if they don't, we can fall through to allow the current
* transaction to lock the tuple.
*
@@ -5133,8 +5132,8 @@ l4:
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
/*
* Check the tuple XMIN against prior XMAX, if any. If we reached
* the end of the chain, we're done, so return success.
* Check the tuple XMIN against prior XMAX, if any. If we reached the
* end of the chain, we're done, so return success.
*/
if (TransactionIdIsValid(priorXmax) &&
!TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data),
@@ -5162,14 +5161,14 @@ l4:
rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
if (old_infomask & HEAP_XMAX_IS_MULTI)
{
int nmembers;
int i;
int nmembers;
int i;
MultiXactMember *members;
nmembers = GetMultiXactIdMembers(rawxmax, &members, false);
for (i = 0; i < nmembers; i++)
{
HTSU_Result res;
HTSU_Result res;
res = test_lockmode_for_conflict(members[i].status,
members[i].xid,
@@ -5196,7 +5195,7 @@ l4:
}
else
{
HTSU_Result res;
HTSU_Result res;
MultiXactStatus status;
/*
@@ -5219,9 +5218,9 @@ l4:
else
{
/*
* LOCK_ONLY present alone (a pg_upgraded tuple
* marked as share-locked in the old cluster) shouldn't
* be seen in the middle of an update chain.
* LOCK_ONLY present alone (a pg_upgraded tuple marked
* as share-locked in the old cluster) shouldn't be
* seen in the middle of an update chain.
*/
elog(ERROR, "invalid lock status in tuple");
}
@@ -5323,7 +5322,7 @@ l4:
* The initial tuple is assumed to be already locked.
*
* This function doesn't check visibility, it just inconditionally marks the
* tuple(s) as locked. If any tuple in the updated chain is being deleted
* tuple(s) as locked. If any tuple in the updated chain is being deleted
* concurrently (or updated with the key being modified), sleep until the
* transaction doing it is finished.
*
@@ -5347,7 +5346,7 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
* If this is the first possibly-multixact-able operation in the
* current transaction, set my per-backend OldestMemberMXactId
* setting. We can be certain that the transaction will never become a
* member of any older MultiXactIds than that. (We have to do this
* member of any older MultiXactIds than that. (We have to do this
* even if we end up just using our own TransactionId below, since
* some other backend could incorporate our XID into a MultiXact
* immediately afterwards.)
@@ -5366,7 +5365,7 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
* heap_inplace_update - update a tuple "in place" (ie, overwrite it)
*
* Overwriting violates both MVCC and transactional safety, so the uses
* of this function in Postgres are extremely limited. Nonetheless we
* of this function in Postgres are extremely limited. Nonetheless we
* find some places to use it.
*
* The tuple cannot change size, and therefore it's reasonable to assume
@@ -5608,7 +5607,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
*/
if (ISUPDATE_from_mxstatus(members[i].status))
{
TransactionId xid = members[i].xid;
TransactionId xid = members[i].xid;
/*
* It's an update; should we keep it? If the transaction is known
@@ -5728,7 +5727,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* heap_prepare_freeze_tuple
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
* are older than the specified cutoff XID and cutoff MultiXactId. If so,
* are older than the specified cutoff XID and cutoff MultiXactId. If so,
* setup enough state (in the *frz output argument) to later execute and
* WAL-log what we would need to do, and return TRUE. Return FALSE if nothing
* is to be changed.
@@ -5801,11 +5800,11 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
else if (flags & FRM_RETURN_IS_XID)
{
/*
* NB -- some of these transformations are only valid because
* we know the return Xid is a tuple updater (i.e. not merely a
* NB -- some of these transformations are only valid because we
* know the return Xid is a tuple updater (i.e. not merely a
* locker.) Also note that the only reason we don't explicitely
* worry about HEAP_KEYS_UPDATED is because it lives in t_infomask2
* rather than t_infomask.
* worry about HEAP_KEYS_UPDATED is because it lives in
* t_infomask2 rather than t_infomask.
*/
frz->t_infomask &= ~HEAP_XMAX_BITS;
frz->xmax = newxmax;
@@ -5815,8 +5814,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
}
else if (flags & FRM_RETURN_IS_MULTI)
{
uint16 newbits;
uint16 newbits2;
uint16 newbits;
uint16 newbits2;
/*
* We can't use GetMultiXactIdHintBits directly on the new multi
@@ -5851,7 +5850,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
/*
* The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
* LOCKED. Normalize to INVALID just to be sure no one gets confused.
* LOCKED. Normalize to INVALID just to be sure no one gets confused.
* Also get rid of the HEAP_KEYS_UPDATED bit.
*/
frz->t_infomask &= ~HEAP_XMAX_BITS;
@@ -6111,7 +6110,7 @@ HeapTupleGetUpdateXid(HeapTupleHeader tuple)
* used to optimize multixact access in case it's a lock-only multi); 'nowait'
* indicates whether to use conditional lock acquisition, to allow callers to
* fail if lock is unavailable. 'rel', 'ctid' and 'oper' are used to set up
* context information for error messages. 'remaining', if not NULL, receives
* context information for error messages. 'remaining', if not NULL, receives
* the number of members that are still running, including any (non-aborted)
* subtransactions of our own transaction.
*
@@ -6173,7 +6172,7 @@ Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
* return failure, if asked to avoid waiting.)
*
* Note that we don't set up an error context callback ourselves,
* but instead we pass the info down to XactLockTableWait. This
* but instead we pass the info down to XactLockTableWait. This
* might seem a bit wasteful because the context is set up and
* tore down for each member of the multixact, but in reality it
* should be barely noticeable, and it avoids duplicate code.
@@ -6242,7 +6241,7 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
* heap_tuple_needs_freeze
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
* are older than the specified cutoff XID or MultiXactId. If so, return TRUE.
* are older than the specified cutoff XID or MultiXactId. If so, return TRUE.
*
* It doesn't matter whether the tuple is alive or dead, we are checking
* to see if a tuple needs to be removed or frozen to avoid wraparound.
@@ -6366,7 +6365,7 @@ heap_restrpos(HeapScanDesc scan)
else
{
/*
* If we reached end of scan, rs_inited will now be false. We must
* If we reached end of scan, rs_inited will now be false. We must
* reset it to true to keep heapgettup from doing the wrong thing.
*/
scan->rs_inited = true;
@@ -6548,7 +6547,7 @@ log_heap_clean(Relation reln, Buffer buffer,
}
/*
* Perform XLogInsert for a heap-freeze operation. Caller must have already
* Perform XLogInsert for a heap-freeze operation. Caller must have already
* modified the buffer and marked it dirty.
*/
XLogRecPtr
@@ -6593,7 +6592,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
/*
* Perform XLogInsert for a heap-visible operation. 'block' is the block
* being marked all-visible, and vm_buffer is the buffer containing the
* corresponding visibility map block. Both should have already been modified
* corresponding visibility map block. Both should have already been modified
* and dirtied.
*
* If checksums are enabled, we also add the heap_buffer to the chain to
@@ -6642,7 +6641,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
}
/*
* Perform XLogInsert for a heap-update operation. Caller must already
* Perform XLogInsert for a heap-update operation. Caller must already
* have modified the buffer(s) and marked them dirty.
*/
static XLogRecPtr
@@ -6674,10 +6673,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
info = XLOG_HEAP_UPDATE;
/*
* If the old and new tuple are on the same page, we only need to log
* the parts of the new tuple that were changed. That saves on the amount
* of WAL we need to write. Currently, we just count any unchanged bytes
* in the beginning and end of the tuple. That's quick to check, and
* If the old and new tuple are on the same page, we only need to log the
* parts of the new tuple that were changed. That saves on the amount of
* WAL we need to write. Currently, we just count any unchanged bytes in
* the beginning and end of the tuple. That's quick to check, and
* perfectly covers the common case that only one field is updated.
*
* We could do this even if the old and new tuple are on different pages,
@@ -6688,10 +6687,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
* updates tend to create the new tuple version on the same page, there
* isn't much to be gained by doing this across pages anyway.
*
* Skip this if we're taking a full-page image of the new page, as we don't
* include the new tuple in the WAL record in that case. Also disable if
* wal_level='logical', as logical decoding needs to be able to read the
* new tuple in whole from the WAL record alone.
* Skip this if we're taking a full-page image of the new page, as we
* don't include the new tuple in the WAL record in that case. Also
* disable if wal_level='logical', as logical decoding needs to be able to
* read the new tuple in whole from the WAL record alone.
*/
if (oldbuf == newbuf && !need_tuple_data &&
!XLogCheckBufferNeedsBackup(newbuf))
@@ -6707,6 +6706,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
if (newp[prefixlen] != oldp[prefixlen])
break;
}
/*
* Storing the length of the prefix takes 2 bytes, so we need to save
* at least 3 bytes or there's no point.
@@ -6793,8 +6793,8 @@ log_heap_update(Relation reln, Buffer oldbuf,
xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
xlhdr.header.t_infomask = newtup->t_data->t_infomask;
xlhdr.header.t_hoff = newtup->t_data->t_hoff;
Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - prefixlen - suffixlen;
Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
/*
* As with insert records, we need not store this rdata segment if we
@@ -6816,7 +6816,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
if (prefixlen == 0)
{
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - suffixlen;
rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
rdata[nr].buffer_std = true;
rdata[nr].next = NULL;
@@ -6829,7 +6829,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
* two separate rdata entries.
*/
/* bitmap [+ padding] [+ oid] */
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) > 0)
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
{
rdata[nr - 1].next = &(rdata[nr]);
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
@@ -6853,13 +6853,13 @@ log_heap_update(Relation reln, Buffer oldbuf,
/*
* Separate storage for the FPW buffer reference of the new page in the
* wal_level >= logical case.
*/
*/
if (need_tuple_data)
{
rdata[nr - 1].next = &(rdata[nr]);
rdata[nr].data = NULL,
rdata[nr].len = 0;
rdata[nr].len = 0;
rdata[nr].buffer = newbufref;
rdata[nr].buffer_std = true;
rdata[nr].next = NULL;
@@ -6992,8 +6992,8 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
/*
* The page may be uninitialized. If so, we can't set the LSN because
* that would corrupt the page.
* The page may be uninitialized. If so, we can't set the LSN because that
* would corrupt the page.
*/
if (!PageIsNew(page))
{
@@ -7173,14 +7173,14 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
*/
for (natt = 0; natt < idx_desc->natts; natt++)
{
int attno = idx_rel->rd_index->indkey.values[natt];
int attno = idx_rel->rd_index->indkey.values[natt];
if (attno < 0)
{
/*
* The OID column can appear in an index definition, but that's
* OK, becuse we always copy the OID if present (see below).
* Other system columns may not.
* OK, becuse we always copy the OID if present (see below). Other
* system columns may not.
*/
if (attno == ObjectIdAttributeNumber)
continue;
@@ -7210,7 +7210,8 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
*/
if (HeapTupleHasExternal(key_tuple))
{
HeapTuple oldtup = key_tuple;
HeapTuple oldtup = key_tuple;
key_tuple = toast_flatten_tuple(oldtup, RelationGetDescr(relation));
heap_freetuple(oldtup);
}
@@ -7963,7 +7964,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
/*
* In normal operation, it is important to lock the two pages in
* page-number order, to avoid possible deadlocks against other update
* operations going the other way. However, during WAL replay there can
* operations going the other way. However, during WAL replay there can
* be no other update happening, so we don't need to worry about that. But
* we *do* need to worry that we don't expose an inconsistent state to Hot
* Standby queries --- so the original page can't be unlocked before we've
@@ -8169,7 +8170,7 @@ newsame:;
if (suffixlen > 0)
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
newlen = offsetof(HeapTupleHeaderData, t_bits) +xlhdr.t_len + prefixlen + suffixlen;
htup->t_infomask2 = xlhdr.header.t_infomask2;
htup->t_infomask = xlhdr.header.t_infomask;
htup->t_hoff = xlhdr.header.t_hoff;
@@ -8444,6 +8445,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
heap_xlog_lock_updated(lsn, record);
break;
case XLOG_HEAP2_NEW_CID:
/*
* Nothing to do on a real replay, only used during logical
* decoding.

View File

@@ -146,7 +146,7 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
/*
* If there are two buffers involved and we pinned just one of them,
* it's possible that the second one became all-visible while we were
* busy pinning the first one. If it looks like that's a possible
* busy pinning the first one. If it looks like that's a possible
* scenario, we'll need to make a second pass through this loop.
*/
if (buffer2 == InvalidBuffer || buffer1 == buffer2
@@ -177,7 +177,7 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
* NOTE: it is unlikely, but not quite impossible, for otherBuffer to be the
* same buffer we select for insertion of the new tuple (this could only
* happen if space is freed in that page after heap_update finds there's not
* enough there). In that case, the page will be pinned and locked only once.
* enough there). In that case, the page will be pinned and locked only once.
*
* For the vmbuffer and vmbuffer_other arguments, we avoid deadlock by
* locking them only after locking the corresponding heap page, and taking
@@ -198,7 +198,7 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
* for additional constraints needed for safe usage of this behavior.)
*
* The caller can also provide a BulkInsertState object to optimize many
* insertions into the same relation. This keeps a pin on the current
* insertions into the same relation. This keeps a pin on the current
* insertion target page (to save pin/unpin cycles) and also passes a
* BULKWRITE buffer selection strategy object to the buffer manager.
* Passing NULL for bistate selects the default behavior.
@@ -251,7 +251,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* We first try to put the tuple on the same page we last inserted a tuple
* on, as cached in the BulkInsertState or relcache entry. If that
* on, as cached in the BulkInsertState or relcache entry. If that
* doesn't work, we ask the Free Space Map to locate a suitable page.
* Since the FSM's info might be out of date, we have to be prepared to
* loop around and retry multiple times. (To insure this isn't an infinite
@@ -283,7 +283,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* If the FSM knows nothing of the rel, try the last page before we
* give up and extend. This avoids one-tuple-per-page syndrome during
* give up and extend. This avoids one-tuple-per-page syndrome during
* bootstrapping or in a recently-started system.
*/
if (targetBlock == InvalidBlockNumber)
@@ -305,7 +305,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* If the page-level all-visible flag is set, caller will need to
* clear both that and the corresponding visibility map bit. However,
* by the time we return, we'll have x-locked the buffer, and we don't
* want to do any I/O while in that state. So we check the bit here
* want to do any I/O while in that state. So we check the bit here
* before taking the lock, and pin the page if it appears necessary.
* Checking without the lock creates a risk of getting the wrong
* answer, so we'll have to recheck after acquiring the lock.
@@ -347,7 +347,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* We now have the target page (and the other buffer, if any) pinned
* and locked. However, since our initial PageIsAllVisible checks
* and locked. However, since our initial PageIsAllVisible checks
* were performed before acquiring the lock, the results might now be
* out of date, either for the selected victim buffer, or for the
* other buffer passed by the caller. In that case, we'll need to
@@ -390,7 +390,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* Not enough space, so we must give up our page locks and pin (if
* any) and prepare to look elsewhere. We don't care which order we
* any) and prepare to look elsewhere. We don't care which order we
* unlock the two buffers in, so this can be slightly simpler than the
* code above.
*/
@@ -432,7 +432,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* XXX This does an lseek - rather expensive - but at the moment it is the
* only way to accurately determine how many blocks are in a relation. Is
* only way to accurately determine how many blocks are in a relation. Is
* it worth keeping an accurate file length in shared memory someplace,
* rather than relying on the kernel to do it for us?
*/
@@ -452,7 +452,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
/*
* Release the file-extension lock; it's now OK for someone else to extend
* the relation some more. Note that we cannot release this lock before
* the relation some more. Note that we cannot release this lock before
* we have buffer lock on the new page, or we risk a race condition
* against vacuumlazy.c --- see comments therein.
*/

View File

@@ -117,7 +117,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
* Checking free space here is questionable since we aren't holding any
* lock on the buffer; in the worst case we could get a bogus answer. It's
* unlikely to be *seriously* wrong, though, since reading either pd_lower
* or pd_upper is probably atomic. Avoiding taking a lock seems more
* or pd_upper is probably atomic. Avoiding taking a lock seems more
* important than sometimes getting a wrong answer in what is after all
* just a heuristic estimate.
*/
@@ -332,8 +332,8 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
* OldestXmin is the cutoff XID used to identify dead tuples.
*
* We don't actually change the page here, except perhaps for hint-bit updates
* caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in
* prstate showing the changes to be made. Items to be redirected are added
* caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in
* prstate showing the changes to be made. Items to be redirected are added
* to the redirected[] array (two entries per redirection); items to be set to
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
* state are added to nowunused[].
@@ -384,7 +384,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
* We need this primarily to handle aborted HOT updates, that is,
* XMIN_INVALID heap-only tuples. Those might not be linked to by
* any chain, since the parent tuple might be re-updated before
* any pruning occurs. So we have to be able to reap them
* any pruning occurs. So we have to be able to reap them
* separately from chain-pruning. (Note that
* HeapTupleHeaderIsHotUpdated will never return true for an
* XMIN_INVALID tuple, so this code will work even when there were
@@ -496,9 +496,10 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/*
* This tuple may soon become DEAD. Update the hint field
* so that the page is reconsidered for pruning in future.
* This tuple may soon become DEAD. Update the hint field so
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
HeapTupleHeaderGetUpdateXid(htup));
@@ -574,7 +575,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
/*
* If the root entry had been a normal tuple, we are deleting it, so
* count it in the result. But changing a redirect (even to DEAD
* count it in the result. But changing a redirect (even to DEAD
* state) doesn't count.
*/
if (ItemIdIsNormal(rootlp))
@@ -663,7 +664,7 @@ heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
* buffer, and is inside a critical section.
*
* This is split out because it is also used by heap_xlog_clean()
* to replay the WAL record when needed after a crash. Note that the
* to replay the WAL record when needed after a crash. Note that the
* arguments are identical to those of log_heap_clean().
*/
void

View File

@@ -10,7 +10,7 @@
*
* The caller is responsible for creating the new heap, all catalog
* changes, supplying the tuples to be written to the new heap, and
* rebuilding indexes. The caller must hold AccessExclusiveLock on the
* rebuilding indexes. The caller must hold AccessExclusiveLock on the
* target table, because we assume no one else is writing into it.
*
* To use the facility:
@@ -43,7 +43,7 @@
* to substitute the correct ctid instead.
*
* For each ctid reference from A -> B, we might encounter either A first
* or B first. (Note that a tuple in the middle of a chain is both A and B
* or B first. (Note that a tuple in the middle of a chain is both A and B
* of different pairs.)
*
* If we encounter A first, we'll store the tuple in the unresolved_tups
@@ -58,11 +58,11 @@
* and can write A immediately with the correct ctid.
*
* Entries in the hash tables can be removed as soon as the later tuple
* is encountered. That helps to keep the memory usage down. At the end,
* is encountered. That helps to keep the memory usage down. At the end,
* both tables are usually empty; we should have encountered both A and B
* of each pair. However, it's possible for A to be RECENTLY_DEAD and B
* entirely DEAD according to HeapTupleSatisfiesVacuum, because the test
* for deadness using OldestXmin is not exact. In such a case we might
* for deadness using OldestXmin is not exact. In such a case we might
* encounter B first, and skip it, and find A later. Then A would be added
* to unresolved_tups, and stay there until end of the rewrite. Since
* this case is very unusual, we don't worry about the memory usage.
@@ -78,7 +78,7 @@
* of CLUSTERing on an unchanging key column, we'll see all the versions
* of a given tuple together anyway, and so the peak memory usage is only
* proportional to the number of RECENTLY_DEAD versions of a single row, not
* in the whole table. Note that if we do fail halfway through a CLUSTER,
* in the whole table. Note that if we do fail halfway through a CLUSTER,
* the old table is still valid, so failure is not catastrophic.
*
* We can't use the normal heap_insert function to insert into the new
@@ -143,13 +143,13 @@ typedef struct RewriteStateData
BlockNumber rs_blockno; /* block where page will go */
bool rs_buffer_valid; /* T if any tuples in buffer */
bool rs_use_wal; /* must we WAL-log inserts? */
bool rs_logical_rewrite; /* do we need to do logical rewriting */
bool rs_logical_rewrite; /* do we need to do logical rewriting */
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
* determine tuple visibility */
TransactionId rs_freeze_xid;/* Xid that will be used as freeze cutoff
* point */
TransactionId rs_logical_xmin; /* Xid that will be used as cutoff
* point for logical rewrites */
TransactionId rs_logical_xmin; /* Xid that will be used as cutoff
* point for logical rewrites */
MultiXactId rs_cutoff_multi;/* MultiXactId that will be used as cutoff
* point for multixacts */
MemoryContext rs_cxt; /* for hash tables and entries and tuples in
@@ -158,7 +158,7 @@ typedef struct RewriteStateData
HTAB *rs_unresolved_tups; /* unmatched A tuples */
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
HTAB *rs_logical_mappings; /* logical remapping files */
uint32 rs_num_rewrite_mappings; /* # in memory mappings */
uint32 rs_num_rewrite_mappings; /* # in memory mappings */
} RewriteStateData;
/*
@@ -199,12 +199,12 @@ typedef OldToNewMappingData *OldToNewMapping;
*/
typedef struct RewriteMappingFile
{
TransactionId xid; /* xid that might need to see the row */
int vfd; /* fd of mappings file */
off_t off; /* how far have we written yet */
uint32 num_mappings; /* number of in-memory mappings */
dlist_head mappings; /* list of in-memory mappings */
char path[MAXPGPATH]; /* path, for error messages */
TransactionId xid; /* xid that might need to see the row */
int vfd; /* fd of mappings file */
off_t off; /* how far have we written yet */
uint32 num_mappings; /* number of in-memory mappings */
dlist_head mappings; /* list of in-memory mappings */
char path[MAXPGPATH]; /* path, for error messages */
} RewriteMappingFile;
/*
@@ -213,8 +213,8 @@ typedef struct RewriteMappingFile
*/
typedef struct RewriteMappingDataEntry
{
LogicalRewriteMappingData map; /* map between old and new location of
* the tuple */
LogicalRewriteMappingData map; /* map between old and new location of
* the tuple */
dlist_node node;
} RewriteMappingDataEntry;
@@ -346,7 +346,7 @@ end_heap_rewrite(RewriteState state)
}
/*
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
* to ensure that the toast table gets fsync'd too.
*
* It's obvious that we must do this when not WAL-logging. It's less
@@ -617,7 +617,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
}
/*
* Insert a tuple to the new relation. This has to track heap_insert
* Insert a tuple to the new relation. This has to track heap_insert
* and its subsidiary functions!
*
* t_self of the tuple is set to the new TID of the tuple. If t_ctid of the
@@ -866,13 +866,13 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
XLogRecData rdata[2];
char *waldata;
char *waldata_start;
XLogRecData rdata[2];
char *waldata;
char *waldata_start;
xl_heap_rewrite_mapping xlrec;
Oid dboid;
uint32 len;
int written;
Oid dboid;
uint32 len;
int written;
/* this file hasn't got any new mappings */
if (src->num_mappings == 0)
@@ -962,14 +962,14 @@ logical_end_heap_rewrite(RewriteState state)
return;
/* writeout remaining in-memory entries */
if (state->rs_num_rewrite_mappings > 0 )
if (state->rs_num_rewrite_mappings > 0)
logical_heap_rewrite_flush_mappings(state);
/* Iterate over all mappings we have written and fsync the files. */
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
if(FileSync(src->vfd) != 0)
if (FileSync(src->vfd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", src->path)));
@@ -985,10 +985,10 @@ static void
logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
LogicalRewriteMappingData *map)
{
RewriteMappingFile *src;
RewriteMappingDataEntry *pmap;
Oid relid;
bool found;
RewriteMappingFile *src;
RewriteMappingDataEntry *pmap;
Oid relid;
bool found;
relid = RelationGetRelid(state->rs_old_rel);
@@ -1027,7 +1027,7 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
if (src->vfd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", path)));
errmsg("could not create file \"%s\": %m", path)));
}
pmap = MemoryContextAlloc(state->rs_cxt,
@@ -1041,7 +1041,7 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
* Write out buffer every time we've too many in-memory entries across all
* mapping files.
*/
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */)
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */ )
logical_heap_rewrite_flush_mappings(state);
}
@@ -1054,11 +1054,11 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
HeapTuple new_tuple)
{
ItemPointerData new_tid = new_tuple->t_self;
TransactionId cutoff = state->rs_logical_xmin;
TransactionId xmin;
TransactionId xmax;
bool do_log_xmin = false;
bool do_log_xmax = false;
TransactionId cutoff = state->rs_logical_xmin;
TransactionId xmin;
TransactionId xmax;
bool do_log_xmin = false;
bool do_log_xmax = false;
LogicalRewriteMappingData map;
/* no logical rewrite in progress, we don't need to log anything */
@@ -1147,7 +1147,8 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", path)));
errmsg("could not create file \"%s\": %m", path)));
/*
* Truncate all data that's not guaranteed to have been safely fsynced (by
* previous record or by the last checkpoint).
@@ -1174,6 +1175,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", path)));
/*
* Now fsync all previously written data. We could improve things and only
* do this for the last write to a file, but the required bookkeeping
@@ -1222,13 +1224,14 @@ CheckPointLogicalRewriteHeap(void)
mappings_dir = AllocateDir("pg_llog/mappings");
while ((mapping_de = ReadDir(mappings_dir, "pg_llog/mappings")) != NULL)
{
struct stat statbuf;
struct stat statbuf;
Oid dboid;
Oid relid;
XLogRecPtr lsn;
TransactionId rewrite_xid;
TransactionId create_xid;
uint32 hi, lo;
uint32 hi,
lo;
if (strcmp(mapping_de->d_name, ".") == 0 ||
strcmp(mapping_de->d_name, "..") == 0)
@@ -1244,7 +1247,7 @@ CheckPointLogicalRewriteHeap(void)
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
&dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
elog(ERROR,"could not parse filename \"%s\"", mapping_de->d_name);
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
lsn = ((uint64) hi) << 32 | lo;
@@ -1258,7 +1261,7 @@ CheckPointLogicalRewriteHeap(void)
}
else
{
int fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
int fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
/*
* The file cannot vanish due to concurrency since this function
@@ -1269,6 +1272,7 @@ CheckPointLogicalRewriteHeap(void)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
/*
* We could try to avoid fsyncing files that either haven't
* changed or have only been created since the checkpoint's start,

View File

@@ -4,7 +4,7 @@
* heap scan synchronization support
*
* When multiple backends run a sequential scan on the same table, we try
* to keep them synchronized to reduce the overall I/O needed. The goal is
* to keep them synchronized to reduce the overall I/O needed. The goal is
* to read each page into shared buffer cache only once, and let all backends
* that take part in the shared scan process the page before it falls out of
* the cache.
@@ -26,7 +26,7 @@
* don't want such queries to slow down others.
*
* There can realistically only be a few large sequential scans on different
* tables in progress at any time. Therefore we just keep the scan positions
* tables in progress at any time. Therefore we just keep the scan positions
* in a small LRU list which we scan every time we need to look up or update a
* scan position. The whole mechanism is only applied for tables exceeding
* a threshold size (but that is not the concern of this module).
@@ -243,7 +243,7 @@ ss_search(RelFileNode relfilenode, BlockNumber location, bool set)
* relation, or 0 if no valid location is found.
*
* We expect the caller has just done RelationGetNumberOfBlocks(), and
* so that number is passed in rather than computing it again. The result
* so that number is passed in rather than computing it again. The result
* is guaranteed less than relnblocks (assuming that's > 0).
*/
BlockNumber

View File

@@ -53,11 +53,11 @@ static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length);
static int toast_open_indexes(Relation toastrel,
LOCKMODE lock,
Relation **toastidxs,
int *num_indexes);
LOCKMODE lock,
Relation **toastidxs,
int *num_indexes);
static void toast_close_indexes(Relation *toastidxs, int num_indexes,
LOCKMODE lock);
LOCKMODE lock);
/* ----------
@@ -91,8 +91,9 @@ heap_tuple_fetch_attr(struct varlena * attr)
* to persist a Datum for unusually long time, like in a HOLD cursor.
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *)redirect.pointer;
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
@@ -147,8 +148,9 @@ heap_tuple_untoast_attr(struct varlena * attr)
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *)redirect.pointer;
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
@@ -217,6 +219,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
/* nested indirect Datums aren't allowed */
@@ -299,6 +302,7 @@ toast_raw_datum_size(Datum value)
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
@@ -354,6 +358,7 @@ toast_datum_size(Datum value)
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
@@ -597,7 +602,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
* We took care of UPDATE above, so any external value we find
* still in the tuple must be someone else's we cannot reuse.
* Fetch it back (without decompression, unless we are forcing
* PLAIN storage). If necessary, we'll push it out as a new
* PLAIN storage). If necessary, we'll push it out as a new
* external value below.
*/
if (VARATT_IS_EXTERNAL(new_value))
@@ -740,7 +745,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
/*
* Second we look for attributes of attstorage 'x' or 'e' that are still
* inline. But skip this if there's no toast table to push them to.
* inline. But skip this if there's no toast table to push them to.
*/
while (heap_compute_data_size(tupleDesc,
toast_values, toast_isnull) > maxDataLen &&
@@ -850,7 +855,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
}
/*
* Finally we store attributes of type 'm' externally. At this point we
* Finally we store attributes of type 'm' externally. At this point we
* increase the target tuple size, so that 'm' attributes aren't stored
* externally unless really necessary.
*/
@@ -1438,7 +1443,7 @@ toast_save_datum(Relation rel, Datum value,
* those versions could easily reference the same toast value.
* When we copy the second or later version of such a row,
* reusing the OID will mean we select an OID that's already
* in the new toast table. Check for that, and if so, just
* in the new toast table. Check for that, and if so, just
* fall through without writing the data again.
*
* While annoying and ugly-looking, this is a good thing
@@ -1467,7 +1472,7 @@ toast_save_datum(Relation rel, Datum value,
{
toast_pointer.va_valueid =
GetNewOidWithIndex(toastrel,
RelationGetRelid(toastidxs[validIndex]),
RelationGetRelid(toastidxs[validIndex]),
(AttrNumber) 1);
} while (toastid_valueid_exists(rel->rd_toastoid,
toast_pointer.va_valueid));
@@ -1488,7 +1493,7 @@ toast_save_datum(Relation rel, Datum value,
*/
while (data_todo > 0)
{
int i;
int i;
/*
* Calculate the size of this chunk
@@ -1506,7 +1511,7 @@ toast_save_datum(Relation rel, Datum value,
heap_insert(toastrel, toasttup, mycid, options, NULL);
/*
* Create the index entry. We cheat a little here by not using
* Create the index entry. We cheat a little here by not using
* FormIndexDatum: this relies on the knowledge that the index columns
* are the same as the initial columns of the table for all the
* indexes.
@@ -1656,8 +1661,8 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
* Is there any such chunk?
*/
toastscan = systable_beginscan(toastrel,
RelationGetRelid(toastidxs[validIndex]),
true, SnapshotToast, 1, &toastkey);
RelationGetRelid(toastidxs[validIndex]),
true, SnapshotToast, 1, &toastkey);
if (systable_getnext(toastscan) != NULL)
result = true;
@@ -2126,7 +2131,8 @@ toast_open_indexes(Relation toastrel,
/* Fetch the first valid index in list */
for (i = 0; i < *num_indexes; i++)
{
Relation toastidx = (*toastidxs)[i];
Relation toastidx = (*toastidxs)[i];
if (toastidx->rd_index->indisvalid)
{
res = i;
@@ -2136,14 +2142,14 @@ toast_open_indexes(Relation toastrel,
}
/*
* Free index list, not necessary anymore as relations are opened
* and a valid index has been found.
* Free index list, not necessary anymore as relations are opened and a
* valid index has been found.
*/
list_free(indexlist);
/*
* The toast relation should have one valid index, so something is
* going wrong if there is nothing.
* The toast relation should have one valid index, so something is going
* wrong if there is nothing.
*/
if (!found)
elog(ERROR, "no valid index found for toast relation with Oid %d",
@@ -2161,7 +2167,7 @@ toast_open_indexes(Relation toastrel,
static void
toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
{
int i;
int i;
/* Close relations and clean up things */
for (i = 0; i < num_indexes; i++)

View File

@@ -27,7 +27,7 @@
* the sense that we make sure that whenever a bit is set, we know the
* condition is true, but if a bit is not set, it might or might not be true.
*
* Clearing a visibility map bit is not separately WAL-logged. The callers
* Clearing a visibility map bit is not separately WAL-logged. The callers
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
* replay of the updating operation as well.
*
@@ -36,9 +36,9 @@
* it may still be the case that every tuple on the page is visible to all
* transactions; we just don't know that for certain. The difficulty is that
* there are two bits which are typically set together: the PD_ALL_VISIBLE bit
* on the page itself, and the visibility map bit. If a crash occurs after the
* on the page itself, and the visibility map bit. If a crash occurs after the
* visibility map page makes it to disk and before the updated heap page makes
* it to disk, redo must set the bit on the heap page. Otherwise, the next
* it to disk, redo must set the bit on the heap page. Otherwise, the next
* insert, update, or delete on the heap page will fail to realize that the
* visibility map bit must be cleared, possibly causing index-only scans to
* return wrong answers.
@@ -59,10 +59,10 @@
* the buffer lock over any I/O that may be required to read in the visibility
* map page. To avoid this, we examine the heap page before locking it;
* if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
* bit. Then, we lock the buffer. But this creates a race condition: there
* bit. Then, we lock the buffer. But this creates a race condition: there
* is a possibility that in the time it takes to lock the buffer, the
* PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
* buffer, pin the visibility map page, and relock the buffer. This shouldn't
* buffer, pin the visibility map page, and relock the buffer. This shouldn't
* happen often, because only VACUUM currently sets visibility map bits,
* and the race will only occur if VACUUM processes a given page at almost
* exactly the same time that someone tries to further modify it.
@@ -227,9 +227,9 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
* visibilitymap_set - set a bit on a previously pinned page
*
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
* one provided; in normal running, we generate a new XLOG record and set the
* page LSN to that value. cutoff_xid is the largest xmin on the page being
* page LSN to that value. cutoff_xid is the largest xmin on the page being
* marked all-visible; it is needed for Hot Standby, and can be
* InvalidTransactionId if the page contains no tuples.
*
@@ -320,10 +320,10 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
* releasing *buf after it's done testing and setting bits.
*
* NOTE: This function is typically called without a lock on the heap page,
* so somebody else could change the bit just after we look at it. In fact,
* so somebody else could change the bit just after we look at it. In fact,
* since we don't lock the visibility map page either, it's even possible that
* someone else could have changed the bit just before we look at it, but yet
* we might see the old value. It is the caller's responsibility to deal with
* we might see the old value. It is the caller's responsibility to deal with
* all concurrency issues!
*/
bool
@@ -526,7 +526,7 @@ vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
/*
* We might not have opened the relation at the smgr level yet, or we
* might have been forced to close it by a sinval message. The code below
* might have been forced to close it by a sinval message. The code below
* won't necessarily notice relation extension immediately when extend =
* false, so we rely on sinval messages to ensure that our ideas about the
* size of the map aren't too far out of date.

View File

@@ -45,7 +45,7 @@
*
* At the end of a scan, the AM's endscan routine undoes the locking,
* but does *not* call IndexScanEnd --- the higher-level index_endscan
* routine does that. (We can't do it in the AM because index_endscan
* routine does that. (We can't do it in the AM because index_endscan
* still needs to touch the IndexScanDesc after calling the AM.)
*
* Because of this, the AM does not have a choice whether to call
@@ -79,7 +79,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
scan->heapRelation = NULL; /* may be set later */
scan->indexRelation = indexRelation;
scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
scan->numberOfKeys = nkeys;
scan->numberOfOrderBys = norderbys;
@@ -188,7 +188,7 @@ BuildIndexValueDescription(Relation indexRelation,
* at rd_opcintype not the index tupdesc.
*
* Note: this is a bit shaky for opclasses that have pseudotype
* input types such as ANYARRAY or RECORD. Currently, the
* input types such as ANYARRAY or RECORD. Currently, the
* typoutput functions associated with the pseudotypes will work
* okay, but we might have to try harder in future.
*/
@@ -269,7 +269,7 @@ systable_beginscan(Relation heapRelation,
if (snapshot == NULL)
{
Oid relid = RelationGetRelid(heapRelation);
Oid relid = RelationGetRelid(heapRelation);
snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
sysscan->snapshot = snapshot;
@@ -442,7 +442,7 @@ systable_endscan(SysScanDesc sysscan)
* index order. Also, for largely historical reasons, the index to use
* is opened and locked by the caller, not here.
*
* Currently we do not support non-index-based scans here. (In principle
* Currently we do not support non-index-based scans here. (In principle
* we could do a heapscan and sort, but the uses are in places that
* probably don't need to still work with corrupted catalog indexes.)
* For the moment, therefore, these functions are merely the thinnest of
@@ -475,7 +475,7 @@ systable_beginscan_ordered(Relation heapRelation,
if (snapshot == NULL)
{
Oid relid = RelationGetRelid(heapRelation);
Oid relid = RelationGetRelid(heapRelation);
snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
sysscan->snapshot = snapshot;

View File

@@ -84,7 +84,7 @@
*
* Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there
* to check that we don't try to scan or do retail insertions into an index
* that is currently being rebuilt or pending rebuild. This helps to catch
* that is currently being rebuilt or pending rebuild. This helps to catch
* things that don't work when reindexing system catalogs. The assertion
* doesn't prevent the actual rebuild because we don't use RELATION_CHECKS
* when calling the index AM's ambuild routine, and there is no reason for
@@ -149,7 +149,7 @@ static IndexScanDesc index_beginscan_internal(Relation indexRelation,
* index_open - open an index relation by relation OID
*
* If lockmode is not "NoLock", the specified kind of lock is
* obtained on the index. (Generally, NoLock should only be
* obtained on the index. (Generally, NoLock should only be
* used if the caller knows it has some appropriate lock on the
* index already.)
*
@@ -414,7 +414,7 @@ index_markpos(IndexScanDesc scan)
* returnable tuple in each HOT chain, and so restoring the prior state at the
* granularity of the index AM is sufficient. Since the only current user
* of mark/restore functionality is nodeMergejoin.c, this effectively means
* that merge-join plans only work for MVCC snapshots. This could be fixed
* that merge-join plans only work for MVCC snapshots. This could be fixed
* if necessary, but for now it seems unimportant.
* ----------------
*/
@@ -553,7 +553,7 @@ index_fetch_heap(IndexScanDesc scan)
/*
* If we scanned a whole HOT chain and found only dead tuples, tell index
* AM to kill its entry for that TID (this will take effect in the next
* amgettuple call, in index_getnext_tid). We do not do this when in
* amgettuple call, in index_getnext_tid). We do not do this when in
* recovery because it may violate MVCC to do so. See comments in
* RelationGetIndexScan().
*/
@@ -590,7 +590,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
{
/*
* We are resuming scan of a HOT chain after having returned an
* earlier member. Must still hold pin on current heap page.
* earlier member. Must still hold pin on current heap page.
*/
Assert(BufferIsValid(scan->xs_cbuf));
Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
@@ -760,7 +760,7 @@ index_can_return(Relation indexRelation)
* particular indexed attribute are those with both types equal to
* the index opclass' opcintype (note that this is subtly different
* from the indexed attribute's own type: it may be a binary-compatible
* type instead). Only the default functions are stored in relcache
* type instead). Only the default functions are stored in relcache
* entries --- access methods can use the syscache to look up non-default
* functions.
*
@@ -794,7 +794,7 @@ index_getprocid(Relation irel,
* index_getprocinfo
*
* This routine allows index AMs to keep fmgr lookup info for
* support procs in the relcache. As above, only the "default"
* support procs in the relcache. As above, only the "default"
* functions for any particular indexed attribute are cached.
*
* Note: the return value points into cached data that will be lost during

View File

@@ -25,7 +25,7 @@
* Although any negative int32 (except INT_MIN) is acceptable for reporting
* "<", and any positive int32 is acceptable for reporting ">", routines
* that work on 32-bit or wider datatypes can't just return "a - b".
* That could overflow and give the wrong answer. Also, one must not
* That could overflow and give the wrong answer. Also, one must not
* return INT_MIN to report "<", since some callers will negate the result.
*
* NOTE: it is critical that the comparison function impose a total order

View File

@@ -90,7 +90,7 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
* By here, itup is filled in, including the TID.
*
* If checkUnique is UNIQUE_CHECK_NO or UNIQUE_CHECK_PARTIAL, this
* will allow duplicates. Otherwise (UNIQUE_CHECK_YES or
* will allow duplicates. Otherwise (UNIQUE_CHECK_YES or
* UNIQUE_CHECK_EXISTING) it will throw error for a duplicate.
* For UNIQUE_CHECK_EXISTING we merely run the duplicate check, and
* don't actually insert.
@@ -129,7 +129,7 @@ top:
* If the page was split between the time that we surrendered our read
* lock and acquired our write lock, then this page may no longer be the
* right place for the key we want to insert. In this case, we need to
* move right in the tree. See Lehman and Yao for an excruciatingly
* move right in the tree. See Lehman and Yao for an excruciatingly
* precise description.
*/
buf = _bt_moveright(rel, buf, natts, itup_scankey, false,
@@ -211,7 +211,7 @@ top:
* is the first tuple on the next page.
*
* Returns InvalidTransactionId if there is no conflict, else an xact ID
* we must wait for to see if it commits a conflicting tuple. If an actual
* we must wait for to see if it commits a conflicting tuple. If an actual
* conflict is detected, no return --- just ereport().
*
* However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return
@@ -293,7 +293,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
/*
* If we are doing a recheck, we expect to find the tuple we
* are rechecking. It's not a duplicate, but we have to keep
* are rechecking. It's not a duplicate, but we have to keep
* scanning.
*/
if (checkUnique == UNIQUE_CHECK_EXISTING &&
@@ -482,7 +482,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
* If the new key is equal to one or more existing keys, we can
* legitimately place it anywhere in the series of equal keys --- in fact,
* if the new key is equal to the page's "high key" we can place it on
* the next page. If it is equal to the high key, and there's not room
* the next page. If it is equal to the high key, and there's not room
* to insert the new tuple on the current page without splitting, then
* we can move right hoping to find more free space and avoid a split.
* (We should not move right indefinitely, however, since that leads to
@@ -494,7 +494,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
* removing any LP_DEAD tuples.
*
* On entry, *buf and *offsetptr point to the first legal position
* where the new tuple could be inserted. The caller should hold an
* where the new tuple could be inserted. The caller should hold an
* exclusive lock on *buf. *offsetptr can also be set to
* InvalidOffsetNumber, in which case the function will search for the
* right location within the page if needed. On exit, they point to the
@@ -564,7 +564,7 @@ _bt_findinsertloc(Relation rel,
* on every insert. We implement "get tired" as a random choice,
* since stopping after scanning a fixed number of pages wouldn't work
* well (we'd never reach the right-hand side of previously split
* pages). Currently the probability of moving right is set at 0.99,
* pages). Currently the probability of moving right is set at 0.99,
* which may seem too high to change the behavior much, but it does an
* excellent job of preventing O(N^2) behavior with many equal keys.
*----------
@@ -574,7 +574,7 @@ _bt_findinsertloc(Relation rel,
while (PageGetFreeSpace(page) < itemsz)
{
Buffer rbuf;
BlockNumber rblkno;
BlockNumber rblkno;
/*
* before considering moving right, see if we can obtain enough space
@@ -620,10 +620,10 @@ _bt_findinsertloc(Relation rel,
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
/*
* If this page was incompletely split, finish the split now.
* We do this while holding a lock on the left sibling, which
* is not good because finishing the split could be a fairly
* lengthy operation. But this should happen very seldom.
* If this page was incompletely split, finish the split now. We
* do this while holding a lock on the left sibling, which is not
* good because finishing the split could be a fairly lengthy
* operation. But this should happen very seldom.
*/
if (P_INCOMPLETE_SPLIT(lpageop))
{
@@ -681,7 +681,7 @@ _bt_findinsertloc(Relation rel,
* + updates the metapage if a true root or fast root is split.
*
* On entry, we must have the correct buffer in which to do the
* insertion, and the buffer must be pinned and write-locked. On return,
* insertion, and the buffer must be pinned and write-locked. On return,
* we will have dropped both the pin and the lock on the buffer.
*
* When inserting to a non-leaf page, 'cbuf' is the left-sibling of the
@@ -978,7 +978,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
* origpage is the original page to be split. leftpage is a temporary
* buffer that receives the left-sibling data, which will be copied back
* into origpage on success. rightpage is the new page that receives the
* right-sibling data. If we fail before reaching the critical section,
* right-sibling data. If we fail before reaching the critical section,
* origpage hasn't been modified and leftpage is only workspace. In
* principle we shouldn't need to worry about rightpage either, because it
* hasn't been linked into the btree page structure; but to avoid leaving
@@ -1196,7 +1196,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
* page. If you're confused, imagine that page A splits to A B and
* then again, yielding A C B, while vacuum is in progress. Tuples
* originally in A could now be in either B or C, hence vacuum must
* examine both pages. But if D, our right sibling, has a different
* examine both pages. But if D, our right sibling, has a different
* cycleid then it could not contain any tuples that were in A when
* the vacuum started.
*/
@@ -1330,11 +1330,10 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
lastrdata++;
/*
* Although we don't need to WAL-log anything on the left page,
* we still need XLogInsert to consider storing a full-page image
* of the left page, so make an empty entry referencing that
* buffer. This also ensures that the left page is always backup
* block 1.
* Although we don't need to WAL-log anything on the left page, we
* still need XLogInsert to consider storing a full-page image of
* the left page, so make an empty entry referencing that buffer.
* This also ensures that the left page is always backup block 1.
*/
lastrdata->data = NULL;
lastrdata->len = 0;
@@ -1448,7 +1447,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
*
* We return the index of the first existing tuple that should go on the
* righthand page, plus a boolean indicating whether the new tuple goes on
* the left or right page. The bool is necessary to disambiguate the case
* the left or right page. The bool is necessary to disambiguate the case
* where firstright == newitemoff.
*/
static OffsetNumber
@@ -1684,7 +1683,7 @@ _bt_checksplitloc(FindSplitData *state,
*
* On entry, buf and rbuf are the left and right split pages, which we
* still hold write locks on per the L&Y algorithm. We release the
* write locks once we have write lock on the parent page. (Any sooner,
* write locks once we have write lock on the parent page. (Any sooner,
* and it'd be possible for some other process to try to split or delete
* one of these pages, and get confused because it cannot find the downlink.)
*
@@ -1705,7 +1704,7 @@ _bt_insert_parent(Relation rel,
* Here we have to do something Lehman and Yao don't talk about: deal with
* a root split and construction of a new root. If our stack is empty
* then we have just split a node on what had been the root level when we
* descended the tree. If it was still the root then we perform a
* descended the tree. If it was still the root then we perform a
* new-root construction. If it *wasn't* the root anymore, search to find
* the next higher level that someone constructed meanwhile, and find the
* right place to insert as for the normal case.
@@ -1917,7 +1916,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
/*
* These loops will check every item on the page --- but in an
* order that's attuned to the probability of where it actually
* is. Scan to the right first, then to the left.
* is. Scan to the right first, then to the left.
*/
for (offnum = start;
offnum <= maxoff;

View File

@@ -12,7 +12,7 @@
* src/backend/access/nbtree/nbtpage.c
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
* Postgres btree pages look like ordinary relation pages. The opaque
* data at high addresses includes pointers to left and right siblings
* and flag data describing page state. The first page in a btree, page
* zero, is special -- it stores meta-information describing the tree.
@@ -36,7 +36,7 @@ static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
BlockNumber *target, BlockNumber *rightsib);
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
TransactionId latestRemovedXid);
/*
@@ -62,7 +62,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
metaopaque->btpo_flags = BTP_META;
/*
* Set pd_lower just past the end of the metadata. This is not essential
* Set pd_lower just past the end of the metadata. This is not essential
* but it makes the page look compressible to xlog.c.
*/
((PageHeader) page)->pd_lower =
@@ -80,7 +80,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
*
* The access type parameter (BT_READ or BT_WRITE) controls whether
* a new root page will be created or not. If access = BT_READ,
* and no root page exists, we just return InvalidBuffer. For
* and no root page exists, we just return InvalidBuffer. For
* BT_WRITE, we try to create the root page if it doesn't exist.
* NOTE that the returned root page will have only a read lock set
* on it even if access = BT_WRITE!
@@ -197,7 +197,7 @@ _bt_getroot(Relation rel, int access)
/*
* Metadata initialized by someone else. In order to guarantee no
* deadlocks, we have to release the metadata page and start all
* over again. (Is that really true? But it's hardly worth trying
* over again. (Is that really true? But it's hardly worth trying
* to optimize this case.)
*/
_bt_relbuf(rel, metabuf);
@@ -254,7 +254,7 @@ _bt_getroot(Relation rel, int access)
END_CRIT_SECTION();
/*
* swap root write lock for read lock. There is no danger of anyone
* swap root write lock for read lock. There is no danger of anyone
* else accessing the new root page while it's unlocked, since no one
* else knows where it is yet.
*/
@@ -322,7 +322,7 @@ _bt_getroot(Relation rel, int access)
* By the time we acquire lock on the root page, it might have been split and
* not be the true root anymore. This is okay for the present uses of this
* routine; we only really need to be able to move up at least one tree level
* from whatever non-root page we were at. If we ever do need to lock the
* from whatever non-root page we were at. If we ever do need to lock the
* one true root page, we could loop here, re-reading the metapage on each
* failure. (Note that it wouldn't do to hold the lock on the metapage while
* moving to the root --- that'd deadlock against any concurrent root split.)
@@ -497,7 +497,7 @@ _bt_checkpage(Relation rel, Buffer buf)
/*
* ReadBuffer verifies that every newly-read page passes
* PageHeaderIsValid, which means it either contains a reasonably sane
* page header or is all-zero. We have to defend against the all-zero
* page header or is all-zero. We have to defend against the all-zero
* case, however.
*/
if (PageIsNew(page))
@@ -564,7 +564,7 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX
/*
* _bt_getbuf() -- Get a buffer by block number for read or write.
*
* blkno == P_NEW means to get an unallocated index page. The page
* blkno == P_NEW means to get an unallocated index page. The page
* will be initialized before returning it.
*
* When this routine returns, the appropriate lock is set on the
@@ -595,7 +595,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
* First see if the FSM knows of any free pages.
*
* We can't trust the FSM's report unreservedly; we have to check that
* the page is still free. (For example, an already-free page could
* the page is still free. (For example, an already-free page could
* have been re-used between the time the last VACUUM scanned it and
* the time the VACUUM made its FSM updates.)
*
@@ -774,7 +774,7 @@ _bt_page_recyclable(Page page)
/*
* Delete item(s) from a btree page during VACUUM.
*
* This must only be used for deleting leaf items. Deleting an item on a
* This must only be used for deleting leaf items. Deleting an item on a
* non-leaf page has to be done as part of an atomic action that includes
* deleting the page it points to.
*
@@ -842,7 +842,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
/*
* The target-offsets array is not in the buffer, but pretend that it
* is. When XLogInsert stores the whole buffer, the offsets array
* is. When XLogInsert stores the whole buffer, the offsets array
* need not be stored too.
*/
if (nitems > 0)
@@ -1049,11 +1049,12 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
lbuf = _bt_getbuf(rel, leftsib, BT_READ);
lpage = BufferGetPage(lbuf);
lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
/*
* If the left sibling was concurrently split, so that its
* next-pointer doesn't point to the current page anymore,
* the split that created the current page must be completed.
* (We don't allow splitting an incompletely split page again
* next-pointer doesn't point to the current page anymore, the
* split that created the current page must be completed. (We
* don't allow splitting an incompletely split page again
* until the previous split has been completed)
*/
if (lopaque->btpo_next == parent &&
@@ -1066,7 +1067,7 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
}
return _bt_lock_branch_parent(rel, parent, stack->bts_parent,
topparent, topoff, target, rightsib);
topparent, topoff, target, rightsib);
}
else
{
@@ -1112,6 +1113,7 @@ _bt_pagedel(Relation rel, Buffer buf)
bool rightsib_empty;
Page page;
BTPageOpaque opaque;
/*
* "stack" is a search stack leading (approximately) to the target page.
* It is initially NULL, but when iterating, we keep it to avoid
@@ -1140,24 +1142,24 @@ _bt_pagedel(Relation rel, Buffer buf)
* was never supposed to leave half-dead pages in the tree, it was
* just a transient state, but it was nevertheless possible in
* error scenarios. We don't know how to deal with them here. They
* are harmless as far as searches are considered, but inserts into
* the deleted keyspace could add out-of-order downlinks in the
* upper levels. Log a notice, hopefully the admin will notice and
* reindex.
* are harmless as far as searches are considered, but inserts
* into the deleted keyspace could add out-of-order downlinks in
* the upper levels. Log a notice, hopefully the admin will notice
* and reindex.
*/
if (P_ISHALFDEAD(opaque))
ereport(LOG,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" contains a half-dead internal page",
RelationGetRelationName(rel)),
errmsg("index \"%s\" contains a half-dead internal page",
RelationGetRelationName(rel)),
errhint("This can be caused by an interrupt VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
_bt_relbuf(rel, buf);
return ndeleted;
}
/*
* We can never delete rightmost pages nor root pages. While at
* it, check that page is not already deleted and is empty.
* We can never delete rightmost pages nor root pages. While at it,
* check that page is not already deleted and is empty.
*
* To keep the algorithm simple, we also never delete an incompletely
* split page (they should be rare enough that this doesn't make any
@@ -1167,10 +1169,10 @@ _bt_pagedel(Relation rel, Buffer buf)
* left half of an incomplete split, but ensuring that it's not the
* right half is more complicated. For that, we have to check that
* the left sibling doesn't have its INCOMPLETE_SPLIT flag set. On
* the first iteration, we temporarily release the lock on the
* current page, and check the left sibling and also construct a
* search stack to. On subsequent iterations, we know we stepped right
* from a page that passed these tests, so it's OK.
* the first iteration, we temporarily release the lock on the current
* page, and check the left sibling and also construct a search stack
* to. On subsequent iterations, we know we stepped right from a page
* that passed these tests, so it's OK.
*/
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
@@ -1184,9 +1186,9 @@ _bt_pagedel(Relation rel, Buffer buf)
}
/*
* First, remove downlink pointing to the page (or a parent of the page,
* if we are going to delete a taller branch), and mark the page as
* half-dead.
* First, remove downlink pointing to the page (or a parent of the
* page, if we are going to delete a taller branch), and mark the page
* as half-dead.
*/
if (!P_ISHALFDEAD(opaque))
{
@@ -1205,7 +1207,7 @@ _bt_pagedel(Relation rel, Buffer buf)
ItemId itemid;
IndexTuple targetkey;
Buffer lbuf;
BlockNumber leftsib;
BlockNumber leftsib;
itemid = PageGetItemId(page, P_HIKEY);
targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
@@ -1219,9 +1221,9 @@ _bt_pagedel(Relation rel, Buffer buf)
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
/*
* Fetch the left sibling, to check that it's not marked
* with INCOMPLETE_SPLIT flag. That would mean that the
* page to-be-deleted doesn't have a downlink, and the page
* Fetch the left sibling, to check that it's not marked with
* INCOMPLETE_SPLIT flag. That would mean that the page
* to-be-deleted doesn't have a downlink, and the page
* deletion algorithm isn't prepared to handle that.
*/
if (!P_LEFTMOST(opaque))
@@ -1267,7 +1269,7 @@ _bt_pagedel(Relation rel, Buffer buf)
/*
* Then unlink it from its siblings. Each call to
*_bt_unlink_halfdead_page unlinks the topmost page from the branch,
* _bt_unlink_halfdead_page unlinks the topmost page from the branch,
* making it shallower. Iterate until the leaf page is gone.
*/
rightsib_empty = false;
@@ -1291,8 +1293,8 @@ _bt_pagedel(Relation rel, Buffer buf)
* is that it was the rightmost child of the parent. Now that we
* removed the downlink for this page, the right sibling might now be
* the only child of the parent, and could be removed. It would be
* picked up by the next vacuum anyway, but might as well try to remove
* it now, so loop back to process the right sibling.
* picked up by the next vacuum anyway, but might as well try to
* remove it now, so loop back to process the right sibling.
*/
if (!rightsib_empty)
break;
@@ -1310,9 +1312,9 @@ _bt_pagedel(Relation rel, Buffer buf)
static bool
_bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
{
BlockNumber leafblkno;
BlockNumber leafblkno;
BlockNumber leafrightsib;
BlockNumber target;
BlockNumber target;
BlockNumber rightsib;
ItemId itemid;
Page page;
@@ -1351,7 +1353,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
/*
* Check that the parent-page index items we're about to delete/overwrite
* contain what we expect. This can fail if the index has become corrupt
* contain what we expect. This can fail if the index has become corrupt
* for some reason. We want to throw any error before entering the
* critical section --- otherwise it'd be a PANIC.
*
@@ -1490,9 +1492,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
BlockNumber leafleftsib;
BlockNumber leafrightsib;
BlockNumber target;
BlockNumber leftsib;
BlockNumber rightsib;
BlockNumber target;
BlockNumber leftsib;
BlockNumber rightsib;
Buffer lbuf = InvalidBuffer;
Buffer buf;
Buffer rbuf;
@@ -1506,7 +1508,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
int targetlevel;
ItemPointer leafhikey;
BlockNumber nextchild;
BlockNumber topblkno;
BlockNumber topblkno;
page = BufferGetPage(leafbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1596,7 +1598,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
lbuf = InvalidBuffer;
/*
* Next write-lock the target page itself. It should be okay to take just
* Next write-lock the target page itself. It should be okay to take just
* a write lock not a superexclusive lock, since no scans would stop on an
* empty page.
*/
@@ -1605,9 +1607,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/*
* Check page is still empty etc, else abandon deletion. This is just
* for paranoia's sake; a half-dead page cannot resurrect because there
* can be only one vacuum process running at a time.
* Check page is still empty etc, else abandon deletion. This is just for
* paranoia's sake; a half-dead page cannot resurrect because there can be
* only one vacuum process running at a time.
*/
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
{
@@ -1733,7 +1735,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
* we're in VACUUM and would not otherwise have an XID. Having already
* updated links to the target, ReadNewTransactionId() suffices as an
* upper bound. Any scan having retained a now-stale link is advertising
* in its PGXACT an xmin less than or equal to the value we read here. It
* in its PGXACT an xmin less than or equal to the value we read here. It
* will continue to do so, holding back RecentGlobalXmin, for the duration
* of that scan.
*/

View File

@@ -208,7 +208,7 @@ btbuildempty(PG_FUNCTION_ARGS)
metapage = (Page) palloc(BLCKSZ);
_bt_initmetapage(metapage, P_NONE, 0);
/* Write the page. If archiving/streaming, XLOG it. */
/* Write the page. If archiving/streaming, XLOG it. */
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
(char *) metapage, true);
@@ -427,7 +427,7 @@ btbeginscan(PG_FUNCTION_ARGS)
/*
* We don't know yet whether the scan will be index-only, so we do not
* allocate the tuple workspace arrays until btrescan. However, we set up
* allocate the tuple workspace arrays until btrescan. However, we set up
* scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
*/
so->currTuples = so->markTuples = NULL;
@@ -472,7 +472,7 @@ btrescan(PG_FUNCTION_ARGS)
/*
* Allocate tuple workspace arrays, if needed for an index-only scan and
* not already done in a previous rescan call. To save on palloc
* not already done in a previous rescan call. To save on palloc
* overhead, both workspaces are allocated as one palloc block; only this
* function and btendscan know that.
*
@@ -952,7 +952,7 @@ restart:
vstate->lastBlockLocked = blkno;
/*
* Check whether we need to recurse back to earlier pages. What we
* Check whether we need to recurse back to earlier pages. What we
* are concerned about is a page split that happened since we started
* the vacuum scan. If the split moved some tuples to a lower page
* then we might have missed 'em. If so, set up for tail recursion.

View File

@@ -50,7 +50,7 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
*
* NOTE that the returned buffer is read-locked regardless of the access
* parameter. However, access = BT_WRITE will allow an empty root page
* to be created and returned. When access = BT_READ, an empty index
* to be created and returned. When access = BT_READ, an empty index
* will result in *bufP being set to InvalidBuffer. Also, in BT_WRITE mode,
* any incomplete splits encountered during the search will be finished.
*/
@@ -271,7 +271,7 @@ _bt_moveright(Relation rel,
* (or leaf keys > given scankey when nextkey is true).
*
* This procedure is not responsible for walking right, it just examines
* the given page. _bt_binsrch() has no lock or refcount side effects
* the given page. _bt_binsrch() has no lock or refcount side effects
* on the buffer.
*/
OffsetNumber
@@ -403,7 +403,7 @@ _bt_compare(Relation rel,
/*
* The scan key is set up with the attribute number associated with each
* term in the key. It is important that, if the index is multi-key, the
* scan contain the first k key attributes, and that they be in order. If
* scan contain the first k key attributes, and that they be in order. If
* you think about how multi-key ordering works, you'll understand why
* this is.
*
@@ -442,7 +442,7 @@ _bt_compare(Relation rel,
/*
* The sk_func needs to be passed the index value as left arg and
* the sk_argument as right arg (they might be of different
* types). Since it is convenient for callers to think of
* types). Since it is convenient for callers to think of
* _bt_compare as comparing the scankey to the index item, we have
* to flip the sign of the comparison result. (Unless it's a DESC
* column, in which case we *don't* flip the sign.)
@@ -471,7 +471,7 @@ _bt_compare(Relation rel,
* _bt_first() -- Find the first item in a scan.
*
* We need to be clever about the direction of scan, the search
* conditions, and the tree ordering. We find the first item (or,
* conditions, and the tree ordering. We find the first item (or,
* if backwards scan, the last item) in the tree that satisfies the
* qualifications in the scan key. On success exit, the page containing
* the current index tuple is pinned but not locked, and data about
@@ -527,7 +527,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* We want to identify the keys that can be used as starting boundaries;
* these are =, >, or >= keys for a forward scan or =, <, <= keys for
* a backwards scan. We can use keys for multiple attributes so long as
* the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
* the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
* a > or < boundary or find an attribute with no boundary (which can be
* thought of as the same as "> -infinity"), we can't use keys for any
* attributes to its right, because it would break our simplistic notion
@@ -742,7 +742,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* even if the row comparison is of ">" or "<" type, because the
* condition applied to all but the last row member is effectively
* ">=" or "<=", and so the extra keys don't break the positioning
* scheme. But, by the same token, if we aren't able to use all
* scheme. But, by the same token, if we aren't able to use all
* the row members, then the part of the row comparison that we
* did use has to be treated as just a ">=" or "<=" condition, and
* so we'd better adjust strat_total accordingly.
@@ -861,7 +861,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
/*
* Find first item >= scankey, then back up one to arrive at last
* item < scankey. (Note: this positioning strategy is only used
* item < scankey. (Note: this positioning strategy is only used
* for a backward scan, so that is always the correct starting
* position.)
*/
@@ -910,7 +910,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
case BTGreaterEqualStrategyNumber:
/*
* Find first item >= scankey. (This is only used for forward
* Find first item >= scankey. (This is only used for forward
* scans.)
*/
nextkey = false;
@@ -988,7 +988,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
*
* The actually desired starting point is either this item or the prior
* one, or in the end-of-page case it's the first item on the next page or
* the last item on this page. Adjust the starting offset if needed. (If
* the last item on this page. Adjust the starting offset if needed. (If
* this results in an offset before the first item or after the last one,
* _bt_readpage will report no items found, and then we'll step to the
* next page as needed.)
@@ -1304,7 +1304,7 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
* than the walk-right case because of the possibility that the page
* to our left splits while we are in flight to it, plus the
* possibility that the page we were on gets deleted after we leave
* it. See nbtree/README for details.
* it. See nbtree/README for details.
*/
for (;;)
{
@@ -1399,7 +1399,7 @@ _bt_walk_left(Relation rel, Buffer buf)
* anymore, not that its left sibling got split more than four times.
*
* Note that it is correct to test P_ISDELETED not P_IGNORE here,
* because half-dead pages are still in the sibling chain. Caller
* because half-dead pages are still in the sibling chain. Caller
* must reject half-dead pages if wanted.
*/
tries = 0;
@@ -1425,7 +1425,7 @@ _bt_walk_left(Relation rel, Buffer buf)
if (P_ISDELETED(opaque))
{
/*
* It was deleted. Move right to first nondeleted page (there
* It was deleted. Move right to first nondeleted page (there
* must be one); that is the page that has acquired the deleted
* one's keyspace, so stepping left from it will take us where we
* want to be.
@@ -1469,7 +1469,7 @@ _bt_walk_left(Relation rel, Buffer buf)
* _bt_get_endpoint() -- Find the first or last page on a given tree level
*
* If the index is empty, we will return InvalidBuffer; any other failure
* condition causes ereport(). We will not return a dead page.
* condition causes ereport(). We will not return a dead page.
*
* The returned buffer is pinned and read-locked.
*/

View File

@@ -7,7 +7,7 @@
*
* We use tuplesort.c to sort the given index tuples into order.
* Then we scan the index tuples in order and build the btree pages
* for each level. We load source tuples into leaf-level pages.
* for each level. We load source tuples into leaf-level pages.
* Whenever we fill a page at one level, we add a link to it to its
* parent level (starting a new parent level if necessary). When
* done, we write out each final page on each level, adding it to
@@ -42,11 +42,11 @@
*
* Since the index will never be used unless it is completely built,
* from a crash-recovery point of view there is no need to WAL-log the
* steps of the build. After completing the index build, we can just sync
* steps of the build. After completing the index build, we can just sync
* the whole file to disk using smgrimmedsync() before exiting this module.
* This can be seen to be sufficient for crash recovery by considering that
* it's effectively equivalent to what would happen if a CHECKPOINT occurred
* just after the index build. However, it is clearly not sufficient if the
* just after the index build. However, it is clearly not sufficient if the
* DBA is using the WAL log for PITR or replication purposes, since another
* machine would not be able to reconstruct the index from WAL. Therefore,
* we log the completed index pages to WAL if and only if WAL archiving is
@@ -89,7 +89,7 @@ struct BTSpool
};
/*
* Status record for a btree page being built. We have one of these
* Status record for a btree page being built. We have one of these
* for each active tree level.
*
* The reason we need to store a copy of the minimum key is that we'll
@@ -160,7 +160,7 @@ _bt_spoolinit(Relation heap, Relation index, bool isunique, bool isdead)
* We size the sort area as maintenance_work_mem rather than work_mem to
* speed index creation. This should be OK since a single backend can't
* run multiple index creations in parallel. Note that creation of a
* unique index actually requires two BTSpool objects. We expect that the
* unique index actually requires two BTSpool objects. We expect that the
* second one (for dead tuples) won't get very full, so we give it only
* work_mem.
*/
@@ -298,7 +298,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
PageSetChecksumInplace(page, blkno);
/*
* Now write the page. There's no need for smgr to schedule an fsync for
* Now write the page. There's no need for smgr to schedule an fsync for
* this write; we'll do it ourselves before ending the build.
*/
if (blkno == wstate->btws_pages_written)
@@ -423,14 +423,14 @@ _bt_sortaddtup(Page page,
* A leaf page being built looks like:
*
* +----------------+---------------------------------+
* | PageHeaderData | linp0 linp1 linp2 ... |
* | PageHeaderData | linp0 linp1 linp2 ... |
* +-----------+----+---------------------------------+
* | ... linpN | |
* +-----------+--------------------------------------+
* | ^ last |
* | |
* +-------------+------------------------------------+
* | | itemN ... |
* | | itemN ... |
* +-------------+------------------+-----------------+
* | ... item3 item2 item1 | "special space" |
* +--------------------------------+-----------------+
@@ -492,9 +492,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
RelationGetRelationName(wstate->index))));
/*
* Check to see if page is "full". It's definitely full if the item won't
* Check to see if page is "full". It's definitely full if the item won't
* fit. Otherwise, compare to the target freespace derived from the
* fillfactor. However, we must put at least two items on each page, so
* fillfactor. However, we must put at least two items on each page, so
* disregard fillfactor if we don't have that many.
*/
if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
@@ -567,7 +567,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
}
/*
* Write out the old page. We never need to touch it again, so we can
* Write out the old page. We never need to touch it again, so we can
* free the opage workspace too.
*/
_bt_blwritepage(wstate, opage, oblkno);
@@ -804,7 +804,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
/*
* If the index is WAL-logged, we must fsync it down to disk before it's
* safe to commit the transaction. (For a non-WAL-logged index we don't
* safe to commit the transaction. (For a non-WAL-logged index we don't
* care since the index will be uninteresting after a crash anyway.)
*
* It's obvious that we must do this when not WAL-logging the build. It's

View File

@@ -107,7 +107,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
* comparison data ultimately used must match the key datatypes.
*
* The result cannot be used with _bt_compare(), unless comparison
* data is first stored into the key entries. Currently this
* data is first stored into the key entries. Currently this
* routine is only called by nbtsort.c and tuplesort.c, which have
* their own comparison routines.
*/
@@ -269,7 +269,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
continue;
/*
* First, deconstruct the array into elements. Anything allocated
* First, deconstruct the array into elements. Anything allocated
* here (including a possibly detoasted array value) is in the
* workspace context.
*/
@@ -283,7 +283,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
&elem_values, &elem_nulls, &num_elems);
/*
* Compress out any null elements. We can ignore them since we assume
* Compress out any null elements. We can ignore them since we assume
* all btree operators are strict.
*/
num_nonnulls = 0;
@@ -517,7 +517,7 @@ _bt_compare_array_elements(const void *a, const void *b, void *arg)
* _bt_start_array_keys() -- Initialize array keys at start of a scan
*
* Set up the cur_elem counters and fill in the first sk_argument value for
* each array scankey. We can't do this until we know the scan direction.
* each array scankey. We can't do this until we know the scan direction.
*/
void
_bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
@@ -670,8 +670,8 @@ _bt_restore_array_keys(IndexScanDesc scan)
* so that the index sorts in the desired direction.
*
* One key purpose of this routine is to discover which scan keys must be
* satisfied to continue the scan. It also attempts to eliminate redundant
* keys and detect contradictory keys. (If the index opfamily provides
* satisfied to continue the scan. It also attempts to eliminate redundant
* keys and detect contradictory keys. (If the index opfamily provides
* incomplete sets of cross-type operators, we may fail to detect redundant
* or contradictory keys, but we can survive that.)
*
@@ -702,7 +702,7 @@ _bt_restore_array_keys(IndexScanDesc scan)
* that's the only one returned. (So, we return either a single = key,
* or one or two boundary-condition keys for each attr.) However, if we
* cannot compare two keys for lack of a suitable cross-type operator,
* we cannot eliminate either. If there are two such keys of the same
* we cannot eliminate either. If there are two such keys of the same
* operator strategy, the second one is just pushed into the output array
* without further processing here. We may also emit both >/>= or both
* </<= keys if we can't compare them. The logic about required keys still
@@ -737,7 +737,7 @@ _bt_restore_array_keys(IndexScanDesc scan)
* Note: the reason we have to copy the preprocessed scan keys into private
* storage is that we are modifying the array based on comparisons of the
* key argument values, which could change on a rescan or after moving to
* new elements of array keys. Therefore we can't overwrite the source data.
* new elements of array keys. Therefore we can't overwrite the source data.
*/
void
_bt_preprocess_keys(IndexScanDesc scan)
@@ -919,7 +919,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
/*
* Emit the cleaned-up keys into the outkeys[] array, and then
* mark them if they are required. They are required (possibly
* mark them if they are required. They are required (possibly
* only in one direction) if all attrs before this one had "=".
*/
for (j = BTMaxStrategyNumber; --j >= 0;)
@@ -1017,7 +1017,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
* and amoplefttype/amoprighttype equal to the two argument datatypes.
*
* If the opfamily doesn't supply a complete set of cross-type operators we
* may not be able to make the comparison. If we can make the comparison
* may not be able to make the comparison. If we can make the comparison
* we store the operator result in *result and return TRUE. We return FALSE
* if the comparison could not be made.
*
@@ -1043,7 +1043,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
StrategyNumber strat;
/*
* First, deal with cases where one or both args are NULL. This should
* First, deal with cases where one or both args are NULL. This should
* only happen when the scankeys represent IS NULL/NOT NULL conditions.
*/
if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ISNULL)
@@ -1183,7 +1183,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
*
* Lastly, for ordinary scankeys (not IS NULL/NOT NULL), we check for a
* NULL comparison value. Since all btree operators are assumed strict,
* a NULL means that the qual cannot be satisfied. We return TRUE if the
* a NULL means that the qual cannot be satisfied. We return TRUE if the
* comparison value isn't NULL, or FALSE if the scan should be abandoned.
*
* This function is applied to the *input* scankey structure; therefore
@@ -1212,7 +1212,7 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
* --- we can treat IS NULL as an equality operator for purposes of search
* strategy.
*
* Likewise, "x IS NOT NULL" is supported. We treat that as either "less
* Likewise, "x IS NOT NULL" is supported. We treat that as either "less
* than NULL" in a NULLS LAST index, or "greater than NULL" in a NULLS
* FIRST index.
*
@@ -1284,7 +1284,7 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
* Mark a scankey as "required to continue the scan".
*
* Depending on the operator type, the key may be required for both scan
* directions or just one. Also, if the key is a row comparison header,
* directions or just one. Also, if the key is a row comparison header,
* we have to mark the appropriate subsidiary ScanKeys as required. In
* such cases, the first subsidiary key is required, but subsequent ones
* are required only as long as they correspond to successive index columns
@@ -1296,7 +1296,7 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
* scribbling on a data structure belonging to the index AM's caller, not on
* our private copy. This should be OK because the marking will not change
* from scan to scan within a query, and so we'd just re-mark the same way
* anyway on a rescan. Something to keep an eye on though.
* anyway on a rescan. Something to keep an eye on though.
*/
static void
_bt_mark_scankey_required(ScanKey skey)
@@ -1482,7 +1482,7 @@ _bt_checkkeys(IndexScanDesc scan,
/*
* Since NULLs are sorted before non-NULLs, we know we have
* reached the lower limit of the range of values for this
* index attr. On a backward scan, we can stop if this qual
* index attr. On a backward scan, we can stop if this qual
* is one of the "must match" subset. We can stop regardless
* of whether the qual is > or <, so long as it's required,
* because it's not possible for any future tuples to pass. On
@@ -1498,8 +1498,8 @@ _bt_checkkeys(IndexScanDesc scan,
/*
* Since NULLs are sorted after non-NULLs, we know we have
* reached the upper limit of the range of values for this
* index attr. On a forward scan, we can stop if this qual is
* one of the "must match" subset. We can stop regardless of
* index attr. On a forward scan, we can stop if this qual is
* one of the "must match" subset. We can stop regardless of
* whether the qual is > or <, so long as it's required,
* because it's not possible for any future tuples to pass. On
* a backward scan, however, we must keep going, because we
@@ -1593,7 +1593,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
/*
* Since NULLs are sorted before non-NULLs, we know we have
* reached the lower limit of the range of values for this
* index attr. On a backward scan, we can stop if this qual
* index attr. On a backward scan, we can stop if this qual
* is one of the "must match" subset. We can stop regardless
* of whether the qual is > or <, so long as it's required,
* because it's not possible for any future tuples to pass. On
@@ -1609,8 +1609,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
/*
* Since NULLs are sorted after non-NULLs, we know we have
* reached the upper limit of the range of values for this
* index attr. On a forward scan, we can stop if this qual is
* one of the "must match" subset. We can stop regardless of
* index attr. On a forward scan, we can stop if this qual is
* one of the "must match" subset. We can stop regardless of
* whether the qual is > or <, so long as it's required,
* because it's not possible for any future tuples to pass. On
* a backward scan, however, we must keep going, because we
@@ -1631,7 +1631,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
{
/*
* Unlike the simple-scankey case, this isn't a disallowed case.
* But it can never match. If all the earlier row comparison
* But it can never match. If all the earlier row comparison
* columns are required for the scan direction, we can stop the
* scan, because there can't be another tuple that will succeed.
*/
@@ -1696,7 +1696,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
/*
* Tuple fails this qual. If it's a required qual for the current
* scan direction, then we can conclude no further tuples will pass,
* either. Note we have to look at the deciding column, not
* either. Note we have to look at the deciding column, not
* necessarily the first or last column of the row condition.
*/
if ((subkey->sk_flags & SK_BT_REQFWD) &&
@@ -1722,7 +1722,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
* is sufficient for setting LP_DEAD status (which is only a hint).
*
* We match items by heap TID before assuming they are the right ones to
* delete. We cope with cases where items have moved right due to insertions.
* delete. We cope with cases where items have moved right due to insertions.
* If an item has moved off the current page due to a split, we'll fail to
* find it and do nothing (this is not an error case --- we assume the item
* will eventually get marked in a future indexscan). Note that because we
@@ -1806,8 +1806,8 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
/*
* The following routines manage a shared-memory area in which we track
* assignment of "vacuum cycle IDs" to currently-active btree vacuuming
* operations. There is a single counter which increments each time we
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
* operations. There is a single counter which increments each time we
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
* be active concurrently, we have to track the cycle ID for each active
* vacuum; this requires at most MaxBackends entries (usually far fewer).
* We assume at most one vacuum can be active for a given index.

View File

@@ -40,9 +40,9 @@ _bt_restore_page(Page page, char *from, int len)
int nitems;
/*
* To get the items back in the original order, we add them to the page
* in reverse. To figure out where one tuple ends and another begins,
* we have to scan them in forward order first.
* To get the items back in the original order, we add them to the page in
* reverse. To figure out where one tuple ends and another begins, we
* have to scan them in forward order first.
*/
i = 0;
while (from < end)
@@ -97,7 +97,7 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
pageop->btpo_flags = BTP_META;
/*
* Set pd_lower just past the end of the metadata. This is not essential
* Set pd_lower just past the end of the metadata. This is not essential
* but it makes the page look compressible to xlog.c.
*/
((PageHeader) metapg)->pd_lower =
@@ -118,7 +118,7 @@ static void
_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
RelFileNode rnode, BlockNumber cblock)
{
Buffer buf;
Buffer buf;
buf = XLogReadBuffer(rnode, cblock, false);
if (BufferIsValid(buf))
@@ -128,6 +128,7 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
if (lsn > PageGetLSN(page))
{
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
@@ -153,6 +154,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
datapos = (char *) xlrec + SizeOfBtreeInsert;
datalen = record->xl_len - SizeOfBtreeInsert;
/*
* if this insert finishes a split at lower level, extract the block
* number of the (left) child.
@@ -172,10 +174,10 @@ btree_xlog_insert(bool isleaf, bool ismeta,
}
/*
* Insertion to an internal page finishes an incomplete split at the
* child level. Clear the incomplete-split flag in the child. Note:
* during normal operation, the child and parent pages are locked at the
* same time, so that clearing the flag and inserting the downlink appear
* Insertion to an internal page finishes an incomplete split at the child
* level. Clear the incomplete-split flag in the child. Note: during
* normal operation, the child and parent pages are locked at the same
* time, so that clearing the flag and inserting the downlink appear
* atomic to other backends. We don't bother with that during replay,
* because readers don't care about the incomplete-split flag and there
* cannot be updates happening.
@@ -279,9 +281,10 @@ btree_xlog_split(bool onleft, bool isroot,
datapos += left_hikeysz;
datalen -= left_hikeysz;
}
/*
* If this insertion finishes an incomplete split, get the block number
* of the child.
* If this insertion finishes an incomplete split, get the block number of
* the child.
*/
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
{
@@ -439,7 +442,7 @@ btree_xlog_split(bool onleft, bool isroot,
* the backup block containing right sibling is 2 or 3, depending
* whether this was a leaf or internal page.
*/
int rnext_index = isleaf ? 2 : 3;
int rnext_index = isleaf ? 2 : 3;
if (record->xl_info & XLR_BKP_BLOCK(rnext_index))
(void) RestoreBackupBlock(lsn, record, rnext_index, false, false);
@@ -620,7 +623,7 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
/*
* In what follows, we have to examine the previous state of the index
* page, as well as the heap page(s) it points to. This is only valid if
* page, as well as the heap page(s) it points to. This is only valid if
* WAL replay has reached a consistent database state; which means that
* the preceding check is not just an optimization, but is *necessary*. We
* won't have let in any user sessions before we reach consistency.
@@ -629,9 +632,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
elog(PANIC, "btree_xlog_delete_get_latestRemovedXid: cannot operate with inconsistent data");
/*
* Get index page. If the DB is consistent, this should not fail, nor
* Get index page. If the DB is consistent, this should not fail, nor
* should any of the heap page fetches below. If one does, we return
* InvalidTransactionId to cancel all HS transactions. That's probably
* InvalidTransactionId to cancel all HS transactions. That's probably
* overkill, but it's safe, and certainly better than panicking here.
*/
ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
@@ -716,9 +719,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
/*
* If all heap tuples were LP_DEAD then we will be returning
* InvalidTransactionId here, which avoids conflicts. This matches
* existing logic which assumes that LP_DEAD tuples must already be
* older than the latestRemovedXid on the cleanup record that
* set them as LP_DEAD, hence must already have generated a conflict.
* existing logic which assumes that LP_DEAD tuples must already be older
* than the latestRemovedXid on the cleanup record that set them as
* LP_DEAD, hence must already have generated a conflict.
*/
return latestRemovedXid;
}
@@ -735,7 +738,7 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
* If we have any conflict processing to do, it must happen before we
* update the page.
*
* Btree delete records can conflict with standby queries. You might
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
@@ -828,7 +831,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
ItemId itemid;
IndexTuple itup;
OffsetNumber nextoffset;
BlockNumber rightsib;
BlockNumber rightsib;
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));

View File

@@ -54,7 +54,7 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
walbuf += nitems * sizeof(ItemPointerData);
}
switch(a_action)
switch (a_action)
{
case GIN_SEGMENT_ADDITEMS:
appendStringInfo(buf, " %d (add %d items)", a_segno, nitems);
@@ -94,13 +94,13 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
case XLOG_GIN_INSERT:
{
ginxlogInsert *xlrec = (ginxlogInsert *) rec;
char *payload = rec + sizeof(ginxlogInsert);
char *payload = rec + sizeof(ginxlogInsert);
appendStringInfoString(buf, "Insert item, ");
desc_node(buf, xlrec->node, xlrec->blkno);
appendStringInfo(buf, " isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
if (!(xlrec->flags & GIN_INSERT_ISLEAF))
{
BlockNumber leftChildBlkno;
@@ -115,11 +115,11 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
}
if (!(xlrec->flags & GIN_INSERT_ISDATA))
appendStringInfo(buf, " isdelete: %c",
(((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
(((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
else if (xlrec->flags & GIN_INSERT_ISLEAF)
{
ginxlogRecompressDataLeaf *insertData =
(ginxlogRecompressDataLeaf *) payload;
(ginxlogRecompressDataLeaf *) payload;
if (xl_info & XLR_BKP_BLOCK(0))
appendStringInfo(buf, " (full page image)");
@@ -129,10 +129,11 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
else
{
ginxlogInsertDataInternal *insertData = (ginxlogInsertDataInternal *) payload;
appendStringInfo(buf, " pitem: %u-%u/%u",
PostingItemGetBlockNumber(&insertData->newitem),
ItemPointerGetBlockNumber(&insertData->newitem.key),
ItemPointerGetOffsetNumber(&insertData->newitem.key));
PostingItemGetBlockNumber(&insertData->newitem),
ItemPointerGetBlockNumber(&insertData->newitem.key),
ItemPointerGetOffsetNumber(&insertData->newitem.key));
}
}
break;
@@ -144,8 +145,8 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
appendStringInfo(buf, " isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
}
break;
case XLOG_GIN_VACUUM_PAGE:
@@ -155,6 +156,7 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
{
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
appendStringInfoString(buf, "Vacuum data leaf page, ");
desc_node(buf, xlrec->node, xlrec->blkno);
if (xl_info & XLR_BKP_BLOCK(0))

View File

@@ -140,7 +140,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
appendStringInfo(buf, "unlink_page: rel %u/%u/%u; ",
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",

View File

@@ -25,7 +25,7 @@
/*
* SPPageDesc tracks all info about a page we are inserting into. In some
* situations it actually identifies a tuple, or even a specific node within
* an inner tuple. But any of the fields can be invalid. If the buffer
* an inner tuple. But any of the fields can be invalid. If the buffer
* field is valid, it implies we hold pin and exclusive lock on that buffer.
* page pointer should be valid exactly when buffer is.
*/
@@ -249,7 +249,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
else
{
/*
* Tuple must be inserted into existing chain. We mustn't change the
* Tuple must be inserted into existing chain. We mustn't change the
* chain's head address, but we don't need to chase the entire chain
* to put the tuple at the end; we can insert it second.
*
@@ -814,7 +814,7 @@ doPickSplit(Relation index, SpGistState *state,
* We may not actually insert new tuple because another picksplit may be
* necessary due to too large value, but we will try to allocate enough
* space to include it; and in any case it has to be included in the input
* for the picksplit function. So don't increment nToInsert yet.
* for the picksplit function. So don't increment nToInsert yet.
*/
in.datums[in.nTuples] = SGLTDATUM(newLeafTuple, state);
heapPtrs[in.nTuples] = newLeafTuple->heapPtr;
@@ -872,7 +872,7 @@ doPickSplit(Relation index, SpGistState *state,
/*
* Check to see if the picksplit function failed to separate the values,
* ie, it put them all into the same child node. If so, select allTheSame
* mode and create a random split instead. See comments for
* mode and create a random split instead. See comments for
* checkAllTheSame as to why we need to know if the new leaf tuples could
* fit on one page.
*/
@@ -1037,7 +1037,7 @@ doPickSplit(Relation index, SpGistState *state,
&xlrec.initDest);
/*
* Attempt to assign node groups to the two pages. We might fail to
* Attempt to assign node groups to the two pages. We might fail to
* do so, even if totalLeafSizes is less than the available space,
* because we can't split a group across pages.
*/
@@ -1917,7 +1917,7 @@ spgdoinsert(Relation index, SpGistState *state,
if (current.blkno == InvalidBlockNumber)
{
/*
* Create a leaf page. If leafSize is too large to fit on a page,
* Create a leaf page. If leafSize is too large to fit on a page,
* we won't actually use the page yet, but it simplifies the API
* for doPickSplit to always have a leaf page at hand; so just
* quietly limit our request to a page size.
@@ -2120,7 +2120,7 @@ spgdoinsert(Relation index, SpGistState *state,
out.result.addNode.nodeLabel);
/*
* Retry insertion into the enlarged node. We assume that
* Retry insertion into the enlarged node. We assume that
* we'll get a MatchNode result this time.
*/
goto process_inner_tuple;

View File

@@ -163,7 +163,7 @@ spgbuildempty(PG_FUNCTION_ARGS)
page = (Page) palloc(BLCKSZ);
SpGistInitMetapage(page);
/* Write the page. If archiving/streaming, XLOG it. */
/* Write the page. If archiving/streaming, XLOG it. */
PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
(char *) page, true);
@@ -232,7 +232,7 @@ spginsert(PG_FUNCTION_ARGS)
/*
* We might have to repeat spgdoinsert() multiple times, if conflicts
* occur with concurrent insertions. If so, reset the insertCtx each time
* to avoid cumulative memory consumption. That means we also have to
* to avoid cumulative memory consumption. That means we also have to
* redo initSpGistState(), but it's cheap enough not to matter.
*/
while (!spgdoinsert(index, &spgstate, ht_ctid, *values, *isnull))

View File

@@ -103,7 +103,7 @@ resetSpGistScanOpaque(SpGistScanOpaque so)
* Sets searchNulls, searchNonNulls, numberOfKeys, keyData fields of *so.
*
* The point here is to eliminate null-related considerations from what the
* opclass consistent functions need to deal with. We assume all SPGiST-
* opclass consistent functions need to deal with. We assume all SPGiST-
* indexable operators are strict, so any null RHS value makes the scan
* condition unsatisfiable. We also pull out any IS NULL/IS NOT NULL
* conditions; their effect is reflected into searchNulls/searchNonNulls.
@@ -600,7 +600,7 @@ storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr,
if (so->want_itup)
{
/*
* Reconstruct desired IndexTuple. We have to copy the datum out of
* Reconstruct desired IndexTuple. We have to copy the datum out of
* the temp context anyway, so we may as well create the tuple here.
*/
so->indexTups[so->nPtrs] = index_form_tuple(so->indexTupDesc,

View File

@@ -26,11 +26,11 @@
* In the worst case, an inner tuple in a text radix tree could have as many
* as 256 nodes (one for each possible byte value). Each node can take 16
* bytes on MAXALIGN=8 machines. The inner tuple must fit on an index page
* of size BLCKSZ. Rather than assuming we know the exact amount of overhead
* of size BLCKSZ. Rather than assuming we know the exact amount of overhead
* imposed by page headers, tuple headers, etc, we leave 100 bytes for that
* (the actual overhead should be no more than 56 bytes at this writing, so
* there is slop in this number). So we can safely create prefixes up to
* BLCKSZ - 256 * 16 - 100 bytes long. Unfortunately, because 256 * 16 is
* BLCKSZ - 256 * 16 - 100 bytes long. Unfortunately, because 256 * 16 is
* already 4K, there is no safe prefix length when BLCKSZ is less than 8K;
* it is always possible to get "SPGiST inner tuple size exceeds maximum"
* if there are too many distinct next-byte values at a given place in the
@@ -327,7 +327,7 @@ spg_text_picksplit(PG_FUNCTION_ARGS)
}
/*
* Sort by label bytes so that we can group the values into nodes. This
* Sort by label bytes so that we can group the values into nodes. This
* also ensures that the nodes are ordered by label value, allowing the
* use of binary search in searchChar.
*/
@@ -377,7 +377,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
/*
* Reconstruct values represented at this tuple, including parent data,
* prefix of this tuple if any, and the node label if any. in->level
* prefix of this tuple if any, and the node label if any. in->level
* should be the length of the previously reconstructed value, and the
* number of bytes added here is prefixSize or prefixSize + 1.
*

View File

@@ -235,7 +235,7 @@ SpGistUpdateMetaPage(Relation index)
*
* When requesting an inner page, if we get one with the wrong parity,
* we just release the buffer and try again. We will get a different page
* because GetFreeIndexPage will have marked the page used in FSM. The page
* because GetFreeIndexPage will have marked the page used in FSM. The page
* is entered in our local lastUsedPages cache, so there's some hope of
* making use of it later in this session, but otherwise we rely on VACUUM
* to eventually re-enter the page in FSM, making it available for recycling.
@@ -245,7 +245,7 @@ SpGistUpdateMetaPage(Relation index)
*
* When we return a buffer to the caller, the page is *not* entered into
* the lastUsedPages cache; we expect the caller will do so after it's taken
* whatever space it will use. This is because after the caller has used up
* whatever space it will use. This is because after the caller has used up
* some space, the page might have less space than whatever was cached already
* so we'd rather not trash the old cache entry.
*/
@@ -317,7 +317,7 @@ SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
/*
* If possible, increase the space request to include relation's
* fillfactor. This ensures that when we add unrelated tuples to a page,
* fillfactor. This ensures that when we add unrelated tuples to a page,
* we try to keep 100-fillfactor% available for adding tuples that are
* related to the ones already on it. But fillfactor mustn't cause an
* error for requests that would otherwise be legal.

View File

@@ -211,7 +211,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
* Figure out exactly what we have to do. We do this separately from
* actually modifying the page, mainly so that we have a representation
* that can be dumped into WAL and then the replay code can do exactly
* the same thing. The output of this step consists of six arrays
* the same thing. The output of this step consists of six arrays
* describing four kinds of operations, to be performed in this order:
*
* toDead[]: tuple numbers to be replaced with DEAD tuples
@@ -287,7 +287,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
else
{
/*
* Second or later live tuple. Arrange to re-chain it to the
* Second or later live tuple. Arrange to re-chain it to the
* previous live one, if there was a gap.
*/
if (interveningDeletable)

View File

@@ -41,7 +41,7 @@ fillFakeState(SpGistState *state, spgxlogState stateSrc)
}
/*
* Add a leaf tuple, or replace an existing placeholder tuple. This is used
* Add a leaf tuple, or replace an existing placeholder tuple. This is used
* to replay SpGistPageAddNewItem() operations. If the offset points at an
* existing tuple, it had better be a placeholder tuple.
*/
@@ -462,7 +462,7 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
}
/*
* Update parent downlink. Since parent could be in either of the
* Update parent downlink. Since parent could be in either of the
* previous two buffers, it's a bit tricky to determine which BKP bit
* applies.
*/
@@ -799,7 +799,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
bbi++;
/*
* Now we can release the leaf-page locks. It's okay to do this before
* Now we can release the leaf-page locks. It's okay to do this before
* updating the parent downlink.
*/
if (BufferIsValid(srcBuffer))

View File

@@ -11,15 +11,15 @@
* log can be broken into relatively small, independent segments.
*
* XLOG interactions: this module generates an XLOG record whenever a new
* CLOG page is initialized to zeroes. Other writes of CLOG come from
* CLOG page is initialized to zeroes. Other writes of CLOG come from
* recording of transaction commit or abort in xact.c, which generates its
* own XLOG records for these events and will re-perform the status update
* on redo; so we need make no additional XLOG entry here. For synchronous
* on redo; so we need make no additional XLOG entry here. For synchronous
* transaction commits, the XLOG is guaranteed flushed through the XLOG commit
* record before we are called to log a commit, so the WAL rule "write xlog
* before data" is satisfied automatically. However, for async commits we
* must track the latest LSN affecting each CLOG page, so that we can flush
* XLOG that far and satisfy the WAL rule. We don't have to worry about this
* XLOG that far and satisfy the WAL rule. We don't have to worry about this
* for aborts (whether sync or async), since the post-crash assumption would
* be that such transactions failed anyway.
*
@@ -105,7 +105,7 @@ static void set_status_by_pages(int nsubxids, TransactionId *subxids,
* in the tree of xid. In various cases nsubxids may be zero.
*
* lsn must be the WAL location of the commit record when recording an async
* commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
* commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
* caller guarantees the commit record is already flushed in that case. It
* should be InvalidXLogRecPtr for abort cases, too.
*
@@ -417,7 +417,7 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
* Testing during the PostgreSQL 9.2 development cycle revealed that on a
* large multi-processor system, it was possible to have more CLOG page
* requests in flight at one time than the numebr of CLOG buffers which existed
* at that time, which was hardcoded to 8. Further testing revealed that
* at that time, which was hardcoded to 8. Further testing revealed that
* performance dropped off with more than 32 CLOG buffers, possibly because
* the linear buffer search algorithm doesn't scale well.
*

View File

@@ -5,7 +5,7 @@
*
* The pg_multixact manager is a pg_clog-like manager that stores an array of
* MultiXactMember for each MultiXactId. It is a fundamental part of the
* shared-row-lock implementation. Each MultiXactMember is comprised of a
* shared-row-lock implementation. Each MultiXactMember is comprised of a
* TransactionId and a set of flag bits. The name is a bit historical:
* originally, a MultiXactId consisted of more than one TransactionId (except
* in rare corner cases), hence "multi". Nowadays, however, it's perfectly
@@ -18,7 +18,7 @@
*
* We use two SLRU areas, one for storing the offsets at which the data
* starts for each MultiXactId in the other one. This trick allows us to
* store variable length arrays of TransactionIds. (We could alternatively
* store variable length arrays of TransactionIds. (We could alternatively
* use one area containing counts and TransactionIds, with valid MultiXactId
* values pointing at slots containing counts; but that way seems less robust
* since it would get completely confused if someone inquired about a bogus
@@ -38,7 +38,7 @@
*
* Like clog.c, and unlike subtrans.c, we have to preserve state across
* crashes and ensure that MXID and offset numbering increases monotonically
* across a crash. We do this in the same way as it's done for transaction
* across a crash. We do this in the same way as it's done for transaction
* IDs: the WAL record is guaranteed to contain evidence of every MXID we
* could need to worry about, and we just make sure that at the end of
* replay, the next-MXID and next-offset counters are at least as large as
@@ -50,7 +50,7 @@
* The minimum value in each database is stored in pg_database, and the
* global minimum is part of pg_control. Any vacuum that is able to
* advance its database's minimum value also computes a new global minimum,
* and uses this value to truncate older segments. When new multixactid
* and uses this value to truncate older segments. When new multixactid
* values are to be created, care is taken that the counter does not
* fall within the wraparound horizon considering the global minimum value.
*
@@ -85,13 +85,13 @@
/*
* Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
* Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
* used everywhere else in Postgres.
*
* Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
* MultiXact page numbering also wraps around at
* 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
* 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
* 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
* take no explicit notice of that fact in this module, except when comparing
* segment and page numbers in TruncateMultiXact (see
* MultiXactOffsetPagePrecedes).
@@ -110,7 +110,7 @@
* additional flag bits for each TransactionId. To do this without getting
* into alignment issues, we store four bytes of flags, and then the
* corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
* are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
* are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
* per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
* performance) trumps space efficiency here.
*
@@ -161,7 +161,7 @@ static SlruCtlData MultiXactMemberCtlData;
#define MultiXactMemberCtl (&MultiXactMemberCtlData)
/*
* MultiXact state shared across all backends. All this state is protected
* MultiXact state shared across all backends. All this state is protected
* by MultiXactGenLock. (We also use MultiXactOffsetControlLock and
* MultiXactMemberControlLock to guard accesses to the two sets of SLRU
* buffers. For concurrency's sake, we avoid holding more than one of these
@@ -179,7 +179,7 @@ typedef struct MultiXactStateData
MultiXactId lastTruncationPoint;
/*
* oldest multixact that is still on disk. Anything older than this
* oldest multixact that is still on disk. Anything older than this
* should not be consulted.
*/
MultiXactId oldestMultiXactId;
@@ -269,8 +269,8 @@ typedef struct mXactCacheEnt
} mXactCacheEnt;
#define MAX_CACHE_ENTRIES 256
static dlist_head MXactCache = DLIST_STATIC_INIT(MXactCache);
static int MXactCacheMembers = 0;
static dlist_head MXactCache = DLIST_STATIC_INIT(MXactCache);
static int MXactCacheMembers = 0;
static MemoryContext MXactContext = NULL;
#ifdef MULTIXACT_DEBUG
@@ -528,7 +528,7 @@ MultiXactIdIsRunning(MultiXactId multi)
/*
* This could be made faster by having another entry point in procarray.c,
* walking the PGPROC array only once for all the members. But in most
* walking the PGPROC array only once for all the members. But in most
* cases nmembers should be small enough that it doesn't much matter.
*/
for (i = 0; i < nmembers; i++)
@@ -579,9 +579,9 @@ MultiXactIdSetOldestMember(void)
* back. Which would be wrong.
*
* Note that a shared lock is sufficient, because it's enough to stop
* someone from advancing nextMXact; and nobody else could be trying to
* write to our OldestMember entry, only reading (and we assume storing
* it is atomic.)
* someone from advancing nextMXact; and nobody else could be trying
* to write to our OldestMember entry, only reading (and we assume
* storing it is atomic.)
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
@@ -615,7 +615,7 @@ MultiXactIdSetOldestMember(void)
* The value to set is the oldest of nextMXact and all the valid per-backend
* OldestMemberMXactId[] entries. Because of the locking we do, we can be
* certain that no subsequent call to MultiXactIdSetOldestMember can set
* an OldestMemberMXactId[] entry older than what we compute here. Therefore
* an OldestMemberMXactId[] entry older than what we compute here. Therefore
* there is no live transaction, now or later, that can be a member of any
* MultiXactId older than the OldestVisibleMXactId we compute here.
*/
@@ -751,7 +751,7 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
* heap_lock_tuple() to have put it there, and heap_lock_tuple() generates
* an XLOG record that must follow ours. The normal LSN interlock between
* the data page and that XLOG record will ensure that our XLOG record
* reaches disk first. If the SLRU members/offsets data reaches disk
* reaches disk first. If the SLRU members/offsets data reaches disk
* sooner than the XLOG record, we do not care because we'll overwrite it
* with zeroes unless the XLOG record is there too; see notes at top of
* this file.
@@ -882,7 +882,7 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
* GetNewMultiXactId
* Get the next MultiXactId.
*
* Also, reserve the needed amount of space in the "members" area. The
* Also, reserve the needed amount of space in the "members" area. The
* starting offset of the reserved space is returned in *offset.
*
* This may generate XLOG records for expansion of the offsets and/or members
@@ -916,7 +916,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
/*----------
* Check to see if it's safe to assign another MultiXactId. This protects
* against catastrophic data loss due to multixact wraparound. The basic
* against catastrophic data loss due to multixact wraparound. The basic
* rules are:
*
* If we're past multiVacLimit, start trying to force autovacuum cycles.
@@ -930,7 +930,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
{
/*
* For safety's sake, we release MultiXactGenLock while sending
* signals, warnings, etc. This is not so much because we care about
* signals, warnings, etc. This is not so much because we care about
* preserving concurrency in this situation, as to avoid any
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
@@ -981,8 +981,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
(errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
"database \"%s\" must be vacuumed before %u more MultiXactIds are used",
multiWrapLimit - result,
oldest_datname,
multiWrapLimit - result),
oldest_datname,
multiWrapLimit - result),
errhint("Execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions.")));
else
@@ -990,8 +990,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
(errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
"database with OID %u must be vacuumed before %u more MultiXactIds are used",
multiWrapLimit - result,
oldest_datoid,
multiWrapLimit - result),
oldest_datoid,
multiWrapLimit - result),
errhint("Execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions.")));
}
@@ -1036,7 +1036,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* until after file extension has succeeded!
*
* We don't care about MultiXactId wraparound here; it will be handled by
* the next iteration. But note that nextMXact may be InvalidMultiXactId
* the next iteration. But note that nextMXact may be InvalidMultiXactId
* or the first value on a segment-beginning page after this routine
* exits, so anyone else looking at the variable must be prepared to deal
* with either case. Similarly, nextOffset may be zero, but we won't use
@@ -1114,16 +1114,16 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
* need to allow an empty set to be returned regardless, if the caller is
* willing to accept it; the caller is expected to check that it's an
* allowed condition (such as ensuring that the infomask bits set on the
* tuple are consistent with the pg_upgrade scenario). If the caller is
* tuple are consistent with the pg_upgrade scenario). If the caller is
* expecting this to be called only on recently created multis, then we
* raise an error.
*
* Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
* seen, it implies undetected ID wraparound has occurred. This raises a
* seen, it implies undetected ID wraparound has occurred. This raises a
* hard error.
*
* Shared lock is enough here since we aren't modifying any global state.
* Acquire it just long enough to grab the current counter values. We may
* Acquire it just long enough to grab the current counter values. We may
* need both nextMXact and nextOffset; see below.
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
@@ -1151,12 +1151,12 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
/*
* Find out the offset at which we need to start reading MultiXactMembers
* and the number of members in the multixact. We determine the latter as
* and the number of members in the multixact. We determine the latter as
* the difference between this multixact's starting offset and the next
* one's. However, there are some corner cases to worry about:
*
* 1. This multixact may be the latest one created, in which case there is
* no next one to look at. In this case the nextOffset value we just
* no next one to look at. In this case the nextOffset value we just
* saved is the correct endpoint.
*
* 2. The next multixact may still be in process of being filled in: that
@@ -1167,11 +1167,11 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
* (because we are careful to pre-zero offset pages). Because
* GetNewMultiXactId will never return zero as the starting offset for a
* multixact, when we read zero as the next multixact's offset, we know we
* have this case. We sleep for a bit and try again.
* have this case. We sleep for a bit and try again.
*
* 3. Because GetNewMultiXactId increments offset zero to offset one to
* handle case #2, there is an ambiguity near the point of offset
* wraparound. If we see next multixact's offset is one, is that our
* wraparound. If we see next multixact's offset is one, is that our
* multixact's actual endpoint, or did it end at zero with a subsequent
* increment? We handle this using the knowledge that if the zero'th
* member slot wasn't filled, it'll contain zero, and zero isn't a valid
@@ -1297,8 +1297,8 @@ retry:
/*
* MultiXactHasRunningRemoteMembers
* Does the given multixact have still-live members from
* transactions other than our own?
* Does the given multixact have still-live members from
* transactions other than our own?
*/
bool
MultiXactHasRunningRemoteMembers(MultiXactId multi)
@@ -1694,7 +1694,7 @@ multixact_twophase_postabort(TransactionId xid, uint16 info,
/*
* Initialization of shared memory for MultiXact. We use two SLRU areas,
* thus double memory. Also, reserve space for the shared MultiXactState
* thus double memory. Also, reserve space for the shared MultiXactState
* struct and the per-backend MultiXactId arrays (two of those, too).
*/
Size
@@ -1754,7 +1754,7 @@ MultiXactShmemInit(void)
/*
* This func must be called ONCE on system install. It creates the initial
* MultiXact segments. (The MultiXacts directories are assumed to have been
* MultiXact segments. (The MultiXacts directories are assumed to have been
* created by initdb, and MultiXactShmemInit must have been called already.)
*/
void
@@ -1849,7 +1849,7 @@ MaybeExtendOffsetSlru(void)
if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
{
int slotno;
int slotno;
/*
* Fortunately for us, SimpleLruWritePage is already prepared to deal
@@ -1925,7 +1925,7 @@ TrimMultiXact(void)
MultiXactOffsetCtl->shared->latest_page_number = pageno;
/*
* Zero out the remainder of the current offsets page. See notes in
* Zero out the remainder of the current offsets page. See notes in
* StartupCLOG() for motivation.
*/
entryno = MultiXactIdToOffsetEntry(multi);
@@ -1955,7 +1955,7 @@ TrimMultiXact(void)
MultiXactMemberCtl->shared->latest_page_number = pageno;
/*
* Zero out the remainder of the current members page. See notes in
* Zero out the remainder of the current members page. See notes in
* TrimCLOG() for motivation.
*/
flagsoff = MXOffsetToFlagsOffset(offset);
@@ -2097,7 +2097,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
/*
* We'll start complaining loudly when we get within 10M multis of the
* stop point. This is kind of arbitrary, but if you let your gas gauge
* stop point. This is kind of arbitrary, but if you let your gas gauge
* get down to 1% of full, would you be looking for the next gas station?
* We need to be fairly liberal about this number because there are lots
* of scenarios where most transactions are done by automatic clients that
@@ -2172,8 +2172,8 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
(errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
"database \"%s\" must be vacuumed before %u more MultiXactIds are used",
multiWrapLimit - curMulti,
oldest_datname,
multiWrapLimit - curMulti),
oldest_datname,
multiWrapLimit - curMulti),
errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions.")));
else
@@ -2181,8 +2181,8 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
(errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
"database with OID %u must be vacuumed before %u more MultiXactIds are used",
multiWrapLimit - curMulti,
oldest_datoid,
multiWrapLimit - curMulti),
oldest_datoid,
multiWrapLimit - curMulti),
errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions.")));
}
@@ -2375,16 +2375,16 @@ GetOldestMultiXactId(void)
/*
* SlruScanDirectory callback.
* This callback deletes segments that are outside the range determined by
* the given page numbers.
* This callback deletes segments that are outside the range determined by
* the given page numbers.
*
* Both range endpoints are exclusive (that is, segments containing any of
* those pages are kept.)
*/
typedef struct MembersLiveRange
{
int rangeStart;
int rangeEnd;
int rangeStart;
int rangeEnd;
} MembersLiveRange;
static bool
@@ -2392,15 +2392,15 @@ SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage,
void *data)
{
MembersLiveRange *range = (MembersLiveRange *) data;
MultiXactOffset nextOffset;
MultiXactOffset nextOffset;
if ((segpage == range->rangeStart) ||
(segpage == range->rangeEnd))
return false; /* easy case out */
return false; /* easy case out */
/*
* To ensure that no segment is spuriously removed, we must keep track
* of new segments added since the start of the directory scan; to do this,
* To ensure that no segment is spuriously removed, we must keep track of
* new segments added since the start of the directory scan; to do this,
* we update our end-of-range point as we run.
*
* As an optimization, we can skip looking at shared memory if we know for
@@ -2473,10 +2473,10 @@ void
TruncateMultiXact(MultiXactId oldestMXact)
{
MultiXactOffset oldestOffset;
MultiXactOffset nextOffset;
MultiXactOffset nextOffset;
mxtruncinfo trunc;
MultiXactId earliest;
MembersLiveRange range;
MembersLiveRange range;
/*
* Note we can't just plow ahead with the truncation; it's possible that

View File

@@ -15,7 +15,7 @@
*
* We use a control LWLock to protect the shared data structures, plus
* per-buffer LWLocks that synchronize I/O for each buffer. The control lock
* must be held to examine or modify any shared state. A process that is
* must be held to examine or modify any shared state. A process that is
* reading in or writing out a page buffer does not hold the control lock,
* only the per-buffer lock for the buffer it is working on.
*
@@ -34,7 +34,7 @@
* could have happened while we didn't have the lock).
*
* As with the regular buffer manager, it is possible for another process
* to re-dirty a page that is currently being written out. This is handled
* to re-dirty a page that is currently being written out. This is handled
* by re-setting the page's page_dirty flag.
*
*
@@ -96,7 +96,7 @@ typedef struct SlruFlushData *SlruFlush;
* page_lru_count entries to be "reset" to lower values than they should have,
* in case a process is delayed while it executes this macro. With care in
* SlruSelectLRUPage(), this does little harm, and in any case the absolute
* worst possible consequence is a nonoptimal choice of page to evict. The
* worst possible consequence is a nonoptimal choice of page to evict. The
* gain from allowing concurrent reads of SLRU pages seems worth it.
*/
#define SlruRecentlyUsed(shared, slotno) \
@@ -481,7 +481,7 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
*
* NOTE: only one write attempt is made here. Hence, it is possible that
* the page is still dirty at exit (if someone else re-dirtied it during
* the write). However, we *do* attempt a fresh write even if the page
* the write). However, we *do* attempt a fresh write even if the page
* is already being written; this is for checkpoints.
*
* Control lock must be held at entry, and will be held at exit.
@@ -634,7 +634,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
* In a crash-and-restart situation, it's possible for us to receive
* commands to set the commit status of transactions whose bits are in
* already-truncated segments of the commit log (see notes in
* SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
* SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
* where the file doesn't exist, and return zeroes instead.
*/
fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
@@ -964,9 +964,9 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
/*
* If we find any EMPTY slot, just select that one. Else choose a
* victim page to replace. We normally take the least recently used
* victim page to replace. We normally take the least recently used
* valid page, but we will never take the slot containing
* latest_page_number, even if it appears least recently used. We
* latest_page_number, even if it appears least recently used. We
* will select a slot that is already I/O busy only if there is no
* other choice: a read-busy slot will not be least recently used once
* the read finishes, and waiting for an I/O on a write-busy slot is
@@ -1041,7 +1041,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
/*
* If all pages (except possibly the latest one) are I/O busy, we'll
* have to wait for an I/O to complete and then retry. In that
* have to wait for an I/O to complete and then retry. In that
* unhappy case, we choose to wait for the I/O on the least recently
* used slot, on the assumption that it was likely initiated first of
* all the I/Os in progress and may therefore finish first.
@@ -1193,7 +1193,7 @@ restart:;
/*
* Hmm, we have (or may have) I/O operations acting on the page, so
* we've got to wait for them to finish and then start again. This is
* the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
* the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
* wouldn't it be OK to just discard it without writing it? For now,
* keep the logic the same as it was.)
*/
@@ -1293,7 +1293,7 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
cldir = AllocateDir(ctl->Dir);
while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
{
size_t len;
size_t len;
len = strlen(clde->d_name);

View File

@@ -5,7 +5,7 @@
*
* The pg_subtrans manager is a pg_clog-like manager that stores the parent
* transaction Id for each transaction. It is a fundamental part of the
* nested transactions implementation. A main transaction has a parent
* nested transactions implementation. A main transaction has a parent
* of InvalidTransactionId, and each subtransaction has its immediate parent.
* The tree can easily be walked from child to parent, but not in the
* opposite direction.
@@ -191,7 +191,7 @@ SUBTRANSShmemInit(void)
* must have been called already.)
*
* Note: it's not really necessary to create the initial segment now,
* since slru.c would create it on first write anyway. But we may as well
* since slru.c would create it on first write anyway. But we may as well
* do it to be sure the directory is set up correctly.
*/
void

View File

@@ -66,7 +66,7 @@ restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
* Try to read a timeline's history file.
*
* If successful, return the list of component TLIs (the given TLI followed by
* its ancestor TLIs). If we can't find the history file, assume that the
* its ancestor TLIs). If we can't find the history file, assume that the
* timeline has no parents, and return a list of just the specified timeline
* ID.
*/
@@ -150,7 +150,7 @@ readTimeLineHistory(TimeLineID targetTLI)
if (nfields != 3)
ereport(FATAL,
(errmsg("syntax error in history file: %s", fline),
errhint("Expected a transaction log switchpoint location.")));
errhint("Expected a transaction log switchpoint location.")));
if (result && tli <= lasttli)
ereport(FATAL,
@@ -281,7 +281,7 @@ findNewestTimeLine(TimeLineID startTLI)
* reason: human-readable explanation of why the timeline was switched
*
* Currently this is only used at the end recovery, and so there are no locking
* considerations. But we should be just as tense as XLogFileInit to avoid
* considerations. But we should be just as tense as XLogFileInit to avoid
* emplacing a bogus file.
*/
void
@@ -418,7 +418,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
/*
* Prefer link() to rename() here just to be really sure that we don't
* overwrite an existing file. However, there shouldn't be one, so
* overwrite an existing file. However, there shouldn't be one, so
* rename() is an acceptable substitute except for the truly paranoid.
*/
#if HAVE_WORKING_LINK

View File

@@ -145,7 +145,7 @@ TransactionIdDidCommit(TransactionId transactionId)
* be a window just after database startup where we do not have complete
* knowledge in pg_subtrans of the transactions after TransactionXmin.
* StartupSUBTRANS() has ensured that any missing information will be
* zeroed. Since this case should not happen under normal conditions, it
* zeroed. Since this case should not happen under normal conditions, it
* seems reasonable to emit a WARNING for it.
*/
if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED)
@@ -301,7 +301,7 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2)
{
/*
* If either ID is a permanent XID then we can just do unsigned
* comparison. If both are normal, do a modulo-2^32 comparison.
* comparison. If both are normal, do a modulo-2^32 comparison.
*/
int32 diff;

View File

@@ -443,7 +443,7 @@ LockGXact(const char *gid, Oid user)
/*
* Note: it probably would be possible to allow committing from
* another database; but at the moment NOTIFY is known not to work and
* there may be some other issues as well. Hence disallow until
* there may be some other issues as well. Hence disallow until
* someone gets motivated to make it work.
*/
if (MyDatabaseId != proc->databaseId)
@@ -1031,7 +1031,7 @@ EndPrepare(GlobalTransaction gxact)
* out the correct state file CRC, we have an inconsistency: the xact is
* prepared according to WAL but not according to our on-disk state. We
* use a critical section to force a PANIC if we are unable to complete
* the write --- then, WAL replay should repair the inconsistency. The
* the write --- then, WAL replay should repair the inconsistency. The
* odds of a PANIC actually occurring should be very tiny given that we
* were able to write the bogus CRC above.
*
@@ -1069,7 +1069,7 @@ EndPrepare(GlobalTransaction gxact)
errmsg("could not close two-phase state file: %m")));
/*
* Mark the prepared transaction as valid. As soon as xact.c marks
* Mark the prepared transaction as valid. As soon as xact.c marks
* MyPgXact as not running our XID (which it will do immediately after
* this function returns), others can commit/rollback the xact.
*
@@ -1336,7 +1336,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
/*
* In case we fail while running the callbacks, mark the gxact invalid so
* no one else will try to commit/rollback, and so it can be recycled
* properly later. It is still locked by our XID so it won't go away yet.
* properly later. It is still locked by our XID so it won't go away yet.
*
* (We assume it's safe to do this without taking TwoPhaseStateLock.)
*/
@@ -1540,7 +1540,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
*
* This approach creates a race condition: someone else could delete a
* GXACT between the time we release TwoPhaseStateLock and the time we try
* to open its state file. We handle this by special-casing ENOENT
* to open its state file. We handle this by special-casing ENOENT
* failures: if we see that, we verify that the GXACT is no longer valid,
* and if so ignore the failure.
*/
@@ -1621,7 +1621,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
*
* We throw away any prepared xacts with main XID beyond nextXid --- if any
* are present, it suggests that the DBA has done a PITR recovery to an
* earlier point in time without cleaning out pg_twophase. We dare not
* earlier point in time without cleaning out pg_twophase. We dare not
* try to recover such prepared xacts since they likely depend on database
* state that doesn't exist now.
*
@@ -1713,7 +1713,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
* XID, and they may force us to advance nextXid.
*
* We don't expect anyone else to modify nextXid, hence we don't
* need to hold a lock while examining it. We still acquire the
* need to hold a lock while examining it. We still acquire the
* lock to modify it, though.
*/
subxids = (TransactionId *)

View File

@@ -39,7 +39,7 @@ VariableCache ShmemVariableCache = NULL;
*
* Note: when this is called, we are actually already inside a valid
* transaction, since XIDs are now not allocated until the transaction
* does something. So it is safe to do a database lookup if we want to
* does something. So it is safe to do a database lookup if we want to
* issue a warning about XID wrap.
*/
TransactionId
@@ -165,20 +165,20 @@ GetNewTransactionId(bool isSubXact)
/*
* Now advance the nextXid counter. This must not happen until after we
* have successfully completed ExtendCLOG() --- if that routine fails, we
* want the next incoming transaction to try it again. We cannot assign
* want the next incoming transaction to try it again. We cannot assign
* more XIDs until there is CLOG space for them.
*/
TransactionIdAdvance(ShmemVariableCache->nextXid);
/*
* We must store the new XID into the shared ProcArray before releasing
* XidGenLock. This ensures that every active XID older than
* XidGenLock. This ensures that every active XID older than
* latestCompletedXid is present in the ProcArray, which is essential for
* correct OldestXmin tracking; see src/backend/access/transam/README.
*
* XXX by storing xid into MyPgXact without acquiring ProcArrayLock, we
* are relying on fetch/store of an xid to be atomic, else other backends
* might see a partially-set xid here. But holding both locks at once
* might see a partially-set xid here. But holding both locks at once
* would be a nasty concurrency hit. So for now, assume atomicity.
*
* Note that readers of PGXACT xid fields should be careful to fetch the
@@ -289,7 +289,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
/*
* We'll start complaining loudly when we get within 10M transactions of
* the stop point. This is kind of arbitrary, but if you let your gas
* the stop point. This is kind of arbitrary, but if you let your gas
* gauge get down to 1% of full, would you be looking for the next gas
* station? We need to be fairly liberal about this number because there
* are lots of scenarios where most transactions are done by automatic
@@ -390,7 +390,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
* We primarily check whether oldestXidDB is valid. The cases we have in
* mind are that that database was dropped, or the field was reset to zero
* by pg_resetxlog. In either case we should force recalculation of the
* wrap limit. Also do it if oldestXid is old enough to be forcing
* wrap limit. Also do it if oldestXid is old enough to be forcing
* autovacuums or other actions; this ensures we update our state as soon
* as possible once extra overhead is being incurred.
*/

View File

@@ -270,7 +270,7 @@ static void CallSubXactCallbacks(SubXactEvent event,
SubTransactionId parentSubid);
static void CleanupTransaction(void);
static void CheckTransactionChain(bool isTopLevel, bool throwError,
const char *stmtType);
const char *stmtType);
static void CommitTransaction(void);
static TransactionId RecordTransactionAbort(bool isSubXact);
static void StartTransaction(void);
@@ -450,7 +450,7 @@ AssignTransactionId(TransactionState s)
{
bool isSubXact = (s->parent != NULL);
ResourceOwner currentOwner;
bool log_unknown_top = false;
bool log_unknown_top = false;
/* Assert that caller didn't screw up */
Assert(!TransactionIdIsValid(s->transactionId));
@@ -487,8 +487,8 @@ AssignTransactionId(TransactionState s)
/*
* When wal_level=logical, guarantee that a subtransaction's xid can only
* be seen in the WAL stream if its toplevel xid has been logged
* before. If necessary we log a xact_assignment record with fewer than
* be seen in the WAL stream if its toplevel xid has been logged before.
* If necessary we log a xact_assignment record with fewer than
* PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine if didLogXid isn't set
* for a transaction even though it appears in a WAL record, we just might
* superfluously log something. That can happen when an xid is included
@@ -637,7 +637,7 @@ SubTransactionIsActive(SubTransactionId subxid)
*
* "used" must be TRUE if the caller intends to use the command ID to mark
* inserted/updated/deleted tuples. FALSE means the ID is being fetched
* for read-only purposes (ie, as a snapshot validity cutoff). See
* for read-only purposes (ie, as a snapshot validity cutoff). See
* CommandCounterIncrement() for discussion.
*/
CommandId
@@ -724,7 +724,7 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
/*
* We always say that BootstrapTransactionId is "not my transaction ID"
* even when it is (ie, during bootstrap). Along with the fact that
* even when it is (ie, during bootstrap). Along with the fact that
* transam.c always treats BootstrapTransactionId as already committed,
* this causes the tqual.c routines to see all tuples as committed, which
* is what we need during bootstrap. (Bootstrap mode only inserts tuples,
@@ -866,7 +866,7 @@ AtStart_Memory(void)
/*
* If this is the first time through, create a private context for
* AbortTransaction to work in. By reserving some space now, we can
* insulate AbortTransaction from out-of-memory scenarios. Like
* insulate AbortTransaction from out-of-memory scenarios. Like
* ErrorContext, we set it up with slow growth rate and a nonzero minimum
* size, so that space will be reserved immediately.
*/
@@ -969,7 +969,7 @@ AtSubStart_ResourceOwner(void)
Assert(s->parent != NULL);
/*
* Create a resource owner for the subtransaction. We make it a child of
* Create a resource owner for the subtransaction. We make it a child of
* the immediate parent's resource owner.
*/
s->curTransactionOwner =
@@ -989,7 +989,7 @@ AtSubStart_ResourceOwner(void)
* RecordTransactionCommit
*
* Returns latest XID among xact and its children, or InvalidTransactionId
* if the xact has no XID. (We compute that here just because it's easier.)
* if the xact has no XID. (We compute that here just because it's easier.)
*/
static TransactionId
RecordTransactionCommit(void)
@@ -1034,7 +1034,7 @@ RecordTransactionCommit(void)
/*
* If we didn't create XLOG entries, we're done here; otherwise we
* should flush those entries the same as a commit record. (An
* should flush those entries the same as a commit record. (An
* example of a possible record that wouldn't cause an XID to be
* assigned is a sequence advance record due to nextval() --- we want
* to flush that to disk before reporting commit.)
@@ -1051,7 +1051,7 @@ RecordTransactionCommit(void)
BufmgrCommit();
/*
* Mark ourselves as within our "commit critical section". This
* Mark ourselves as within our "commit critical section". This
* forces any concurrent checkpoint to wait until we've updated
* pg_clog. Without this, it is possible for the checkpoint to set
* REDO after the XLOG record but fail to flush the pg_clog update to
@@ -1059,7 +1059,7 @@ RecordTransactionCommit(void)
* crashes a little later.
*
* Note: we could, but don't bother to, set this flag in
* RecordTransactionAbort. That's because loss of a transaction abort
* RecordTransactionAbort. That's because loss of a transaction abort
* is noncritical; the presumption would be that it aborted, anyway.
*
* It's safe to change the delayChkpt flag of our own backend without
@@ -1168,15 +1168,15 @@ RecordTransactionCommit(void)
/*
* Check if we want to commit asynchronously. We can allow the XLOG flush
* to happen asynchronously if synchronous_commit=off, or if the current
* transaction has not performed any WAL-logged operation. The latter
* transaction has not performed any WAL-logged operation. The latter
* case can arise if the current transaction wrote only to temporary
* and/or unlogged tables. In case of a crash, the loss of such a
* and/or unlogged tables. In case of a crash, the loss of such a
* transaction will be irrelevant since temp tables will be lost anyway,
* and unlogged tables will be truncated. (Given the foregoing, you might
* think that it would be unnecessary to emit the XLOG record at all in
* this case, but we don't currently try to do that. It would certainly
* cause problems at least in Hot Standby mode, where the
* KnownAssignedXids machinery requires tracking every XID assignment. It
* KnownAssignedXids machinery requires tracking every XID assignment. It
* might be OK to skip it only when wal_level < hot_standby, but for now
* we don't.)
*
@@ -1423,7 +1423,7 @@ AtSubCommit_childXids(void)
* RecordTransactionAbort
*
* Returns latest XID among xact and its children, or InvalidTransactionId
* if the xact has no XID. (We compute that here just because it's easier.)
* if the xact has no XID. (We compute that here just because it's easier.)
*/
static TransactionId
RecordTransactionAbort(bool isSubXact)
@@ -1440,7 +1440,7 @@ RecordTransactionAbort(bool isSubXact)
/*
* If we haven't been assigned an XID, nobody will care whether we aborted
* or not. Hence, we're done in that case. It does not matter if we have
* or not. Hence, we're done in that case. It does not matter if we have
* rels to delete (note that this routine is not responsible for actually
* deleting 'em). We cannot have any child XIDs, either.
*/
@@ -1456,7 +1456,7 @@ RecordTransactionAbort(bool isSubXact)
* We have a valid XID, so we should write an ABORT record for it.
*
* We do not flush XLOG to disk here, since the default assumption after a
* crash would be that we aborted, anyway. For the same reason, we don't
* crash would be that we aborted, anyway. For the same reason, we don't
* need to worry about interlocking against checkpoint start.
*/
@@ -1624,7 +1624,7 @@ AtSubAbort_childXids(void)
/*
* We keep the child-XID arrays in TopTransactionContext (see
* AtSubCommit_childXids). This means we'd better free the array
* AtSubCommit_childXids). This means we'd better free the array
* explicitly at abort to avoid leakage.
*/
if (s->childXids != NULL)
@@ -1802,7 +1802,7 @@ StartTransaction(void)
VirtualXactLockTableInsert(vxid);
/*
* Advertise it in the proc array. We assume assignment of
* Advertise it in the proc array. We assume assignment of
* LocalTransactionID is atomic, and the backendId should be set already.
*/
Assert(MyProc->backendId == vxid.backendId);
@@ -1899,7 +1899,7 @@ CommitTransaction(void)
/*
* The remaining actions cannot call any user-defined code, so it's safe
* to start shutting down within-transaction services. But note that most
* to start shutting down within-transaction services. But note that most
* of this stuff could still throw an error, which would switch us into
* the transaction-abort path.
*/
@@ -2104,7 +2104,7 @@ PrepareTransaction(void)
/*
* The remaining actions cannot call any user-defined code, so it's safe
* to start shutting down within-transaction services. But note that most
* to start shutting down within-transaction services. But note that most
* of this stuff could still throw an error, which would switch us into
* the transaction-abort path.
*/
@@ -2224,7 +2224,7 @@ PrepareTransaction(void)
XactLastRecEnd = 0;
/*
* Let others know about no transaction in progress by me. This has to be
* Let others know about no transaction in progress by me. This has to be
* done *after* the prepared transaction has been marked valid, else
* someone may think it is unlocked and recyclable.
*/
@@ -2233,7 +2233,7 @@ PrepareTransaction(void)
/*
* This is all post-transaction cleanup. Note that if an error is raised
* here, it's too late to abort the transaction. This should be just
* noncritical resource releasing. See notes in CommitTransaction.
* noncritical resource releasing. See notes in CommitTransaction.
*/
CallXactCallbacks(XACT_EVENT_PREPARE);
@@ -2411,7 +2411,7 @@ AbortTransaction(void)
ProcArrayEndTransaction(MyProc, latestXid);
/*
* Post-abort cleanup. See notes in CommitTransaction() concerning
* Post-abort cleanup. See notes in CommitTransaction() concerning
* ordering. We can skip all of it if the transaction failed before
* creating a resource owner.
*/
@@ -2646,7 +2646,7 @@ CommitTransactionCommand(void)
/*
* Here we were in a perfectly good transaction block but the user
* told us to ROLLBACK anyway. We have to abort the transaction
* told us to ROLLBACK anyway. We have to abort the transaction
* and then clean up.
*/
case TBLOCK_ABORT_PENDING:
@@ -2666,7 +2666,7 @@ CommitTransactionCommand(void)
/*
* We were just issued a SAVEPOINT inside a transaction block.
* Start a subtransaction. (DefineSavepoint already did
* Start a subtransaction. (DefineSavepoint already did
* PushTransaction, so as to have someplace to put the SUBBEGIN
* state.)
*/
@@ -2870,7 +2870,7 @@ AbortCurrentTransaction(void)
break;
/*
* Here, we failed while trying to COMMIT. Clean up the
* Here, we failed while trying to COMMIT. Clean up the
* transaction and return to idle state (we do not want to stay in
* the transaction).
*/
@@ -2932,7 +2932,7 @@ AbortCurrentTransaction(void)
/*
* If we failed while trying to create a subtransaction, clean up
* the broken subtransaction and abort the parent. The same
* the broken subtransaction and abort the parent. The same
* applies if we get a failure while ending a subtransaction.
*/
case TBLOCK_SUBBEGIN:
@@ -3485,7 +3485,7 @@ UserAbortTransactionBlock(void)
break;
/*
* We are inside a subtransaction. Mark everything up to top
* We are inside a subtransaction. Mark everything up to top
* level as exitable.
*/
case TBLOCK_SUBINPROGRESS:
@@ -3619,7 +3619,7 @@ ReleaseSavepoint(List *options)
break;
/*
* We are in a non-aborted subtransaction. This is the only valid
* We are in a non-aborted subtransaction. This is the only valid
* case.
*/
case TBLOCK_SUBINPROGRESS:
@@ -3676,7 +3676,7 @@ ReleaseSavepoint(List *options)
/*
* Mark "commit pending" all subtransactions up to the target
* subtransaction. The actual commits will happen when control gets to
* subtransaction. The actual commits will happen when control gets to
* CommitTransactionCommand.
*/
xact = CurrentTransactionState;
@@ -3775,7 +3775,7 @@ RollbackToSavepoint(List *options)
/*
* Mark "abort pending" all subtransactions up to the target
* subtransaction. The actual aborts will happen when control gets to
* subtransaction. The actual aborts will happen when control gets to
* CommitTransactionCommand.
*/
xact = CurrentTransactionState;
@@ -4182,7 +4182,7 @@ CommitSubTransaction(void)
CommandCounterIncrement();
/*
* Prior to 8.4 we marked subcommit in clog at this point. We now only
* Prior to 8.4 we marked subcommit in clog at this point. We now only
* perform that step, if required, as part of the atomic update of the
* whole transaction tree at top level commit or abort.
*/
@@ -4641,7 +4641,7 @@ TransStateAsString(TransState state)
/*
* xactGetCommittedChildren
*
* Gets the list of committed children of the current transaction. The return
* Gets the list of committed children of the current transaction. The return
* value is the number of child transactions. *ptr is set to point to an
* array of TransactionIds. The array is allocated in TopTransactionContext;
* the caller should *not* pfree() it (this is a change from pre-8.4 code!).

View File

@@ -101,7 +101,7 @@ bool XLOG_DEBUG = false;
* future XLOG segment as long as there aren't already XLOGfileslop future
* segments; else we'll delete it. This could be made a separate GUC
* variable, but at present I think it's sufficient to hardwire it as
* 2*CheckPointSegments+1. Under normal conditions, a checkpoint will free
* 2*CheckPointSegments+1. Under normal conditions, a checkpoint will free
* no more than 2*CheckPointSegments log segments, and we want to recycle all
* of them; the +1 allows boundary cases to happen without wasting a
* delete/create-segment cycle.
@@ -190,7 +190,7 @@ static bool LocalHotStandbyActive = false;
* 0: unconditionally not allowed to insert XLOG
* -1: must check RecoveryInProgress(); disallow until it is false
* Most processes start with -1 and transition to 1 after seeing that recovery
* is not in progress. But we can also force the value for special cases.
* is not in progress. But we can also force the value for special cases.
* The coding in XLogInsertAllowed() depends on the first two of these states
* being numerically the same as bool true and false.
*/
@@ -223,7 +223,7 @@ static bool recoveryPauseAtTarget = true;
static TransactionId recoveryTargetXid;
static TimestampTz recoveryTargetTime;
static char *recoveryTargetName;
static int min_recovery_apply_delay = 0;
static int min_recovery_apply_delay = 0;
static TimestampTz recoveryDelayUntilTime;
/* options taken from recovery.conf for XLOG streaming */
@@ -261,7 +261,7 @@ static bool recoveryStopAfter;
*
* expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and the timelines of
* its known parents, newest first (so recoveryTargetTLI is always the
* first list member). Only these TLIs are expected to be seen in the WAL
* first list member). Only these TLIs are expected to be seen in the WAL
* segments we read, and indeed only these TLIs will be considered as
* candidate WAL files to open at all.
*
@@ -290,7 +290,7 @@ XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr;
/*
* RedoRecPtr is this backend's local copy of the REDO record pointer
* (which is almost but not quite the same as a pointer to the most recent
* CHECKPOINT record). We update this from the shared-memory copy,
* CHECKPOINT record). We update this from the shared-memory copy,
* XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we
* hold an insertion lock). See XLogInsert for details. We are also allowed
* to update from XLogCtl->RedoRecPtr if we hold the info_lck;
@@ -418,11 +418,11 @@ typedef struct XLogCtlInsert
slock_t insertpos_lck; /* protects CurrBytePos and PrevBytePos */
/*
* CurrBytePos is the end of reserved WAL. The next record will be inserted
* at that position. PrevBytePos is the start position of the previously
* inserted (or rather, reserved) record - it is copied to the prev-link
* of the next record. These are stored as "usable byte positions" rather
* than XLogRecPtrs (see XLogBytePosToRecPtr()).
* CurrBytePos is the end of reserved WAL. The next record will be
* inserted at that position. PrevBytePos is the start position of the
* previously inserted (or rather, reserved) record - it is copied to the
* prev-link of the next record. These are stored as "usable byte
* positions" rather than XLogRecPtrs (see XLogBytePosToRecPtr()).
*/
uint64 CurrBytePos;
uint64 PrevBytePos;
@@ -464,7 +464,7 @@ typedef struct XLogCtlInsert
/*
* WAL insertion locks.
*/
WALInsertLockPadded *WALInsertLocks;
WALInsertLockPadded *WALInsertLocks;
LWLockTranche WALInsertLockTranche;
int WALInsertLockTrancheId;
} XLogCtlInsert;
@@ -504,10 +504,11 @@ typedef struct XLogCtlData
* Latest initialized page in the cache (last byte position + 1).
*
* To change the identity of a buffer (and InitializedUpTo), you need to
* hold WALBufMappingLock. To change the identity of a buffer that's still
* dirty, the old page needs to be written out first, and for that you
* need WALWriteLock, and you need to ensure that there are no in-progress
* insertions to the page by calling WaitXLogInsertionsToFinish().
* hold WALBufMappingLock. To change the identity of a buffer that's
* still dirty, the old page needs to be written out first, and for that
* you need WALWriteLock, and you need to ensure that there are no
* in-progress insertions to the page by calling
* WaitXLogInsertionsToFinish().
*/
XLogRecPtr InitializedUpTo;
@@ -799,8 +800,8 @@ static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch,
XLogRecData *rdata,
XLogRecPtr StartPos, XLogRecPtr EndPos);
XLogRecData *rdata,
XLogRecPtr StartPos, XLogRecPtr EndPos);
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos,
XLogRecPtr *EndPos, XLogRecPtr *PrevPtr);
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos,
@@ -860,6 +861,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
if (rechdr == NULL)
{
static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
MemSet(rechdr, 0, SizeOfXLogRecord);
}
@@ -1075,12 +1077,12 @@ begin:;
* record to the shared WAL buffer cache is a two-step process:
*
* 1. Reserve the right amount of space from the WAL. The current head of
* reserved space is kept in Insert->CurrBytePos, and is protected by
* insertpos_lck.
* reserved space is kept in Insert->CurrBytePos, and is protected by
* insertpos_lck.
*
* 2. Copy the record to the reserved WAL space. This involves finding the
* correct WAL buffer containing the reserved space, and copying the
* record in place. This can be done concurrently in multiple processes.
* correct WAL buffer containing the reserved space, and copying the
* record in place. This can be done concurrently in multiple processes.
*
* To keep track of which insertions are still in-progress, each concurrent
* inserter acquires an insertion lock. In addition to just indicating that
@@ -1232,6 +1234,7 @@ begin:;
{
TRACE_POSTGRESQL_XLOG_SWITCH();
XLogFlush(EndPos);
/*
* Even though we reserved the rest of the segment for us, which is
* reflected in EndPos, we return a pointer to just the end of the
@@ -1272,7 +1275,7 @@ begin:;
rdt_lastnormal->next = NULL;
initStringInfo(&recordbuf);
for (;rdata != NULL; rdata = rdata->next)
for (; rdata != NULL; rdata = rdata->next)
appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
appendStringInfoString(&buf, " - ");
@@ -1514,8 +1517,8 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
/*
* If this was an xlog-switch, it's not enough to write the switch record,
* we also have to consume all the remaining space in the WAL segment.
* We have already reserved it for us, but we still need to make sure it's
* we also have to consume all the remaining space in the WAL segment. We
* have already reserved it for us, but we still need to make sure it's
* allocated and zeroed in the WAL buffers so that when the caller (or
* someone else) does XLogWrite(), it can really write out all the zeros.
*/
@@ -1556,14 +1559,14 @@ WALInsertLockAcquire(void)
/*
* It doesn't matter which of the WAL insertion locks we acquire, so try
* the one we used last time. If the system isn't particularly busy,
* it's a good bet that it's still available, and it's good to have some
* the one we used last time. If the system isn't particularly busy, it's
* a good bet that it's still available, and it's good to have some
* affinity to a particular lock so that you don't unnecessarily bounce
* cache lines between processes when there's no contention.
*
* If this is the first time through in this backend, pick a lock
* (semi-)randomly. This allows the locks to be used evenly if you have
* a lot of very short connections.
* (semi-)randomly. This allows the locks to be used evenly if you have a
* lot of very short connections.
*/
static int lockToTry = -1;
@@ -1583,10 +1586,10 @@ WALInsertLockAcquire(void)
/*
* If we couldn't get the lock immediately, try another lock next
* time. On a system with more insertion locks than concurrent
* inserters, this causes all the inserters to eventually migrate
* to a lock that no-one else is using. On a system with more
* inserters than locks, it still helps to distribute the inserters
* evenly across the locks.
* inserters, this causes all the inserters to eventually migrate to a
* lock that no-one else is using. On a system with more inserters
* than locks, it still helps to distribute the inserters evenly
* across the locks.
*/
lockToTry = (lockToTry + 1) % num_xloginsert_locks;
}
@@ -1604,8 +1607,8 @@ WALInsertLockAcquireExclusive(void)
/*
* When holding all the locks, we only update the last lock's insertingAt
* indicator. The others are set to 0xFFFFFFFFFFFFFFFF, which is higher
* than any real XLogRecPtr value, to make sure that no-one blocks
* waiting on those.
* than any real XLogRecPtr value, to make sure that no-one blocks waiting
* on those.
*/
for (i = 0; i < num_xloginsert_locks - 1; i++)
{
@@ -1655,7 +1658,7 @@ WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
* WALInsertLockAcquireExclusive.
*/
LWLockUpdateVar(&WALInsertLocks[num_xloginsert_locks - 1].l.lock,
&WALInsertLocks[num_xloginsert_locks - 1].l.insertingAt,
&WALInsertLocks[num_xloginsert_locks - 1].l.insertingAt,
insertingAt);
}
else
@@ -1716,15 +1719,16 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
* Loop through all the locks, sleeping on any in-progress insert older
* than 'upto'.
*
* finishedUpto is our return value, indicating the point upto which
* all the WAL insertions have been finished. Initialize it to the head
* of reserved WAL, and as we iterate through the insertion locks, back it
* finishedUpto is our return value, indicating the point upto which all
* the WAL insertions have been finished. Initialize it to the head of
* reserved WAL, and as we iterate through the insertion locks, back it
* out for any insertion that's still in progress.
*/
finishedUpto = reservedUpto;
for (i = 0; i < num_xloginsert_locks; i++)
{
XLogRecPtr insertingat = InvalidXLogRecPtr;
XLogRecPtr insertingat = InvalidXLogRecPtr;
do
{
/*
@@ -1797,9 +1801,9 @@ GetXLogBuffer(XLogRecPtr ptr)
}
/*
* The XLog buffer cache is organized so that a page is always loaded
* to a particular buffer. That way we can easily calculate the buffer
* a given page must be loaded into, from the XLogRecPtr alone.
* The XLog buffer cache is organized so that a page is always loaded to a
* particular buffer. That way we can easily calculate the buffer a given
* page must be loaded into, from the XLogRecPtr alone.
*/
idx = XLogRecPtrToBufIdx(ptr);
@@ -1827,8 +1831,8 @@ GetXLogBuffer(XLogRecPtr ptr)
if (expectedEndPtr != endptr)
{
/*
* Let others know that we're finished inserting the record up
* to the page boundary.
* Let others know that we're finished inserting the record up to the
* page boundary.
*/
WALInsertLockUpdateInsertingAt(expectedEndPtr - XLOG_BLCKSZ);
@@ -1837,7 +1841,7 @@ GetXLogBuffer(XLogRecPtr ptr)
if (expectedEndPtr != endptr)
elog(PANIC, "could not find WAL buffer for %X/%X",
(uint32) (ptr >> 32) , (uint32) ptr);
(uint32) (ptr >> 32), (uint32) ptr);
}
else
{
@@ -1974,8 +1978,8 @@ XLogRecPtrToBytePos(XLogRecPtr ptr)
else
{
result = fullsegs * UsableBytesInSegment +
(XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
(fullpages - 1) * UsableBytesInPage; /* full pages */
(XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
(fullpages - 1) * UsableBytesInPage; /* full pages */
if (offset > 0)
{
Assert(offset >= SizeOfXLogShortPHD);
@@ -2170,8 +2174,8 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
}
/*
* Now the next buffer slot is free and we can set it up to be the next
* output page.
* Now the next buffer slot is free and we can set it up to be the
* next output page.
*/
NewPageBeginPtr = XLogCtl->InitializedUpTo;
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
@@ -2194,7 +2198,8 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
/* NewPage->xlp_info = 0; */ /* done by memset */
NewPage ->xlp_tli = ThisTimeLineID;
NewPage ->xlp_pageaddr = NewPageBeginPtr;
/* NewPage->xlp_rem_len = 0; */ /* done by memset */
/* NewPage->xlp_rem_len = 0; */ /* done by memset */
/*
* If online backup is not in progress, mark the header to indicate
@@ -2202,12 +2207,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
* blocks. This allows the WAL archiver to know whether it is safe to
* compress archived WAL data by transforming full-block records into
* the non-full-block format. It is sufficient to record this at the
* page level because we force a page switch (in fact a segment switch)
* when starting a backup, so the flag will be off before any records
* can be written during the backup. At the end of a backup, the last
* page will be marked as all unsafe when perhaps only part is unsafe,
* but at worst the archiver would miss the opportunity to compress a
* few records.
* page level because we force a page switch (in fact a segment
* switch) when starting a backup, so the flag will be off before any
* records can be written during the backup. At the end of a backup,
* the last page will be marked as all unsafe when perhaps only part
* is unsafe, but at worst the archiver would miss the opportunity to
* compress a few records.
*/
if (!Insert->forcePageWrites)
NewPage ->xlp_info |= XLP_BKP_REMOVABLE;
@@ -2329,7 +2334,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
* if we're passed a bogus WriteRqst.Write that is past the end of the
* last page that's been initialized by AdvanceXLInsertBuffer.
*/
XLogRecPtr EndPtr = XLogCtl->xlblocks[curridx];
XLogRecPtr EndPtr = XLogCtl->xlblocks[curridx];
if (LogwrtResult.Write >= EndPtr)
elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
(uint32) (LogwrtResult.Write >> 32),
@@ -2413,7 +2419,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
do
{
errno = 0;
written = write(openLogFile, from, nleft);
written = write(openLogFile, from, nleft);
if (written <= 0)
{
if (errno == EINTR)
@@ -2422,7 +2428,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
(errcode_for_file_access(),
errmsg("could not write to log file %s "
"at offset %u, length %zu: %m",
XLogFileNameP(ThisTimeLineID, openLogSegNo),
XLogFileNameP(ThisTimeLineID, openLogSegNo),
openLogOff, nbytes)));
}
nleft -= written;
@@ -2500,7 +2506,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
{
/*
* Could get here without iterating above loop, in which case we might
* have no open file or the wrong one. However, we do not need to
* have no open file or the wrong one. However, we do not need to
* fsync more than one file.
*/
if (sync_method != SYNC_METHOD_OPEN &&
@@ -2569,7 +2575,7 @@ XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
/*
* If the WALWriter is sleeping, we should kick it to make it come out of
* low-power mode. Otherwise, determine whether there's a full page of
* low-power mode. Otherwise, determine whether there's a full page of
* WAL available to write.
*/
if (!sleeping)
@@ -2616,7 +2622,8 @@ XLogGetReplicationSlotMinimumLSN(void)
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
XLogRecPtr retval;
XLogRecPtr retval;
SpinLockAcquire(&xlogctl->info_lck);
retval = xlogctl->replicationSlotMinLSN;
SpinLockRelease(&xlogctl->info_lck);
@@ -2883,9 +2890,9 @@ XLogFlush(XLogRecPtr record)
* We normally flush only completed blocks; but if there is nothing to do on
* that basis, we check for unflushed async commits in the current incomplete
* block, and flush through the latest one of those. Thus, if async commits
* are not being used, we will flush complete blocks only. We can guarantee
* are not being used, we will flush complete blocks only. We can guarantee
* that async commits reach disk after at most three cycles; normally only
* one or two. (When flushing complete blocks, we allow XLogWrite to write
* one or two. (When flushing complete blocks, we allow XLogWrite to write
* "flexibly", meaning it can stop at the end of the buffer ring; this makes a
* difference only with very high load or long wal_writer_delay, but imposes
* one extra cycle for the worst case for async commits.)
@@ -3060,7 +3067,7 @@ XLogNeedsFlush(XLogRecPtr record)
* log, seg: identify segment to be created/opened.
*
* *use_existent: if TRUE, OK to use a pre-existing file (else, any
* pre-existing file will be deleted). On return, TRUE if a pre-existing
* pre-existing file will be deleted). On return, TRUE if a pre-existing
* file was used.
*
* use_lock: if TRUE, acquire ControlFileLock while moving file into
@@ -3127,11 +3134,11 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
errmsg("could not create file \"%s\": %m", tmppath)));
/*
* Zero-fill the file. We have to do this the hard way to ensure that all
* Zero-fill the file. We have to do this the hard way to ensure that all
* the file space has really been allocated --- on platforms that allow
* "holes" in files, just seeking to the end doesn't allocate intermediate
* space. This way, we know that we have all the space and (after the
* fsync below) that all the indirect blocks are down on disk. Therefore,
* fsync below) that all the indirect blocks are down on disk. Therefore,
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
* log file.
*
@@ -3223,7 +3230,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
* a different timeline)
*
* Currently this is only used during recovery, and so there are no locking
* considerations. But we should be just as tense as XLogFileInit to avoid
* considerations. But we should be just as tense as XLogFileInit to avoid
* emplacing a bogus file.
*/
static void
@@ -3434,7 +3441,7 @@ XLogFileOpen(XLogSegNo segno)
if (fd < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not open transaction log file \"%s\": %m", path)));
errmsg("could not open transaction log file \"%s\": %m", path)));
return fd;
}
@@ -3541,13 +3548,13 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)
* the timelines listed in expectedTLEs.
*
* We expect curFileTLI on entry to be the TLI of the preceding file in
* sequence, or 0 if there was no predecessor. We do not allow curFileTLI
* sequence, or 0 if there was no predecessor. We do not allow curFileTLI
* to go backwards; this prevents us from picking up the wrong file when a
* parent timeline extends to higher segment numbers than the child we
* want to read.
*
* If we haven't read the timeline history file yet, read it now, so that
* we know which TLIs to scan. We don't save the list in expectedTLEs,
* we know which TLIs to scan. We don't save the list in expectedTLEs,
* however, unless we actually find a valid segment. That way if there is
* neither a timeline history file nor a WAL segment in the archive, and
* streaming replication is set up, we'll read the timeline history file
@@ -3611,7 +3618,7 @@ XLogFileClose(void)
/*
* WAL segment files will not be re-read in normal operation, so we advise
* the OS to release any cached pages. But do not do so if WAL archiving
* the OS to release any cached pages. But do not do so if WAL archiving
* or streaming is active, because archiver and walsender process could
* use the cache to read the WAL segment.
*/
@@ -3777,7 +3784,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr)
{
/*
* We ignore the timeline part of the XLOG segment identifiers in
* deciding whether a segment is still needed. This ensures that we
* deciding whether a segment is still needed. This ensures that we
* won't prematurely remove a segment from a parent timeline. We could
* probably be a little more proactive about removing segments of
* non-parent timelines, but that would be a whole lot more
@@ -3828,6 +3835,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr)
xlde->d_name)));
#ifdef WIN32
/*
* On Windows, if another process (e.g another backend)
* holds the file open in FILE_SHARE_DELETE mode, unlink
@@ -4310,7 +4318,7 @@ rescanLatestTimeLine(void)
* I/O routines for pg_control
*
* *ControlFile is a buffer in shared memory that holds an image of the
* contents of pg_control. WriteControlFile() initializes pg_control
* contents of pg_control. WriteControlFile() initializes pg_control
* given a preloaded buffer, ReadControlFile() loads the buffer from
* the pg_control file (during postmaster or standalone-backend startup),
* and UpdateControlFile() rewrites pg_control after we modify xlog state.
@@ -4715,7 +4723,7 @@ check_wal_buffers(int *newval, void **extra, GucSource source)
{
/*
* If we haven't yet changed the boot_val default of -1, just let it
* be. We'll fix it when XLOGShmemSize is called.
* be. We'll fix it when XLOGShmemSize is called.
*/
if (XLOGbuffers == -1)
return true;
@@ -4815,7 +4823,7 @@ XLOGShmemInit(void)
/* WAL insertion locks. Ensure they're aligned to the full padded size */
allocptr += sizeof(WALInsertLockPadded) -
((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
((uintptr_t) allocptr) %sizeof(WALInsertLockPadded);
WALInsertLocks = XLogCtl->Insert.WALInsertLocks =
(WALInsertLockPadded *) allocptr;
allocptr += sizeof(WALInsertLockPadded) * num_xloginsert_locks;
@@ -4836,8 +4844,8 @@ XLOGShmemInit(void)
/*
* Align the start of the page buffers to a full xlog block size boundary.
* This simplifies some calculations in XLOG insertion. It is also required
* for O_DIRECT.
* This simplifies some calculations in XLOG insertion. It is also
* required for O_DIRECT.
*/
allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
XLogCtl->pages = allocptr;
@@ -5233,7 +5241,7 @@ readRecoveryCommandFile(void)
const char *hintmsg;
if (!parse_int(item->value, &min_recovery_apply_delay, GUC_UNIT_MS,
&hintmsg))
&hintmsg))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("parameter \"%s\" requires a temporal value", "min_recovery_apply_delay"),
@@ -5271,7 +5279,7 @@ readRecoveryCommandFile(void)
/*
* If user specified recovery_target_timeline, validate it or compute the
* "latest" value. We can't do this until after we've gotten the restore
* "latest" value. We can't do this until after we've gotten the restore
* command and set InArchiveRecovery, because we need to fetch timeline
* history files from the archive.
*/
@@ -5464,8 +5472,8 @@ recoveryStopsBefore(XLogRecord *record)
*
* when testing for an xid, we MUST test for equality only, since
* transactions are numbered in the order they start, not the order
* they complete. A higher numbered xid will complete before you
* about 50% of the time...
* they complete. A higher numbered xid will complete before you about
* 50% of the time...
*/
stopsHere = (record->xl_xid == recoveryTargetXid);
}
@@ -5525,8 +5533,8 @@ recoveryStopsAfter(XLogRecord *record)
record_info = record->xl_info & ~XLR_INFO_MASK;
/*
* There can be many restore points that share the same name; we stop
* at the first one.
* There can be many restore points that share the same name; we stop at
* the first one.
*/
if (recoveryTarget == RECOVERY_TARGET_NAME &&
record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
@@ -5543,9 +5551,9 @@ recoveryStopsAfter(XLogRecord *record)
strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
ereport(LOG,
(errmsg("recovery stopping at restore point \"%s\", time %s",
recoveryStopName,
timestamptz_to_str(recoveryStopTime))));
(errmsg("recovery stopping at restore point \"%s\", time %s",
recoveryStopName,
timestamptz_to_str(recoveryStopTime))));
return true;
}
}
@@ -5688,10 +5696,10 @@ recoveryApplyDelay(XLogRecord *record)
/*
* Is it a COMMIT record?
*
* We deliberately choose not to delay aborts since they have no effect
* on MVCC. We already allow replay of records that don't have a
* timestamp, so there is already opportunity for issues caused by early
* conflicts on standbys.
* We deliberately choose not to delay aborts since they have no effect on
* MVCC. We already allow replay of records that don't have a timestamp,
* so there is already opportunity for issues caused by early conflicts on
* standbys.
*/
record_info = record->xl_info & ~XLR_INFO_MASK;
if (!(record->xl_rmid == RM_XACT_ID &&
@@ -5711,7 +5719,7 @@ recoveryApplyDelay(XLogRecord *record)
*/
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
&secs, &microsecs);
if (secs <= 0 && microsecs <=0)
if (secs <= 0 && microsecs <= 0)
return false;
while (true)
@@ -5731,15 +5739,15 @@ recoveryApplyDelay(XLogRecord *record)
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
&secs, &microsecs);
if (secs <= 0 && microsecs <=0)
if (secs <= 0 && microsecs <= 0)
break;
elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
secs, microsecs / 1000);
secs, microsecs / 1000);
WaitLatch(&XLogCtl->recoveryWakeupLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
secs * 1000L + microsecs / 1000);
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
secs * 1000L + microsecs / 1000);
}
return true;
}
@@ -5978,7 +5986,7 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
* Clear out any old relcache cache files. This is *necessary* if we do
* Clear out any old relcache cache files. This is *necessary* if we do
* any WAL replay, since that would probably result in the cache files
* being out of sync with database reality. In theory we could leave them
* in place if the database had been cleanly shut down, but it seems
@@ -6050,7 +6058,7 @@ StartupXLOG(void)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory"),
errdetail("Failed while allocating an XLog reading processor.")));
errdetail("Failed while allocating an XLog reading processor.")));
xlogreader->system_identifier = ControlFile->system_identifier;
if (read_backup_label(&checkPointLoc, &backupEndRequired,
@@ -6261,9 +6269,9 @@ StartupXLOG(void)
StartupReorderBuffer();
/*
* Startup MultiXact. We need to do this early for two reasons: one
* is that we might try to access multixacts when we do tuple freezing,
* and the other is we need its state initialized because we attempt
* Startup MultiXact. We need to do this early for two reasons: one is
* that we might try to access multixacts when we do tuple freezing, and
* the other is we need its state initialized because we attempt
* truncation during restartpoints.
*/
StartupMultiXact();
@@ -6517,9 +6525,9 @@ StartupXLOG(void)
}
/*
* Initialize shared variables for tracking progress of WAL replay,
* as if we had just replayed the record before the REDO location
* (or the checkpoint record itself, if it's a shutdown checkpoint).
* Initialize shared variables for tracking progress of WAL replay, as
* if we had just replayed the record before the REDO location (or the
* checkpoint record itself, if it's a shutdown checkpoint).
*/
SpinLockAcquire(&xlogctl->info_lck);
if (checkPoint.redo < RecPtr)
@@ -6646,17 +6654,17 @@ StartupXLOG(void)
}
/*
* If we've been asked to lag the master, wait on
* latch until enough time has passed.
* If we've been asked to lag the master, wait on latch until
* enough time has passed.
*/
if (recoveryApplyDelay(record))
{
/*
* We test for paused recovery again here. If
* user sets delayed apply, it may be because
* they expect to pause recovery in case of
* problems, so we must test again here otherwise
* pausing during the delay-wait wouldn't work.
* We test for paused recovery again here. If user sets
* delayed apply, it may be because they expect to pause
* recovery in case of problems, so we must test again
* here otherwise pausing during the delay-wait wouldn't
* work.
*/
if (xlogctl->recoveryPause)
recoveryPausesHere();
@@ -6893,8 +6901,8 @@ StartupXLOG(void)
/*
* Consider whether we need to assign a new timeline ID.
*
* If we are doing an archive recovery, we always assign a new ID. This
* handles a couple of issues. If we stopped short of the end of WAL
* If we are doing an archive recovery, we always assign a new ID. This
* handles a couple of issues. If we stopped short of the end of WAL
* during recovery, then we are clearly generating a new timeline and must
* assign it a unique new ID. Even if we ran to the end, modifying the
* current last segment is problematic because it may result in trying to
@@ -6969,7 +6977,7 @@ StartupXLOG(void)
/*
* Tricky point here: readBuf contains the *last* block that the LastRec
* record spans, not the one it starts in. The last block is indeed the
* record spans, not the one it starts in. The last block is indeed the
* one we want to use.
*/
if (EndOfLog % XLOG_BLCKSZ != 0)
@@ -6996,9 +7004,9 @@ StartupXLOG(void)
else
{
/*
* There is no partial block to copy. Just set InitializedUpTo,
* and let the first attempt to insert a log record to initialize
* the next buffer.
* There is no partial block to copy. Just set InitializedUpTo, and
* let the first attempt to insert a log record to initialize the next
* buffer.
*/
XLogCtl->InitializedUpTo = EndOfLog;
}
@@ -7162,7 +7170,7 @@ StartupXLOG(void)
XLogReportParameters();
/*
* All done. Allow backends to write WAL. (Although the bool flag is
* All done. Allow backends to write WAL. (Although the bool flag is
* probably atomic in itself, we use the info_lck here to ensure that
* there are no race conditions concerning visibility of other recent
* updates to shared memory.)
@@ -7200,7 +7208,7 @@ StartupXLOG(void)
static void
CheckRecoveryConsistency(void)
{
XLogRecPtr lastReplayedEndRecPtr;
XLogRecPtr lastReplayedEndRecPtr;
/*
* During crash recovery, we don't reach a consistent state until we've
@@ -7322,7 +7330,7 @@ RecoveryInProgress(void)
/*
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
* is finished. InitPostgres() relies upon this behaviour to ensure
* that InitXLOGAccess() is called at backend startup. (If you change
* that InitXLOGAccess() is called at backend startup. (If you change
* this, see also LocalSetXLogInsertAllowed.)
*/
if (!LocalRecoveryInProgress)
@@ -7335,6 +7343,7 @@ RecoveryInProgress(void)
pg_memory_barrier();
InitXLOGAccess();
}
/*
* Note: We don't need a memory barrier when we're still in recovery.
* We might exit recovery immediately after return, so the caller
@@ -7594,7 +7603,7 @@ GetRedoRecPtr(void)
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
XLogRecPtr ptr;
XLogRecPtr ptr;
/*
* The possibly not up-to-date copy in XlogCtl is enough. Even if we
@@ -7983,7 +7992,7 @@ CreateCheckPoint(int flags)
/*
* If this isn't a shutdown or forced checkpoint, and we have not inserted
* any XLOG records since the start of the last checkpoint, skip the
* checkpoint. The idea here is to avoid inserting duplicate checkpoints
* checkpoint. The idea here is to avoid inserting duplicate checkpoints
* when the system is idle. That wastes log space, and more importantly it
* exposes us to possible loss of both current and previous checkpoint
* records if the machine crashes just as we're writing the update.
@@ -8120,7 +8129,7 @@ CreateCheckPoint(int flags)
* performing those groups of actions.
*
* One example is end of transaction, so we must wait for any transactions
* that are currently in commit critical sections. If an xact inserted
* that are currently in commit critical sections. If an xact inserted
* its commit record into XLOG just before the REDO point, then a crash
* restart from the REDO point would not replay that record, which means
* that our flushing had better include the xact's update of pg_clog. So
@@ -8131,9 +8140,8 @@ CreateCheckPoint(int flags)
* fuzzy: it is possible that we will wait for xacts we didn't really need
* to wait for. But the delay should be short and it seems better to make
* checkpoint take a bit longer than to hold off insertions longer than
* necessary.
* (In fact, the whole reason we have this issue is that xact.c does
* commit record XLOG insertion and clog update as two separate steps
* necessary. (In fact, the whole reason we have this issue is that xact.c
* does commit record XLOG insertion and clog update as two separate steps
* protected by different locks, but again that seems best on grounds of
* minimizing lock contention.)
*
@@ -8280,9 +8288,9 @@ CreateCheckPoint(int flags)
/*
* Truncate pg_subtrans if possible. We can throw away all data before
* the oldest XMIN of any running transaction. No future transaction will
* the oldest XMIN of any running transaction. No future transaction will
* attempt to reference any pg_subtrans entry older than that (see Asserts
* in subtrans.c). During recovery, though, we mustn't do this because
* in subtrans.c). During recovery, though, we mustn't do this because
* StartupSUBTRANS hasn't been called yet.
*/
if (!RecoveryInProgress())
@@ -8600,11 +8608,11 @@ CreateRestartPoint(int flags)
_logSegNo--;
/*
* Try to recycle segments on a useful timeline. If we've been promoted
* since the beginning of this restartpoint, use the new timeline
* chosen at end of recovery (RecoveryInProgress() sets ThisTimeLineID
* in that case). If we're still in recovery, use the timeline we're
* currently replaying.
* Try to recycle segments on a useful timeline. If we've been
* promoted since the beginning of this restartpoint, use the new
* timeline chosen at end of recovery (RecoveryInProgress() sets
* ThisTimeLineID in that case). If we're still in recovery, use the
* timeline we're currently replaying.
*
* There is no guarantee that the WAL segments will be useful on the
* current timeline; if recovery proceeds to a new timeline right
@@ -8636,9 +8644,9 @@ CreateRestartPoint(int flags)
/*
* Truncate pg_subtrans if possible. We can throw away all data before
* the oldest XMIN of any running transaction. No future transaction will
* the oldest XMIN of any running transaction. No future transaction will
* attempt to reference any pg_subtrans entry older than that (see Asserts
* in subtrans.c). When hot standby is disabled, though, we mustn't do
* in subtrans.c). When hot standby is disabled, though, we mustn't do
* this because StartupSUBTRANS hasn't been called yet.
*/
if (EnableHotStandby)
@@ -8697,7 +8705,7 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
/* then check whether slots limit removal further */
if (max_replication_slots > 0 && keep != InvalidXLogRecPtr)
{
XLogRecPtr slotSegNo;
XLogRecPtr slotSegNo;
XLByteToSeg(keep, slotSegNo);
@@ -8730,7 +8738,7 @@ XLogPutNextOid(Oid nextOid)
* We need not flush the NEXTOID record immediately, because any of the
* just-allocated OIDs could only reach disk as part of a tuple insert or
* update that would have its own XLOG record that must follow the NEXTOID
* record. Therefore, the standard buffer LSN interlock applied to those
* record. Therefore, the standard buffer LSN interlock applied to those
* records will ensure no such OID reaches disk before the NEXTOID record
* does.
*
@@ -8859,8 +8867,9 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
* lsn updates. We assume pd_lower/upper cannot be changed without an
* exclusive lock, so the contents bkp are not racy.
*
* With buffer_std set to false, XLogCheckBuffer() sets hole_length and
* hole_offset to 0; so the following code is safe for either case.
* With buffer_std set to false, XLogCheckBuffer() sets hole_length
* and hole_offset to 0; so the following code is safe for either
* case.
*/
memcpy(copied_buffer, origdata, bkpb.hole_offset);
memcpy(copied_buffer + bkpb.hole_offset,
@@ -9072,7 +9081,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
/*
* We used to try to take the maximum of ShmemVariableCache->nextOid
* and the recorded nextOid, but that fails if the OID counter wraps
* around. Since no OID allocation should be happening during replay
* around. Since no OID allocation should be happening during replay
* anyway, better to just believe the record exactly. We still take
* OidGenLock while setting the variable, just in case.
*/
@@ -9262,10 +9271,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
BkpBlock bkpb;
/*
* Full-page image (FPI) records contain a backup block stored "inline"
* in the normal data since the locking when writing hint records isn't
* sufficient to use the normal backup block mechanism, which assumes
* exclusive lock on the buffer supplied.
* Full-page image (FPI) records contain a backup block stored
* "inline" in the normal data since the locking when writing hint
* records isn't sufficient to use the normal backup block mechanism,
* which assumes exclusive lock on the buffer supplied.
*
* Since the only change in these backup block are hint bits, there
* are no recovery conflicts generated.
@@ -9415,7 +9424,7 @@ get_sync_bit(int method)
/*
* Optimize writes by bypassing kernel cache with O_DIRECT when using
* O_SYNC/O_FSYNC and O_DSYNC. But only if archiving and streaming are
* O_SYNC/O_FSYNC and O_DSYNC. But only if archiving and streaming are
* disabled, otherwise the archive command or walsender process will read
* the WAL soon after writing it, which is guaranteed to cause a physical
* read if we bypassed the kernel cache. We also skip the
@@ -9619,7 +9628,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
* during an on-line backup even if not doing so at other times, because
* it's quite possible for the backup dump to obtain a "torn" (partially
* written) copy of a database page if it reads the page concurrently with
* our write to the same page. This can be fixed as long as the first
* our write to the same page. This can be fixed as long as the first
* write to the page in the WAL sequence is a full-page write. Hence, we
* turn on forcePageWrites and then force a CHECKPOINT, to ensure there
* are no dirty pages in shared memory that might get dumped while the
@@ -9663,7 +9672,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
* old timeline IDs. That would otherwise happen if you called
* pg_start_backup() right after restoring from a PITR archive: the
* first WAL segment containing the startup checkpoint has pages in
* the beginning with the old timeline ID. That can cause trouble at
* the beginning with the old timeline ID. That can cause trouble at
* recovery: we won't have a history file covering the old timeline if
* pg_xlog directory was not included in the base backup and the WAL
* archive was cleared too before starting the backup.
@@ -9686,7 +9695,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
bool checkpointfpw;
/*
* Force a CHECKPOINT. Aside from being necessary to prevent torn
* Force a CHECKPOINT. Aside from being necessary to prevent torn
* page problems, this guarantees that two successive backup runs
* will have different checkpoint positions and hence different
* history file names, even if nothing happened in between.
@@ -10339,7 +10348,7 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
*
* If we see a backup_label during recovery, we assume that we are recovering
* from a backup dump file, and we therefore roll forward from the checkpoint
* identified by the label file, NOT what pg_control says. This avoids the
* identified by the label file, NOT what pg_control says. This avoids the
* problem that pg_control might have been archived one or more checkpoints
* later than the start of the dump, and so if we rely on it as the start
* point, we will fail to restore a consistent database state.
@@ -10686,7 +10695,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* Standby mode is implemented by a state machine:
*
* 1. Read from either archive or pg_xlog (XLOG_FROM_ARCHIVE), or just
* pg_xlog (XLOG_FROM_XLOG)
* pg_xlog (XLOG_FROM_XLOG)
* 2. Check trigger file
* 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
* 4. Rescan timelines
@@ -10887,8 +10896,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* file from pg_xlog.
*/
readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
currentSource);
currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
currentSource);
if (readFile >= 0)
return true; /* success! */
@@ -10945,11 +10954,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
if (havedata)
{
/*
* Great, streamed far enough. Open the file if it's
* Great, streamed far enough. Open the file if it's
* not open already. Also read the timeline history
* file if we haven't initialized timeline history
* yet; it should be streamed over and present in
* pg_xlog by now. Use XLOG_FROM_STREAM so that
* pg_xlog by now. Use XLOG_FROM_STREAM so that
* source info is set correctly and XLogReceiptTime
* isn't changed.
*/
@@ -11014,7 +11023,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
HandleStartupProcInterrupts();
}
return false; /* not reached */
return false; /* not reached */
}
/*
@@ -11022,9 +11031,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* in the current WAL page, previously read by XLogPageRead().
*
* 'emode' is the error mode that would be used to report a file-not-found
* or legitimate end-of-WAL situation. Generally, we use it as-is, but if
* or legitimate end-of-WAL situation. Generally, we use it as-is, but if
* we're retrying the exact same record that we've tried previously, only
* complain the first time to keep the noise down. However, we only do when
* complain the first time to keep the noise down. However, we only do when
* reading from pg_xlog, because we don't expect any invalid records in archive
* or in records streamed from master. Files in the archive should be complete,
* and we should never hit the end of WAL because we stop and wait for more WAL

View File

@@ -300,8 +300,8 @@ RestoreArchivedFile(char *path, const char *xlogfname,
signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
ereport(signaled ? FATAL : DEBUG2,
(errmsg("could not restore file \"%s\" from archive: %s",
xlogfname, wait_result_to_str(rc))));
(errmsg("could not restore file \"%s\" from archive: %s",
xlogfname, wait_result_to_str(rc))));
not_available:

View File

@@ -429,7 +429,7 @@ pg_is_in_recovery(PG_FUNCTION_ARGS)
Datum
pg_xlog_location_diff(PG_FUNCTION_ARGS)
{
Datum result;
Datum result;
result = DirectFunctionCall2(pg_lsn_mi,
PG_GETARG_DATUM(0),

View File

@@ -199,7 +199,7 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
randAccess = true;
/*
* RecPtr is pointing to end+1 of the previous WAL record. If we're
* RecPtr is pointing to end+1 of the previous WAL record. If we're
* at a page boundary, no more records can fit on the current page. We
* must skip over the page header, but we can't do that until we've
* read in the page, since the header size is variable.
@@ -277,7 +277,7 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
/*
* If the whole record header is on this page, validate it immediately.
* Otherwise do just a basic sanity check on xl_tot_len, and validate the
* rest of the header after reading it from the next page. The xl_tot_len
* rest of the header after reading it from the next page. The xl_tot_len
* check is necessary here to ensure that we enter the "Need to reassemble
* record" code path below; otherwise we might fail to apply
* ValidXLogRecordHeader at all.
@@ -572,7 +572,7 @@ err:
* Validate an XLOG record header.
*
* This is just a convenience subroutine to avoid duplicated code in
* XLogReadRecord. It's not intended for use from anywhere else.
* XLogReadRecord. It's not intended for use from anywhere else.
*/
static bool
ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
@@ -661,7 +661,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
* data to read in) until we've checked the CRCs.
*
* We assume all of the record (that is, xl_tot_len bytes) has been read
* into memory at *record. Also, ValidXLogRecordHeader() has accepted the
* into memory at *record. Also, ValidXLogRecordHeader() has accepted the
* record's header, which means in particular that xl_tot_len is at least
* SizeOfXlogRecord, so it is safe to fetch xl_len.
*/