mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Revert "Skip WAL for new relfilenodes, under wal_level=minimal."
This reverts commit cb2fd7eac2
. Per
numerous buildfarm members, it was incompatible with parallel query, and
a test case assumed LP64. Back-patch to 9.5 (all supported versions).
Discussion: https://postgr.es/m/20200321224920.GB1763544@rfd.leadboat.com
This commit is contained in:
@@ -66,7 +66,7 @@
|
||||
#define BUF_WRITTEN 0x01
|
||||
#define BUF_REUSABLE 0x02
|
||||
|
||||
#define RELS_BSEARCH_THRESHOLD 20
|
||||
#define DROP_RELS_BSEARCH_THRESHOLD 20
|
||||
|
||||
typedef struct PrivateRefCountEntry
|
||||
{
|
||||
@@ -105,19 +105,6 @@ typedef struct CkptTsStatus
|
||||
int index;
|
||||
} CkptTsStatus;
|
||||
|
||||
/*
|
||||
* Type for array used to sort SMgrRelations
|
||||
*
|
||||
* FlushRelationsAllBuffers shares the same comparator function with
|
||||
* DropRelFileNodesAllBuffers. Pointer to this struct and RelFileNode must be
|
||||
* compatible.
|
||||
*/
|
||||
typedef struct SMgrSortArray
|
||||
{
|
||||
RelFileNode rnode; /* This must be the first member */
|
||||
SMgrRelation srel;
|
||||
} SMgrSortArray;
|
||||
|
||||
/* GUC variables */
|
||||
bool zero_damaged_pages = false;
|
||||
int bgwriter_lru_maxpages = 100;
|
||||
@@ -3003,7 +2990,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
|
||||
* an exactly determined value, as it depends on many factors (CPU and RAM
|
||||
* speeds, amount of shared buffers etc.).
|
||||
*/
|
||||
use_bsearch = n > RELS_BSEARCH_THRESHOLD;
|
||||
use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
|
||||
|
||||
/* sort the list of rnodes if necessary */
|
||||
if (use_bsearch)
|
||||
@@ -3253,104 +3240,6 @@ FlushRelationBuffers(Relation rel)
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
* FlushRelationsAllBuffers
|
||||
*
|
||||
* This function flushes out of the buffer pool all the pages of all
|
||||
* forks of the specified smgr relations. It's equivalent to calling
|
||||
* FlushRelationBuffers once per fork per relation. The relations are
|
||||
* assumed not to use local buffers.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
void
|
||||
FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
|
||||
{
|
||||
int i;
|
||||
SMgrSortArray *srels;
|
||||
bool use_bsearch;
|
||||
|
||||
if (nrels == 0)
|
||||
return;
|
||||
|
||||
/* fill-in array for qsort */
|
||||
srels = palloc(sizeof(SMgrSortArray) * nrels);
|
||||
|
||||
for (i = 0; i < nrels; i++)
|
||||
{
|
||||
Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
|
||||
|
||||
srels[i].rnode = smgrs[i]->smgr_rnode.node;
|
||||
srels[i].srel = smgrs[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the bsearch overhead for low number of relations to sync. See
|
||||
* DropRelFileNodesAllBuffers for details.
|
||||
*/
|
||||
use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
|
||||
|
||||
/* sort the list of SMgrRelations if necessary */
|
||||
if (use_bsearch)
|
||||
pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
|
||||
|
||||
/* Make sure we can handle the pin inside the loop */
|
||||
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
|
||||
|
||||
for (i = 0; i < NBuffers; i++)
|
||||
{
|
||||
SMgrSortArray *srelent = NULL;
|
||||
BufferDesc *bufHdr = GetBufferDescriptor(i);
|
||||
uint32 buf_state;
|
||||
|
||||
/*
|
||||
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
|
||||
* and saves some cycles.
|
||||
*/
|
||||
|
||||
if (!use_bsearch)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = 0; j < nrels; j++)
|
||||
{
|
||||
if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
|
||||
{
|
||||
srelent = &srels[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
srelent = bsearch((const void *) &(bufHdr->tag.rnode),
|
||||
srels, nrels, sizeof(SMgrSortArray),
|
||||
rnode_comparator);
|
||||
}
|
||||
|
||||
/* buffer doesn't belong to any of the given relfilenodes; skip it */
|
||||
if (srelent == NULL)
|
||||
continue;
|
||||
|
||||
ReservePrivateRefCountEntry();
|
||||
|
||||
buf_state = LockBufHdr(bufHdr);
|
||||
if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
|
||||
(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
|
||||
{
|
||||
PinBuffer_Locked(bufHdr);
|
||||
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
|
||||
FlushBuffer(bufHdr, srelent->srel);
|
||||
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
|
||||
UnpinBuffer(bufHdr, true);
|
||||
}
|
||||
else
|
||||
UnlockBufHdr(bufHdr, buf_state);
|
||||
}
|
||||
|
||||
pfree(srels);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
* FlushDatabaseBuffers
|
||||
*
|
||||
@@ -3552,15 +3441,13 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
|
||||
(pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
|
||||
{
|
||||
/*
|
||||
* If we must not write WAL, due to a relfilenode-specific
|
||||
* condition or being in recovery, don't dirty the page. We can
|
||||
* set the hint, just not dirty the page as a result so the hint
|
||||
* is lost when we evict the page or shutdown.
|
||||
* If we're in recovery we cannot dirty a page because of a hint.
|
||||
* We can set the hint, just not dirty the page as a result so the
|
||||
* hint is lost when we evict the page or shutdown.
|
||||
*
|
||||
* See src/backend/storage/page/README for longer discussion.
|
||||
*/
|
||||
if (RecoveryInProgress() ||
|
||||
RelFileNodeSkippingWAL(bufHdr->tag.rnode))
|
||||
if (RecoveryInProgress())
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@@ -614,18 +614,6 @@ LockHeldByMe(const LOCKTAG *locktag, LOCKMODE lockmode)
|
||||
return (locallock && locallock->nLocks > 0);
|
||||
}
|
||||
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
/*
|
||||
* GetLockMethodLocalHash -- return the hash of local locks, for modules that
|
||||
* evaluate assertions based on all locks held.
|
||||
*/
|
||||
HTAB *
|
||||
GetLockMethodLocalHash(void)
|
||||
{
|
||||
return LockMethodLocalHash;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* LockHasWaiters -- look up 'locktag' and check if releasing this
|
||||
* lock would wake up other processes waiting for it.
|
||||
|
@@ -248,10 +248,11 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
* During replay, we would delete the file and then recreate it, which is fine
|
||||
* if the contents of the file were repopulated by subsequent WAL entries.
|
||||
* But if we didn't WAL-log insertions, but instead relied on fsyncing the
|
||||
* file after populating it (as we do at wal_level=minimal), the contents of
|
||||
* the file would be lost forever. By leaving the empty file until after the
|
||||
* next checkpoint, we prevent reassignment of the relfilenode number until
|
||||
* it's safe, because relfilenode assignment skips over any existing file.
|
||||
* file after populating it (as for instance CLUSTER and CREATE INDEX do),
|
||||
* the contents of the file would be lost forever. By leaving the empty file
|
||||
* until after the next checkpoint, we prevent reassignment of the relfilenode
|
||||
* number until it's safe, because relfilenode assignment skips over any
|
||||
* existing file.
|
||||
*
|
||||
* We do not need to go through this dance for temp relations, though, because
|
||||
* we never make WAL entries for temp rels, and so a temp rel poses no threat
|
||||
@@ -876,18 +877,12 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
* mdimmedsync() -- Immediately sync a relation to stable storage.
|
||||
*
|
||||
* Note that only writes already issued are synced; this routine knows
|
||||
* nothing of dirty buffers that may exist inside the buffer manager. We
|
||||
* sync active and inactive segments; smgrDoPendingSyncs() relies on this.
|
||||
* Consider a relation skipping WAL. Suppose a checkpoint syncs blocks of
|
||||
* some segment, then mdtruncate() renders that segment inactive. If we
|
||||
* crash before the next checkpoint syncs the newly-inactive segment, that
|
||||
* segment may survive recovery, reintroducing unwanted data into the table.
|
||||
* nothing of dirty buffers that may exist inside the buffer manager.
|
||||
*/
|
||||
void
|
||||
mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
int segno;
|
||||
int min_inactive_seg;
|
||||
|
||||
/*
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||
@@ -895,16 +890,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
*/
|
||||
mdnblocks(reln, forknum);
|
||||
|
||||
min_inactive_seg = segno = reln->md_num_open_segs[forknum];
|
||||
|
||||
/*
|
||||
* Temporarily open inactive segments, then close them after sync. There
|
||||
* may be some inactive segments left opened after fsync() error, but that
|
||||
* is harmless. We don't bother to clean them up and take a risk of
|
||||
* further trouble. The next mdclose() will soon close them.
|
||||
*/
|
||||
while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
|
||||
segno++;
|
||||
segno = reln->md_num_open_segs[forknum];
|
||||
|
||||
while (segno > 0)
|
||||
{
|
||||
@@ -915,14 +901,6 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync file \"%s\": %m",
|
||||
FilePathName(v->mdfd_vfd))));
|
||||
|
||||
/* Close inactive segments immediately */
|
||||
if (segno > min_inactive_seg)
|
||||
{
|
||||
FileClose(v->mdfd_vfd);
|
||||
_fdvec_resize(reln, forknum, segno - 1);
|
||||
}
|
||||
|
||||
segno--;
|
||||
}
|
||||
}
|
||||
|
@@ -388,41 +388,6 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
|
||||
smgrsw[which].smgr_unlink(rnode, InvalidForkNumber, isRedo);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrdosyncall() -- Immediately sync all forks of all given relations
|
||||
*
|
||||
* All forks of all given relations are synced out to the store.
|
||||
*
|
||||
* This is equivalent to FlushRelationBuffers() for each smgr relation,
|
||||
* then calling smgrimmedsync() for all forks of each relation, but it's
|
||||
* significantly quicker so should be preferred when possible.
|
||||
*/
|
||||
void
|
||||
smgrdosyncall(SMgrRelation *rels, int nrels)
|
||||
{
|
||||
int i = 0;
|
||||
ForkNumber forknum;
|
||||
|
||||
if (nrels == 0)
|
||||
return;
|
||||
|
||||
FlushRelationsAllBuffers(rels, nrels);
|
||||
|
||||
/*
|
||||
* Sync the physical file(s).
|
||||
*/
|
||||
for (i = 0; i < nrels; i++)
|
||||
{
|
||||
int which = rels[i]->smgr_which;
|
||||
|
||||
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||
{
|
||||
if (smgrsw[which].smgr_exists(rels[i], forknum))
|
||||
smgrsw[which].smgr_immedsync(rels[i], forknum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrdounlinkall() -- Immediately unlink all forks of all given relations
|
||||
*
|
||||
|
Reference in New Issue
Block a user