1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Revert "Skip WAL for new relfilenodes, under wal_level=minimal."

This reverts commit cb2fd7eac2.  Per
numerous buildfarm members, it was incompatible with parallel query, and
a test case assumed LP64.  Back-patch to 9.5 (all supported versions).

Discussion: https://postgr.es/m/20200321224920.GB1763544@rfd.leadboat.com
This commit is contained in:
Noah Misch
2020-03-22 09:24:09 -07:00
parent d0587f52b3
commit de9396326e
51 changed files with 362 additions and 1438 deletions

View File

@@ -66,7 +66,7 @@
#define BUF_WRITTEN 0x01
#define BUF_REUSABLE 0x02
#define RELS_BSEARCH_THRESHOLD 20
#define DROP_RELS_BSEARCH_THRESHOLD 20
typedef struct PrivateRefCountEntry
{
@@ -105,19 +105,6 @@ typedef struct CkptTsStatus
int index;
} CkptTsStatus;
/*
* Type for array used to sort SMgrRelations
*
* FlushRelationsAllBuffers shares the same comparator function with
* DropRelFileNodesAllBuffers. Pointer to this struct and RelFileNode must be
* compatible.
*/
typedef struct SMgrSortArray
{
RelFileNode rnode; /* This must be the first member */
SMgrRelation srel;
} SMgrSortArray;
/* GUC variables */
bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100;
@@ -3003,7 +2990,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
* an exactly determined value, as it depends on many factors (CPU and RAM
* speeds, amount of shared buffers etc.).
*/
use_bsearch = n > RELS_BSEARCH_THRESHOLD;
use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
/* sort the list of rnodes if necessary */
if (use_bsearch)
@@ -3253,104 +3240,6 @@ FlushRelationBuffers(Relation rel)
}
}
/* ---------------------------------------------------------------------
* FlushRelationsAllBuffers
*
* This function flushes out of the buffer pool all the pages of all
* forks of the specified smgr relations. It's equivalent to calling
* FlushRelationBuffers once per fork per relation. The relations are
* assumed not to use local buffers.
* --------------------------------------------------------------------
*/
void
FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
{
int i;
SMgrSortArray *srels;
bool use_bsearch;
if (nrels == 0)
return;
/* fill-in array for qsort */
srels = palloc(sizeof(SMgrSortArray) * nrels);
for (i = 0; i < nrels; i++)
{
Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
srels[i].rnode = smgrs[i]->smgr_rnode.node;
srels[i].srel = smgrs[i];
}
/*
* Save the bsearch overhead for low number of relations to sync. See
* DropRelFileNodesAllBuffers for details.
*/
use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
/* sort the list of SMgrRelations if necessary */
if (use_bsearch)
pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
/* Make sure we can handle the pin inside the loop */
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
for (i = 0; i < NBuffers; i++)
{
SMgrSortArray *srelent = NULL;
BufferDesc *bufHdr = GetBufferDescriptor(i);
uint32 buf_state;
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
* and saves some cycles.
*/
if (!use_bsearch)
{
int j;
for (j = 0; j < nrels; j++)
{
if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
{
srelent = &srels[j];
break;
}
}
}
else
{
srelent = bsearch((const void *) &(bufHdr->tag.rnode),
srels, nrels, sizeof(SMgrSortArray),
rnode_comparator);
}
/* buffer doesn't belong to any of the given relfilenodes; skip it */
if (srelent == NULL)
continue;
ReservePrivateRefCountEntry();
buf_state = LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, srelent->srel);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
}
else
UnlockBufHdr(bufHdr, buf_state);
}
pfree(srels);
}
/* ---------------------------------------------------------------------
* FlushDatabaseBuffers
*
@@ -3552,15 +3441,13 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
(pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
{
/*
* If we must not write WAL, due to a relfilenode-specific
* condition or being in recovery, don't dirty the page. We can
* set the hint, just not dirty the page as a result so the hint
* is lost when we evict the page or shutdown.
* If we're in recovery we cannot dirty a page because of a hint.
* We can set the hint, just not dirty the page as a result so the
* hint is lost when we evict the page or shutdown.
*
* See src/backend/storage/page/README for longer discussion.
*/
if (RecoveryInProgress() ||
RelFileNodeSkippingWAL(bufHdr->tag.rnode))
if (RecoveryInProgress())
return;
/*

View File

@@ -614,18 +614,6 @@ LockHeldByMe(const LOCKTAG *locktag, LOCKMODE lockmode)
return (locallock && locallock->nLocks > 0);
}
#ifdef USE_ASSERT_CHECKING
/*
* GetLockMethodLocalHash -- return the hash of local locks, for modules that
* evaluate assertions based on all locks held.
*/
HTAB *
GetLockMethodLocalHash(void)
{
return LockMethodLocalHash;
}
#endif
/*
* LockHasWaiters -- look up 'locktag' and check if releasing this
* lock would wake up other processes waiting for it.

View File

@@ -248,10 +248,11 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
* During replay, we would delete the file and then recreate it, which is fine
* if the contents of the file were repopulated by subsequent WAL entries.
* But if we didn't WAL-log insertions, but instead relied on fsyncing the
* file after populating it (as we do at wal_level=minimal), the contents of
* the file would be lost forever. By leaving the empty file until after the
* next checkpoint, we prevent reassignment of the relfilenode number until
* it's safe, because relfilenode assignment skips over any existing file.
* file after populating it (as for instance CLUSTER and CREATE INDEX do),
* the contents of the file would be lost forever. By leaving the empty file
* until after the next checkpoint, we prevent reassignment of the relfilenode
* number until it's safe, because relfilenode assignment skips over any
* existing file.
*
* We do not need to go through this dance for temp relations, though, because
* we never make WAL entries for temp rels, and so a temp rel poses no threat
@@ -876,18 +877,12 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
* mdimmedsync() -- Immediately sync a relation to stable storage.
*
* Note that only writes already issued are synced; this routine knows
* nothing of dirty buffers that may exist inside the buffer manager. We
* sync active and inactive segments; smgrDoPendingSyncs() relies on this.
* Consider a relation skipping WAL. Suppose a checkpoint syncs blocks of
* some segment, then mdtruncate() renders that segment inactive. If we
* crash before the next checkpoint syncs the newly-inactive segment, that
* segment may survive recovery, reintroducing unwanted data into the table.
* nothing of dirty buffers that may exist inside the buffer manager.
*/
void
mdimmedsync(SMgrRelation reln, ForkNumber forknum)
{
int segno;
int min_inactive_seg;
/*
* NOTE: mdnblocks makes sure we have opened all active segments, so that
@@ -895,16 +890,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
*/
mdnblocks(reln, forknum);
min_inactive_seg = segno = reln->md_num_open_segs[forknum];
/*
* Temporarily open inactive segments, then close them after sync. There
* may be some inactive segments left opened after fsync() error, but that
* is harmless. We don't bother to clean them up and take a risk of
* further trouble. The next mdclose() will soon close them.
*/
while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
segno++;
segno = reln->md_num_open_segs[forknum];
while (segno > 0)
{
@@ -915,14 +901,6 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
FilePathName(v->mdfd_vfd))));
/* Close inactive segments immediately */
if (segno > min_inactive_seg)
{
FileClose(v->mdfd_vfd);
_fdvec_resize(reln, forknum, segno - 1);
}
segno--;
}
}

View File

@@ -388,41 +388,6 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
smgrsw[which].smgr_unlink(rnode, InvalidForkNumber, isRedo);
}
/*
* smgrdosyncall() -- Immediately sync all forks of all given relations
*
* All forks of all given relations are synced out to the store.
*
* This is equivalent to FlushRelationBuffers() for each smgr relation,
* then calling smgrimmedsync() for all forks of each relation, but it's
* significantly quicker so should be preferred when possible.
*/
void
smgrdosyncall(SMgrRelation *rels, int nrels)
{
int i = 0;
ForkNumber forknum;
if (nrels == 0)
return;
FlushRelationsAllBuffers(rels, nrels);
/*
* Sync the physical file(s).
*/
for (i = 0; i < nrels; i++)
{
int which = rels[i]->smgr_which;
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
{
if (smgrsw[which].smgr_exists(rels[i], forknum))
smgrsw[which].smgr_immedsync(rels[i], forknum);
}
}
}
/*
* smgrdounlinkall() -- Immediately unlink all forks of all given relations
*