mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Don't ERROR on PreallocXlogFiles() race condition.
Before a restartpoint finishes PreallocXlogFiles(), a startup process KeepFileRestoredFromArchive() call can unlink the preallocated segment. If a CHECKPOINT sql command had elicited the restartpoint experiencing the race condition, that sql command failed. Moreover, the restartpoint omitted its log_checkpoints message and some inessential resource reclamation. Prevent the ERROR by skipping open() of the segment. Since these consequences are so minor, no back-patch. Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com
This commit is contained in:
parent
421484f79c
commit
2b3e4672f7
@ -2424,7 +2424,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
||||
bool ispartialpage;
|
||||
bool last_iteration;
|
||||
bool finishing_seg;
|
||||
bool added;
|
||||
int curridx;
|
||||
int npages;
|
||||
int startidx;
|
||||
@ -2490,7 +2489,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
||||
wal_segment_size);
|
||||
|
||||
/* create/use new log file */
|
||||
openLogFile = XLogFileInit(openLogSegNo, &added);
|
||||
openLogFile = XLogFileInit(openLogSegNo);
|
||||
ReserveExternalFD();
|
||||
}
|
||||
|
||||
@ -3255,23 +3254,21 @@ XLogNeedsFlush(XLogRecPtr record)
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new XLOG file segment, or open a pre-existing one.
|
||||
* Try to make a given XLOG file segment exist.
|
||||
*
|
||||
* logsegno: identify segment to be created/opened.
|
||||
* logsegno: identify segment.
|
||||
*
|
||||
* *added: on return, true if this call raised the number of extant segments.
|
||||
*
|
||||
* Returns FD of opened file.
|
||||
* path: on return, this char[MAXPGPATH] has the path to the logsegno file.
|
||||
*
|
||||
* Note: errors here are ERROR not PANIC because we might or might not be
|
||||
* inside a critical section (eg, during checkpoint there is no reason to
|
||||
* take down the system on failure). They will promote to PANIC if we are
|
||||
* in a critical section.
|
||||
* Returns -1 or FD of opened file. A -1 here is not an error; a caller
|
||||
* wanting an open segment should attempt to open "path", which usually will
|
||||
* succeed. (This is weird, but it's efficient for the callers.)
|
||||
*/
|
||||
int
|
||||
XLogFileInit(XLogSegNo logsegno, bool *added)
|
||||
static int
|
||||
XLogFileInitInternal(XLogSegNo logsegno, bool *added, char *path)
|
||||
{
|
||||
char path[MAXPGPATH];
|
||||
char tmppath[MAXPGPATH];
|
||||
PGAlignedXLogBlock zbuffer;
|
||||
XLogSegNo installed_segno;
|
||||
@ -3424,26 +3421,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
|
||||
*/
|
||||
max_segno = logsegno + CheckPointSegments;
|
||||
if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno))
|
||||
{
|
||||
*added = true;
|
||||
elog(DEBUG2, "done creating and filling new WAL file");
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* No need for any more future segments, or InstallXLogFileSegment()
|
||||
* failed to rename the file into place. If the rename failed, opening
|
||||
* the file below will fail.
|
||||
* failed to rename the file into place. If the rename failed, a
|
||||
* caller opening the file may fail.
|
||||
*/
|
||||
unlink(tmppath);
|
||||
elog(DEBUG2, "abandoned new WAL file");
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new XLOG file segment, or open a pre-existing one.
|
||||
*
|
||||
* logsegno: identify segment to be created/opened.
|
||||
*
|
||||
* Returns FD of opened file.
|
||||
*
|
||||
* Note: errors here are ERROR not PANIC because we might or might not be
|
||||
* inside a critical section (eg, during checkpoint there is no reason to
|
||||
* take down the system on failure). They will promote to PANIC if we are
|
||||
* in a critical section.
|
||||
*/
|
||||
int
|
||||
XLogFileInit(XLogSegNo logsegno)
|
||||
{
|
||||
bool ignore_added;
|
||||
char path[MAXPGPATH];
|
||||
int fd;
|
||||
|
||||
fd = XLogFileInitInternal(logsegno, &ignore_added, path);
|
||||
if (fd >= 0)
|
||||
return fd;
|
||||
|
||||
/* Now open original target segment (might not be file I just made) */
|
||||
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
|
||||
if (fd < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m", path)));
|
||||
|
||||
elog(DEBUG2, "done creating and filling new WAL file");
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
@ -3903,6 +3927,15 @@ XLogFileClose(void)
|
||||
* High-volume systems will be OK once they've built up a sufficient set of
|
||||
* recycled log segments, but the startup transient is likely to include
|
||||
* a lot of segment creations by foreground processes, which is not so good.
|
||||
*
|
||||
* XLogFileInitInternal() can ereport(ERROR). All known causes indicate big
|
||||
* trouble; for example, a full filesystem is one cause. The checkpoint WAL
|
||||
* and/or ControlFile updates already completed. If a RequestCheckpoint()
|
||||
* initiated the present checkpoint and an ERROR ends this function, the
|
||||
* command that called RequestCheckpoint() fails. That's not ideal, but it's
|
||||
* not worth contorting more functions to use caller-specified elevel values.
|
||||
* (With or without RequestCheckpoint(), an ERROR forestalls some inessential
|
||||
* reporting and resource reclamation.)
|
||||
*/
|
||||
static void
|
||||
PreallocXlogFiles(XLogRecPtr endptr)
|
||||
@ -3910,6 +3943,7 @@ PreallocXlogFiles(XLogRecPtr endptr)
|
||||
XLogSegNo _logSegNo;
|
||||
int lf;
|
||||
bool added;
|
||||
char path[MAXPGPATH];
|
||||
uint64 offset;
|
||||
|
||||
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
|
||||
@ -3917,8 +3951,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
|
||||
if (offset >= (uint32) (0.75 * wal_segment_size))
|
||||
{
|
||||
_logSegNo++;
|
||||
lf = XLogFileInit(_logSegNo, &added);
|
||||
close(lf);
|
||||
lf = XLogFileInitInternal(_logSegNo, &added, path);
|
||||
if (lf >= 0)
|
||||
close(lf);
|
||||
if (added)
|
||||
CheckpointStats.ckpt_segs_added++;
|
||||
}
|
||||
@ -5214,7 +5249,6 @@ BootStrapXLOG(void)
|
||||
XLogLongPageHeader longpage;
|
||||
XLogRecord *record;
|
||||
char *recptr;
|
||||
bool added;
|
||||
uint64 sysidentifier;
|
||||
struct timeval tv;
|
||||
pg_crc32c crc;
|
||||
@ -5311,7 +5345,7 @@ BootStrapXLOG(void)
|
||||
record->xl_crc = crc;
|
||||
|
||||
/* Create first XLOG segment file */
|
||||
openLogFile = XLogFileInit(1, &added);
|
||||
openLogFile = XLogFileInit(1);
|
||||
|
||||
/*
|
||||
* We needn't bother with Reserve/ReleaseExternalFD here, since we'll
|
||||
@ -5617,10 +5651,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
|
||||
* The switch happened at a segment boundary, so just create the next
|
||||
* segment on the new timeline.
|
||||
*/
|
||||
bool added;
|
||||
int fd;
|
||||
|
||||
fd = XLogFileInit(startLogSegNo, &added);
|
||||
fd = XLogFileInit(startLogSegNo);
|
||||
|
||||
if (close(fd) != 0)
|
||||
{
|
||||
|
@ -885,8 +885,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
|
||||
|
||||
if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size))
|
||||
{
|
||||
bool added;
|
||||
|
||||
/*
|
||||
* fsync() and close current file before we switch to next one. We
|
||||
* would otherwise have to reopen this file to fsync it later
|
||||
@ -923,7 +921,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
|
||||
|
||||
/* Create/use new log file */
|
||||
XLByteToSeg(recptr, recvSegNo, wal_segment_size);
|
||||
recvFile = XLogFileInit(recvSegNo, &added);
|
||||
recvFile = XLogFileInit(recvSegNo);
|
||||
recvFileTLI = ThisTimeLineID;
|
||||
}
|
||||
|
||||
|
@ -296,7 +296,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
|
||||
extern void XLogFlush(XLogRecPtr RecPtr);
|
||||
extern bool XLogBackgroundFlush(void);
|
||||
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
|
||||
extern int XLogFileInit(XLogSegNo segno, bool *added);
|
||||
extern int XLogFileInit(XLogSegNo segno);
|
||||
extern int XLogFileOpen(XLogSegNo segno);
|
||||
|
||||
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
|
||||
|
Loading…
x
Reference in New Issue
Block a user