mirror of
https://github.com/postgres/postgres.git
synced 2025-12-01 12:18:01 +03:00
The file handling functions from fd.c were called with a diverse mix of notations for the file permissions when they were opening new files. Almost all files created by the server should have the same permissions set. So change the API so that e.g. OpenTransientFile() automatically uses the standard permissions set, and OpenTransientFilePerm() is a new function that takes an explicit permissions set for the few cases where it is needed. This also saves an unnecessary argument for call sites that are just opening an existing file. While we're reviewing these APIs, get rid of the FileName typedef and use the standard const char * for the file name and mode_t for the file mode. This makes these functions match other file handling functions and removes an unnecessary layer of mysteriousness. We can also get rid of a few casts that way. Author: David Steele <david@pgmasters.net>
582 lines
14 KiB
C
582 lines
14 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* timeline.c
|
|
* Functions for reading and writing timeline history files.
|
|
*
|
|
* A timeline history file lists the timeline changes of the timeline, in
|
|
* a simple text format. They are archived along with the WAL segments.
|
|
*
|
|
* The files are named like "<tli>.history". For example, if the database
|
|
* starts up and switches to timeline 5, the timeline history file would be
|
|
* called "00000005.history".
|
|
*
|
|
* Each line in the file represents a timeline switch:
|
|
*
|
|
* <parentTLI> <switchpoint> <reason>
|
|
*
|
|
* parentTLI ID of the parent timeline
|
|
* switchpoint XLogRecPtr of the WAL location where the switch happened
|
|
* reason human-readable explanation of why the timeline was changed
|
|
*
|
|
* The fields are separated by tabs. Lines beginning with # are comments, and
|
|
* are ignored. Empty lines are also ignored.
|
|
*
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/backend/access/transam/timeline.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
|
|
#include "access/timeline.h"
|
|
#include "access/xlog.h"
|
|
#include "access/xlog_internal.h"
|
|
#include "access/xlogdefs.h"
|
|
#include "pgstat.h"
|
|
#include "storage/fd.h"
|
|
|
|
/*
|
|
* Copies all timeline history files with id's between 'begin' and 'end'
|
|
* from archive to pg_wal.
|
|
*/
|
|
void
|
|
restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
TimeLineID tli;
|
|
|
|
for (tli = begin; tli < end; tli++)
|
|
{
|
|
if (tli == 1)
|
|
continue;
|
|
|
|
TLHistoryFileName(histfname, tli);
|
|
if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
|
|
KeepFileRestoredFromArchive(path, histfname);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Try to read a timeline's history file.
|
|
*
|
|
* If successful, return the list of component TLIs (the given TLI followed by
|
|
* its ancestor TLIs). If we can't find the history file, assume that the
|
|
* timeline has no parents, and return a list of just the specified timeline
|
|
* ID.
|
|
*/
|
|
List *
|
|
readTimeLineHistory(TimeLineID targetTLI)
|
|
{
|
|
List *result;
|
|
char path[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
char fline[MAXPGPATH];
|
|
FILE *fd;
|
|
TimeLineHistoryEntry *entry;
|
|
TimeLineID lasttli = 0;
|
|
XLogRecPtr prevend;
|
|
bool fromArchive = false;
|
|
|
|
/* Timeline 1 does not have a history file, so no need to check */
|
|
if (targetTLI == 1)
|
|
{
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = targetTLI;
|
|
entry->begin = entry->end = InvalidXLogRecPtr;
|
|
return list_make1(entry);
|
|
}
|
|
|
|
if (ArchiveRecoveryRequested)
|
|
{
|
|
TLHistoryFileName(histfname, targetTLI);
|
|
fromArchive =
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
|
}
|
|
else
|
|
TLHistoryFilePath(path, targetTLI);
|
|
|
|
fd = AllocateFile(path, "r");
|
|
if (fd == NULL)
|
|
{
|
|
if (errno != ENOENT)
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
/* Not there, so assume no parents */
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = targetTLI;
|
|
entry->begin = entry->end = InvalidXLogRecPtr;
|
|
return list_make1(entry);
|
|
}
|
|
|
|
result = NIL;
|
|
|
|
/*
|
|
* Parse the file...
|
|
*/
|
|
prevend = InvalidXLogRecPtr;
|
|
while (fgets(fline, sizeof(fline), fd) != NULL)
|
|
{
|
|
/* skip leading whitespace and check for # comment */
|
|
char *ptr;
|
|
TimeLineID tli;
|
|
uint32 switchpoint_hi;
|
|
uint32 switchpoint_lo;
|
|
int nfields;
|
|
|
|
for (ptr = fline; *ptr; ptr++)
|
|
{
|
|
if (!isspace((unsigned char) *ptr))
|
|
break;
|
|
}
|
|
if (*ptr == '\0' || *ptr == '#')
|
|
continue;
|
|
|
|
nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
|
|
|
|
if (nfields < 1)
|
|
{
|
|
/* expect a numeric timeline ID as first field of line */
|
|
ereport(FATAL,
|
|
(errmsg("syntax error in history file: %s", fline),
|
|
errhint("Expected a numeric timeline ID.")));
|
|
}
|
|
if (nfields != 3)
|
|
ereport(FATAL,
|
|
(errmsg("syntax error in history file: %s", fline),
|
|
errhint("Expected a write-ahead log switchpoint location.")));
|
|
|
|
if (result && tli <= lasttli)
|
|
ereport(FATAL,
|
|
(errmsg("invalid data in history file: %s", fline),
|
|
errhint("Timeline IDs must be in increasing sequence.")));
|
|
|
|
lasttli = tli;
|
|
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = tli;
|
|
entry->begin = prevend;
|
|
entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
|
|
prevend = entry->end;
|
|
|
|
/* Build list with newest item first */
|
|
result = lcons(entry, result);
|
|
|
|
/* we ignore the remainder of each line */
|
|
}
|
|
|
|
FreeFile(fd);
|
|
|
|
if (result && targetTLI <= lasttli)
|
|
ereport(FATAL,
|
|
(errmsg("invalid data in history file \"%s\"", path),
|
|
errhint("Timeline IDs must be less than child timeline's ID.")));
|
|
|
|
/*
|
|
* Create one more entry for the "tip" of the timeline, which has no entry
|
|
* in the history file.
|
|
*/
|
|
entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
|
|
entry->tli = targetTLI;
|
|
entry->begin = prevend;
|
|
entry->end = InvalidXLogRecPtr;
|
|
|
|
result = lcons(entry, result);
|
|
|
|
/*
|
|
* If the history file was fetched from archive, save it in pg_wal for
|
|
* future reference.
|
|
*/
|
|
if (fromArchive)
|
|
KeepFileRestoredFromArchive(path, histfname);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Probe whether a timeline history file exists for the given timeline ID
|
|
*/
|
|
bool
|
|
existsTimeLineHistory(TimeLineID probeTLI)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
FILE *fd;
|
|
|
|
/* Timeline 1 does not have a history file, so no need to check */
|
|
if (probeTLI == 1)
|
|
return false;
|
|
|
|
if (ArchiveRecoveryRequested)
|
|
{
|
|
TLHistoryFileName(histfname, probeTLI);
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
|
}
|
|
else
|
|
TLHistoryFilePath(path, probeTLI);
|
|
|
|
fd = AllocateFile(path, "r");
|
|
if (fd != NULL)
|
|
{
|
|
FreeFile(fd);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
if (errno != ENOENT)
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Find the newest existing timeline, assuming that startTLI exists.
|
|
*
|
|
* Note: while this is somewhat heuristic, it does positively guarantee
|
|
* that (result + 1) is not a known timeline, and therefore it should
|
|
* be safe to assign that ID to a new timeline.
|
|
*/
|
|
TimeLineID
|
|
findNewestTimeLine(TimeLineID startTLI)
|
|
{
|
|
TimeLineID newestTLI;
|
|
TimeLineID probeTLI;
|
|
|
|
/*
|
|
* The algorithm is just to probe for the existence of timeline history
|
|
* files. XXX is it useful to allow gaps in the sequence?
|
|
*/
|
|
newestTLI = startTLI;
|
|
|
|
for (probeTLI = startTLI + 1;; probeTLI++)
|
|
{
|
|
if (existsTimeLineHistory(probeTLI))
|
|
{
|
|
newestTLI = probeTLI; /* probeTLI exists */
|
|
}
|
|
else
|
|
{
|
|
/* doesn't exist, assume we're done */
|
|
break;
|
|
}
|
|
}
|
|
|
|
return newestTLI;
|
|
}
|
|
|
|
/*
|
|
* Create a new timeline history file.
|
|
*
|
|
* newTLI: ID of the new timeline
|
|
* parentTLI: ID of its immediate parent
|
|
* switchpoint: WAL location where the system switched to the new timeline
|
|
* reason: human-readable explanation of why the timeline was switched
|
|
*
|
|
* Currently this is only used at the end recovery, and so there are no locking
|
|
* considerations. But we should be just as tense as XLogFileInit to avoid
|
|
* emplacing a bogus file.
|
|
*/
|
|
void
|
|
writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
|
|
XLogRecPtr switchpoint, char *reason)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char tmppath[MAXPGPATH];
|
|
char histfname[MAXFNAMELEN];
|
|
char buffer[BLCKSZ];
|
|
int srcfd;
|
|
int fd;
|
|
int nbytes;
|
|
|
|
Assert(newTLI > parentTLI); /* else bad selection of newTLI */
|
|
|
|
/*
|
|
* Write into a temp file name.
|
|
*/
|
|
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
|
|
|
|
unlink(tmppath);
|
|
|
|
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
|
|
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
|
|
if (fd < 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not create file \"%s\": %m", tmppath)));
|
|
|
|
/*
|
|
* If a history file exists for the parent, copy it verbatim
|
|
*/
|
|
if (ArchiveRecoveryRequested)
|
|
{
|
|
TLHistoryFileName(histfname, parentTLI);
|
|
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
|
|
}
|
|
else
|
|
TLHistoryFilePath(path, parentTLI);
|
|
|
|
srcfd = OpenTransientFile(path, O_RDONLY);
|
|
if (srcfd < 0)
|
|
{
|
|
if (errno != ENOENT)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open file \"%s\": %m", path)));
|
|
/* Not there, so assume parent has no parents */
|
|
}
|
|
else
|
|
{
|
|
for (;;)
|
|
{
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
|
|
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
|
|
pgstat_report_wait_end();
|
|
if (nbytes < 0 || errno != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not read file \"%s\": %m", path)));
|
|
if (nbytes == 0)
|
|
break;
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
|
|
if ((int) write(fd, buffer, nbytes) != nbytes)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
/*
|
|
* If we fail to make the file, delete it to release disk
|
|
* space
|
|
*/
|
|
unlink(tmppath);
|
|
|
|
/*
|
|
* if write didn't set errno, assume problem is no disk space
|
|
*/
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
}
|
|
pgstat_report_wait_end();
|
|
}
|
|
CloseTransientFile(srcfd);
|
|
}
|
|
|
|
/*
|
|
* Append one line with the details of this timeline split.
|
|
*
|
|
* If we did have a parent file, insert an extra newline just in case the
|
|
* parent file failed to end with one.
|
|
*/
|
|
snprintf(buffer, sizeof(buffer),
|
|
"%s%u\t%X/%X\t%s\n",
|
|
(srcfd < 0) ? "" : "\n",
|
|
parentTLI,
|
|
(uint32) (switchpoint >> 32), (uint32) (switchpoint),
|
|
reason);
|
|
|
|
nbytes = strlen(buffer);
|
|
errno = 0;
|
|
if ((int) write(fd, buffer, nbytes) != nbytes)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
/*
|
|
* If we fail to make the file, delete it to release disk space
|
|
*/
|
|
unlink(tmppath);
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
}
|
|
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
|
|
if (pg_fsync(fd) != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not fsync file \"%s\": %m", tmppath)));
|
|
pgstat_report_wait_end();
|
|
|
|
if (CloseTransientFile(fd))
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not close file \"%s\": %m", tmppath)));
|
|
|
|
|
|
/*
|
|
* Now move the completed history file into place with its final name.
|
|
*/
|
|
TLHistoryFilePath(path, newTLI);
|
|
|
|
/*
|
|
* Perform the rename using link if available, paranoidly trying to avoid
|
|
* overwriting an existing file (there shouldn't be one).
|
|
*/
|
|
durable_link_or_rename(tmppath, path, ERROR);
|
|
|
|
/* The history file can be archived immediately. */
|
|
if (XLogArchivingActive())
|
|
{
|
|
TLHistoryFileName(histfname, newTLI);
|
|
XLogArchiveNotify(histfname);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Writes a history file for given timeline and contents.
|
|
*
|
|
* Currently this is only used in the walreceiver process, and so there are
|
|
* no locking considerations. But we should be just as tense as XLogFileInit
|
|
* to avoid emplacing a bogus file.
|
|
*/
|
|
void
|
|
writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
|
|
{
|
|
char path[MAXPGPATH];
|
|
char tmppath[MAXPGPATH];
|
|
int fd;
|
|
|
|
/*
|
|
* Write into a temp file name.
|
|
*/
|
|
snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
|
|
|
|
unlink(tmppath);
|
|
|
|
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
|
|
fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
|
|
if (fd < 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not create file \"%s\": %m", tmppath)));
|
|
|
|
errno = 0;
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
|
|
if ((int) write(fd, content, size) != size)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
/*
|
|
* If we fail to make the file, delete it to release disk space
|
|
*/
|
|
unlink(tmppath);
|
|
/* if write didn't set errno, assume problem is no disk space */
|
|
errno = save_errno ? save_errno : ENOSPC;
|
|
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not write to file \"%s\": %m", tmppath)));
|
|
}
|
|
pgstat_report_wait_end();
|
|
|
|
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
|
|
if (pg_fsync(fd) != 0)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not fsync file \"%s\": %m", tmppath)));
|
|
pgstat_report_wait_end();
|
|
|
|
if (CloseTransientFile(fd))
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not close file \"%s\": %m", tmppath)));
|
|
|
|
|
|
/*
|
|
* Now move the completed history file into place with its final name.
|
|
*/
|
|
TLHistoryFilePath(path, tli);
|
|
|
|
/*
|
|
* Perform the rename using link if available, paranoidly trying to avoid
|
|
* overwriting an existing file (there shouldn't be one).
|
|
*/
|
|
durable_link_or_rename(tmppath, path, ERROR);
|
|
}
|
|
|
|
/*
|
|
* Returns true if 'expectedTLEs' contains a timeline with id 'tli'
|
|
*/
|
|
bool
|
|
tliInHistory(TimeLineID tli, List *expectedTLEs)
|
|
{
|
|
ListCell *cell;
|
|
|
|
foreach(cell, expectedTLEs)
|
|
{
|
|
if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Returns the ID of the timeline in use at a particular point in time, in
|
|
* the given timeline history.
|
|
*/
|
|
TimeLineID
|
|
tliOfPointInHistory(XLogRecPtr ptr, List *history)
|
|
{
|
|
ListCell *cell;
|
|
|
|
foreach(cell, history)
|
|
{
|
|
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
|
|
|
|
if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
|
|
(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
|
|
{
|
|
/* found it */
|
|
return tle->tli;
|
|
}
|
|
}
|
|
|
|
/* shouldn't happen. */
|
|
elog(ERROR, "timeline history was not contiguous");
|
|
return 0; /* keep compiler quiet */
|
|
}
|
|
|
|
/*
|
|
* Returns the point in history where we branched off the given timeline,
|
|
* and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
|
|
* the timeline is current, ie. we have not branched off from it, and throws
|
|
* an error if the timeline is not part of this server's history.
|
|
*/
|
|
XLogRecPtr
|
|
tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
|
|
{
|
|
ListCell *cell;
|
|
|
|
if (nextTLI)
|
|
*nextTLI = 0;
|
|
foreach(cell, history)
|
|
{
|
|
TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
|
|
|
|
if (tle->tli == tli)
|
|
return tle->end;
|
|
if (nextTLI)
|
|
*nextTLI = tle->tli;
|
|
}
|
|
|
|
ereport(ERROR,
|
|
(errmsg("requested timeline %u is not in this server's history",
|
|
tli)));
|
|
return InvalidXLogRecPtr; /* keep compiler quiet */
|
|
}
|