mirror of
https://github.com/postgres/postgres.git
synced 2025-08-05 07:41:25 +03:00
Improve handling of corrupted two-phase state files at recovery
When a corrupted two-phase state file is found by WAL replay, be it for
crash recovery or archive recovery, then the file is simply skipped and
a WARNING is logged to the user, causing the transaction to be silently
lost. Facing an on-disk WAL file which is corrupted is as likely to
happen as what is stored in WAL records, but WAL records are already
able to fail hard if there is a CRC mismatch. On-disk two-phase state
files, on the contrary, are simply ignored if corrupted. Note that when
restoring the initial two-phase data state at recovery, files newer than
the horizon XID are discarded hence no files present in pg_twophase/
should be torned and have been made durable by a previous checkpoint, so
recovery should never see any corrupted two-phase state file by design.
The situation got better since 978b2f6
which has added two-phase state
information directly in WAL instead of using on-disk files, so the risk
is limited to two-phase transactions which live across at least one
checkpoint for long periods. Backups having legit two-phase state files
on-disk could also lose silently transactions when restored if things
get corrupted.
This behavior exists since two-phase commit has been introduced, no
back-patch is done for now per the lack of complaints about this
problem.
Author: Michael Paquier
Discussion: https://postgr.es/m/20180709050309.GM1467@paquier.xyz
This commit is contained in:
@@ -1207,10 +1207,12 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
|
|||||||
* Read and validate the state file for xid.
|
* Read and validate the state file for xid.
|
||||||
*
|
*
|
||||||
* If it looks OK (has a valid magic number and CRC), return the palloc'd
|
* If it looks OK (has a valid magic number and CRC), return the palloc'd
|
||||||
* contents of the file. Otherwise return NULL.
|
* contents of the file, issuing an error when finding corrupted data. If
|
||||||
|
* missing_ok is true, which indicates that missing files can be safely
|
||||||
|
* ignored, then return NULL. This state can be reached when doing recovery.
|
||||||
*/
|
*/
|
||||||
static char *
|
static char *
|
||||||
ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
|
ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
|
||||||
{
|
{
|
||||||
char path[MAXPGPATH];
|
char path[MAXPGPATH];
|
||||||
char *buf;
|
char *buf;
|
||||||
@@ -1227,11 +1229,12 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
|
|||||||
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
|
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
{
|
{
|
||||||
if (give_warnings)
|
if (missing_ok && errno == ENOENT)
|
||||||
ereport(WARNING,
|
return NULL;
|
||||||
(errcode_for_file_access(),
|
|
||||||
errmsg("could not open file \"%s\": %m", path)));
|
ereport(ERROR,
|
||||||
return NULL;
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not open file \"%s\": %m", path)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1241,35 +1244,27 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
|
|||||||
* even on a valid file.
|
* even on a valid file.
|
||||||
*/
|
*/
|
||||||
if (fstat(fd, &stat))
|
if (fstat(fd, &stat))
|
||||||
{
|
ereport(ERROR,
|
||||||
int save_errno = errno;
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not stat file \"%s\": %m", path)));
|
||||||
CloseTransientFile(fd);
|
|
||||||
if (give_warnings)
|
|
||||||
{
|
|
||||||
errno = save_errno;
|
|
||||||
ereport(WARNING,
|
|
||||||
(errcode_for_file_access(),
|
|
||||||
errmsg("could not stat file \"%s\": %m", path)));
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
|
if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
|
||||||
MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
|
MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
|
||||||
sizeof(pg_crc32c)) ||
|
sizeof(pg_crc32c)) ||
|
||||||
stat.st_size > MaxAllocSize)
|
stat.st_size > MaxAllocSize)
|
||||||
{
|
ereport(ERROR,
|
||||||
CloseTransientFile(fd);
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
return NULL;
|
errmsg_plural("incorrect size of file \"%s\": %zu byte",
|
||||||
}
|
"incorrect size of file \"%s\": %zu bytes",
|
||||||
|
(Size) stat.st_size, path,
|
||||||
|
(Size) stat.st_size)));
|
||||||
|
|
||||||
crc_offset = stat.st_size - sizeof(pg_crc32c);
|
crc_offset = stat.st_size - sizeof(pg_crc32c);
|
||||||
if (crc_offset != MAXALIGN(crc_offset))
|
if (crc_offset != MAXALIGN(crc_offset))
|
||||||
{
|
ereport(ERROR,
|
||||||
CloseTransientFile(fd);
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
return NULL;
|
errmsg("incorrect alignment of CRC offset for file \"%s\"",
|
||||||
}
|
path)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* OK, slurp in the file.
|
* OK, slurp in the file.
|
||||||
@@ -1280,37 +1275,31 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
|
|||||||
r = read(fd, buf, stat.st_size);
|
r = read(fd, buf, stat.st_size);
|
||||||
if (r != stat.st_size)
|
if (r != stat.st_size)
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
if (r < 0)
|
||||||
|
ereport(ERROR,
|
||||||
pgstat_report_wait_end();
|
(errcode_for_file_access(),
|
||||||
CloseTransientFile(fd);
|
errmsg("could not read file \"%s\": %m", path)));
|
||||||
if (give_warnings)
|
else
|
||||||
{
|
ereport(ERROR,
|
||||||
if (r < 0)
|
(errmsg("could not read file \"%s\": read %d of %zu",
|
||||||
{
|
path, r, (Size) stat.st_size)));
|
||||||
errno = save_errno;
|
|
||||||
ereport(WARNING,
|
|
||||||
(errcode_for_file_access(),
|
|
||||||
errmsg("could not read file \"%s\": %m", path)));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
ereport(WARNING,
|
|
||||||
(errmsg("could not read file \"%s\": read %d of %zu",
|
|
||||||
path, r, (Size) stat.st_size)));
|
|
||||||
}
|
|
||||||
pfree(buf);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pgstat_report_wait_end();
|
pgstat_report_wait_end();
|
||||||
CloseTransientFile(fd);
|
CloseTransientFile(fd);
|
||||||
|
|
||||||
hdr = (TwoPhaseFileHeader *) buf;
|
hdr = (TwoPhaseFileHeader *) buf;
|
||||||
if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
|
if (hdr->magic != TWOPHASE_MAGIC)
|
||||||
{
|
ereport(ERROR,
|
||||||
pfree(buf);
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
return NULL;
|
errmsg("invalid magic number stored in file \"%s\"",
|
||||||
}
|
path)));
|
||||||
|
|
||||||
|
if (hdr->total_len != stat.st_size)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
|
errmsg("invalid size stored in file \"%s\"",
|
||||||
|
path)));
|
||||||
|
|
||||||
INIT_CRC32C(calc_crc);
|
INIT_CRC32C(calc_crc);
|
||||||
COMP_CRC32C(calc_crc, buf, crc_offset);
|
COMP_CRC32C(calc_crc, buf, crc_offset);
|
||||||
@@ -1319,10 +1308,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
|
|||||||
file_crc = *((pg_crc32c *) (buf + crc_offset));
|
file_crc = *((pg_crc32c *) (buf + crc_offset));
|
||||||
|
|
||||||
if (!EQ_CRC32C(calc_crc, file_crc))
|
if (!EQ_CRC32C(calc_crc, file_crc))
|
||||||
{
|
ereport(ERROR,
|
||||||
pfree(buf);
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
return NULL;
|
errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
|
||||||
}
|
path)));
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@@ -1431,7 +1420,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
|
|||||||
return false; /* nothing to do */
|
return false; /* nothing to do */
|
||||||
|
|
||||||
/* Read and validate file */
|
/* Read and validate file */
|
||||||
buf = ReadTwoPhaseFile(xid, false);
|
buf = ReadTwoPhaseFile(xid, true);
|
||||||
if (buf == NULL)
|
if (buf == NULL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@@ -1479,7 +1468,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
|
|||||||
* to disk if for some reason they have lived for a long time.
|
* to disk if for some reason they have lived for a long time.
|
||||||
*/
|
*/
|
||||||
if (gxact->ondisk)
|
if (gxact->ondisk)
|
||||||
buf = ReadTwoPhaseFile(xid, true);
|
buf = ReadTwoPhaseFile(xid, false);
|
||||||
else
|
else
|
||||||
XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
|
XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
|
||||||
|
|
||||||
@@ -1874,6 +1863,10 @@ restoreTwoPhaseData(void)
|
|||||||
* write a WAL entry, and so there might be no evidence in WAL of those
|
* write a WAL entry, and so there might be no evidence in WAL of those
|
||||||
* subxact XIDs.
|
* subxact XIDs.
|
||||||
*
|
*
|
||||||
|
* On corrupted two-phase files, fail immediately. Keeping around broken
|
||||||
|
* entries and let replay continue causes harm on the system, and a new
|
||||||
|
* backup should be rolled in.
|
||||||
|
*
|
||||||
* Our other responsibility is to determine and return the oldest valid XID
|
* Our other responsibility is to determine and return the oldest valid XID
|
||||||
* among the prepared xacts (if none, return ShmemVariableCache->nextXid).
|
* among the prepared xacts (if none, return ShmemVariableCache->nextXid).
|
||||||
* This is needed to synchronize pg_subtrans startup properly.
|
* This is needed to synchronize pg_subtrans startup properly.
|
||||||
@@ -2164,15 +2157,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
|||||||
if (fromdisk)
|
if (fromdisk)
|
||||||
{
|
{
|
||||||
/* Read and validate file */
|
/* Read and validate file */
|
||||||
buf = ReadTwoPhaseFile(xid, true);
|
buf = ReadTwoPhaseFile(xid, false);
|
||||||
if (buf == NULL)
|
|
||||||
{
|
|
||||||
ereport(WARNING,
|
|
||||||
(errmsg("removing corrupt two-phase state file for transaction %u",
|
|
||||||
xid)));
|
|
||||||
RemoveTwoPhaseFile(xid, true);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2185,21 +2170,15 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
|||||||
if (!TransactionIdEquals(hdr->xid, xid))
|
if (!TransactionIdEquals(hdr->xid, xid))
|
||||||
{
|
{
|
||||||
if (fromdisk)
|
if (fromdisk)
|
||||||
{
|
ereport(ERROR,
|
||||||
ereport(WARNING,
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
(errmsg("removing corrupt two-phase state file for transaction %u",
|
errmsg("corrupted two-phase state file for transaction \"%u\"",
|
||||||
xid)));
|
xid)));
|
||||||
RemoveTwoPhaseFile(xid, true);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
ereport(ERROR,
|
||||||
ereport(WARNING,
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
(errmsg("removing corrupt two-phase state from memory for transaction %u",
|
errmsg("corrupted two-phase state in memory for transaction \"%u\"",
|
||||||
xid)));
|
xid)));
|
||||||
PrepareRedoRemove(xid, true);
|
|
||||||
}
|
|
||||||
pfree(buf);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user