mirror of
https://github.com/postgres/postgres.git
synced 2025-06-30 21:42:05 +03:00
Fix recovery of 2PC transaction during crash recovery
A crash in the middle of a checkpoint with some two-phase state data
already flushed to disk by this checkpoint could cause a follow-up crash
recovery to recover twice the same transaction, once from what has been
found in pg_twophase/ at the beginning of recovery and a second time
when replaying its corresponding record.
This would lead to FATAL failures in the startup process during
recovery, where the same transaction would have a state recovered twice
instead of once:
LOG: recovering prepared transaction 731 from shared memory
LOG: recovering prepared transaction 731 from shared memory
FATAL: lock ExclusiveLock on object 731/0/0 is already held
This issue is fixed by skipping the addition of any 2PC state coming
from a record whose equivalent 2PC state file has already been loaded in
TwoPhaseState at the beginning of recovery by restoreTwoPhaseData(),
which is OK as long as the system has not reached a consistent state.
The timing to get a messed up recovery processing is very racy, and
would very unlikely happen. The thread that has reported the issue has
demonstrated the bug using injection points to force a PANIC in the
middle of a checkpoint.
Issue introduced in 728bd99
, so backpatch all the way down.
Reported-by: "suyu.cmj" <mengjuan.cmj@alibaba-inc.com>
Author: "suyu.cmj" <mengjuan.cmj@alibaba-inc.com>
Author: Michael Paquier
Discussion: https://postgr.es/m/109e6994-b971-48cb-84f6-829646f18b4c.mengjuan.cmj@alibaba-inc.com
Backpatch-through: 11
This commit is contained in:
@ -86,6 +86,7 @@
|
|||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "access/xlogreader.h"
|
#include "access/xlogreader.h"
|
||||||
|
#include "access/xlogrecovery.h"
|
||||||
#include "access/xlogutils.h"
|
#include "access/xlogutils.h"
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "catalog/storage.h"
|
#include "catalog/storage.h"
|
||||||
@ -2479,6 +2480,38 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
|
|||||||
* that it got added in the redo phase
|
* that it got added in the redo phase
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the event of a crash while a checkpoint was running, it may be
|
||||||
|
* possible that some two-phase data found its way to disk while its
|
||||||
|
* corresponding record needs to be replayed in the follow-up recovery.
|
||||||
|
* As the 2PC data was on disk, it has already been restored at the
|
||||||
|
* beginning of recovery with restoreTwoPhaseData(), so skip this record
|
||||||
|
* to avoid duplicates in TwoPhaseState. If a consistent state has been
|
||||||
|
* reached, the record is added to TwoPhaseState and it should have no
|
||||||
|
* corresponding file in pg_twophase.
|
||||||
|
*/
|
||||||
|
if (!XLogRecPtrIsInvalid(start_lsn))
|
||||||
|
{
|
||||||
|
char path[MAXPGPATH];
|
||||||
|
|
||||||
|
TwoPhaseFilePath(path, hdr->xid);
|
||||||
|
|
||||||
|
if (access(path, F_OK) == 0)
|
||||||
|
{
|
||||||
|
ereport(reachedConsistency ? ERROR : WARNING,
|
||||||
|
(errmsg("could not recover two-phase state file for transaction %u",
|
||||||
|
hdr->xid),
|
||||||
|
errdetail("Two-phase state file has been found in WAL record %X/%X, but this transaction has already been restored from disk.",
|
||||||
|
LSN_FORMAT_ARGS(start_lsn))));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errno != ENOENT)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not access file \"%s\": %m", path)));
|
||||||
|
}
|
||||||
|
|
||||||
/* Get a free gxact from the freelist */
|
/* Get a free gxact from the freelist */
|
||||||
if (TwoPhaseState->freeGXacts == NULL)
|
if (TwoPhaseState->freeGXacts == NULL)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
|
Reference in New Issue
Block a user