mirror of
https://github.com/postgres/postgres.git
synced 2025-06-29 10:41:53 +03:00
During recovery, if we reach consistent state and still have entries in the
invalid-page hash table, PANIC immediately. Immediate PANIC is much better than waiting for end-of-recovery, which is what we did before, because the end-of-recovery might not come until months later if this is a standby server. Also refrain from creating a restartpoint if there are invalid-page entries in the hash table. Restarting recovery from such a restartpoint would not see the invalid references, and wouldn't be able to cross-check them when consistency is reached. That wouldn't matter when things are going smoothly, but the more sanity checks you have the better. Fujii Masao
This commit is contained in:
@ -562,7 +562,7 @@ static TimeLineID lastPageTLI = 0;
|
|||||||
static XLogRecPtr minRecoveryPoint; /* local copy of
|
static XLogRecPtr minRecoveryPoint; /* local copy of
|
||||||
* ControlFile->minRecoveryPoint */
|
* ControlFile->minRecoveryPoint */
|
||||||
static bool updateMinRecoveryPoint = true;
|
static bool updateMinRecoveryPoint = true;
|
||||||
static bool reachedMinRecoveryPoint = false;
|
bool reachedMinRecoveryPoint = false;
|
||||||
|
|
||||||
static bool InRedo = false;
|
static bool InRedo = false;
|
||||||
|
|
||||||
@ -6758,12 +6758,6 @@ StartupXLOG(void)
|
|||||||
/* Disallow XLogInsert again */
|
/* Disallow XLogInsert again */
|
||||||
LocalXLogInsertAllowed = -1;
|
LocalXLogInsertAllowed = -1;
|
||||||
|
|
||||||
/*
|
|
||||||
* Check to see if the XLOG sequence contained any unresolved
|
|
||||||
* references to uninitialized pages.
|
|
||||||
*/
|
|
||||||
XLogCheckInvalidPages();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform a checkpoint to update all our recovery activity to disk.
|
* Perform a checkpoint to update all our recovery activity to disk.
|
||||||
*
|
*
|
||||||
@ -6906,6 +6900,12 @@ CheckRecoveryConsistency(void)
|
|||||||
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
||||||
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Check to see if the XLOG sequence contained any unresolved
|
||||||
|
* references to uninitialized pages.
|
||||||
|
*/
|
||||||
|
XLogCheckInvalidPages();
|
||||||
|
|
||||||
reachedMinRecoveryPoint = true;
|
reachedMinRecoveryPoint = true;
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("consistent recovery state reached at %X/%X",
|
(errmsg("consistent recovery state reached at %X/%X",
|
||||||
@ -7907,7 +7907,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
|||||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is it safe to checkpoint? We must ask each of the resource managers
|
* Is it safe to restartpoint? We must ask each of the resource managers
|
||||||
* whether they have any partial state information that might prevent a
|
* whether they have any partial state information that might prevent a
|
||||||
* correct restart from this point. If so, we skip this opportunity, but
|
* correct restart from this point. If so, we skip this opportunity, but
|
||||||
* return at the next checkpoint record for another try.
|
* return at the next checkpoint record for another try.
|
||||||
@ -7926,6 +7926,22 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Also refrain from creating a restartpoint if we have seen any references
|
||||||
|
* to non-existent pages. Restarting recovery from the restartpoint would
|
||||||
|
* not see the references, so we would lose the cross-check that the pages
|
||||||
|
* belonged to a relation that was dropped later.
|
||||||
|
*/
|
||||||
|
if (XLogHaveInvalidPages())
|
||||||
|
{
|
||||||
|
elog(trace_recovery(DEBUG2),
|
||||||
|
"could not record restart point at %X/%X because there "
|
||||||
|
"are unresolved references to invalid pages",
|
||||||
|
checkPoint->redo.xlogid,
|
||||||
|
checkPoint->redo.xrecoff);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy the checkpoint record to shared memory, so that checkpointer
|
* Copy the checkpoint record to shared memory, so that checkpointer
|
||||||
* can work out the next time it wants to perform a restartpoint.
|
* can work out the next time it wants to perform a restartpoint.
|
||||||
|
@ -52,6 +52,22 @@ typedef struct xl_invalid_page
|
|||||||
static HTAB *invalid_page_tab = NULL;
|
static HTAB *invalid_page_tab = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
/* Report a reference to an invalid page */
|
||||||
|
static void
|
||||||
|
report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno,
|
||||||
|
BlockNumber blkno, bool present)
|
||||||
|
{
|
||||||
|
char *path = relpathperm(node, forkno);
|
||||||
|
|
||||||
|
if (present)
|
||||||
|
elog(elevel, "page %u of relation %s is uninitialized",
|
||||||
|
blkno, path);
|
||||||
|
else
|
||||||
|
elog(elevel, "page %u of relation %s does not exist",
|
||||||
|
blkno, path);
|
||||||
|
pfree(path);
|
||||||
|
}
|
||||||
|
|
||||||
/* Log a reference to an invalid page */
|
/* Log a reference to an invalid page */
|
||||||
static void
|
static void
|
||||||
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
|
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
|
||||||
@ -61,23 +77,27 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
|
|||||||
xl_invalid_page *hentry;
|
xl_invalid_page *hentry;
|
||||||
bool found;
|
bool found;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Once recovery has reached a consistent state, the invalid-page table
|
||||||
|
* should be empty and remain so. If a reference to an invalid page is
|
||||||
|
* found after consistency is reached, PANIC immediately. This might
|
||||||
|
* seem aggressive, but it's better than letting the invalid reference
|
||||||
|
* linger in the hash table until the end of recovery and PANIC there,
|
||||||
|
* which might come only much later if this is a standby server.
|
||||||
|
*/
|
||||||
|
if (reachedMinRecoveryPoint)
|
||||||
|
{
|
||||||
|
report_invalid_page(WARNING, node, forkno, blkno, present);
|
||||||
|
elog(PANIC, "WAL contains references to invalid pages");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Log references to invalid pages at DEBUG1 level. This allows some
|
* Log references to invalid pages at DEBUG1 level. This allows some
|
||||||
* tracing of the cause (note the elog context mechanism will tell us
|
* tracing of the cause (note the elog context mechanism will tell us
|
||||||
* something about the XLOG record that generated the reference).
|
* something about the XLOG record that generated the reference).
|
||||||
*/
|
*/
|
||||||
if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1)
|
if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1)
|
||||||
{
|
report_invalid_page(DEBUG1, node, forkno, blkno, present);
|
||||||
char *path = relpathperm(node, forkno);
|
|
||||||
|
|
||||||
if (present)
|
|
||||||
elog(DEBUG1, "page %u of relation %s is uninitialized",
|
|
||||||
blkno, path);
|
|
||||||
else
|
|
||||||
elog(DEBUG1, "page %u of relation %s does not exist",
|
|
||||||
blkno, path);
|
|
||||||
pfree(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (invalid_page_tab == NULL)
|
if (invalid_page_tab == NULL)
|
||||||
{
|
{
|
||||||
@ -181,6 +201,16 @@ forget_invalid_pages_db(Oid dbid)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Are there any unresolved references to invalid pages? */
|
||||||
|
bool
|
||||||
|
XLogHaveInvalidPages(void)
|
||||||
|
{
|
||||||
|
if (invalid_page_tab != NULL &&
|
||||||
|
hash_get_num_entries(invalid_page_tab) > 0)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Complain about any remaining invalid-page entries */
|
/* Complain about any remaining invalid-page entries */
|
||||||
void
|
void
|
||||||
XLogCheckInvalidPages(void)
|
XLogCheckInvalidPages(void)
|
||||||
@ -200,15 +230,8 @@ XLogCheckInvalidPages(void)
|
|||||||
*/
|
*/
|
||||||
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
|
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
|
||||||
{
|
{
|
||||||
char *path = relpathperm(hentry->key.node, hentry->key.forkno);
|
report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno,
|
||||||
|
hentry->key.blkno, hentry->present);
|
||||||
if (hentry->present)
|
|
||||||
elog(WARNING, "page %u of relation %s was uninitialized",
|
|
||||||
hentry->key.blkno, path);
|
|
||||||
else
|
|
||||||
elog(WARNING, "page %u of relation %s did not exist",
|
|
||||||
hentry->key.blkno, path);
|
|
||||||
pfree(path);
|
|
||||||
foundone = true;
|
foundone = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -190,6 +190,8 @@ typedef enum
|
|||||||
|
|
||||||
extern XLogRecPtr XactLastRecEnd;
|
extern XLogRecPtr XactLastRecEnd;
|
||||||
|
|
||||||
|
extern bool reachedMinRecoveryPoint;
|
||||||
|
|
||||||
/* these variables are GUC parameters related to XLOG */
|
/* these variables are GUC parameters related to XLOG */
|
||||||
extern int CheckPointSegments;
|
extern int CheckPointSegments;
|
||||||
extern int wal_keep_segments;
|
extern int wal_keep_segments;
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
|
|
||||||
|
|
||||||
|
extern bool XLogHaveInvalidPages(void);
|
||||||
extern void XLogCheckInvalidPages(void);
|
extern void XLogCheckInvalidPages(void);
|
||||||
|
|
||||||
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
|
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
|
||||||
|
Reference in New Issue
Block a user