1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-30 21:42:05 +03:00

Remove secondary checkpoint

Previously server reserved WAL for last two checkpoints,
which used too much disk space for small servers.

Bumps PG_CONTROL_VERSION

Author: Simon Riggs <simon@2ndQuadrant.com>
Reviewed-by: Michael Paquier <michael.paquier@gmail.com>
This commit is contained in:
Simon Riggs
2017-11-07 12:56:30 -05:00
parent 98267ee83e
commit 4b0d28de06
8 changed files with 61 additions and 125 deletions

View File

@ -2221,13 +2221,18 @@ CalculateCheckpointSegments(void)
* Calculate the distance at which to trigger a checkpoint, to avoid
* exceeding max_wal_size_mb. This is based on two assumptions:
*
* a) we keep WAL for two checkpoint cycles, back to the "prev" checkpoint.
* a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
* WAL for two checkpoint cycles to allow us to recover from the
* secondary checkpoint if the first checkpoint failed, though we
* only did this on the master anyway, not on standby. Keeping just
* one checkpoint simplifies processing and reduces disk space in
* many smaller databases.)
* b) during checkpoint, we consume checkpoint_completion_target *
* number of segments consumed between checkpoints.
*-------
*/
target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
(2.0 + CheckPointCompletionTarget);
(1.0 + CheckPointCompletionTarget);
/* round down */
CheckPointSegments = (int) target;
@ -2279,23 +2284,8 @@ XLOGfileslop(XLogRecPtr PriorRedoPtr)
* To estimate where the next checkpoint will finish, assume that the
* system runs steadily consuming CheckPointDistanceEstimate bytes between
* every checkpoint.
*
* The reason this calculation is done from the prior checkpoint, not the
* one that just finished, is that this behaves better if some checkpoint
* cycles are abnormally short, like if you perform a manual checkpoint
* right after a timed one. The manual checkpoint will make almost a full
* cycle's worth of WAL segments available for recycling, because the
* segments from the prior's prior, fully-sized checkpoint cycle are no
* longer needed. However, the next checkpoint will make only few segments
* available for recycling, the ones generated between the timed
* checkpoint and the manual one right after that. If at the manual
* checkpoint we only retained enough segments to get us to the next timed
* one, and removed the rest, then at the next checkpoint we would not
* have enough segments around for recycling, to get us to the checkpoint
* after that. Basing the calculations on the distance from the prior redo
* pointer largely fixes that problem.
*/
distance = (2.0 + CheckPointCompletionTarget) * CheckPointDistanceEstimate;
distance = (1.0 + CheckPointCompletionTarget) * CheckPointDistanceEstimate;
/* add 10% for good measure. */
distance *= 1.10;
@ -6593,30 +6583,17 @@ StartupXLOG(void)
(errmsg("checkpoint record is at %X/%X",
(uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
}
else if (StandbyMode)
else
{
/*
* The last valid checkpoint record required for a streaming
* recovery exists in neither standby nor the primary.
* We used to attempt to go back to a secondary checkpoint
* record here, but only when not in standby_mode. We now
* just fail if we can't read the last checkpoint because
* this allows us to simplify processing around checkpoints.
*/
ereport(PANIC,
(errmsg("could not locate a valid checkpoint record")));
}
else
{
checkPointLoc = ControlFile->prevCheckPoint;
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
if (record != NULL)
{
ereport(LOG,
(errmsg("using previous checkpoint record at %X/%X",
(uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
InRecovery = true; /* force recovery even if SHUTDOWNED */
}
else
ereport(PANIC,
(errmsg("could not locate a valid checkpoint record")));
}
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
}
@ -6845,7 +6822,6 @@ StartupXLOG(void)
recoveryTargetTLI)));
ControlFile->state = DB_IN_CRASH_RECOVERY;
}
ControlFile->prevCheckPoint = ControlFile->checkPoint;
ControlFile->checkPoint = checkPointLoc;
ControlFile->checkPointCopy = checkPoint;
if (InArchiveRecovery)
@ -7619,12 +7595,11 @@ StartupXLOG(void)
{
if (fast_promote)
{
checkPointLoc = ControlFile->prevCheckPoint;
checkPointLoc = ControlFile->checkPoint;
/*
* Confirm the last checkpoint is available for us to recover
* from if we fail. Note that we don't check for the secondary
* checkpoint since that isn't available in most base backups.
* from if we fail.
*/
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, false);
if (record != NULL)
@ -8090,7 +8065,7 @@ LocalSetXLogInsertAllowed(void)
* Subroutine to try to fetch and validate a prior checkpoint record.
*
* whichChkpt identifies the checkpoint (merely for reporting purposes).
* 1 for "primary", 2 for "secondary", 0 for "other" (backup_label)
* 1 for "primary", 0 for "other" (backup_label)
*/
static XLogRecord *
ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
@ -8110,10 +8085,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
ereport(LOG,
(errmsg("invalid primary checkpoint link in control file")));
break;
case 2:
ereport(LOG,
(errmsg("invalid secondary checkpoint link in control file")));
break;
default:
ereport(LOG,
(errmsg("invalid checkpoint link in backup_label file")));
@ -8135,10 +8106,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
ereport(LOG,
(errmsg("invalid primary checkpoint record")));
break;
case 2:
ereport(LOG,
(errmsg("invalid secondary checkpoint record")));
break;
default:
ereport(LOG,
(errmsg("invalid checkpoint record")));
@ -8154,10 +8121,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
ereport(LOG,
(errmsg("invalid resource manager ID in primary checkpoint record")));
break;
case 2:
ereport(LOG,
(errmsg("invalid resource manager ID in secondary checkpoint record")));
break;
default:
ereport(LOG,
(errmsg("invalid resource manager ID in checkpoint record")));
@ -8175,10 +8138,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
ereport(LOG,
(errmsg("invalid xl_info in primary checkpoint record")));
break;
case 2:
ereport(LOG,
(errmsg("invalid xl_info in secondary checkpoint record")));
break;
default:
ereport(LOG,
(errmsg("invalid xl_info in checkpoint record")));
@ -8194,10 +8153,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
ereport(LOG,
(errmsg("invalid length of primary checkpoint record")));
break;
case 2:
ereport(LOG,
(errmsg("invalid length of secondary checkpoint record")));
break;
default:
ereport(LOG,
(errmsg("invalid length of checkpoint record")));
@ -8933,8 +8888,7 @@ CreateCheckPoint(int flags)
(errmsg("concurrent write-ahead log activity while database system is shutting down")));
/*
* Remember the prior checkpoint's redo pointer, used later to determine
* the point where the log can be truncated.
* Remember the prior checkpoint's redo ptr for UpdateCheckPointDistanceEstimate()
*/
PriorRedoPtr = ControlFile->checkPointCopy.redo;
@ -8944,7 +8898,6 @@ CreateCheckPoint(int flags)
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (shutdown)
ControlFile->state = DB_SHUTDOWNED;
ControlFile->prevCheckPoint = ControlFile->checkPoint;
ControlFile->checkPoint = ProcLastRecPtr;
ControlFile->checkPointCopy = checkPoint;
ControlFile->time = (pg_time_t) time(NULL);
@ -8982,8 +8935,7 @@ CreateCheckPoint(int flags)
smgrpostckpt();
/*
* Delete old log files (those no longer needed even for previous
* checkpoint or the standbys in XLOG streaming).
* Delete old log files and recycle them
*/
if (PriorRedoPtr != InvalidXLogRecPtr)
{
@ -8992,7 +8944,8 @@ CreateCheckPoint(int flags)
/* Update the average distance between checkpoints. */
UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr);
XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size);
/* Trim from the last checkpoint, not the last - 1 */
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
KeepLogSeg(recptr, &_logSegNo);
_logSegNo--;
RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr);
@ -9258,8 +9211,7 @@ CreateRestartPoint(int flags)
CheckPointGuts(lastCheckPoint.redo, flags);
/*
* Remember the prior checkpoint's redo pointer, used later to determine
* the point at which we can truncate the log.
* Remember the prior checkpoint's redo ptr for UpdateCheckPointDistanceEstimate()
*/
PriorRedoPtr = ControlFile->checkPointCopy.redo;
@ -9273,7 +9225,6 @@ CreateRestartPoint(int flags)
if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY &&
ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
{
ControlFile->prevCheckPoint = ControlFile->checkPoint;
ControlFile->checkPoint = lastCheckPointRecPtr;
ControlFile->checkPointCopy = lastCheckPoint;
ControlFile->time = (pg_time_t) time(NULL);