mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Skip checkpoints, archiving on idle systems.
Some background activity (like checkpoints, archive timeout, standby snapshots) is not supposed to happen on an idle system. Unfortunately so far it was not easy to determine when a system is idle, which defeated some of the attempts to avoid redundant activity on an idle system. To make that easier, allow to make individual WAL insertions as not being "important". By checking whether any important activity happened since the last time an activity was performed, it now is easy to check whether some action needs to be repeated. Use the new facility for checkpoints, archive timeout and standby snapshots. The lack of a facility causes some issues in older releases, but in my opinion the consequences (superflous checkpoints / archived segments) aren't grave enough to warrant backpatching. Author: Michael Paquier, editorialized by Andres Freund Reviewed-By: Andres Freund, David Steele, Amit Kapila, Kyotaro HORIGUCHI Bug: #13685 Discussion: https://www.postgresql.org/message-id/20151016203031.3019.72930@wrigleys.postgresql.org https://www.postgresql.org/message-id/CAB7nPqQcPqxEM3S735Bd2RzApNqSNJVietAC=6kfkYv_45dKwA@mail.gmail.com Backpatch: -
This commit is contained in:
		| @@ -2852,12 +2852,10 @@ include_dir 'conf.d' | |||||||
|         parameter is greater than zero, the server will switch to a new |         parameter is greater than zero, the server will switch to a new | ||||||
|         segment file whenever this many seconds have elapsed since the last |         segment file whenever this many seconds have elapsed since the last | ||||||
|         segment file switch, and there has been any database activity, |         segment file switch, and there has been any database activity, | ||||||
|         including a single checkpoint.  (Increasing |         including a single checkpoint (checkpoints are skipped if there is | ||||||
|         <varname>checkpoint_timeout</> will reduce unnecessary |         no database activity).  Note that archived files that are closed | ||||||
|         checkpoints on an idle system.) |         early due to a forced switch are still the same length as completely | ||||||
|         Note that archived files that are closed early |         full files.  Therefore, it is unwise to use a very short | ||||||
|         due to a forced switch are still the same length as completely full |  | ||||||
|         files.  Therefore, it is unwise to use a very short |  | ||||||
|         <varname>archive_timeout</> — it will bloat your archive |         <varname>archive_timeout</> — it will bloat your archive | ||||||
|         storage.  <varname>archive_timeout</> settings of a minute or so are |         storage.  <varname>archive_timeout</> settings of a minute or so are | ||||||
|         usually reasonable.  You should consider using streaming replication, |         usually reasonable.  You should consider using streaming replication, | ||||||
|   | |||||||
| @@ -2507,7 +2507,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, | |||||||
| 							heaptup->t_len - SizeofHeapTupleHeader); | 							heaptup->t_len - SizeofHeapTupleHeader); | ||||||
|  |  | ||||||
| 		/* filtering by origin on a row level is much more efficient */ | 		/* filtering by origin on a row level is much more efficient */ | ||||||
| 		XLogIncludeOrigin(); | 		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 		recptr = XLogInsert(RM_HEAP_ID, info); | 		recptr = XLogInsert(RM_HEAP_ID, info); | ||||||
|  |  | ||||||
| @@ -2846,7 +2846,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, | |||||||
| 			XLogRegisterBufData(0, tupledata, totaldatalen); | 			XLogRegisterBufData(0, tupledata, totaldatalen); | ||||||
|  |  | ||||||
| 			/* filtering by origin on a row level is much more efficient */ | 			/* filtering by origin on a row level is much more efficient */ | ||||||
| 			XLogIncludeOrigin(); | 			XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 			recptr = XLogInsert(RM_HEAP2_ID, info); | 			recptr = XLogInsert(RM_HEAP2_ID, info); | ||||||
|  |  | ||||||
| @@ -3308,7 +3308,7 @@ l1: | |||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		/* filtering by origin on a row level is much more efficient */ | 		/* filtering by origin on a row level is much more efficient */ | ||||||
| 		XLogIncludeOrigin(); | 		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); | 		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); | ||||||
|  |  | ||||||
| @@ -6035,7 +6035,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple) | |||||||
| 		XLogBeginInsert(); | 		XLogBeginInsert(); | ||||||
|  |  | ||||||
| 		/* We want the same filtering on this as on a plain insert */ | 		/* We want the same filtering on this as on a plain insert */ | ||||||
| 		XLogIncludeOrigin(); | 		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 		XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm); | 		XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm); | ||||||
| 		XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); | 		XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); | ||||||
| @@ -7703,7 +7703,7 @@ log_heap_update(Relation reln, Buffer oldbuf, | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* filtering by origin on a row level is much more efficient */ | 	/* filtering by origin on a row level is much more efficient */ | ||||||
| 	XLogIncludeOrigin(); | 	XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 	recptr = XLogInsert(RM_HEAP_ID, info); | 	recptr = XLogInsert(RM_HEAP_ID, info); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5234,7 +5234,7 @@ XactLogCommitRecord(TimestampTz commit_time, | |||||||
| 		XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin)); | 		XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin)); | ||||||
|  |  | ||||||
| 	/* we allow filtering by xacts */ | 	/* we allow filtering by xacts */ | ||||||
| 	XLogIncludeOrigin(); | 	XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 	return XLogInsert(RM_XACT_ID, info); | 	return XLogInsert(RM_XACT_ID, info); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -442,11 +442,21 @@ typedef struct XLogwrtResult | |||||||
|  * the WAL record is just copied to the page and the lock is released. But |  * the WAL record is just copied to the page and the lock is released. But | ||||||
|  * to avoid the deadlock-scenario explained above, the indicator is always |  * to avoid the deadlock-scenario explained above, the indicator is always | ||||||
|  * updated before sleeping while holding an insertion lock. |  * updated before sleeping while holding an insertion lock. | ||||||
|  |  * | ||||||
|  |  * lastImportantAt contains the LSN of the last important WAL record inserted | ||||||
|  |  * using a given lock. This value is used to detect if there has been | ||||||
|  |  * important WAL activity since the last time some action, like a checkpoint, | ||||||
|  |  * was performed - allowing to not repeat the action if not. The LSN is | ||||||
|  |  * updated for all insertions, unless the XLOG_MARK_UNIMPORTANT flag was | ||||||
|  |  * set. lastImportantAt is never cleared, only overwritten by the LSN of newer | ||||||
|  |  * records.  Tracking the WAL activity directly in WALInsertLock has the | ||||||
|  |  * advantage of not needing any additional locks to update the value. | ||||||
|  */ |  */ | ||||||
| typedef struct | typedef struct | ||||||
| { | { | ||||||
| 	LWLock		lock; | 	LWLock		lock; | ||||||
| 	XLogRecPtr	insertingAt; | 	XLogRecPtr	insertingAt; | ||||||
|  | 	XLogRecPtr	lastImportantAt; | ||||||
| } WALInsertLock; | } WALInsertLock; | ||||||
|  |  | ||||||
| /* | /* | ||||||
| @@ -541,8 +551,9 @@ typedef struct XLogCtlData | |||||||
| 	XLogRecPtr	unloggedLSN; | 	XLogRecPtr	unloggedLSN; | ||||||
| 	slock_t		ulsn_lck; | 	slock_t		ulsn_lck; | ||||||
|  |  | ||||||
| 	/* Time of last xlog segment switch. Protected by WALWriteLock. */ | 	/* Time and LSN of last xlog segment switch. Protected by WALWriteLock. */ | ||||||
| 	pg_time_t	lastSegSwitchTime; | 	pg_time_t	lastSegSwitchTime; | ||||||
|  | 	XLogRecPtr	lastSegSwitchLSN; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Protected by info_lck and WALWriteLock (you must hold either lock to | 	 * Protected by info_lck and WALWriteLock (you must hold either lock to | ||||||
| @@ -884,6 +895,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); | |||||||
|  * which pages need a full-page image, and retry.  If fpw_lsn is invalid, the |  * which pages need a full-page image, and retry.  If fpw_lsn is invalid, the | ||||||
|  * record is always inserted. |  * record is always inserted. | ||||||
|  * |  * | ||||||
|  |  * 'flags' gives more in-depth control on the record being inserted. See | ||||||
|  |  * XLogSetRecordFlags() for details. | ||||||
|  |  * | ||||||
|  * The first XLogRecData in the chain must be for the record header, and its |  * The first XLogRecData in the chain must be for the record header, and its | ||||||
|  * data must be MAXALIGNed.  XLogInsertRecord fills in the xl_prev and |  * data must be MAXALIGNed.  XLogInsertRecord fills in the xl_prev and | ||||||
|  * xl_crc fields in the header, the rest of the header must already be filled |  * xl_crc fields in the header, the rest of the header must already be filled | ||||||
| @@ -896,7 +910,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); | |||||||
|  * WAL rule "write the log before the data".) |  * WAL rule "write the log before the data".) | ||||||
|  */ |  */ | ||||||
| XLogRecPtr | XLogRecPtr | ||||||
| XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) | XLogInsertRecord(XLogRecData *rdata, | ||||||
|  | 				 XLogRecPtr fpw_lsn, | ||||||
|  | 				 uint8 flags) | ||||||
| { | { | ||||||
| 	XLogCtlInsert *Insert = &XLogCtl->Insert; | 	XLogCtlInsert *Insert = &XLogCtl->Insert; | ||||||
| 	pg_crc32c	rdata_crc; | 	pg_crc32c	rdata_crc; | ||||||
| @@ -1013,6 +1029,18 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) | |||||||
| 		 */ | 		 */ | ||||||
| 		CopyXLogRecordToWAL(rechdr->xl_tot_len, isLogSwitch, rdata, | 		CopyXLogRecordToWAL(rechdr->xl_tot_len, isLogSwitch, rdata, | ||||||
| 							StartPos, EndPos); | 							StartPos, EndPos); | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * Unless record is flagged as not important, update LSN of last | ||||||
|  | 		 * important record in the current slot. When holding all locks, just | ||||||
|  | 		 * update the first one. | ||||||
|  | 		 */ | ||||||
|  | 		if ((flags & XLOG_MARK_UNIMPORTANT) == 0) | ||||||
|  | 		{ | ||||||
|  | 			int lockno = holdingAllLocks ? 0 : MyLockNo; | ||||||
|  |  | ||||||
|  | 			WALInsertLocks[lockno].l.lastImportantAt = StartPos; | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| @@ -2332,6 +2360,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) | |||||||
| 					XLogArchiveNotifySeg(openLogSegNo); | 					XLogArchiveNotifySeg(openLogSegNo); | ||||||
|  |  | ||||||
| 				XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); | 				XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); | ||||||
|  | 				XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush; | ||||||
|  |  | ||||||
| 				/* | 				/* | ||||||
| 				 * Request a checkpoint if we've consumed too much xlog since | 				 * Request a checkpoint if we've consumed too much xlog since | ||||||
| @@ -4715,6 +4744,7 @@ XLOGShmemInit(void) | |||||||
| 	{ | 	{ | ||||||
| 		LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT); | 		LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT); | ||||||
| 		WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr; | 		WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr; | ||||||
|  | 		WALInsertLocks[i].l.lastImportantAt = InvalidXLogRecPtr; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| @@ -7431,8 +7461,9 @@ StartupXLOG(void) | |||||||
| 	 */ | 	 */ | ||||||
| 	InRecovery = false; | 	InRecovery = false; | ||||||
|  |  | ||||||
| 	/* start the archive_timeout timer running */ | 	/* start the archive_timeout timer and LSN running */ | ||||||
| 	XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); | 	XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); | ||||||
|  | 	XLogCtl->lastSegSwitchLSN = EndOfLog; | ||||||
|  |  | ||||||
| 	/* also initialize latestCompletedXid, to nextXid - 1 */ | 	/* also initialize latestCompletedXid, to nextXid - 1 */ | ||||||
| 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); | 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); | ||||||
| @@ -7994,16 +8025,51 @@ GetFlushRecPtr(void) | |||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Get the time of the last xlog segment switch |  * GetLastImportantRecPtr -- Returns the LSN of the last important record | ||||||
|  |  * inserted. All records not explicitly marked as unimportant are considered | ||||||
|  |  * important. | ||||||
|  |  * | ||||||
|  |  * The LSN is determined by computing the maximum of | ||||||
|  |  * WALInsertLocks[i].lastImportantAt. | ||||||
|  |  */ | ||||||
|  | XLogRecPtr | ||||||
|  | GetLastImportantRecPtr(void) | ||||||
|  | { | ||||||
|  | 	XLogRecPtr	res = InvalidXLogRecPtr; | ||||||
|  | 	int			i; | ||||||
|  |  | ||||||
|  | 	for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++) | ||||||
|  | 	{ | ||||||
|  | 		XLogRecPtr	last_important; | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * Need to take a lock to prevent torn reads of the LSN, which are | ||||||
|  | 		 * possible on some of the supported platforms. WAL insert locks only | ||||||
|  | 		 * support exclusive mode, so we have to use that. | ||||||
|  | 		 */ | ||||||
|  | 		LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE); | ||||||
|  | 		last_important = WALInsertLocks[i].l.lastImportantAt; | ||||||
|  | 		LWLockRelease(&WALInsertLocks[i].l.lock); | ||||||
|  |  | ||||||
|  | 		if (res < last_important) | ||||||
|  | 			res = last_important; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return res; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Get the time and LSN of the last xlog segment switch | ||||||
|  */ |  */ | ||||||
| pg_time_t | pg_time_t | ||||||
| GetLastSegSwitchTime(void) | GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN) | ||||||
| { | { | ||||||
| 	pg_time_t	result; | 	pg_time_t	result; | ||||||
|  |  | ||||||
| 	/* Need WALWriteLock, but shared lock is sufficient */ | 	/* Need WALWriteLock, but shared lock is sufficient */ | ||||||
| 	LWLockAcquire(WALWriteLock, LW_SHARED); | 	LWLockAcquire(WALWriteLock, LW_SHARED); | ||||||
| 	result = XLogCtl->lastSegSwitchTime; | 	result = XLogCtl->lastSegSwitchTime; | ||||||
|  | 	*lastSwitchLSN = XLogCtl->lastSegSwitchLSN; | ||||||
| 	LWLockRelease(WALWriteLock); | 	LWLockRelease(WALWriteLock); | ||||||
|  |  | ||||||
| 	return result; | 	return result; | ||||||
| @@ -8065,7 +8131,7 @@ ShutdownXLOG(int code, Datum arg) | |||||||
| 		 * record will go to the next XLOG file and won't be archived (yet). | 		 * record will go to the next XLOG file and won't be archived (yet). | ||||||
| 		 */ | 		 */ | ||||||
| 		if (XLogArchivingActive() && XLogArchiveCommandSet()) | 		if (XLogArchivingActive() && XLogArchiveCommandSet()) | ||||||
| 			RequestXLogSwitch(); | 			RequestXLogSwitch(false); | ||||||
|  |  | ||||||
| 		CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); | 		CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); | ||||||
| 	} | 	} | ||||||
| @@ -8253,7 +8319,7 @@ CreateCheckPoint(int flags) | |||||||
| 	uint32		freespace; | 	uint32		freespace; | ||||||
| 	XLogRecPtr	PriorRedoPtr; | 	XLogRecPtr	PriorRedoPtr; | ||||||
| 	XLogRecPtr	curInsert; | 	XLogRecPtr	curInsert; | ||||||
| 	XLogRecPtr	prevPtr; | 	XLogRecPtr	last_important_lsn; | ||||||
| 	VirtualTransactionId *vxids; | 	VirtualTransactionId *vxids; | ||||||
| 	int			nvxids; | 	int			nvxids; | ||||||
|  |  | ||||||
| @@ -8333,39 +8399,34 @@ CreateCheckPoint(int flags) | |||||||
| 	else | 	else | ||||||
| 		checkPoint.oldestActiveXid = InvalidTransactionId; | 		checkPoint.oldestActiveXid = InvalidTransactionId; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Get location of last important record before acquiring insert locks (as | ||||||
|  | 	 * GetLastImportantRecPtr() also locks WAL locks). | ||||||
|  | 	 */ | ||||||
|  | 	last_important_lsn = GetLastImportantRecPtr(); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * We must block concurrent insertions while examining insert state to | 	 * We must block concurrent insertions while examining insert state to | ||||||
| 	 * determine the checkpoint REDO pointer. | 	 * determine the checkpoint REDO pointer. | ||||||
| 	 */ | 	 */ | ||||||
| 	WALInsertLockAcquireExclusive(); | 	WALInsertLockAcquireExclusive(); | ||||||
| 	curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); | 	curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); | ||||||
| 	prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos); |  | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * If this isn't a shutdown or forced checkpoint, and we have not inserted | 	 * If this isn't a shutdown or forced checkpoint, and if there has been no | ||||||
| 	 * any XLOG records since the start of the last checkpoint, skip the | 	 * WAL activity requiring a checkpoint, skip it.  The idea here is to | ||||||
| 	 * checkpoint.  The idea here is to avoid inserting duplicate checkpoints | 	 * avoid inserting duplicate checkpoints when the system is idle. | ||||||
| 	 * when the system is idle. That wastes log space, and more importantly it |  | ||||||
| 	 * exposes us to possible loss of both current and previous checkpoint |  | ||||||
| 	 * records if the machine crashes just as we're writing the update. |  | ||||||
| 	 * (Perhaps it'd make even more sense to checkpoint only when the previous |  | ||||||
| 	 * checkpoint record is in a different xlog page?) |  | ||||||
| 	 * |  | ||||||
| 	 * If the previous checkpoint crossed a WAL segment, however, we create |  | ||||||
| 	 * the checkpoint anyway, to have the latest checkpoint fully contained in |  | ||||||
| 	 * the new segment. This is for a little bit of extra robustness: it's |  | ||||||
| 	 * better if you don't need to keep two WAL segments around to recover the |  | ||||||
| 	 * checkpoint. |  | ||||||
| 	 */ | 	 */ | ||||||
| 	if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | | 	if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | | ||||||
| 				  CHECKPOINT_FORCE)) == 0) | 				  CHECKPOINT_FORCE)) == 0) | ||||||
| 	{ | 	{ | ||||||
| 		if (prevPtr == ControlFile->checkPointCopy.redo && | 		if (last_important_lsn == ControlFile->checkPoint) | ||||||
| 			prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) |  | ||||||
| 		{ | 		{ | ||||||
| 			WALInsertLockRelease(); | 			WALInsertLockRelease(); | ||||||
| 			LWLockRelease(CheckpointLock); | 			LWLockRelease(CheckpointLock); | ||||||
| 			END_CRIT_SECTION(); | 			END_CRIT_SECTION(); | ||||||
|  | 			ereport(DEBUG1, | ||||||
|  | 					(errmsg("checkpoint skipped due to an idle system"))); | ||||||
| 			return; | 			return; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -9122,12 +9183,15 @@ XLogPutNextOid(Oid nextOid) | |||||||
|  * write a switch record because we are already at segment start. |  * write a switch record because we are already at segment start. | ||||||
|  */ |  */ | ||||||
| XLogRecPtr | XLogRecPtr | ||||||
| RequestXLogSwitch(void) | RequestXLogSwitch(bool mark_unimportant) | ||||||
| { | { | ||||||
| 	XLogRecPtr	RecPtr; | 	XLogRecPtr	RecPtr; | ||||||
|  |  | ||||||
| 	/* XLOG SWITCH has no data */ | 	/* XLOG SWITCH has no data */ | ||||||
| 	XLogBeginInsert(); | 	XLogBeginInsert(); | ||||||
|  |  | ||||||
|  | 	if (mark_unimportant) | ||||||
|  | 		XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); | ||||||
| 	RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH); | 	RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH); | ||||||
|  |  | ||||||
| 	return RecPtr; | 	return RecPtr; | ||||||
| @@ -9997,7 +10061,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, | |||||||
| 		 * recovery case described above. | 		 * recovery case described above. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (!backup_started_in_recovery) | 		if (!backup_started_in_recovery) | ||||||
| 			RequestXLogSwitch(); | 			RequestXLogSwitch(false); | ||||||
|  |  | ||||||
| 		do | 		do | ||||||
| 		{ | 		{ | ||||||
| @@ -10582,7 +10646,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) | |||||||
| 	 * Force a switch to a new xlog segment file, so that the backup is valid | 	 * Force a switch to a new xlog segment file, so that the backup is valid | ||||||
| 	 * as soon as archiver moves out the current segment file. | 	 * as soon as archiver moves out the current segment file. | ||||||
| 	 */ | 	 */ | ||||||
| 	RequestXLogSwitch(); | 	RequestXLogSwitch(false); | ||||||
|  |  | ||||||
| 	XLByteToPrevSeg(stoppoint, _logSegNo); | 	XLByteToPrevSeg(stoppoint, _logSegNo); | ||||||
| 	XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo); | 	XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo); | ||||||
|   | |||||||
| @@ -293,7 +293,7 @@ pg_switch_xlog(PG_FUNCTION_ARGS) | |||||||
| 				 errmsg("recovery is in progress"), | 				 errmsg("recovery is in progress"), | ||||||
| 				 errhint("WAL control functions cannot be executed during recovery."))); | 				 errhint("WAL control functions cannot be executed during recovery."))); | ||||||
|  |  | ||||||
| 	switchpoint = RequestXLogSwitch(); | 	switchpoint = RequestXLogSwitch(false); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * As a convenience, return the WAL location of the switch record | 	 * As a convenience, return the WAL location of the switch record | ||||||
|   | |||||||
| @@ -73,8 +73,8 @@ static XLogRecData *mainrdata_head; | |||||||
| static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head; | static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head; | ||||||
| static uint32 mainrdata_len;	/* total # of bytes in chain */ | static uint32 mainrdata_len;	/* total # of bytes in chain */ | ||||||
|  |  | ||||||
| /* Should the in-progress insertion log the origin? */ | /* flags for the in-progress insertion */ | ||||||
| static bool include_origin = false; | static uint8 curinsert_flags = 0; | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * These are used to hold the record header while constructing a record. |  * These are used to hold the record header while constructing a record. | ||||||
| @@ -201,7 +201,7 @@ XLogResetInsertion(void) | |||||||
| 	max_registered_block_id = 0; | 	max_registered_block_id = 0; | ||||||
| 	mainrdata_len = 0; | 	mainrdata_len = 0; | ||||||
| 	mainrdata_last = (XLogRecData *) &mainrdata_head; | 	mainrdata_last = (XLogRecData *) &mainrdata_head; | ||||||
| 	include_origin = false; | 	curinsert_flags = 0; | ||||||
| 	begininsert_called = false; | 	begininsert_called = false; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -384,13 +384,20 @@ XLogRegisterBufData(uint8 block_id, char *data, int len) | |||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Should this record include the replication origin if one is set up? |  * Set insert status flags for the upcoming WAL record. | ||||||
|  |  * | ||||||
|  |  * The flags that can be used here are: | ||||||
|  |  * - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be | ||||||
|  |  *   included in the record. | ||||||
|  |  * - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for | ||||||
|  |  *   durability, which allows to avoid triggering WAL archiving and other | ||||||
|  |  *   background activity. | ||||||
|  */ |  */ | ||||||
| void | void | ||||||
| XLogIncludeOrigin(void) | XLogSetRecordFlags(uint8 flags) | ||||||
| { | { | ||||||
| 	Assert(begininsert_called); | 	Assert(begininsert_called); | ||||||
| 	include_origin = true; | 	curinsert_flags = flags; | ||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
| @@ -450,7 +457,7 @@ XLogInsert(RmgrId rmid, uint8 info) | |||||||
| 		rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, | 		rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, | ||||||
| 								 &fpw_lsn); | 								 &fpw_lsn); | ||||||
|  |  | ||||||
| 		EndPos = XLogInsertRecord(rdt, fpw_lsn); | 		EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags); | ||||||
| 	} while (EndPos == InvalidXLogRecPtr); | 	} while (EndPos == InvalidXLogRecPtr); | ||||||
|  |  | ||||||
| 	XLogResetInsertion(); | 	XLogResetInsertion(); | ||||||
| @@ -701,7 +708,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* followed by the record's origin, if any */ | 	/* followed by the record's origin, if any */ | ||||||
| 	if (include_origin && replorigin_session_origin != InvalidRepOriginId) | 	if ((curinsert_flags & XLOG_INCLUDE_ORIGIN) && | ||||||
|  | 		replorigin_session_origin != InvalidRepOriginId) | ||||||
| 	{ | 	{ | ||||||
| 		*(scratch++) = XLR_BLOCK_ID_ORIGIN; | 		*(scratch++) = XLR_BLOCK_ID_ORIGIN; | ||||||
| 		memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin)); | 		memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin)); | ||||||
|   | |||||||
| @@ -310,7 +310,7 @@ BackgroundWriterMain(void) | |||||||
| 		 * check whether there has been any WAL inserted since the last time | 		 * check whether there has been any WAL inserted since the last time | ||||||
| 		 * we've logged a running xacts. | 		 * we've logged a running xacts. | ||||||
| 		 * | 		 * | ||||||
| 		 * We do this logging in the bgwriter as its the only process that is | 		 * We do this logging in the bgwriter as it is the only process that is | ||||||
| 		 * run regularly and returns to its mainloop all the time. E.g. | 		 * run regularly and returns to its mainloop all the time. E.g. | ||||||
| 		 * Checkpointer, when active, is barely ever in its mainloop and thus | 		 * Checkpointer, when active, is barely ever in its mainloop and thus | ||||||
| 		 * makes it hard to log regularly. | 		 * makes it hard to log regularly. | ||||||
| @@ -324,11 +324,11 @@ BackgroundWriterMain(void) | |||||||
| 												  LOG_SNAPSHOT_INTERVAL_MS); | 												  LOG_SNAPSHOT_INTERVAL_MS); | ||||||
|  |  | ||||||
| 			/* | 			/* | ||||||
| 			 * only log if enough time has passed and some xlog record has | 			 * Only log if enough time has passed and interesting records have | ||||||
| 			 * been inserted. | 			 * been inserted since the last snapshot. | ||||||
| 			 */ | 			 */ | ||||||
| 			if (now >= timeout && | 			if (now >= timeout && | ||||||
| 				last_snapshot_lsn != GetXLogInsertRecPtr()) | 				last_snapshot_lsn < GetLastImportantRecPtr()) | ||||||
| 			{ | 			{ | ||||||
| 				last_snapshot_lsn = LogStandbySnapshot(); | 				last_snapshot_lsn = LogStandbySnapshot(); | ||||||
| 				last_snapshot_ts = now; | 				last_snapshot_ts = now; | ||||||
|   | |||||||
| @@ -573,15 +573,21 @@ CheckpointerMain(void) | |||||||
| /* | /* | ||||||
|  * CheckArchiveTimeout -- check for archive_timeout and switch xlog files |  * CheckArchiveTimeout -- check for archive_timeout and switch xlog files | ||||||
|  * |  * | ||||||
|  * This will switch to a new WAL file and force an archive file write |  * This will switch to a new WAL file and force an archive file write if | ||||||
|  * if any activity is recorded in the current WAL file, including just |  * meaningful activity is recorded in the current WAL file. This includes most | ||||||
|  * a single checkpoint record. |  * writes, including just a single checkpoint record, but excludes WAL records | ||||||
|  |  * that were inserted with the XLOG_MARK_UNIMPORTANT flag being set (like | ||||||
|  |  * snapshots of running transactions).  Such records, depending on | ||||||
|  |  * configuration, occur on regular intervals and don't contain important | ||||||
|  |  * information.  This avoids generating archives with a few unimportant | ||||||
|  |  * records. | ||||||
|  */ |  */ | ||||||
| static void | static void | ||||||
| CheckArchiveTimeout(void) | CheckArchiveTimeout(void) | ||||||
| { | { | ||||||
| 	pg_time_t	now; | 	pg_time_t	now; | ||||||
| 	pg_time_t	last_time; | 	pg_time_t	last_time; | ||||||
|  | 	XLogRecPtr	last_switch_lsn; | ||||||
|  |  | ||||||
| 	if (XLogArchiveTimeout <= 0 || RecoveryInProgress()) | 	if (XLogArchiveTimeout <= 0 || RecoveryInProgress()) | ||||||
| 		return; | 		return; | ||||||
| @@ -596,17 +602,23 @@ CheckArchiveTimeout(void) | |||||||
| 	 * Update local state ... note that last_xlog_switch_time is the last time | 	 * Update local state ... note that last_xlog_switch_time is the last time | ||||||
| 	 * a switch was performed *or requested*. | 	 * a switch was performed *or requested*. | ||||||
| 	 */ | 	 */ | ||||||
| 	last_time = GetLastSegSwitchTime(); | 	last_time = GetLastSegSwitchData(&last_switch_lsn); | ||||||
|  |  | ||||||
| 	last_xlog_switch_time = Max(last_xlog_switch_time, last_time); | 	last_xlog_switch_time = Max(last_xlog_switch_time, last_time); | ||||||
|  |  | ||||||
| 	/* Now we can do the real check */ | 	/* Now we can do the real checks */ | ||||||
| 	if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout) | 	if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout) | ||||||
|  | 	{ | ||||||
|  | 		/* | ||||||
|  | 		 * Switch segment only when "important" WAL has been logged since the | ||||||
|  | 		 * last segment switch. | ||||||
|  | 		 */ | ||||||
|  | 		if (GetLastImportantRecPtr() > last_switch_lsn) | ||||||
| 		{ | 		{ | ||||||
| 			XLogRecPtr	switchpoint; | 			XLogRecPtr	switchpoint; | ||||||
|  |  | ||||||
| 		/* OK, it's time to switch */ | 			/* mark switch as unimportant, avoids triggering checkpoints */ | ||||||
| 		switchpoint = RequestXLogSwitch(); | 			switchpoint = RequestXLogSwitch(true); | ||||||
|  |  | ||||||
| 			/* | 			/* | ||||||
| 			 * If the returned pointer points exactly to a segment boundary, | 			 * If the returned pointer points exactly to a segment boundary, | ||||||
| @@ -616,6 +628,7 @@ CheckArchiveTimeout(void) | |||||||
| 				ereport(DEBUG1, | 				ereport(DEBUG1, | ||||||
| 						(errmsg("transaction log switch forced (archive_timeout=%d)", | 						(errmsg("transaction log switch forced (archive_timeout=%d)", | ||||||
| 								XLogArchiveTimeout))); | 								XLogArchiveTimeout))); | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Update state in any case, so we don't retry constantly when the | 		 * Update state in any case, so we don't retry constantly when the | ||||||
|   | |||||||
| @@ -73,7 +73,7 @@ LogLogicalMessage(const char *prefix, const char *message, size_t size, | |||||||
| 	XLogRegisterData((char *) message, size); | 	XLogRegisterData((char *) message, size); | ||||||
|  |  | ||||||
| 	/* allow origin filtering */ | 	/* allow origin filtering */ | ||||||
| 	XLogIncludeOrigin(); | 	XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); | ||||||
|  |  | ||||||
| 	return XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE); | 	return XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -961,10 +961,11 @@ LogStandbySnapshot(void) | |||||||
| /* | /* | ||||||
|  * Record an enhanced snapshot of running transactions into WAL. |  * Record an enhanced snapshot of running transactions into WAL. | ||||||
|  * |  * | ||||||
|  * The definitions of RunningTransactionsData and xl_xact_running_xacts |  * The definitions of RunningTransactionsData and xl_xact_running_xacts are | ||||||
|  * are similar. We keep them separate because xl_xact_running_xacts |  * similar. We keep them separate because xl_xact_running_xacts is a | ||||||
|  * is a contiguous chunk of memory and never exists fully until it is |  * contiguous chunk of memory and never exists fully until it is assembled in | ||||||
|  * assembled in WAL. |  * WAL. The inserted records are marked as not being important for durability, | ||||||
|  |  * to avoid triggering superflous checkpoint / archiving activity. | ||||||
|  */ |  */ | ||||||
| static XLogRecPtr | static XLogRecPtr | ||||||
| LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) | LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) | ||||||
| @@ -981,6 +982,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) | |||||||
|  |  | ||||||
| 	/* Header */ | 	/* Header */ | ||||||
| 	XLogBeginInsert(); | 	XLogBeginInsert(); | ||||||
|  | 	XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); | ||||||
| 	XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts); | 	XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts); | ||||||
|  |  | ||||||
| 	/* array of TransactionIds */ | 	/* array of TransactionIds */ | ||||||
| @@ -1035,6 +1037,7 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) | |||||||
| 	XLogBeginInsert(); | 	XLogBeginInsert(); | ||||||
| 	XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); | 	XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); | ||||||
| 	XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); | 	XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); | ||||||
|  | 	XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); | ||||||
|  |  | ||||||
| 	(void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); | 	(void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -184,6 +184,13 @@ extern bool XLOG_DEBUG; | |||||||
| #define CHECKPOINT_CAUSE_XLOG	0x0040	/* XLOG consumption */ | #define CHECKPOINT_CAUSE_XLOG	0x0040	/* XLOG consumption */ | ||||||
| #define CHECKPOINT_CAUSE_TIME	0x0080	/* Elapsed time */ | #define CHECKPOINT_CAUSE_TIME	0x0080	/* Elapsed time */ | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Flag bits for the record being inserted, set using XLogSetRecordFlags(). | ||||||
|  |  */ | ||||||
|  | #define XLOG_INCLUDE_ORIGIN		0x01	/* include the replication origin */ | ||||||
|  | #define XLOG_MARK_UNIMPORTANT	0x02	/* record not important for durability */ | ||||||
|  |  | ||||||
|  |  | ||||||
| /* Checkpoint statistics */ | /* Checkpoint statistics */ | ||||||
| typedef struct CheckpointStatsData | typedef struct CheckpointStatsData | ||||||
| { | { | ||||||
| @@ -211,7 +218,9 @@ extern CheckpointStatsData CheckpointStats; | |||||||
|  |  | ||||||
| struct XLogRecData; | struct XLogRecData; | ||||||
|  |  | ||||||
| extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn); | extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, | ||||||
|  | 								   XLogRecPtr fpw_lsn, | ||||||
|  | 								   uint8 flags); | ||||||
| extern void XLogFlush(XLogRecPtr RecPtr); | extern void XLogFlush(XLogRecPtr RecPtr); | ||||||
| extern bool XLogBackgroundFlush(void); | extern bool XLogBackgroundFlush(void); | ||||||
| extern bool XLogNeedsFlush(XLogRecPtr RecPtr); | extern bool XLogNeedsFlush(XLogRecPtr RecPtr); | ||||||
| @@ -262,6 +271,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) | |||||||
| extern XLogRecPtr GetRedoRecPtr(void); | extern XLogRecPtr GetRedoRecPtr(void); | ||||||
| extern XLogRecPtr GetInsertRecPtr(void); | extern XLogRecPtr GetInsertRecPtr(void); | ||||||
| extern XLogRecPtr GetFlushRecPtr(void); | extern XLogRecPtr GetFlushRecPtr(void); | ||||||
|  | extern XLogRecPtr GetLastImportantRecPtr(void); | ||||||
| extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); | extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); | ||||||
| extern void RemovePromoteSignalFiles(void); | extern void RemovePromoteSignalFiles(void); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -283,8 +283,8 @@ extern const RmgrData RmgrTable[]; | |||||||
| /* | /* | ||||||
|  * Exported to support xlog switching from checkpointer |  * Exported to support xlog switching from checkpointer | ||||||
|  */ |  */ | ||||||
| extern pg_time_t GetLastSegSwitchTime(void); | extern pg_time_t GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN); | ||||||
| extern XLogRecPtr RequestXLogSwitch(void); | extern XLogRecPtr RequestXLogSwitch(bool mark_uninmportant); | ||||||
|  |  | ||||||
| extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); | extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ | |||||||
|  |  | ||||||
| /* prototypes for public functions in xloginsert.c: */ | /* prototypes for public functions in xloginsert.c: */ | ||||||
| extern void XLogBeginInsert(void); | extern void XLogBeginInsert(void); | ||||||
| extern void XLogIncludeOrigin(void); | extern void XLogSetRecordFlags(uint8 flags); | ||||||
| extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); | extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); | ||||||
| extern void XLogEnsureRecordSpace(int nbuffers, int ndatas); | extern void XLogEnsureRecordSpace(int nbuffers, int ndatas); | ||||||
| extern void XLogRegisterData(char *data, int len); | extern void XLogRegisterData(char *data, int len); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user