mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-29 22:49:41 +03:00 
			
		
		
		
	Fix an assertion failure related to an exclusive backup.
Previously multiple sessions could execute pg_start_backup() and
pg_stop_backup() to start and stop an exclusive backup at the same time.
This could trigger the assertion failure of
"FailedAssertion("!(XLogCtl->Insert.exclusiveBackup)".
This happend because, even while pg_start_backup() was starting
an exclusive backup, other session could run pg_stop_backup()
concurrently and mark the backup as not-in-progress unconditionally.
This patch introduces ExclusiveBackupState indicating the state of
an exclusive backup. This state is used to ensure that there is only
one session running pg_start_backup() or pg_stop_backup() at
the same time, to avoid the assertion failure.
Back-patch to all supported versions.
Author: Michael Paquier
Reviewed-By: Kyotaro Horiguchi and me
Reported-By: Andreas Seltenreich
Discussion: <87mvktojme.fsf@credativ.de>
			
			
This commit is contained in:
		| @@ -417,6 +417,29 @@ typedef union WALInsertLockPadded | |||||||
| 	char		pad[PG_CACHE_LINE_SIZE]; | 	char		pad[PG_CACHE_LINE_SIZE]; | ||||||
| } WALInsertLockPadded; | } WALInsertLockPadded; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * State of an exclusive backup, necessary to control concurrent activities | ||||||
|  |  * across sessions when working on exclusive backups. | ||||||
|  |  * | ||||||
|  |  * EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually | ||||||
|  |  * running, to be more precise pg_start_backup() is not being executed for | ||||||
|  |  * an exclusive backup and there is no exclusive backup in progress. | ||||||
|  |  * EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is starting an | ||||||
|  |  * exclusive backup. | ||||||
|  |  * EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished | ||||||
|  |  * running and an exclusive backup is in progress. pg_stop_backup() is | ||||||
|  |  * needed to finish it. | ||||||
|  |  * EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is stopping an | ||||||
|  |  * exclusive backup. | ||||||
|  |  */ | ||||||
|  | typedef enum ExclusiveBackupState | ||||||
|  | { | ||||||
|  | 	EXCLUSIVE_BACKUP_NONE = 0, | ||||||
|  | 	EXCLUSIVE_BACKUP_STARTING, | ||||||
|  | 	EXCLUSIVE_BACKUP_IN_PROGRESS, | ||||||
|  | 	EXCLUSIVE_BACKUP_STOPPING | ||||||
|  | } ExclusiveBackupState; | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Shared state data for XLogInsert. |  * Shared state data for XLogInsert. | ||||||
|  */ |  */ | ||||||
| @@ -458,13 +481,15 @@ typedef struct XLogCtlInsert | |||||||
| 	bool		fullPageWrites; | 	bool		fullPageWrites; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * exclusiveBackup is true if a backup started with pg_start_backup() is | 	 * exclusiveBackupState indicates the state of an exclusive backup | ||||||
| 	 * in progress, and nonExclusiveBackups is a counter indicating the number | 	 * (see comments of ExclusiveBackupState for more details). | ||||||
| 	 * of streaming base backups currently in progress. forcePageWrites is set | 	 * nonExclusiveBackups is a counter indicating the number of streaming | ||||||
| 	 * to true when either of these is non-zero. lastBackupStart is the latest | 	 * base backups currently in progress. forcePageWrites is set to true | ||||||
| 	 * checkpoint redo location used as a starting point for an online backup. | 	 * when either of these is non-zero. lastBackupStart is the latest | ||||||
|  | 	 * checkpoint redo location used as a starting point for an online | ||||||
|  | 	 * backup. | ||||||
| 	 */ | 	 */ | ||||||
| 	bool		exclusiveBackup; | 	ExclusiveBackupState exclusiveBackupState; | ||||||
| 	int			nonExclusiveBackups; | 	int			nonExclusiveBackups; | ||||||
| 	XLogRecPtr	lastBackupStart; | 	XLogRecPtr	lastBackupStart; | ||||||
|  |  | ||||||
| @@ -806,6 +831,7 @@ static bool CheckForStandbyTrigger(void); | |||||||
| static void xlog_outrec(StringInfo buf, XLogRecord *record); | static void xlog_outrec(StringInfo buf, XLogRecord *record); | ||||||
| #endif | #endif | ||||||
| static void pg_start_backup_callback(int code, Datum arg); | static void pg_start_backup_callback(int code, Datum arg); | ||||||
|  | static void pg_stop_backup_callback(int code, Datum arg); | ||||||
| static bool read_backup_label(XLogRecPtr *checkPointLoc, | static bool read_backup_label(XLogRecPtr *checkPointLoc, | ||||||
| 				  bool *backupEndRequired, bool *backupFromStandby); | 				  bool *backupEndRequired, bool *backupFromStandby); | ||||||
| static void rm_redo_error_callback(void *arg); | static void rm_redo_error_callback(void *arg); | ||||||
| @@ -9880,7 +9906,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, | |||||||
| 	WALInsertLockAcquireExclusive(); | 	WALInsertLockAcquireExclusive(); | ||||||
| 	if (exclusive) | 	if (exclusive) | ||||||
| 	{ | 	{ | ||||||
| 		if (XLogCtl->Insert.exclusiveBackup) | 		/* | ||||||
|  | 		 * At first, mark that we're now starting an exclusive backup, | ||||||
|  | 		 * to ensure that there are no other sessions currently running | ||||||
|  | 		 * pg_start_backup() or pg_stop_backup(). | ||||||
|  | 		 */ | ||||||
|  | 		if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_NONE) | ||||||
| 		{ | 		{ | ||||||
| 			WALInsertLockRelease(); | 			WALInsertLockRelease(); | ||||||
| 			ereport(ERROR, | 			ereport(ERROR, | ||||||
| @@ -9888,7 +9919,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, | |||||||
| 					 errmsg("a backup is already in progress"), | 					 errmsg("a backup is already in progress"), | ||||||
| 					 errhint("Run pg_stop_backup() and try again."))); | 					 errhint("Run pg_stop_backup() and try again."))); | ||||||
| 		} | 		} | ||||||
| 		XLogCtl->Insert.exclusiveBackup = true; | 		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STARTING; | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 		XLogCtl->Insert.nonExclusiveBackups++; | 		XLogCtl->Insert.nonExclusiveBackups++; | ||||||
| @@ -10048,7 +10079,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, | |||||||
| 		{ | 		{ | ||||||
| 			/* | 			/* | ||||||
| 			 * Check for existing backup label --- implies a backup is already | 			 * Check for existing backup label --- implies a backup is already | ||||||
| 			 * running.  (XXX given that we checked exclusiveBackup above, | 			 * running.  (XXX given that we checked exclusiveBackupState above, | ||||||
| 			 * maybe it would be OK to just unlink any such label file?) | 			 * maybe it would be OK to just unlink any such label file?) | ||||||
| 			 */ | 			 */ | ||||||
| 			if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) | 			if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) | ||||||
| @@ -10089,6 +10120,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, | |||||||
| 	} | 	} | ||||||
| 	PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); | 	PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Mark that start phase has correctly finished for an exclusive backup. | ||||||
|  | 	 */ | ||||||
|  | 	if (exclusive) | ||||||
|  | 	{ | ||||||
|  | 		WALInsertLockAcquireExclusive(); | ||||||
|  | 		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS; | ||||||
|  | 		WALInsertLockRelease(); | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * We're done.  As a convenience, return the starting WAL location. | 	 * We're done.  As a convenience, return the starting WAL location. | ||||||
| 	 */ | 	 */ | ||||||
| @@ -10107,8 +10148,8 @@ pg_start_backup_callback(int code, Datum arg) | |||||||
| 	WALInsertLockAcquireExclusive(); | 	WALInsertLockAcquireExclusive(); | ||||||
| 	if (exclusive) | 	if (exclusive) | ||||||
| 	{ | 	{ | ||||||
| 		Assert(XLogCtl->Insert.exclusiveBackup); | 		Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING); | ||||||
| 		XLogCtl->Insert.exclusiveBackup = false; | 		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE; | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| @@ -10116,7 +10157,7 @@ pg_start_backup_callback(int code, Datum arg) | |||||||
| 		XLogCtl->Insert.nonExclusiveBackups--; | 		XLogCtl->Insert.nonExclusiveBackups--; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if (!XLogCtl->Insert.exclusiveBackup && | 	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && | ||||||
| 		XLogCtl->Insert.nonExclusiveBackups == 0) | 		XLogCtl->Insert.nonExclusiveBackups == 0) | ||||||
| 	{ | 	{ | ||||||
| 		XLogCtl->Insert.forcePageWrites = false; | 		XLogCtl->Insert.forcePageWrites = false; | ||||||
| @@ -10124,6 +10165,24 @@ pg_start_backup_callback(int code, Datum arg) | |||||||
| 	WALInsertLockRelease(); | 	WALInsertLockRelease(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Error cleanup callback for pg_stop_backup | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | pg_stop_backup_callback(int code, Datum arg) | ||||||
|  | { | ||||||
|  | 	bool		exclusive = DatumGetBool(arg); | ||||||
|  |  | ||||||
|  | 	/* Update backup status on failure */ | ||||||
|  | 	WALInsertLockAcquireExclusive(); | ||||||
|  | 	if (exclusive) | ||||||
|  | 	{ | ||||||
|  | 		Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING); | ||||||
|  | 		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS; | ||||||
|  | 	} | ||||||
|  | 	WALInsertLockRelease(); | ||||||
|  | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup() |  * do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup() | ||||||
|  * function. |  * function. | ||||||
| @@ -10187,12 +10246,86 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) | |||||||
| 			  errmsg("WAL level not sufficient for making an online backup"), | 			  errmsg("WAL level not sufficient for making an online backup"), | ||||||
| 				 errhint("wal_level must be set to \"archive\", \"hot_standby\", or \"logical\" at server start."))); | 				 errhint("wal_level must be set to \"archive\", \"hot_standby\", or \"logical\" at server start."))); | ||||||
|  |  | ||||||
|  | 	if (exclusive) | ||||||
|  | 	{ | ||||||
|  | 		/* | ||||||
|  | 		 * At first, mark that we're now stopping an exclusive backup, | ||||||
|  | 		 * to ensure that there are no other sessions currently running | ||||||
|  | 		 * pg_start_backup() or pg_stop_backup(). | ||||||
|  | 		 */ | ||||||
|  | 		WALInsertLockAcquireExclusive(); | ||||||
|  | 		if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_IN_PROGRESS) | ||||||
|  | 		{ | ||||||
|  | 			WALInsertLockRelease(); | ||||||
|  | 			ereport(ERROR, | ||||||
|  | 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), | ||||||
|  | 					 errmsg("exclusive backup not in progress"))); | ||||||
|  | 		} | ||||||
|  | 		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STOPPING; | ||||||
|  | 		WALInsertLockRelease(); | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * Remove backup_label. In case of failure, the state for an exclusive | ||||||
|  | 		 * backup is switched back to in-progress. | ||||||
|  | 		 */ | ||||||
|  | 		PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive)); | ||||||
|  | 		{ | ||||||
|  | 			/* | ||||||
|  | 			 * Read the existing label file into memory. | ||||||
|  | 			 */ | ||||||
|  | 			struct stat statbuf; | ||||||
|  | 			int			r; | ||||||
|  |  | ||||||
|  | 			if (stat(BACKUP_LABEL_FILE, &statbuf)) | ||||||
|  | 			{ | ||||||
|  | 				/* should not happen per the upper checks */ | ||||||
|  | 				if (errno != ENOENT) | ||||||
|  | 					ereport(ERROR, | ||||||
|  | 							(errcode_for_file_access(), | ||||||
|  | 							 errmsg("could not stat file \"%s\": %m", | ||||||
|  | 									BACKUP_LABEL_FILE))); | ||||||
|  | 				ereport(ERROR, | ||||||
|  | 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), | ||||||
|  | 						 errmsg("a backup is not in progress"))); | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); | ||||||
|  | 			if (!lfp) | ||||||
|  | 			{ | ||||||
|  | 				ereport(ERROR, | ||||||
|  | 						(errcode_for_file_access(), | ||||||
|  | 						 errmsg("could not read file \"%s\": %m", | ||||||
|  | 								BACKUP_LABEL_FILE))); | ||||||
|  | 			} | ||||||
|  | 			labelfile = palloc(statbuf.st_size + 1); | ||||||
|  | 			r = fread(labelfile, statbuf.st_size, 1, lfp); | ||||||
|  | 			labelfile[statbuf.st_size] = '\0'; | ||||||
|  |  | ||||||
|  | 			/* | ||||||
|  | 			 * Close and remove the backup label file | ||||||
|  | 			 */ | ||||||
|  | 			if (r != 1 || ferror(lfp) || FreeFile(lfp)) | ||||||
|  | 				ereport(ERROR, | ||||||
|  | 						(errcode_for_file_access(), | ||||||
|  | 						 errmsg("could not read file \"%s\": %m", | ||||||
|  | 								BACKUP_LABEL_FILE))); | ||||||
|  | 			if (unlink(BACKUP_LABEL_FILE) != 0) | ||||||
|  | 				ereport(ERROR, | ||||||
|  | 						(errcode_for_file_access(), | ||||||
|  | 						 errmsg("could not remove file \"%s\": %m", | ||||||
|  | 								BACKUP_LABEL_FILE))); | ||||||
|  | 		} | ||||||
|  | 		PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive)); | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * OK to update backup counters and forcePageWrites | 	 * OK to update backup counters and forcePageWrites | ||||||
| 	 */ | 	 */ | ||||||
| 	WALInsertLockAcquireExclusive(); | 	WALInsertLockAcquireExclusive(); | ||||||
| 	if (exclusive) | 	if (exclusive) | ||||||
| 		XLogCtl->Insert.exclusiveBackup = false; | 	{ | ||||||
|  | 		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE; | ||||||
|  | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| 		/* | 		/* | ||||||
| @@ -10205,60 +10338,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) | |||||||
| 		XLogCtl->Insert.nonExclusiveBackups--; | 		XLogCtl->Insert.nonExclusiveBackups--; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if (!XLogCtl->Insert.exclusiveBackup && | 	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && | ||||||
| 		XLogCtl->Insert.nonExclusiveBackups == 0) | 		XLogCtl->Insert.nonExclusiveBackups == 0) | ||||||
| 	{ | 	{ | ||||||
| 		XLogCtl->Insert.forcePageWrites = false; | 		XLogCtl->Insert.forcePageWrites = false; | ||||||
| 	} | 	} | ||||||
| 	WALInsertLockRelease(); | 	WALInsertLockRelease(); | ||||||
|  |  | ||||||
| 	if (exclusive) |  | ||||||
| 	{ |  | ||||||
| 		/* |  | ||||||
| 		 * Read the existing label file into memory. |  | ||||||
| 		 */ |  | ||||||
| 		struct stat statbuf; |  | ||||||
| 		int			r; |  | ||||||
|  |  | ||||||
| 		if (stat(BACKUP_LABEL_FILE, &statbuf)) |  | ||||||
| 		{ |  | ||||||
| 			if (errno != ENOENT) |  | ||||||
| 				ereport(ERROR, |  | ||||||
| 						(errcode_for_file_access(), |  | ||||||
| 						 errmsg("could not stat file \"%s\": %m", |  | ||||||
| 								BACKUP_LABEL_FILE))); |  | ||||||
| 			ereport(ERROR, |  | ||||||
| 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |  | ||||||
| 					 errmsg("a backup is not in progress"))); |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); |  | ||||||
| 		if (!lfp) |  | ||||||
| 		{ |  | ||||||
| 			ereport(ERROR, |  | ||||||
| 					(errcode_for_file_access(), |  | ||||||
| 					 errmsg("could not read file \"%s\": %m", |  | ||||||
| 							BACKUP_LABEL_FILE))); |  | ||||||
| 		} |  | ||||||
| 		labelfile = palloc(statbuf.st_size + 1); |  | ||||||
| 		r = fread(labelfile, statbuf.st_size, 1, lfp); |  | ||||||
| 		labelfile[statbuf.st_size] = '\0'; |  | ||||||
|  |  | ||||||
| 		/* |  | ||||||
| 		 * Close and remove the backup label file |  | ||||||
| 		 */ |  | ||||||
| 		if (r != 1 || ferror(lfp) || FreeFile(lfp)) |  | ||||||
| 			ereport(ERROR, |  | ||||||
| 					(errcode_for_file_access(), |  | ||||||
| 					 errmsg("could not read file \"%s\": %m", |  | ||||||
| 							BACKUP_LABEL_FILE))); |  | ||||||
| 		if (unlink(BACKUP_LABEL_FILE) != 0) |  | ||||||
| 			ereport(ERROR, |  | ||||||
| 					(errcode_for_file_access(), |  | ||||||
| 					 errmsg("could not remove file \"%s\": %m", |  | ||||||
| 							BACKUP_LABEL_FILE))); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Read and parse the START WAL LOCATION line (this code is pretty crude, | 	 * Read and parse the START WAL LOCATION line (this code is pretty crude, | ||||||
| 	 * but we are not expecting any variability in the file format). | 	 * but we are not expecting any variability in the file format). | ||||||
| @@ -10499,7 +10585,7 @@ do_pg_abort_backup(void) | |||||||
| 	Assert(XLogCtl->Insert.nonExclusiveBackups > 0); | 	Assert(XLogCtl->Insert.nonExclusiveBackups > 0); | ||||||
| 	XLogCtl->Insert.nonExclusiveBackups--; | 	XLogCtl->Insert.nonExclusiveBackups--; | ||||||
|  |  | ||||||
| 	if (!XLogCtl->Insert.exclusiveBackup && | 	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE && | ||||||
| 		XLogCtl->Insert.nonExclusiveBackups == 0) | 		XLogCtl->Insert.nonExclusiveBackups == 0) | ||||||
| 	{ | 	{ | ||||||
| 		XLogCtl->Insert.forcePageWrites = false; | 		XLogCtl->Insert.forcePageWrites = false; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user