mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Make logical WAL sender report streaming state appropriately
WAL senders sending logically-decoded data fail to properly report in "streaming" state when starting up, hence as long as one extra record is not replayed, such WAL senders would remain in a "catchup" state, which is inconsistent with the physical cousin. This can be easily reproduced by for example using pg_recvlogical and restarting the upstream server. The TAP tests have been slightly modified to detect the failure and strengthened so as future tests also make sure that a node is in streaming state when waiting for its catchup. Backpatch down to 9.4 where this code has been introduced. Reported-by: Sawada Masahiko Author: Simon Riggs, Sawada Masahiko Reviewed-by: Petr Jelinek, Michael Paquier, Vaishnavi Prabakaran Discussion: https://postgr.es/m/CAD21AoB2ZbCCqOx=bgKMcLrAvs1V0ZMqzs7wBTuDySezTGtMZA@mail.gmail.com
This commit is contained in:
		| @@ -2169,7 +2169,7 @@ WalSndLoop(WalSndSendDataCallback send_data) | |||||||
| 			if (MyWalSnd->state == WALSNDSTATE_CATCHUP) | 			if (MyWalSnd->state == WALSNDSTATE_CATCHUP) | ||||||
| 			{ | 			{ | ||||||
| 				ereport(DEBUG1, | 				ereport(DEBUG1, | ||||||
| 						(errmsg("standby \"%s\" has now caught up with primary", | 						(errmsg("\"%s\" has now caught up with upstream server", | ||||||
| 								application_name))); | 								application_name))); | ||||||
| 				WalSndSetState(WALSNDSTATE_STREAMING); | 				WalSndSetState(WALSNDSTATE_STREAMING); | ||||||
| 			} | 			} | ||||||
| @@ -2758,10 +2758,10 @@ XLogSendLogical(void) | |||||||
| 	char	   *errm; | 	char	   *errm; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Don't know whether we've caught up yet. We'll set it to true in | 	 * Don't know whether we've caught up yet. We'll set WalSndCaughtUp to | ||||||
| 	 * WalSndWaitForWal, if we're actually waiting. We also set to true if | 	 * true in WalSndWaitForWal, if we're actually waiting. We also set to | ||||||
| 	 * XLogReadRecord() had to stop reading but WalSndWaitForWal didn't wait - | 	 * true if XLogReadRecord() had to stop reading but WalSndWaitForWal | ||||||
| 	 * i.e. when we're shutting down. | 	 * didn't wait - i.e. when we're shutting down. | ||||||
| 	 */ | 	 */ | ||||||
| 	WalSndCaughtUp = false; | 	WalSndCaughtUp = false; | ||||||
|  |  | ||||||
| @@ -2774,6 +2774,9 @@ XLogSendLogical(void) | |||||||
|  |  | ||||||
| 	if (record != NULL) | 	if (record != NULL) | ||||||
| 	{ | 	{ | ||||||
|  | 		/* XXX: Note that logical decoding cannot be used while in recovery */ | ||||||
|  | 		XLogRecPtr	flushPtr = GetFlushRecPtr(); | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Note the lack of any call to LagTrackerWrite() which is handled by | 		 * Note the lack of any call to LagTrackerWrite() which is handled by | ||||||
| 		 * WalSndUpdateProgress which is called by output plugin through | 		 * WalSndUpdateProgress which is called by output plugin through | ||||||
| @@ -2782,6 +2785,13 @@ XLogSendLogical(void) | |||||||
| 		LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader); | 		LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader); | ||||||
|  |  | ||||||
| 		sentPtr = logical_decoding_ctx->reader->EndRecPtr; | 		sentPtr = logical_decoding_ctx->reader->EndRecPtr; | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * If we have sent a record that is at or beyond the flushed point, we | ||||||
|  | 		 * have caught up. | ||||||
|  | 		 */ | ||||||
|  | 		if (sentPtr >= flushPtr) | ||||||
|  | 			WalSndCaughtUp = true; | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
|   | |||||||
| @@ -1535,7 +1535,8 @@ also works for logical subscriptions) | |||||||
| until its replication location in pg_stat_replication equals or passes the | until its replication location in pg_stat_replication equals or passes the | ||||||
| upstream's WAL insert point at the time this function is called. By default | upstream's WAL insert point at the time this function is called. By default | ||||||
| the replay_lsn is waited for, but 'mode' may be specified to wait for any of | the replay_lsn is waited for, but 'mode' may be specified to wait for any of | ||||||
| sent|write|flush|replay. | sent|write|flush|replay. The connection catching up must be in a streaming | ||||||
|  | state. | ||||||
|  |  | ||||||
| If there is no active replication connection from this peer, waits until | If there is no active replication connection from this peer, waits until | ||||||
| poll_query_until timeout. | poll_query_until timeout. | ||||||
| @@ -1580,7 +1581,7 @@ sub wait_for_catchup | |||||||
| 	  . $lsn_expr . " on " | 	  . $lsn_expr . " on " | ||||||
| 	  . $self->name . "\n"; | 	  . $self->name . "\n"; | ||||||
| 	my $query = | 	my $query = | ||||||
| 	  qq[SELECT $lsn_expr <= ${mode}_lsn FROM pg_catalog.pg_stat_replication WHERE application_name = '$standby_name';]; | 	  qq[SELECT $lsn_expr <= ${mode}_lsn AND state = 'streaming' FROM pg_catalog.pg_stat_replication WHERE application_name = '$standby_name';]; | ||||||
| 	$self->poll_query_until('postgres', $query) | 	$self->poll_query_until('postgres', $query) | ||||||
| 	  or croak "timed out waiting for catchup"; | 	  or croak "timed out waiting for catchup"; | ||||||
| 	print "done\n"; | 	print "done\n"; | ||||||
|   | |||||||
| @@ -188,6 +188,11 @@ $node_publisher->safe_psql('postgres', | |||||||
| 	"INSERT INTO tab_ins SELECT generate_series(1001,1100)"); | 	"INSERT INTO tab_ins SELECT generate_series(1001,1100)"); | ||||||
| $node_publisher->safe_psql('postgres', "DELETE FROM tab_rep"); | $node_publisher->safe_psql('postgres', "DELETE FROM tab_rep"); | ||||||
|  |  | ||||||
|  | # Restart the publisher and check the state of the subscriber which | ||||||
|  | # should be in a streaming state after catching up. | ||||||
|  | $node_publisher->stop('fast'); | ||||||
|  | $node_publisher->start; | ||||||
|  |  | ||||||
| $node_publisher->wait_for_catchup($appname); | $node_publisher->wait_for_catchup($appname); | ||||||
|  |  | ||||||
| $result = $node_subscriber->safe_psql('postgres', | $result = $node_subscriber->safe_psql('postgres', | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user