Add -c/--restore-target-wal to pg_rewind

pg_rewind needs to copy from the source cluster to the target cluster a set of relation blocks changed from the previous checkpoint where WAL forked up to the end of WAL on the target. Building this list of relation blocks requires a range of WAL segments that may not be present anymore on the target's pg_wal, causing pg_rewind to fail. It is possible to work around this issue by copying manually the WAL segments needed but this may lead to some extra and actually useless work. This commit introduces a new option allowing pg_rewind to use a restore_command while doing the rewind by grabbing the parameter value of restore_command from the target cluster configuration. This allows the rewind operation to be more reliable, so as only the WAL segments needed by the rewind are restored from the archives. In order to be able to do that, a new routine is added to src/common/ to allow frontend tools to restore files from archives using an already-built restore command. This version is more simple than the backend equivalent as there is no need to handle the non-recovery case. Author: Alexey Kondratov Reviewed-by: Andrey Borodin, Andres Freund, Alvaro Herrera, Alexander Korotkov, Michael Paquier Discussion: https://postgr.es/m/a3acff50-5a0d-9a2c-b3b2-ee36168955c1@postgrespro.ru
2025-07-30 11:03:19 +03:00 · 2020-04-01 10:57:03 +09:00
parent 92d31085e9
commit a7e8ece41c
12 changed files with 360 additions and 27 deletions
--- a/src/bin/pg_rewind/t/RewindTest.pm
+++ b/src/bin/pg_rewind/t/RewindTest.pm
@ -38,6 +38,7 @@ use File::Copy;
 use File::Path qw(rmtree);
 use IPC::Run qw(run);
 use PostgresNode;
+use RecursiveCopy;
 use TestLib;
 use Test::More;

@ -227,10 +228,26 @@ sub run_pg_rewind
 	# Append the rewind-specific role to the connection string.
 	$standby_connstr = "$standby_connstr user=rewind_user";

-	# Stop the master and be ready to perform the rewind.  The cluster
-	# needs recovery to finish once, and pg_rewind makes sure that it
-	# happens automatically.
-	$node_master->stop('immediate');
+	if ($test_mode eq 'archive')
+	{
+		# pg_rewind is tested with --restore-target-wal by moving all
+		# WAL files to a secondary location.  Note that this leads to
+		# a failure in ensureCleanShutdown(), forcing to the use of
+		# --no-ensure-shutdown in this mode as the initial set of WAL
+		# files needed to ensure a clean restart is gone.  This could
+		# be improved by keeping around only a minimum set of WAL
+		# segments but that would just make the test more costly,
+		# without improving the coverage.  Hence, instead, stop
+		# gracefully the primary here.
+		$node_master->stop;
+	}
+	else
+	{
+		# Stop the master and be ready to perform the rewind.  The cluster
+		# needs recovery to finish once, and pg_rewind makes sure that it
+		# happens automatically.
+		$node_master->stop('immediate');
+	}

 	# At this point, the rewind processing is ready to run.
 	# We now have a very simple scenario with a few diverged WAL record.
@ -284,6 +301,51 @@ sub run_pg_rewind
 		$node_standby->safe_psql('postgres',
 			"ALTER ROLE rewind_user WITH REPLICATION;");
 	}
+	elsif ($test_mode eq "archive")
+	{
+
+		# Do rewind using a local pgdata as source and specified
+		# directory with target WAL archive.  The old master has
+		# to be stopped at this point.
+
+		# Remove the existing archive directory and move all WAL
+		# segments from the old master to the archives.  These
+		# will be used by pg_rewind.
+		rmtree($node_master->archive_dir);
+		RecursiveCopy::copypath($node_master->data_dir . "/pg_wal",
+			$node_master->archive_dir);
+
+		# Fast way to remove entire directory content
+		rmtree($node_master->data_dir . "/pg_wal");
+		mkdir($node_master->data_dir . "/pg_wal");
+
+		# Make sure that directories have the right umask as this is
+		# required by a follow-up check on permissions, and better
+		# safe than sorry.
+		chmod(0700, $node_master->archive_dir);
+		chmod(0700, $node_master->data_dir . "/pg_wal");
+
+		# Add appropriate restore_command to the target cluster
+		$node_master->enable_restoring($node_master, 0);
+
+		# Stop the new master and be ready to perform the rewind.
+		$node_standby->stop;
+
+		# Note the use of --no-ensure-shutdown here.  WAL files are
+		# gone in this mode and the primary has been stopped
+		# gracefully already.
+		command_ok(
+			[
+				'pg_rewind',
+				"--debug",
+				"--source-pgdata=$standby_pgdata",
+				"--target-pgdata=$master_pgdata",
+				"--no-sync",
+				"--no-ensure-shutdown",
+				"--restore-target-wal"
+			],
+			'pg_rewind archive');
+	}
 	else
 	{