Verify roundtrip dump/restore of regression database

Add a test to pg_upgrade's test suite that verifies that dump-restore-dump of regression database produces equivalent output to dumping it directly. This was already being tested by running pg_upgrade itself, but non-binary-upgrade mode was not being covered. The regression database has accrued, over time, a sufficient collection of interesting objects to ensure good coverage, but there hasn't been a concerted effort to be completely exhaustive, so it is likely still possible to have more. This'd belong more naturally in the pg_dump test suite, but we chose to put it in src/bin/pg_upgrade/t/002_pg_upgrade.pl because we need a run of the regression tests which is already done here, so this has less total test runtime impact. Also, experiments have shown that using parallel dump/restore is slightly faster, so we use --format=directory -j2. This test has already reported pg_dump bugs, as fixed in fd41ba93e4, 74563f6b90, d611f8b158, 4694aedf63. Author: Ashutosh Bapat <ashutosh.bapat.oss@gmail.com> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Daniel Gustafsson <daniel@yesql.se> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://www.postgresql.org/message-id/CAExHW5uF5V=Cjecx3_Z=7xfh4rg2Wf61PT+hfquzjBqouRzQJQ@mail.gmail.com
2025-06-11 20:28:21 +03:00 · 2025-04-01 18:50:40 +02:00
parent 764d501d24
commit 172259afb5
4 changed files with 254 additions and 3 deletions
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@ -11,12 +11,15 @@ use File::Path     qw(rmtree);

 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::AdjustDump;
 use PostgreSQL::Test::AdjustUpgrade;
 use Test::More;

 # Can be changed to test the other modes.
 my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';

+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
 # Generate a database with a name made of a range of ASCII characters.
 sub generate_db
 {
@ -35,8 +38,8 @@ sub generate_db
 		"created database with ASCII characters from $from_char to $to_char");
 }

-# Filter the contents of a dump before its use in a content comparison.
-# This returns the path to the filtered dump.
+# Filter the contents of a dump before its use in a content comparison for
+# upgrade testing. This returns the path to the filtered dump.
 sub filter_dump
 {
 	my ($is_old, $old_version, $dump_file) = @_;
@ -60,6 +63,41 @@ sub filter_dump
 	return $dump_file_filtered;
 }

+# Dump database db from the given node in plain format and adjust it for
+# comparing dumps from the original and the restored database.
+#
+# file_prefix is used to create unique names for all dump files so that they
+# remain available for debugging in case the test fails.
+#
+# adjust_child_columns is passed to adjust_regress_dumpfile() which actually
+# adjusts the dump output.
+#
+# The name of the file containting adjusted dump is returned.
+sub get_dump_for_comparison
+{
+	my ($node, $db, $file_prefix, $adjust_child_columns) = @_;
+
+	my $dumpfile = $tempdir . '/' . $file_prefix . '.sql';
+	my $dump_adjusted = "${dumpfile}_adjusted";
+
+	open(my $dh, '>', $dump_adjusted)
+	  || die "could not open $dump_adjusted for writing $!";
+
+	# Don't dump statistics, because there are still some bugs.
+	$node->run_log(
+		[
+			'pg_dump', '--no-sync', '--no-statistics',
+			'-d' => $node->connstr($db),
+			'-f' => $dumpfile
+		]);
+
+	print $dh adjust_regress_dumpfile(slurp_file($dumpfile),
+		$adjust_child_columns);
+	close($dh);
+
+	return $dump_adjusted;
+}
+
 # The test of pg_upgrade requires two clusters, an old one and a new one
 # that gets upgraded.  Before running the upgrade, a logical dump of the
 # old cluster is taken, and a second logical dump of the new one is taken
@ -80,7 +118,6 @@ if (   (defined($ENV{olddump}) && !defined($ENV{oldinstall}))
 }

 # Paths to the dumps taken during the tests.
-my $tempdir = PostgreSQL::Test::Utils::tempdir;
 my $dump1_file = "$tempdir/dump1.sql";
 my $dump2_file = "$tempdir/dump2.sql";

@ -264,6 +301,60 @@ else
 	is($rc, 0, 'regression tests pass');
 }

+# Test that dump/restore of the regression database roundtrips cleanly.  This
+# doesn't work well when the nodes are different versions, so skip it in that
+# case.  Note that this isn't a pg_restore test, but it's convenient to do it
+# here because we've gone to the trouble of creating the regression database.
+#
+# Do this while the old cluster is running before it is shut down by the
+# upgrade test.
+SKIP:
+{
+	my $dstnode = PostgreSQL::Test::Cluster->new('dst_node');
+
+	skip "different Postgres versions"
+	  if ($oldnode->pg_version != $dstnode->pg_version);
+	skip "source node not using default install"
+	  if (defined $oldnode->install_path);
+
+	# Dump the original database for comparison later.
+	my $src_dump =
+	  get_dump_for_comparison($oldnode, 'regression', 'src_dump', 1);
+
+	# Setup destination database cluster
+	$dstnode->init(%node_params);
+	# Stabilize stats for comparison.
+	$dstnode->append_conf('postgresql.conf', 'autovacuum = off');
+	$dstnode->start;
+
+	my $dump_file = "$tempdir/regression.dump";
+
+	# Use --create in dump and restore commands so that the restored
+	# database has the same configurable variable settings as the original
+	# database so that the dumps taken from both databases taken do not
+	# differ because of locale changes. Additionally this provides test
+	# coverage for --create option.
+	#
+	# Use directory format so that we can use parallel dump/restore.
+	$oldnode->command_ok(
+		[
+			'pg_dump', '-Fd', '-j2', '--no-sync',
+			'-d' => $oldnode->connstr('regression'),
+			'--create', '-f' => $dump_file
+		],
+		'pg_dump on source instance');
+
+	$dstnode->command_ok(
+		[ 'pg_restore', '--create', '-j2', '-d' => 'postgres', $dump_file ],
+		'pg_restore to destination instance');
+
+	my $dst_dump =
+	  get_dump_for_comparison($dstnode, 'regression', 'dest_dump', 0);
+
+	compare_files($src_dump, $dst_dump,
+		'dump outputs from original and restored regression databases match');
+}
+
 # Initialize a new node for the upgrade.
 my $newnode = PostgreSQL::Test::Cluster->new('new_node');

--- a/src/test/perl/Makefile
+++ b/src/test/perl/Makefile
@ -25,6 +25,7 @@ install: all installdirs
 	$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Kerberos.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Kerberos.pm'
 	$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Cluster.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm'
 	$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/BackgroundPsql.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/BackgroundPsql.pm'
+	$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/AdjustDump.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustDump.pm'
 	$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/AdjustUpgrade.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustUpgrade.pm'
 	$(INSTALL_DATA) $(srcdir)/PostgreSQL/Version.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Version.pm'

@ -35,6 +36,7 @@ uninstall:
 	rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Kerberos.pm'
 	rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm'
 	rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/BackgroundPsql.pm'
+	rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustDump.pm'
 	rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustUpgrade.pm'
 	rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Version.pm'

--- a/src/test/perl/PostgreSQL/Test/AdjustDump.pm
+++ b/src/test/perl/PostgreSQL/Test/AdjustDump.pm
@ -0,0 +1,157 @@
+# Copyright (c) 2024-2025, PostgreSQL Global Development Group
+
+=pod
+
+=head1 NAME
+
+PostgreSQL::Test::AdjustDump - helper module for dump/restore tests
+
+=head1 SYNOPSIS
+
+  use PostgreSQL::Test::AdjustDump;
+
+  # Adjust contents of dump output file so that dump output from original
+  # regression database and that from the restored regression database match
+  $dump = adjust_regress_dumpfile($dump, $adjust_child_columns);
+
+=head1 DESCRIPTION
+
+C<PostgreSQL::Test::AdjustDump> encapsulates various hacks needed to
+compare the results of dump/restore tests.
+
+=cut
+
+package PostgreSQL::Test::AdjustDump;
+
+use strict;
+use warnings FATAL => 'all';
+
+use Exporter 'import';
+use Test::More;
+
+our @EXPORT = qw(
+  adjust_regress_dumpfile
+);
+
+=pod
+
+=head1 ROUTINES
+
+=over
+
+=item $dump = adjust_regress_dumpfile($dump, $adjust_child_columns)
+
+Edit a dump output file, taken from the source regression database,
+to remove the known differences to a dump taken after restoring the
+same database.
+
+Arguments:
+
+=over
+
+=item C<dump>: Contents of dump file
+
+=item C<adjust_child_columns>: 1 indicates that the given dump file requires
+adjusting columns in the child tables; usually when the dump is from original
+database. 0 indicates no such adjustment is needed; usually when the dump is
+from restored database.
+
+=back
+
+Returns the adjusted dump text.
+
+Adjustments Applied:
+
+=over
+
+=item Column reordering on child table creation
+
+This rearranges the column declarations in the C<CREATE TABLE... INHERITS>
+statements in the dump file from original database so that they match those
+from the restored database.
+
+Only executed if C<adjust_child_columns> is true.
+
+Context: some regression tests purposefully create child tables in such a way
+that the order of their inherited columns differ from column orders of their
+respective parents.  In the restored database, however, the order of their
+inherited columns are same as that of their respective parents. Thus the column
+orders of these child tables in the original database and those in the restored
+database differ, causing difference in the dump outputs. See
+C<MergeAttributes()> and C<dumpTableSchema()> for details.
+
+=item Removal of problematic C<COPY> statements
+
+Remove COPY statements to abnormal children tables.
+
+Context: This difference is caused because of columns that are added to parent
+tables that already have children; because recreating the children tables puts
+the columns from the parent ahead of columns declared locally in children,
+these extra columns are in earlier position compared to the original database.
+Reordering columns on the entire C<COPY> data is impractical, so we just remove
+them.
+
+=item Newline adjustment
+
+Windows-style newlines are changed to Unix-style.  Empty lines are trimmed.
+
+=back
+
+=cut
+
+sub adjust_regress_dumpfile
+{
+	my ($dump, $adjust_child_columns) = @_;
+
+	# use Unix newlines
+	$dump =~ s/\r\n/\n/g;
+
+	# Adjust the CREATE TABLE ... INHERITS statements.
+	if ($adjust_child_columns)
+	{
+		$dump =~ s/(^CREATE\sTABLE\sgenerated_stored_tests\.gtestxx_4\s\()
+		(\n\s+b\sinteger),
+		(\n\s+a\sinteger\sNOT\sNULL)/$1$3,$2/mgx;
+
+		$dump =~ s/(^CREATE\sTABLE\sgenerated_virtual_tests\.gtestxx_4\s\()
+		(\n\s+b\sinteger),
+		(\n\s+a\sinteger\sNOT\sNULL)/$1$3,$2/mgx;
+
+		$dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c1\s\()
+		(\n\s+int_four\sbigint),
+		(\n\s+int_eight\sbigint),
+		(\n\s+int_two\ssmallint)/$1$4,$2,$3/mgx;
+
+		$dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c2\s\()
+		(\n\s+int_eight\sbigint),
+		(\n\s+int_two\ssmallint),
+		(\n\s+int_four\sbigint)/$1$3,$4,$2/mgx;
+	}
+
+	# Remove COPY statements with differing column order
+	for my $table (
+		'public\.b_star', 'public\.c_star',
+		'public\.cc2', 'public\.d_star',
+		'public\.e_star', 'public\.f_star',
+		'public\.renamecolumnanother', 'public\.renamecolumnchild',
+		'public\.test_type_diff2_c1', 'public\.test_type_diff2_c2',
+		'public\.test_type_diff_c')
+	{
+		# This multiline pattern matches the whole COPY, up to the
+		# terminating "\."
+		$dump =~ s/^COPY $table \(.+?^\\\.$//sm;
+	}
+
+	# Suppress blank lines, as some places in pg_dump emit more or fewer.
+	$dump =~ s/\n\n+/\n/g;
+
+	return $dump;
+}
+
+=pod
+
+=back
+
+=cut
+
+1;
--- a/src/test/perl/meson.build
+++ b/src/test/perl/meson.build
@ -13,5 +13,6 @@ install_data(
  'PostgreSQL/Test/Kerberos.pm',
  'PostgreSQL/Test/Cluster.pm',
  'PostgreSQL/Test/BackgroundPsql.pm',
+  'PostgreSQL/Test/AdjustDump.pm',
  'PostgreSQL/Test/AdjustUpgrade.pm',
  install_dir: dir_pgxs / 'src/test/perl/PostgreSQL/Test')