1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-11 20:28:21 +03:00

Verify roundtrip dump/restore of regression database

Add a test to pg_upgrade's test suite that verifies that
dump-restore-dump of regression database produces equivalent output to
dumping it directly.  This was already being tested by running
pg_upgrade itself, but non-binary-upgrade mode was not being covered.

The regression database has accrued, over time, a sufficient collection
of interesting objects to ensure good coverage, but there hasn't been a
concerted effort to be completely exhaustive, so it is likely still
possible to have more.

This'd belong more naturally in the pg_dump test suite, but we chose to
put it in src/bin/pg_upgrade/t/002_pg_upgrade.pl because we need a run
of the regression tests which is already done here, so this has less
total test runtime impact.  Also, experiments have shown that using
parallel dump/restore is slightly faster, so we use --format=directory -j2.

This test has already reported pg_dump bugs, as fixed in fd41ba93e4,
74563f6b90, d611f8b158, 4694aedf63.

Author: Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Daniel Gustafsson <daniel@yesql.se>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://www.postgresql.org/message-id/CAExHW5uF5V=Cjecx3_Z=7xfh4rg2Wf61PT+hfquzjBqouRzQJQ@mail.gmail.com
This commit is contained in:
Álvaro Herrera
2025-04-01 18:50:40 +02:00
parent 764d501d24
commit 172259afb5
4 changed files with 254 additions and 3 deletions

View File

@ -11,12 +11,15 @@ use File::Path qw(rmtree);
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use PostgreSQL::Test::AdjustDump;
use PostgreSQL::Test::AdjustUpgrade;
use Test::More;
# Can be changed to test the other modes.
my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';
my $tempdir = PostgreSQL::Test::Utils::tempdir;
# Generate a database with a name made of a range of ASCII characters.
sub generate_db
{
@ -35,8 +38,8 @@ sub generate_db
"created database with ASCII characters from $from_char to $to_char");
}
# Filter the contents of a dump before its use in a content comparison.
# This returns the path to the filtered dump.
# Filter the contents of a dump before its use in a content comparison for
# upgrade testing. This returns the path to the filtered dump.
sub filter_dump
{
my ($is_old, $old_version, $dump_file) = @_;
@ -60,6 +63,41 @@ sub filter_dump
return $dump_file_filtered;
}
# Dump database db from the given node in plain format and adjust it for
# comparing dumps from the original and the restored database.
#
# file_prefix is used to create unique names for all dump files so that they
# remain available for debugging in case the test fails.
#
# adjust_child_columns is passed to adjust_regress_dumpfile() which actually
# adjusts the dump output.
#
# The name of the file containting adjusted dump is returned.
sub get_dump_for_comparison
{
my ($node, $db, $file_prefix, $adjust_child_columns) = @_;
my $dumpfile = $tempdir . '/' . $file_prefix . '.sql';
my $dump_adjusted = "${dumpfile}_adjusted";
open(my $dh, '>', $dump_adjusted)
|| die "could not open $dump_adjusted for writing $!";
# Don't dump statistics, because there are still some bugs.
$node->run_log(
[
'pg_dump', '--no-sync', '--no-statistics',
'-d' => $node->connstr($db),
'-f' => $dumpfile
]);
print $dh adjust_regress_dumpfile(slurp_file($dumpfile),
$adjust_child_columns);
close($dh);
return $dump_adjusted;
}
# The test of pg_upgrade requires two clusters, an old one and a new one
# that gets upgraded. Before running the upgrade, a logical dump of the
# old cluster is taken, and a second logical dump of the new one is taken
@ -80,7 +118,6 @@ if ( (defined($ENV{olddump}) && !defined($ENV{oldinstall}))
}
# Paths to the dumps taken during the tests.
my $tempdir = PostgreSQL::Test::Utils::tempdir;
my $dump1_file = "$tempdir/dump1.sql";
my $dump2_file = "$tempdir/dump2.sql";
@ -264,6 +301,60 @@ else
is($rc, 0, 'regression tests pass');
}
# Test that dump/restore of the regression database roundtrips cleanly. This
# doesn't work well when the nodes are different versions, so skip it in that
# case. Note that this isn't a pg_restore test, but it's convenient to do it
# here because we've gone to the trouble of creating the regression database.
#
# Do this while the old cluster is running before it is shut down by the
# upgrade test.
SKIP:
{
my $dstnode = PostgreSQL::Test::Cluster->new('dst_node');
skip "different Postgres versions"
if ($oldnode->pg_version != $dstnode->pg_version);
skip "source node not using default install"
if (defined $oldnode->install_path);
# Dump the original database for comparison later.
my $src_dump =
get_dump_for_comparison($oldnode, 'regression', 'src_dump', 1);
# Setup destination database cluster
$dstnode->init(%node_params);
# Stabilize stats for comparison.
$dstnode->append_conf('postgresql.conf', 'autovacuum = off');
$dstnode->start;
my $dump_file = "$tempdir/regression.dump";
# Use --create in dump and restore commands so that the restored
# database has the same configurable variable settings as the original
# database so that the dumps taken from both databases taken do not
# differ because of locale changes. Additionally this provides test
# coverage for --create option.
#
# Use directory format so that we can use parallel dump/restore.
$oldnode->command_ok(
[
'pg_dump', '-Fd', '-j2', '--no-sync',
'-d' => $oldnode->connstr('regression'),
'--create', '-f' => $dump_file
],
'pg_dump on source instance');
$dstnode->command_ok(
[ 'pg_restore', '--create', '-j2', '-d' => 'postgres', $dump_file ],
'pg_restore to destination instance');
my $dst_dump =
get_dump_for_comparison($dstnode, 'regression', 'dest_dump', 0);
compare_files($src_dump, $dst_dump,
'dump outputs from original and restored regression databases match');
}
# Initialize a new node for the upgrade.
my $newnode = PostgreSQL::Test::Cluster->new('new_node');

View File

@ -25,6 +25,7 @@ install: all installdirs
$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Kerberos.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Kerberos.pm'
$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Cluster.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm'
$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/BackgroundPsql.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/BackgroundPsql.pm'
$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/AdjustDump.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustDump.pm'
$(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/AdjustUpgrade.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustUpgrade.pm'
$(INSTALL_DATA) $(srcdir)/PostgreSQL/Version.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Version.pm'
@ -35,6 +36,7 @@ uninstall:
rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Kerberos.pm'
rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm'
rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/BackgroundPsql.pm'
rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustDump.pm'
rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustUpgrade.pm'
rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Version.pm'

View File

@ -0,0 +1,157 @@
# Copyright (c) 2024-2025, PostgreSQL Global Development Group
=pod
=head1 NAME
PostgreSQL::Test::AdjustDump - helper module for dump/restore tests
=head1 SYNOPSIS
use PostgreSQL::Test::AdjustDump;
# Adjust contents of dump output file so that dump output from original
# regression database and that from the restored regression database match
$dump = adjust_regress_dumpfile($dump, $adjust_child_columns);
=head1 DESCRIPTION
C<PostgreSQL::Test::AdjustDump> encapsulates various hacks needed to
compare the results of dump/restore tests.
=cut
package PostgreSQL::Test::AdjustDump;
use strict;
use warnings FATAL => 'all';
use Exporter 'import';
use Test::More;
our @EXPORT = qw(
adjust_regress_dumpfile
);
=pod
=head1 ROUTINES
=over
=item $dump = adjust_regress_dumpfile($dump, $adjust_child_columns)
Edit a dump output file, taken from the source regression database,
to remove the known differences to a dump taken after restoring the
same database.
Arguments:
=over
=item C<dump>: Contents of dump file
=item C<adjust_child_columns>: 1 indicates that the given dump file requires
adjusting columns in the child tables; usually when the dump is from original
database. 0 indicates no such adjustment is needed; usually when the dump is
from restored database.
=back
Returns the adjusted dump text.
Adjustments Applied:
=over
=item Column reordering on child table creation
This rearranges the column declarations in the C<CREATE TABLE... INHERITS>
statements in the dump file from original database so that they match those
from the restored database.
Only executed if C<adjust_child_columns> is true.
Context: some regression tests purposefully create child tables in such a way
that the order of their inherited columns differ from column orders of their
respective parents. In the restored database, however, the order of their
inherited columns are same as that of their respective parents. Thus the column
orders of these child tables in the original database and those in the restored
database differ, causing difference in the dump outputs. See
C<MergeAttributes()> and C<dumpTableSchema()> for details.
=item Removal of problematic C<COPY> statements
Remove COPY statements to abnormal children tables.
Context: This difference is caused because of columns that are added to parent
tables that already have children; because recreating the children tables puts
the columns from the parent ahead of columns declared locally in children,
these extra columns are in earlier position compared to the original database.
Reordering columns on the entire C<COPY> data is impractical, so we just remove
them.
=item Newline adjustment
Windows-style newlines are changed to Unix-style. Empty lines are trimmed.
=back
=cut
sub adjust_regress_dumpfile
{
my ($dump, $adjust_child_columns) = @_;
# use Unix newlines
$dump =~ s/\r\n/\n/g;
# Adjust the CREATE TABLE ... INHERITS statements.
if ($adjust_child_columns)
{
$dump =~ s/(^CREATE\sTABLE\sgenerated_stored_tests\.gtestxx_4\s\()
(\n\s+b\sinteger),
(\n\s+a\sinteger\sNOT\sNULL)/$1$3,$2/mgx;
$dump =~ s/(^CREATE\sTABLE\sgenerated_virtual_tests\.gtestxx_4\s\()
(\n\s+b\sinteger),
(\n\s+a\sinteger\sNOT\sNULL)/$1$3,$2/mgx;
$dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c1\s\()
(\n\s+int_four\sbigint),
(\n\s+int_eight\sbigint),
(\n\s+int_two\ssmallint)/$1$4,$2,$3/mgx;
$dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c2\s\()
(\n\s+int_eight\sbigint),
(\n\s+int_two\ssmallint),
(\n\s+int_four\sbigint)/$1$3,$4,$2/mgx;
}
# Remove COPY statements with differing column order
for my $table (
'public\.b_star', 'public\.c_star',
'public\.cc2', 'public\.d_star',
'public\.e_star', 'public\.f_star',
'public\.renamecolumnanother', 'public\.renamecolumnchild',
'public\.test_type_diff2_c1', 'public\.test_type_diff2_c2',
'public\.test_type_diff_c')
{
# This multiline pattern matches the whole COPY, up to the
# terminating "\."
$dump =~ s/^COPY $table \(.+?^\\\.$//sm;
}
# Suppress blank lines, as some places in pg_dump emit more or fewer.
$dump =~ s/\n\n+/\n/g;
return $dump;
}
=pod
=back
=cut
1;

View File

@ -13,5 +13,6 @@ install_data(
'PostgreSQL/Test/Kerberos.pm',
'PostgreSQL/Test/Cluster.pm',
'PostgreSQL/Test/BackgroundPsql.pm',
'PostgreSQL/Test/AdjustDump.pm',
'PostgreSQL/Test/AdjustUpgrade.pm',
install_dir: dir_pgxs / 'src/test/perl/PostgreSQL/Test')