mirror of
https://github.com/postgres/postgres.git
synced 2025-04-29 13:56:47 +03:00
This is a follow-up to commit 7dac61402 which removed a set of unused modules from the TAP test. The Config references in the pg_ctl and pg_rewind tests were removed in commit 1c6d46293. Fcntl ':mode' and File::stat in the pg_ctl test were added in c37b3d08c which was probably a leftover from an earlier version of the patch, as the function using these was added to another module in that commit. The Config reference in the ldap test was added in ee56c3b21 which in turn use $^O instead of interrogating Config. Author: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org> Discussion: https://postgr.es/m/87lewyqk45.fsf@wibble.ilmari.org
385 lines
11 KiB
Perl
385 lines
11 KiB
Perl
|
|
# Copyright (c) 2021-2022, PostgreSQL Global Development Group
|
|
|
|
package RewindTest;
|
|
|
|
# Test driver for pg_rewind. Each test consists of a cycle where a new cluster
|
|
# is first created with initdb, and a streaming replication standby is set up
|
|
# to follow the primary. Then the primary is shut down and the standby is
|
|
# promoted, and finally pg_rewind is used to rewind the old primary, using the
|
|
# standby as the source.
|
|
#
|
|
# To run a test, the test script (in t/ subdirectory) calls the functions
|
|
# in this module. These functions should be called in this sequence:
|
|
#
|
|
# 1. setup_cluster - creates a PostgreSQL cluster that runs as the primary
|
|
#
|
|
# 2. start_primary - starts the primary server
|
|
#
|
|
# 3. create_standby - runs pg_basebackup to initialize a standby server, and
|
|
# sets it up to follow the primary.
|
|
#
|
|
# 4. promote_standby - runs "pg_ctl promote" to promote the standby server.
|
|
# The old primary keeps running.
|
|
#
|
|
# 5. run_pg_rewind - stops the old primary (if it's still running) and runs
|
|
# pg_rewind to synchronize it with the now-promoted standby server.
|
|
#
|
|
# 6. clean_rewind_test - stops both servers used in the test, if they're
|
|
# still running.
|
|
#
|
|
# The test script can use the helper functions primary_psql and standby_psql
|
|
# to run psql against the primary and standby servers, respectively.
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use Carp;
|
|
use Exporter 'import';
|
|
use File::Copy;
|
|
use File::Path qw(rmtree);
|
|
use IPC::Run qw(run);
|
|
use PostgreSQL::Test::Cluster;
|
|
use PostgreSQL::Test::RecursiveCopy;
|
|
use PostgreSQL::Test::Utils;
|
|
use Test::More;
|
|
|
|
our @EXPORT = qw(
|
|
$node_primary
|
|
$node_standby
|
|
|
|
primary_psql
|
|
standby_psql
|
|
check_query
|
|
|
|
setup_cluster
|
|
start_primary
|
|
create_standby
|
|
promote_standby
|
|
run_pg_rewind
|
|
clean_rewind_test
|
|
);
|
|
|
|
# Our nodes.
|
|
our $node_primary;
|
|
our $node_standby;
|
|
|
|
sub primary_psql
|
|
{
|
|
my $cmd = shift;
|
|
my $dbname = shift || 'postgres';
|
|
|
|
system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
|
|
$node_primary->connstr($dbname), '-c', "$cmd";
|
|
return;
|
|
}
|
|
|
|
sub standby_psql
|
|
{
|
|
my $cmd = shift;
|
|
my $dbname = shift || 'postgres';
|
|
|
|
system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
|
|
$node_standby->connstr($dbname), '-c', "$cmd";
|
|
return;
|
|
}
|
|
|
|
# Run a query against the primary, and check that the output matches what's
|
|
# expected
|
|
sub check_query
|
|
{
|
|
local $Test::Builder::Level = $Test::Builder::Level + 1;
|
|
|
|
my ($query, $expected_stdout, $test_name) = @_;
|
|
my ($stdout, $stderr);
|
|
|
|
# we want just the output, no formatting
|
|
my $result = run [
|
|
'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
|
|
$node_primary->connstr('postgres'),
|
|
'-c', $query
|
|
],
|
|
'>', \$stdout, '2>', \$stderr;
|
|
|
|
is($result, 1, "$test_name: psql exit code");
|
|
is($stderr, '', "$test_name: psql no stderr");
|
|
is($stdout, $expected_stdout, "$test_name: query result matches");
|
|
|
|
return;
|
|
}
|
|
|
|
sub setup_cluster
|
|
{
|
|
my $extra_name = shift; # Used to differentiate clusters
|
|
my $extra = shift; # Extra params for initdb
|
|
|
|
# Initialize primary, data checksums are mandatory
|
|
$node_primary =
|
|
PostgreSQL::Test::Cluster->new('primary' . ($extra_name ? "_${extra_name}" : ''));
|
|
|
|
# Set up pg_hba.conf and pg_ident.conf for the role running
|
|
# pg_rewind. This role is used for all the tests, and has
|
|
# minimal permissions enough to rewind from an online source.
|
|
$node_primary->init(
|
|
allows_streaming => 1,
|
|
extra => $extra,
|
|
auth_extra => [ '--create-role', 'rewind_user' ]);
|
|
|
|
# Set wal_keep_size to prevent WAL segment recycling after enforced
|
|
# checkpoints in the tests.
|
|
$node_primary->append_conf(
|
|
'postgresql.conf', qq(
|
|
wal_keep_size = 320MB
|
|
));
|
|
return;
|
|
}
|
|
|
|
sub start_primary
|
|
{
|
|
$node_primary->start;
|
|
|
|
# Create custom role which is used to run pg_rewind, and adjust its
|
|
# permissions to the minimum necessary.
|
|
$node_primary->safe_psql(
|
|
'postgres', "
|
|
CREATE ROLE rewind_user LOGIN;
|
|
GRANT EXECUTE ON function pg_catalog.pg_ls_dir(text, boolean, boolean)
|
|
TO rewind_user;
|
|
GRANT EXECUTE ON function pg_catalog.pg_stat_file(text, boolean)
|
|
TO rewind_user;
|
|
GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text)
|
|
TO rewind_user;
|
|
GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean)
|
|
TO rewind_user;");
|
|
|
|
#### Now run the test-specific parts to initialize the primary before setting
|
|
# up standby
|
|
|
|
return;
|
|
}
|
|
|
|
sub create_standby
|
|
{
|
|
my $extra_name = shift;
|
|
|
|
$node_standby =
|
|
PostgreSQL::Test::Cluster->new('standby' . ($extra_name ? "_${extra_name}" : ''));
|
|
$node_primary->backup('my_backup');
|
|
$node_standby->init_from_backup($node_primary, 'my_backup');
|
|
my $connstr_primary = $node_primary->connstr();
|
|
|
|
$node_standby->append_conf(
|
|
"postgresql.conf", qq(
|
|
primary_conninfo='$connstr_primary'
|
|
));
|
|
|
|
$node_standby->set_standby_mode();
|
|
|
|
# Start standby
|
|
$node_standby->start;
|
|
|
|
# The standby may have WAL to apply before it matches the primary. That
|
|
# is fine, because no test examines the standby before promotion.
|
|
|
|
return;
|
|
}
|
|
|
|
sub promote_standby
|
|
{
|
|
#### Now run the test-specific parts to run after standby has been started
|
|
# up standby
|
|
|
|
# Wait for the standby to receive and write all WAL.
|
|
$node_primary->wait_for_catchup($node_standby, 'write');
|
|
|
|
# Now promote standby and insert some new data on primary, this will put
|
|
# the primary out-of-sync with the standby.
|
|
$node_standby->promote;
|
|
|
|
# Force a checkpoint after the promotion. pg_rewind looks at the control
|
|
# file to determine what timeline the server is on, and that isn't updated
|
|
# immediately at promotion, but only at the next checkpoint. When running
|
|
# pg_rewind in remote mode, it's possible that we complete the test steps
|
|
# after promotion so quickly that when pg_rewind runs, the standby has not
|
|
# performed a checkpoint after promotion yet.
|
|
standby_psql("checkpoint");
|
|
|
|
return;
|
|
}
|
|
|
|
sub run_pg_rewind
|
|
{
|
|
my $test_mode = shift;
|
|
my $primary_pgdata = $node_primary->data_dir;
|
|
my $standby_pgdata = $node_standby->data_dir;
|
|
my $standby_connstr = $node_standby->connstr('postgres');
|
|
my $tmp_folder = PostgreSQL::Test::Utils::tempdir;
|
|
|
|
# Append the rewind-specific role to the connection string.
|
|
$standby_connstr = "$standby_connstr user=rewind_user";
|
|
|
|
if ($test_mode eq 'archive')
|
|
{
|
|
# pg_rewind is tested with --restore-target-wal by moving all
|
|
# WAL files to a secondary location. Note that this leads to
|
|
# a failure in ensureCleanShutdown(), forcing to the use of
|
|
# --no-ensure-shutdown in this mode as the initial set of WAL
|
|
# files needed to ensure a clean restart is gone. This could
|
|
# be improved by keeping around only a minimum set of WAL
|
|
# segments but that would just make the test more costly,
|
|
# without improving the coverage. Hence, instead, stop
|
|
# gracefully the primary here.
|
|
$node_primary->stop;
|
|
}
|
|
else
|
|
{
|
|
# Stop the primary and be ready to perform the rewind. The cluster
|
|
# needs recovery to finish once, and pg_rewind makes sure that it
|
|
# happens automatically.
|
|
$node_primary->stop('immediate');
|
|
}
|
|
|
|
# At this point, the rewind processing is ready to run.
|
|
# We now have a very simple scenario with a few diverged WAL record.
|
|
# The real testing begins really now with a bifurcation of the possible
|
|
# scenarios that pg_rewind supports.
|
|
|
|
# Keep a temporary postgresql.conf for primary node or it would be
|
|
# overwritten during the rewind.
|
|
copy(
|
|
"$primary_pgdata/postgresql.conf",
|
|
"$tmp_folder/primary-postgresql.conf.tmp");
|
|
|
|
# Now run pg_rewind
|
|
if ($test_mode eq "local")
|
|
{
|
|
|
|
# Do rewind using a local pgdata as source
|
|
# Stop the primary and be ready to perform the rewind
|
|
$node_standby->stop;
|
|
command_ok(
|
|
[
|
|
'pg_rewind',
|
|
"--debug",
|
|
"--source-pgdata=$standby_pgdata",
|
|
"--target-pgdata=$primary_pgdata",
|
|
"--no-sync"
|
|
],
|
|
'pg_rewind local');
|
|
}
|
|
elsif ($test_mode eq "remote")
|
|
{
|
|
# Do rewind using a remote connection as source, generating
|
|
# recovery configuration automatically.
|
|
command_ok(
|
|
[
|
|
'pg_rewind', "--debug",
|
|
"--source-server", $standby_connstr,
|
|
"--target-pgdata=$primary_pgdata", "--no-sync",
|
|
"--write-recovery-conf"
|
|
],
|
|
'pg_rewind remote');
|
|
|
|
# Check that standby.signal is here as recovery configuration
|
|
# was requested.
|
|
ok( -e "$primary_pgdata/standby.signal",
|
|
'standby.signal created after pg_rewind');
|
|
|
|
# Now, when pg_rewind apparently succeeded with minimal permissions,
|
|
# add REPLICATION privilege. So we could test that new standby
|
|
# is able to connect to the new primary with generated config.
|
|
$node_standby->safe_psql('postgres',
|
|
"ALTER ROLE rewind_user WITH REPLICATION;");
|
|
}
|
|
elsif ($test_mode eq "archive")
|
|
{
|
|
|
|
# Do rewind using a local pgdata as source and specified
|
|
# directory with target WAL archive. The old primary has
|
|
# to be stopped at this point.
|
|
|
|
# Remove the existing archive directory and move all WAL
|
|
# segments from the old primary to the archives. These
|
|
# will be used by pg_rewind.
|
|
rmtree($node_primary->archive_dir);
|
|
PostgreSQL::Test::RecursiveCopy::copypath($node_primary->data_dir . "/pg_wal",
|
|
$node_primary->archive_dir);
|
|
|
|
# Fast way to remove entire directory content
|
|
rmtree($node_primary->data_dir . "/pg_wal");
|
|
mkdir($node_primary->data_dir . "/pg_wal");
|
|
|
|
# Make sure that directories have the right umask as this is
|
|
# required by a follow-up check on permissions, and better
|
|
# safe than sorry.
|
|
chmod(0700, $node_primary->archive_dir);
|
|
chmod(0700, $node_primary->data_dir . "/pg_wal");
|
|
|
|
# Add appropriate restore_command to the target cluster
|
|
$node_primary->enable_restoring($node_primary, 0);
|
|
|
|
# Stop the new primary and be ready to perform the rewind.
|
|
$node_standby->stop;
|
|
|
|
# Note the use of --no-ensure-shutdown here. WAL files are
|
|
# gone in this mode and the primary has been stopped
|
|
# gracefully already.
|
|
command_ok(
|
|
[
|
|
'pg_rewind',
|
|
"--debug",
|
|
"--source-pgdata=$standby_pgdata",
|
|
"--target-pgdata=$primary_pgdata",
|
|
"--no-sync",
|
|
"--no-ensure-shutdown",
|
|
"--restore-target-wal"
|
|
],
|
|
'pg_rewind archive');
|
|
}
|
|
else
|
|
{
|
|
|
|
# Cannot come here normally
|
|
croak("Incorrect test mode specified");
|
|
}
|
|
|
|
# Now move back postgresql.conf with old settings
|
|
move(
|
|
"$tmp_folder/primary-postgresql.conf.tmp",
|
|
"$primary_pgdata/postgresql.conf");
|
|
|
|
chmod(
|
|
$node_primary->group_access() ? 0640 : 0600,
|
|
"$primary_pgdata/postgresql.conf")
|
|
or BAIL_OUT(
|
|
"unable to set permissions for $primary_pgdata/postgresql.conf");
|
|
|
|
# Plug-in rewound node to the now-promoted standby node
|
|
if ($test_mode ne "remote")
|
|
{
|
|
my $port_standby = $node_standby->port;
|
|
$node_primary->append_conf(
|
|
'postgresql.conf', qq(
|
|
primary_conninfo='port=$port_standby'));
|
|
|
|
$node_primary->set_standby_mode();
|
|
}
|
|
|
|
# Restart the primary to check that rewind went correctly
|
|
$node_primary->start;
|
|
|
|
#### Now run the test-specific parts to check the result
|
|
|
|
return;
|
|
}
|
|
|
|
# Clean up after the test. Stop both servers, if they're still running.
|
|
sub clean_rewind_test
|
|
{
|
|
$node_primary->teardown_node if defined $node_primary;
|
|
$node_standby->teardown_node if defined $node_standby;
|
|
return;
|
|
}
|
|
|
|
1;
|