mirror of
https://github.com/postgres/postgres.git
synced 2025-04-24 10:47:04 +03:00
Make the pg_rewind regression tests more robust on slow systems.
There were a couple of hard-coded sleeps in the tests: to wait for standby to catch up with master, and to wait for promotion with "pg_ctl promote" to complete. Instead of a fixed, hard-coded sleep, poll the server with a query once a second. This isn't ideal either, and I wish we had a better solution for real-world applications too, but this should fix the immediate problem. Patch by Michael Paquier, with some editing by me.
This commit is contained in:
parent
cef939c347
commit
54a16df010
@ -125,6 +125,37 @@ sub check_query
|
||||
}
|
||||
}
|
||||
|
||||
# Run a query once a second, until it returns 't' (i.e. SQL boolean true).
|
||||
sub poll_query_until
|
||||
{
|
||||
my ($query, $connstr) = @_;
|
||||
|
||||
my $max_attempts = 30;
|
||||
my $attempts = 0;
|
||||
my ($stdout, $stderr);
|
||||
|
||||
while ($attempts < $max_attempts)
|
||||
{
|
||||
my $cmd = ['psql', '-At', '-c', "$query", '-d', "$connstr" ];
|
||||
my $result = run $cmd, '>', \$stdout, '2>', \$stderr;
|
||||
|
||||
chomp($stdout);
|
||||
if ($stdout eq "t")
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
# Wait a second before retrying.
|
||||
sleep 1;
|
||||
$attempts++;
|
||||
}
|
||||
|
||||
# The query result didn't change in 30 seconds. Give up. Print the stderr
|
||||
# from the last attempt, hopefully that's useful for debugging.
|
||||
diag $stderr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub append_to_file
|
||||
{
|
||||
my($filename, $str) = @_;
|
||||
@ -185,7 +216,7 @@ sub create_standby
|
||||
# Base backup is taken with xlog files included
|
||||
system_or_bail("pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1");
|
||||
append_to_file("$test_standby_datadir/recovery.conf", qq(
|
||||
primary_conninfo='$connstr_master'
|
||||
primary_conninfo='$connstr_master application_name=rewind_standby'
|
||||
standby_mode=on
|
||||
recovery_target_timeline='latest'
|
||||
));
|
||||
@ -193,8 +224,11 @@ recovery_target_timeline='latest'
|
||||
# Start standby
|
||||
system_or_bail("pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1");
|
||||
|
||||
# sleep a bit to make sure the standby has caught up.
|
||||
sleep 1;
|
||||
# Wait until the standby has caught up with the primary, by polling
|
||||
# pg_stat_replication.
|
||||
my $caughtup_query = "SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
|
||||
poll_query_until($caughtup_query, $connstr_master)
|
||||
or die "Timed out while waiting for standby to catch up";
|
||||
}
|
||||
|
||||
sub promote_standby
|
||||
@ -203,9 +237,11 @@ sub promote_standby
|
||||
# up standby
|
||||
|
||||
# Now promote slave and insert some new data on master, this will put
|
||||
# the master out-of-sync with the standby.
|
||||
# the master out-of-sync with the standby. Wait until the standby is
|
||||
# out of recovery mode, and is ready to accept read-write connections.
|
||||
system_or_bail("pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1");
|
||||
sleep 2;
|
||||
poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby)
|
||||
or die "Timed out while waiting for promotion of standby";
|
||||
}
|
||||
|
||||
sub run_pg_rewind
|
||||
|
Loading…
x
Reference in New Issue
Block a user