mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Stabilize 035_standby_logical_decoding.pl.
Some tests try to invalidate logical slots on the standby server by running VACUUM on the primary. The problem is that xl_running_xacts was getting generated and replayed before the VACUUM command, leading to the advancement of the active slot's catalog_xmin. Due to this, active slots were not getting invalidated, leading to test failures. We fix it by skipping the generation of xl_running_xacts for the required tests with the help of injection points. As the required interface for injection points was not present in back branches, we fixed the failing tests in them by disallowing the slot to become active for the required cases (where rows_removed conflict could be generated). Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/Z6oQXc8LmiTLfwLA@ip-10-97-1-34.eu-west-3.compute.internal
This commit is contained in:
parent
b8b1e87b70
commit
17a165d60f
@ -205,9 +205,6 @@ sub reactive_slots_change_hfs_and_wait_for_xmins
|
||||
|
||||
change_hot_standby_feedback_and_wait_for_xmins($hsf, $invalidated);
|
||||
|
||||
$handle =
|
||||
make_slot_active($node_standby, $slot_prefix, 1, \$stdout, \$stderr);
|
||||
|
||||
# reset stat: easier to check for confl_active_logicalslot in pg_stat_database_conflicts
|
||||
$node_standby->psql('testdb', q[select pg_stat_reset();]);
|
||||
}
|
||||
@ -215,7 +212,7 @@ sub reactive_slots_change_hfs_and_wait_for_xmins
|
||||
# Check invalidation in the logfile and in pg_stat_database_conflicts
|
||||
sub check_for_invalidation
|
||||
{
|
||||
my ($slot_prefix, $log_start, $test_name) = @_;
|
||||
my ($slot_prefix, $log_start, $test_name, $checks_active_slot) = @_;
|
||||
|
||||
my $active_slot = $slot_prefix . 'activeslot';
|
||||
my $inactive_slot = $slot_prefix . 'inactiveslot';
|
||||
@ -231,13 +228,17 @@ sub check_for_invalidation
|
||||
$log_start),
|
||||
"activeslot slot invalidation is logged $test_name");
|
||||
|
||||
# Verify that pg_stat_database_conflicts.confl_active_logicalslot has been updated
|
||||
if ($checks_active_slot)
|
||||
{
|
||||
# Verify that pg_stat_database_conflicts.confl_active_logicalslot has
|
||||
# been updated
|
||||
ok( $node_standby->poll_query_until(
|
||||
'postgres',
|
||||
"select (confl_active_logicalslot = 1) from pg_stat_database_conflicts where datname = 'testdb'",
|
||||
't'),
|
||||
'confl_active_logicalslot updated'
|
||||
) or die "Timed out waiting confl_active_logicalslot to be updated";
|
||||
}
|
||||
}
|
||||
|
||||
# Launch $sql query, wait for a new snapshot that has a newer horizon and
|
||||
@ -250,7 +251,11 @@ sub check_for_invalidation
|
||||
# seeing a xl_running_xacts that would advance an active replication slot's
|
||||
# catalog_xmin. Advancing the active replication slot's catalog_xmin
|
||||
# would break some tests that expect the active slot to conflict with
|
||||
# the catalog xmin horizon.
|
||||
# the catalog xmin horizon. Even with the above precaution, there is a risk
|
||||
# of xl_running_xacts record being logged and replayed before the VACUUM
|
||||
# command, leading to the test failure. So, we ensured that replication slots
|
||||
# are not activated for tests that can invalidate slots due to 'rows_removed'
|
||||
# conflict reason.
|
||||
sub wait_until_vacuum_can_remove
|
||||
{
|
||||
my ($vac_option, $sql, $to_vac) = @_;
|
||||
@ -532,11 +537,8 @@ $node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub");
|
||||
$node_subscriber->stop;
|
||||
|
||||
##################################################
|
||||
# Recovery conflict: Invalidate conflicting slots, including in-use slots
|
||||
# Recovery conflict: Invalidate conflicting slots
|
||||
# Scenario 1: hot_standby_feedback off and vacuum FULL
|
||||
#
|
||||
# In passing, ensure that replication slot stats are not removed when the
|
||||
# active slot is invalidated.
|
||||
##################################################
|
||||
|
||||
# One way to produce recovery conflict is to create/drop a relation and
|
||||
@ -550,10 +552,6 @@ reactive_slots_change_hfs_and_wait_for_xmins('behaves_ok_', 'vacuum_full_',
|
||||
$node_primary->safe_psql('testdb',
|
||||
qq[INSERT INTO decoding_test(x,y) SELECT 100,'100';]);
|
||||
|
||||
$node_standby->poll_query_until('testdb',
|
||||
qq[SELECT total_txns > 0 FROM pg_stat_replication_slots WHERE slot_name = 'vacuum_full_activeslot']
|
||||
) or die "replication slot stats of vacuum_full_activeslot not updated";
|
||||
|
||||
# This should trigger the conflict
|
||||
wait_until_vacuum_can_remove(
|
||||
'full', 'CREATE TABLE conflict_test(x integer, y text);
|
||||
@ -562,19 +560,11 @@ wait_until_vacuum_can_remove(
|
||||
$node_primary->wait_for_replay_catchup($node_standby);
|
||||
|
||||
# Check invalidation in the logfile and in pg_stat_database_conflicts
|
||||
check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class');
|
||||
check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class', 0);
|
||||
|
||||
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
|
||||
check_slots_conflict_reason('vacuum_full_', 'rows_removed');
|
||||
|
||||
# Ensure that replication slot stats are not removed after invalidation.
|
||||
is( $node_standby->safe_psql(
|
||||
'testdb',
|
||||
qq[SELECT total_txns > 0 FROM pg_stat_replication_slots WHERE slot_name = 'vacuum_full_activeslot']
|
||||
),
|
||||
't',
|
||||
'replication slot stats not removed after invalidation');
|
||||
|
||||
$handle =
|
||||
make_slot_active($node_standby, 'vacuum_full_', 0, \$stdout, \$stderr);
|
||||
|
||||
@ -639,7 +629,7 @@ ok(!-f "$standby_walfile",
|
||||
"invalidated logical slots do not lead to retaining WAL");
|
||||
|
||||
##################################################
|
||||
# Recovery conflict: Invalidate conflicting slots, including in-use slots
|
||||
# Recovery conflict: Invalidate conflicting slots
|
||||
# Scenario 2: conflict due to row removal with hot_standby_feedback off.
|
||||
##################################################
|
||||
|
||||
@ -660,7 +650,7 @@ wait_until_vacuum_can_remove(
|
||||
$node_primary->wait_for_replay_catchup($node_standby);
|
||||
|
||||
# Check invalidation in the logfile and in pg_stat_database_conflicts
|
||||
check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class');
|
||||
check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class', 0);
|
||||
|
||||
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
|
||||
check_slots_conflict_reason('row_removal_', 'rows_removed');
|
||||
@ -696,7 +686,7 @@ $node_primary->wait_for_replay_catchup($node_standby);
|
||||
|
||||
# Check invalidation in the logfile and in pg_stat_database_conflicts
|
||||
check_for_invalidation('shared_row_removal_', $logstart,
|
||||
'with vacuum on pg_authid');
|
||||
'with vacuum on pg_authid', 0);
|
||||
|
||||
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
|
||||
check_slots_conflict_reason('shared_row_removal_', 'rows_removed');
|
||||
@ -720,6 +710,10 @@ $logstart = -s $node_standby->logfile;
|
||||
reactive_slots_change_hfs_and_wait_for_xmins('shared_row_removal_',
|
||||
'no_conflict_', 0, 1);
|
||||
|
||||
# As this scenario is not expected to produce any conflict, so activate the slot.
|
||||
# See comments atop wait_until_vacuum_can_remove().
|
||||
make_slot_active($node_standby, 'no_conflict_', 1, \$stdout, \$stderr);
|
||||
|
||||
# This should not trigger a conflict
|
||||
wait_until_vacuum_can_remove(
|
||||
'', 'CREATE TABLE conflict_test(x integer, y text);
|
||||
@ -763,7 +757,7 @@ change_hot_standby_feedback_and_wait_for_xmins(1, 0);
|
||||
$node_standby->restart;
|
||||
|
||||
##################################################
|
||||
# Recovery conflict: Invalidate conflicting slots, including in-use slots
|
||||
# Recovery conflict: Invalidate conflicting slots
|
||||
# Scenario 5: conflict due to on-access pruning.
|
||||
##################################################
|
||||
|
||||
@ -788,7 +782,7 @@ $node_primary->safe_psql('testdb', qq[UPDATE prun SET s = 'E';]);
|
||||
$node_primary->wait_for_replay_catchup($node_standby);
|
||||
|
||||
# Check invalidation in the logfile and in pg_stat_database_conflicts
|
||||
check_for_invalidation('pruning_', $logstart, 'with on-access pruning');
|
||||
check_for_invalidation('pruning_', $logstart, 'with on-access pruning', 0);
|
||||
|
||||
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
|
||||
check_slots_conflict_reason('pruning_', 'rows_removed');
|
||||
@ -832,7 +826,7 @@ $node_primary->restart;
|
||||
$node_primary->wait_for_replay_catchup($node_standby);
|
||||
|
||||
# Check invalidation in the logfile and in pg_stat_database_conflicts
|
||||
check_for_invalidation('wal_level_', $logstart, 'due to wal_level');
|
||||
check_for_invalidation('wal_level_', $logstart, 'due to wal_level', 1);
|
||||
|
||||
# Verify reason for conflict is 'wal_level_insufficient' in pg_replication_slots
|
||||
check_slots_conflict_reason('wal_level_', 'wal_level_insufficient');
|
||||
|
Loading…
x
Reference in New Issue
Block a user