mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Test that vacuum removes tuples older than OldestXmin
If vacuum fails to prune a tuple killed before OldestXmin, it will decide to freeze its xmax and later error out in pre-freeze checks. Add a test reproducing this scenario to the recovery suite which creates a table on a primary, updates the table to generate dead tuples for vacuum, and then, during the vacuum, uses a replica to force GlobalVisState->maybe_needed on the primary to move backwards and precede the value of OldestXmin set at the beginning of vacuuming the table. This test is coverage for a case fixed in83c39a1f7f. The test was originally committed to master inaa607980aebut later reverted inefcbb76efedue to test instability. The test requires multiple index passes. In Postgres 17+, vacuum uses a TID store for the dead TIDs that is very space efficient. With the old minimum maintenance_work_mem of 1 MB, it required a large number of dead rows to generate enough dead TIDs to force multiple index vacuuming passes. Once the source code changes were made to allow a minimum maintenance_work_mem value of 64kB, the test could be made much faster and more stable. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAAKRu_ZJBkidusDut6i%3DbDCiXzJEp93GC1%2BNFaZt4eqanYF3Kw%40mail.gmail.com Backpatch-through: 17
This commit is contained in:
		@@ -54,7 +54,8 @@ tests += {
 | 
			
		||||
      't/043_no_contrecord_switch.pl',
 | 
			
		||||
      't/045_archive_restartpoint.pl',
 | 
			
		||||
      't/046_checkpoint_logical_slot.pl',
 | 
			
		||||
      't/047_checkpoint_physical_slot.pl'
 | 
			
		||||
      't/047_checkpoint_physical_slot.pl',
 | 
			
		||||
      't/048_vacuum_horizon_floor.pl'
 | 
			
		||||
    ],
 | 
			
		||||
  },
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										278
									
								
								src/test/recovery/t/048_vacuum_horizon_floor.pl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										278
									
								
								src/test/recovery/t/048_vacuum_horizon_floor.pl
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,278 @@
 | 
			
		||||
use strict;
 | 
			
		||||
use warnings;
 | 
			
		||||
use PostgreSQL::Test::Cluster;
 | 
			
		||||
use Test::More;
 | 
			
		||||
 | 
			
		||||
# Test that vacuum prunes away all dead tuples killed before OldestXmin
 | 
			
		||||
#
 | 
			
		||||
# This test creates a table on a primary, updates the table to generate dead
 | 
			
		||||
# tuples for vacuum, and then, during the vacuum, uses the replica to force
 | 
			
		||||
# GlobalVisState->maybe_needed on the primary to move backwards and precede
 | 
			
		||||
# the value of OldestXmin set at the beginning of vacuuming the table.
 | 
			
		||||
 | 
			
		||||
# Set up nodes
 | 
			
		||||
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
 | 
			
		||||
$node_primary->init(allows_streaming => 'physical');
 | 
			
		||||
 | 
			
		||||
# io_combine_limit is set to 1 to avoid pinning more than one buffer at a time
 | 
			
		||||
# to ensure test determinism.
 | 
			
		||||
$node_primary->append_conf(
 | 
			
		||||
	'postgresql.conf', qq[
 | 
			
		||||
hot_standby_feedback = on
 | 
			
		||||
autovacuum = off
 | 
			
		||||
log_min_messages = INFO
 | 
			
		||||
maintenance_work_mem = 64
 | 
			
		||||
io_combine_limit = 1
 | 
			
		||||
]);
 | 
			
		||||
$node_primary->start;
 | 
			
		||||
 | 
			
		||||
my $node_replica = PostgreSQL::Test::Cluster->new('standby');
 | 
			
		||||
 | 
			
		||||
$node_primary->backup('my_backup');
 | 
			
		||||
$node_replica->init_from_backup($node_primary, 'my_backup',
 | 
			
		||||
	has_streaming => 1);
 | 
			
		||||
 | 
			
		||||
$node_replica->start;
 | 
			
		||||
 | 
			
		||||
my $test_db = "test_db";
 | 
			
		||||
$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db");
 | 
			
		||||
 | 
			
		||||
# Save the original connection info for later use
 | 
			
		||||
my $orig_conninfo = $node_primary->connstr();
 | 
			
		||||
 | 
			
		||||
my $table1 = "vac_horizon_floor_table";
 | 
			
		||||
 | 
			
		||||
# Long-running Primary Session A
 | 
			
		||||
my $psql_primaryA =
 | 
			
		||||
  $node_primary->background_psql($test_db, on_error_stop => 1);
 | 
			
		||||
 | 
			
		||||
# Long-running Primary Session B
 | 
			
		||||
my $psql_primaryB  =
 | 
			
		||||
  $node_primary->background_psql($test_db, on_error_stop => 1);
 | 
			
		||||
 | 
			
		||||
# Our test relies on two rounds of index vacuuming for reasons elaborated
 | 
			
		||||
# later. To trigger two rounds of index vacuuming, we must fill up the
 | 
			
		||||
# TIDStore with dead items partway through a vacuum of the table. The number
 | 
			
		||||
# of rows is just enough to ensure we exceed maintenance_work_mem on all
 | 
			
		||||
# supported platforms, while keeping test runtime as short as we can.
 | 
			
		||||
my $nrows = 2000;
 | 
			
		||||
 | 
			
		||||
# Because vacuum's first pass, pruning, is where we use the GlobalVisState to
 | 
			
		||||
# check tuple visibility, GlobalVisState->maybe_needed must move backwards
 | 
			
		||||
# during pruning before checking the visibility for a tuple which would have
 | 
			
		||||
# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but
 | 
			
		||||
# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed.
 | 
			
		||||
#
 | 
			
		||||
# We must not only force the horizon on the primary to move backwards but also
 | 
			
		||||
# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState
 | 
			
		||||
# is forced to update during index vacuuming.
 | 
			
		||||
#
 | 
			
		||||
# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the
 | 
			
		||||
# end of a round of index vacuuming, updating the backend's GlobalVisState
 | 
			
		||||
# and, in our case, moving maybe_needed backwards.
 | 
			
		||||
#
 | 
			
		||||
# Then vacuum's first (pruning) pass will continue and pruning will find our
 | 
			
		||||
# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to
 | 
			
		||||
# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin.
 | 
			
		||||
#
 | 
			
		||||
# Thus, we must force at least two rounds of index vacuuming to ensure that
 | 
			
		||||
# some tuple visibility checks will happen after a round of index vacuuming.
 | 
			
		||||
# To accomplish this, we set maintenance_work_mem to its minimum value and
 | 
			
		||||
# insert and delete enough rows that we force at least one round of index
 | 
			
		||||
# vacuuming before getting to a dead tuple which was killed after the standby
 | 
			
		||||
# is disconnected.
 | 
			
		||||
$node_primary->safe_psql($test_db, qq[
 | 
			
		||||
	CREATE TABLE ${table1}(col1 int)
 | 
			
		||||
		WITH (autovacuum_enabled=false, fillfactor=10);
 | 
			
		||||
	INSERT INTO $table1 VALUES(7);
 | 
			
		||||
	INSERT INTO $table1 SELECT generate_series(1, $nrows) % 3;
 | 
			
		||||
	CREATE INDEX on ${table1}(col1);
 | 
			
		||||
	DELETE FROM $table1 WHERE col1 = 0;
 | 
			
		||||
	INSERT INTO $table1 VALUES(7);
 | 
			
		||||
]);
 | 
			
		||||
 | 
			
		||||
# We will later move the primary forward while the standby is disconnected.
 | 
			
		||||
# For now, however, there is no reason not to wait for the standby to catch
 | 
			
		||||
# up.
 | 
			
		||||
my $primary_lsn = $node_primary->lsn('flush');
 | 
			
		||||
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
 | 
			
		||||
 | 
			
		||||
# Test that the WAL receiver is up and running.
 | 
			
		||||
$node_replica->poll_query_until($test_db, qq[
 | 
			
		||||
	SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't');
 | 
			
		||||
 | 
			
		||||
# Set primary_conninfo to something invalid on the replica and reload the
 | 
			
		||||
# config. Once the config is reloaded, the startup process will force the WAL
 | 
			
		||||
# receiver to restart and it will be unable to reconnect because of the
 | 
			
		||||
# invalid connection information.
 | 
			
		||||
$node_replica->safe_psql($test_db, qq[
 | 
			
		||||
		ALTER SYSTEM SET primary_conninfo = '';
 | 
			
		||||
		SELECT pg_reload_conf();
 | 
			
		||||
	]);
 | 
			
		||||
 | 
			
		||||
# Wait until the WAL receiver has shut down and been unable to start up again.
 | 
			
		||||
$node_replica->poll_query_until($test_db, qq[
 | 
			
		||||
	SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f');
 | 
			
		||||
 | 
			
		||||
# Now insert and update a tuple which will be visible to the vacuum on the
 | 
			
		||||
# primary but which will have xmax newer than the oldest xmin on the standby
 | 
			
		||||
# that was recently disconnected.
 | 
			
		||||
my $res = $psql_primaryA->query_safe(
 | 
			
		||||
	qq[
 | 
			
		||||
		INSERT INTO $table1 VALUES (99);
 | 
			
		||||
		UPDATE $table1 SET col1 = 100 WHERE col1 = 99;
 | 
			
		||||
		SELECT 'after_update';
 | 
			
		||||
        ]
 | 
			
		||||
	);
 | 
			
		||||
 | 
			
		||||
# Make sure the UPDATE finished
 | 
			
		||||
like($res, qr/^after_update$/m, "UPDATE occurred on primary session A");
 | 
			
		||||
 | 
			
		||||
# Open a cursor on the primary whose pin will keep VACUUM from getting a
 | 
			
		||||
# cleanup lock on the first page of the relation. We want VACUUM to be able to
 | 
			
		||||
# start, calculate initial values for OldestXmin and GlobalVisState and then
 | 
			
		||||
# be unable to proceed with pruning our dead tuples. This will allow us to
 | 
			
		||||
# reconnect the standby and push the horizon back before we start actual
 | 
			
		||||
# pruning and vacuuming.
 | 
			
		||||
my $primary_cursor1 = "vac_horizon_floor_cursor1";
 | 
			
		||||
 | 
			
		||||
# The first value inserted into the table was a 7, so FETCH FORWARD should
 | 
			
		||||
# return a 7. That's how we know the cursor has a pin.
 | 
			
		||||
# Disable index scans so the cursor pins heap pages and not index pages.
 | 
			
		||||
$res = $psql_primaryB->query_safe(
 | 
			
		||||
	qq[
 | 
			
		||||
	BEGIN;
 | 
			
		||||
	SET enable_bitmapscan = off;
 | 
			
		||||
	SET enable_indexscan = off;
 | 
			
		||||
	SET enable_indexonlyscan = off;
 | 
			
		||||
	DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7;
 | 
			
		||||
	FETCH $primary_cursor1;
 | 
			
		||||
	]
 | 
			
		||||
	);
 | 
			
		||||
 | 
			
		||||
is($res, 7, qq[Cursor query returned $res. Expected value 7.]);
 | 
			
		||||
 | 
			
		||||
# Get the PID of the session which will run the VACUUM FREEZE so that we can
 | 
			
		||||
# use it to filter pg_stat_activity later.
 | 
			
		||||
my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();");
 | 
			
		||||
 | 
			
		||||
# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs()
 | 
			
		||||
# and establish values of OldestXmin and GlobalVisState which are newer than
 | 
			
		||||
# all of our dead tuples. Then it will be unable to get a cleanup lock to
 | 
			
		||||
# start pruning, so it will hang.
 | 
			
		||||
#
 | 
			
		||||
# We use VACUUM FREEZE because it will wait for a cleanup lock instead of
 | 
			
		||||
# skipping the page pinned by the cursor. Note that works because the target
 | 
			
		||||
# tuple's xmax precedes OldestXmin which ensures that lazy_scan_noprune() will
 | 
			
		||||
# return false and we will wait for the cleanup lock.
 | 
			
		||||
#
 | 
			
		||||
# Disable any prefetching, parallelism, or other concurrent I/O by vacuum. The
 | 
			
		||||
# pages of the heap must be processed in order by a single worker to ensure
 | 
			
		||||
# test stability (PARALLEL 0 shouldn't be necessary but guards against the
 | 
			
		||||
# possibility of parallel heap vacuuming).
 | 
			
		||||
$psql_primaryA->{stdin} .= qq[
 | 
			
		||||
		SET maintenance_io_concurrency = 0;
 | 
			
		||||
		VACUUM (VERBOSE, FREEZE, PARALLEL 0) $table1;
 | 
			
		||||
		\\echo VACUUM
 | 
			
		||||
        ];
 | 
			
		||||
 | 
			
		||||
# Make sure the VACUUM command makes it to the server.
 | 
			
		||||
$psql_primaryA->{run}->pump_nb();
 | 
			
		||||
 | 
			
		||||
# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is
 | 
			
		||||
# just waiting on the lock to start vacuuming. We don't want the standby to
 | 
			
		||||
# re-establish a connection to the primary and push the horizon back until
 | 
			
		||||
# we've saved initial values in GlobalVisState and calculated OldestXmin.
 | 
			
		||||
$node_primary->poll_query_until($test_db,
 | 
			
		||||
	qq[
 | 
			
		||||
	SELECT count(*) >= 1 FROM pg_stat_activity
 | 
			
		||||
		WHERE pid = $vacuum_pid
 | 
			
		||||
		AND wait_event = 'BufferPin';
 | 
			
		||||
	],
 | 
			
		||||
	't');
 | 
			
		||||
 | 
			
		||||
# Ensure the WAL receiver is still not active on the replica.
 | 
			
		||||
$node_replica->poll_query_until($test_db, qq[
 | 
			
		||||
	SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f');
 | 
			
		||||
 | 
			
		||||
# Allow the WAL receiver connection to re-establish.
 | 
			
		||||
$node_replica->safe_psql(
 | 
			
		||||
	$test_db, qq[
 | 
			
		||||
		ALTER SYSTEM SET primary_conninfo = '$orig_conninfo';
 | 
			
		||||
		SELECT pg_reload_conf();
 | 
			
		||||
	]);
 | 
			
		||||
 | 
			
		||||
# Ensure the new WAL receiver has connected.
 | 
			
		||||
$node_replica->poll_query_until($test_db, qq[
 | 
			
		||||
	SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't');
 | 
			
		||||
 | 
			
		||||
# Once the WAL sender is shown on the primary, the replica should have
 | 
			
		||||
# connected with the primary and pushed the horizon backward. Primary Session
 | 
			
		||||
# A won't see that until the VACUUM FREEZE proceeds and does its first round
 | 
			
		||||
# of index vacuuming.
 | 
			
		||||
$node_primary->poll_query_until($test_db, qq[
 | 
			
		||||
	SELECT EXISTS (SELECT * FROM pg_stat_replication);] , 't');
 | 
			
		||||
 | 
			
		||||
# Move the cursor forward to the next 7. We inserted the 7 much later, so
 | 
			
		||||
# advancing the cursor should allow vacuum to proceed vacuuming most pages of
 | 
			
		||||
# the relation. Because we set maintanence_work_mem sufficiently low, we
 | 
			
		||||
# expect that a round of index vacuuming has happened and that the vacuum is
 | 
			
		||||
# now waiting for the cursor to release its pin on the last page of the
 | 
			
		||||
# relation.
 | 
			
		||||
$res = $psql_primaryB->query_safe("FETCH $primary_cursor1");
 | 
			
		||||
is($res, 7,
 | 
			
		||||
	qq[Cursor query returned $res from second fetch. Expected value 7.]);
 | 
			
		||||
 | 
			
		||||
# Prevent the test from incorrectly passing by confirming that we did indeed
 | 
			
		||||
# do a pass of index vacuuming.
 | 
			
		||||
$node_primary->poll_query_until($test_db, qq[
 | 
			
		||||
	SELECT index_vacuum_count > 0
 | 
			
		||||
	FROM pg_stat_progress_vacuum
 | 
			
		||||
	WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass;
 | 
			
		||||
	] , 't');
 | 
			
		||||
 | 
			
		||||
# Commit the transaction with the open cursor so that the VACUUM can finish.
 | 
			
		||||
$psql_primaryB->query_until(
 | 
			
		||||
		qr/^commit$/m,
 | 
			
		||||
		qq[
 | 
			
		||||
			COMMIT;
 | 
			
		||||
			\\echo commit
 | 
			
		||||
        ]
 | 
			
		||||
	);
 | 
			
		||||
 | 
			
		||||
# VACUUM proceeds with pruning and does a visibility check on each tuple. In
 | 
			
		||||
# older versions of Postgres, pruning found our final dead tuple
 | 
			
		||||
# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new
 | 
			
		||||
# value of maybe_needed. Then heap_prepare_freeze_tuple() would decide the
 | 
			
		||||
# tuple xmax should be frozen because it precedes OldestXmin. Vacuum would
 | 
			
		||||
# then error out in heap_pre_freeze_checks() with "cannot freeze committed
 | 
			
		||||
# xmax". This was fixed by changing pruning to find all
 | 
			
		||||
# HEAPTUPLE_RECENTLY_DEAD tuples with xmaxes preceding OldestXmin
 | 
			
		||||
# HEAPTUPLE_DEAD and removing them.
 | 
			
		||||
 | 
			
		||||
# With the fix, VACUUM should finish successfully, incrementing the table
 | 
			
		||||
# vacuum_count.
 | 
			
		||||
$node_primary->poll_query_until($test_db,
 | 
			
		||||
	qq[
 | 
			
		||||
	SELECT vacuum_count > 0
 | 
			
		||||
	FROM pg_stat_all_tables WHERE relname = '${table1}';
 | 
			
		||||
	]
 | 
			
		||||
	, 't');
 | 
			
		||||
 | 
			
		||||
$primary_lsn = $node_primary->lsn('flush');
 | 
			
		||||
 | 
			
		||||
# Make sure something causes us to flush
 | 
			
		||||
$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);");
 | 
			
		||||
 | 
			
		||||
# Nothing on the replica should cause a recovery conflict, so this should
 | 
			
		||||
# finish successfully.
 | 
			
		||||
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
 | 
			
		||||
 | 
			
		||||
## Shut down psqls
 | 
			
		||||
$psql_primaryA->quit;
 | 
			
		||||
$psql_primaryB->quit;
 | 
			
		||||
 | 
			
		||||
$node_replica->stop();
 | 
			
		||||
$node_primary->stop();
 | 
			
		||||
 | 
			
		||||
done_testing();
 | 
			
		||||
		Reference in New Issue
	
	Block a user