mirror of
https://github.com/postgres/postgres.git
synced 2025-06-26 12:21:12 +03:00
Test that vacuum removes tuples older than OldestXmin
If vacuum fails to prune a tuple killed before OldestXmin, it will decide to freeze its xmax and later error out in pre-freeze checks. Add a test reproducing this scenario to the recovery suite which creates a table on a primary, updates the table to generate dead tuples for vacuum, and then, during the vacuum, uses a replica to force GlobalVisState->maybe_needed on the primary to move backwards and precede the value of OldestXmin set at the beginning of vacuuming the table. This test is coverage for a case fixed in83c39a1f7f
. The test was originally committed to master inaa607980ae
but later reverted inefcbb76efe
due to test instability. The test requires multiple index passes. In Postgres 17+, vacuum uses a TID store for the dead TIDs that is very space efficient. With the old minimum maintenance_work_mem of 1 MB, it required a large number of dead rows to generate enough dead TIDs to force multiple index vacuuming passes. Once the source code changes were made to allow a minimum maintenance_work_mem value of 64kB, the test could be made much faster and more stable. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAAKRu_ZJBkidusDut6i%3DbDCiXzJEp93GC1%2BNFaZt4eqanYF3Kw%40mail.gmail.com Backpatch-through: 17
This commit is contained in:
@ -54,7 +54,8 @@ tests += {
|
||||
't/043_no_contrecord_switch.pl',
|
||||
't/045_archive_restartpoint.pl',
|
||||
't/046_checkpoint_logical_slot.pl',
|
||||
't/047_checkpoint_physical_slot.pl'
|
||||
't/047_checkpoint_physical_slot.pl',
|
||||
't/048_vacuum_horizon_floor.pl'
|
||||
],
|
||||
},
|
||||
}
|
||||
|
278
src/test/recovery/t/048_vacuum_horizon_floor.pl
Normal file
278
src/test/recovery/t/048_vacuum_horizon_floor.pl
Normal file
@ -0,0 +1,278 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use Test::More;
|
||||
|
||||
# Test that vacuum prunes away all dead tuples killed before OldestXmin
|
||||
#
|
||||
# This test creates a table on a primary, updates the table to generate dead
|
||||
# tuples for vacuum, and then, during the vacuum, uses the replica to force
|
||||
# GlobalVisState->maybe_needed on the primary to move backwards and precede
|
||||
# the value of OldestXmin set at the beginning of vacuuming the table.
|
||||
|
||||
# Set up nodes
|
||||
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
|
||||
$node_primary->init(allows_streaming => 'physical');
|
||||
|
||||
# io_combine_limit is set to 1 to avoid pinning more than one buffer at a time
|
||||
# to ensure test determinism.
|
||||
$node_primary->append_conf(
|
||||
'postgresql.conf', qq[
|
||||
hot_standby_feedback = on
|
||||
autovacuum = off
|
||||
log_min_messages = INFO
|
||||
maintenance_work_mem = 64
|
||||
io_combine_limit = 1
|
||||
]);
|
||||
$node_primary->start;
|
||||
|
||||
my $node_replica = PostgreSQL::Test::Cluster->new('standby');
|
||||
|
||||
$node_primary->backup('my_backup');
|
||||
$node_replica->init_from_backup($node_primary, 'my_backup',
|
||||
has_streaming => 1);
|
||||
|
||||
$node_replica->start;
|
||||
|
||||
my $test_db = "test_db";
|
||||
$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db");
|
||||
|
||||
# Save the original connection info for later use
|
||||
my $orig_conninfo = $node_primary->connstr();
|
||||
|
||||
my $table1 = "vac_horizon_floor_table";
|
||||
|
||||
# Long-running Primary Session A
|
||||
my $psql_primaryA =
|
||||
$node_primary->background_psql($test_db, on_error_stop => 1);
|
||||
|
||||
# Long-running Primary Session B
|
||||
my $psql_primaryB =
|
||||
$node_primary->background_psql($test_db, on_error_stop => 1);
|
||||
|
||||
# Our test relies on two rounds of index vacuuming for reasons elaborated
|
||||
# later. To trigger two rounds of index vacuuming, we must fill up the
|
||||
# TIDStore with dead items partway through a vacuum of the table. The number
|
||||
# of rows is just enough to ensure we exceed maintenance_work_mem on all
|
||||
# supported platforms, while keeping test runtime as short as we can.
|
||||
my $nrows = 2000;
|
||||
|
||||
# Because vacuum's first pass, pruning, is where we use the GlobalVisState to
|
||||
# check tuple visibility, GlobalVisState->maybe_needed must move backwards
|
||||
# during pruning before checking the visibility for a tuple which would have
|
||||
# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but
|
||||
# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed.
|
||||
#
|
||||
# We must not only force the horizon on the primary to move backwards but also
|
||||
# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState
|
||||
# is forced to update during index vacuuming.
|
||||
#
|
||||
# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the
|
||||
# end of a round of index vacuuming, updating the backend's GlobalVisState
|
||||
# and, in our case, moving maybe_needed backwards.
|
||||
#
|
||||
# Then vacuum's first (pruning) pass will continue and pruning will find our
|
||||
# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to
|
||||
# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin.
|
||||
#
|
||||
# Thus, we must force at least two rounds of index vacuuming to ensure that
|
||||
# some tuple visibility checks will happen after a round of index vacuuming.
|
||||
# To accomplish this, we set maintenance_work_mem to its minimum value and
|
||||
# insert and delete enough rows that we force at least one round of index
|
||||
# vacuuming before getting to a dead tuple which was killed after the standby
|
||||
# is disconnected.
|
||||
$node_primary->safe_psql($test_db, qq[
|
||||
CREATE TABLE ${table1}(col1 int)
|
||||
WITH (autovacuum_enabled=false, fillfactor=10);
|
||||
INSERT INTO $table1 VALUES(7);
|
||||
INSERT INTO $table1 SELECT generate_series(1, $nrows) % 3;
|
||||
CREATE INDEX on ${table1}(col1);
|
||||
DELETE FROM $table1 WHERE col1 = 0;
|
||||
INSERT INTO $table1 VALUES(7);
|
||||
]);
|
||||
|
||||
# We will later move the primary forward while the standby is disconnected.
|
||||
# For now, however, there is no reason not to wait for the standby to catch
|
||||
# up.
|
||||
my $primary_lsn = $node_primary->lsn('flush');
|
||||
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
|
||||
|
||||
# Test that the WAL receiver is up and running.
|
||||
$node_replica->poll_query_until($test_db, qq[
|
||||
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't');
|
||||
|
||||
# Set primary_conninfo to something invalid on the replica and reload the
|
||||
# config. Once the config is reloaded, the startup process will force the WAL
|
||||
# receiver to restart and it will be unable to reconnect because of the
|
||||
# invalid connection information.
|
||||
$node_replica->safe_psql($test_db, qq[
|
||||
ALTER SYSTEM SET primary_conninfo = '';
|
||||
SELECT pg_reload_conf();
|
||||
]);
|
||||
|
||||
# Wait until the WAL receiver has shut down and been unable to start up again.
|
||||
$node_replica->poll_query_until($test_db, qq[
|
||||
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f');
|
||||
|
||||
# Now insert and update a tuple which will be visible to the vacuum on the
|
||||
# primary but which will have xmax newer than the oldest xmin on the standby
|
||||
# that was recently disconnected.
|
||||
my $res = $psql_primaryA->query_safe(
|
||||
qq[
|
||||
INSERT INTO $table1 VALUES (99);
|
||||
UPDATE $table1 SET col1 = 100 WHERE col1 = 99;
|
||||
SELECT 'after_update';
|
||||
]
|
||||
);
|
||||
|
||||
# Make sure the UPDATE finished
|
||||
like($res, qr/^after_update$/m, "UPDATE occurred on primary session A");
|
||||
|
||||
# Open a cursor on the primary whose pin will keep VACUUM from getting a
|
||||
# cleanup lock on the first page of the relation. We want VACUUM to be able to
|
||||
# start, calculate initial values for OldestXmin and GlobalVisState and then
|
||||
# be unable to proceed with pruning our dead tuples. This will allow us to
|
||||
# reconnect the standby and push the horizon back before we start actual
|
||||
# pruning and vacuuming.
|
||||
my $primary_cursor1 = "vac_horizon_floor_cursor1";
|
||||
|
||||
# The first value inserted into the table was a 7, so FETCH FORWARD should
|
||||
# return a 7. That's how we know the cursor has a pin.
|
||||
# Disable index scans so the cursor pins heap pages and not index pages.
|
||||
$res = $psql_primaryB->query_safe(
|
||||
qq[
|
||||
BEGIN;
|
||||
SET enable_bitmapscan = off;
|
||||
SET enable_indexscan = off;
|
||||
SET enable_indexonlyscan = off;
|
||||
DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7;
|
||||
FETCH $primary_cursor1;
|
||||
]
|
||||
);
|
||||
|
||||
is($res, 7, qq[Cursor query returned $res. Expected value 7.]);
|
||||
|
||||
# Get the PID of the session which will run the VACUUM FREEZE so that we can
|
||||
# use it to filter pg_stat_activity later.
|
||||
my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();");
|
||||
|
||||
# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs()
|
||||
# and establish values of OldestXmin and GlobalVisState which are newer than
|
||||
# all of our dead tuples. Then it will be unable to get a cleanup lock to
|
||||
# start pruning, so it will hang.
|
||||
#
|
||||
# We use VACUUM FREEZE because it will wait for a cleanup lock instead of
|
||||
# skipping the page pinned by the cursor. Note that works because the target
|
||||
# tuple's xmax precedes OldestXmin which ensures that lazy_scan_noprune() will
|
||||
# return false and we will wait for the cleanup lock.
|
||||
#
|
||||
# Disable any prefetching, parallelism, or other concurrent I/O by vacuum. The
|
||||
# pages of the heap must be processed in order by a single worker to ensure
|
||||
# test stability (PARALLEL 0 shouldn't be necessary but guards against the
|
||||
# possibility of parallel heap vacuuming).
|
||||
$psql_primaryA->{stdin} .= qq[
|
||||
SET maintenance_io_concurrency = 0;
|
||||
VACUUM (VERBOSE, FREEZE, PARALLEL 0) $table1;
|
||||
\\echo VACUUM
|
||||
];
|
||||
|
||||
# Make sure the VACUUM command makes it to the server.
|
||||
$psql_primaryA->{run}->pump_nb();
|
||||
|
||||
# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is
|
||||
# just waiting on the lock to start vacuuming. We don't want the standby to
|
||||
# re-establish a connection to the primary and push the horizon back until
|
||||
# we've saved initial values in GlobalVisState and calculated OldestXmin.
|
||||
$node_primary->poll_query_until($test_db,
|
||||
qq[
|
||||
SELECT count(*) >= 1 FROM pg_stat_activity
|
||||
WHERE pid = $vacuum_pid
|
||||
AND wait_event = 'BufferPin';
|
||||
],
|
||||
't');
|
||||
|
||||
# Ensure the WAL receiver is still not active on the replica.
|
||||
$node_replica->poll_query_until($test_db, qq[
|
||||
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f');
|
||||
|
||||
# Allow the WAL receiver connection to re-establish.
|
||||
$node_replica->safe_psql(
|
||||
$test_db, qq[
|
||||
ALTER SYSTEM SET primary_conninfo = '$orig_conninfo';
|
||||
SELECT pg_reload_conf();
|
||||
]);
|
||||
|
||||
# Ensure the new WAL receiver has connected.
|
||||
$node_replica->poll_query_until($test_db, qq[
|
||||
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't');
|
||||
|
||||
# Once the WAL sender is shown on the primary, the replica should have
|
||||
# connected with the primary and pushed the horizon backward. Primary Session
|
||||
# A won't see that until the VACUUM FREEZE proceeds and does its first round
|
||||
# of index vacuuming.
|
||||
$node_primary->poll_query_until($test_db, qq[
|
||||
SELECT EXISTS (SELECT * FROM pg_stat_replication);] , 't');
|
||||
|
||||
# Move the cursor forward to the next 7. We inserted the 7 much later, so
|
||||
# advancing the cursor should allow vacuum to proceed vacuuming most pages of
|
||||
# the relation. Because we set maintanence_work_mem sufficiently low, we
|
||||
# expect that a round of index vacuuming has happened and that the vacuum is
|
||||
# now waiting for the cursor to release its pin on the last page of the
|
||||
# relation.
|
||||
$res = $psql_primaryB->query_safe("FETCH $primary_cursor1");
|
||||
is($res, 7,
|
||||
qq[Cursor query returned $res from second fetch. Expected value 7.]);
|
||||
|
||||
# Prevent the test from incorrectly passing by confirming that we did indeed
|
||||
# do a pass of index vacuuming.
|
||||
$node_primary->poll_query_until($test_db, qq[
|
||||
SELECT index_vacuum_count > 0
|
||||
FROM pg_stat_progress_vacuum
|
||||
WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass;
|
||||
] , 't');
|
||||
|
||||
# Commit the transaction with the open cursor so that the VACUUM can finish.
|
||||
$psql_primaryB->query_until(
|
||||
qr/^commit$/m,
|
||||
qq[
|
||||
COMMIT;
|
||||
\\echo commit
|
||||
]
|
||||
);
|
||||
|
||||
# VACUUM proceeds with pruning and does a visibility check on each tuple. In
|
||||
# older versions of Postgres, pruning found our final dead tuple
|
||||
# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new
|
||||
# value of maybe_needed. Then heap_prepare_freeze_tuple() would decide the
|
||||
# tuple xmax should be frozen because it precedes OldestXmin. Vacuum would
|
||||
# then error out in heap_pre_freeze_checks() with "cannot freeze committed
|
||||
# xmax". This was fixed by changing pruning to find all
|
||||
# HEAPTUPLE_RECENTLY_DEAD tuples with xmaxes preceding OldestXmin
|
||||
# HEAPTUPLE_DEAD and removing them.
|
||||
|
||||
# With the fix, VACUUM should finish successfully, incrementing the table
|
||||
# vacuum_count.
|
||||
$node_primary->poll_query_until($test_db,
|
||||
qq[
|
||||
SELECT vacuum_count > 0
|
||||
FROM pg_stat_all_tables WHERE relname = '${table1}';
|
||||
]
|
||||
, 't');
|
||||
|
||||
$primary_lsn = $node_primary->lsn('flush');
|
||||
|
||||
# Make sure something causes us to flush
|
||||
$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);");
|
||||
|
||||
# Nothing on the replica should cause a recovery conflict, so this should
|
||||
# finish successfully.
|
||||
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
|
||||
|
||||
## Shut down psqls
|
||||
$psql_primaryA->quit;
|
||||
$psql_primaryB->quit;
|
||||
|
||||
$node_replica->stop();
|
||||
$node_primary->stop();
|
||||
|
||||
done_testing();
|
Reference in New Issue
Block a user