mirror of
https://github.com/postgres/postgres.git
synced 2025-06-26 12:21:12 +03:00
Test that vacuum removes tuples older than OldestXmin
If vacuum fails to prune a tuple killed before OldestXmin, it will decide to freeze its xmax and later error out in pre-freeze checks. Add a test reproducing this scenario to the recovery suite which creates a table on a primary, updates the table to generate dead tuples for vacuum, and then, during the vacuum, uses a replica to force GlobalVisState->maybe_needed on the primary to move backwards and precede the value of OldestXmin set at the beginning of vacuuming the table. This test is coverage for a case fixed in83c39a1f7f
. The test was originally committed to master inaa607980ae
but later reverted inefcbb76efe
due to test instability. The test requires multiple index passes. In Postgres 17+, vacuum uses a TID store for the dead TIDs that is very space efficient. With the old minimum maintenance_work_mem of 1 MB, it required a large number of dead rows to generate enough dead TIDs to force multiple index vacuuming passes. Once the source code changes were made to allow a minimum maintenance_work_mem value of 64kB, the test could be made much faster and more stable. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAAKRu_ZJBkidusDut6i%3DbDCiXzJEp93GC1%2BNFaZt4eqanYF3Kw%40mail.gmail.com Backpatch-through: 17
This commit is contained in:
@ -54,7 +54,8 @@ tests += {
|
|||||||
't/043_no_contrecord_switch.pl',
|
't/043_no_contrecord_switch.pl',
|
||||||
't/044_invalidate_inactive_slots.pl',
|
't/044_invalidate_inactive_slots.pl',
|
||||||
't/045_archive_restartpoint.pl',
|
't/045_archive_restartpoint.pl',
|
||||||
't/047_checkpoint_physical_slot.pl'
|
't/047_checkpoint_physical_slot.pl',
|
||||||
|
't/048_vacuum_horizon_floor.pl'
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
278
src/test/recovery/t/048_vacuum_horizon_floor.pl
Normal file
278
src/test/recovery/t/048_vacuum_horizon_floor.pl
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use PostgreSQL::Test::Cluster;
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
# Test that vacuum prunes away all dead tuples killed before OldestXmin
|
||||||
|
#
|
||||||
|
# This test creates a table on a primary, updates the table to generate dead
|
||||||
|
# tuples for vacuum, and then, during the vacuum, uses the replica to force
|
||||||
|
# GlobalVisState->maybe_needed on the primary to move backwards and precede
|
||||||
|
# the value of OldestXmin set at the beginning of vacuuming the table.
|
||||||
|
|
||||||
|
# Set up nodes
|
||||||
|
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
|
||||||
|
$node_primary->init(allows_streaming => 'physical');
|
||||||
|
|
||||||
|
# io_combine_limit is set to 1 to avoid pinning more than one buffer at a time
|
||||||
|
# to ensure test determinism.
|
||||||
|
$node_primary->append_conf(
|
||||||
|
'postgresql.conf', qq[
|
||||||
|
hot_standby_feedback = on
|
||||||
|
autovacuum = off
|
||||||
|
log_min_messages = INFO
|
||||||
|
maintenance_work_mem = 64
|
||||||
|
io_combine_limit = 1
|
||||||
|
]);
|
||||||
|
$node_primary->start;
|
||||||
|
|
||||||
|
my $node_replica = PostgreSQL::Test::Cluster->new('standby');
|
||||||
|
|
||||||
|
$node_primary->backup('my_backup');
|
||||||
|
$node_replica->init_from_backup($node_primary, 'my_backup',
|
||||||
|
has_streaming => 1);
|
||||||
|
|
||||||
|
$node_replica->start;
|
||||||
|
|
||||||
|
my $test_db = "test_db";
|
||||||
|
$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db");
|
||||||
|
|
||||||
|
# Save the original connection info for later use
|
||||||
|
my $orig_conninfo = $node_primary->connstr();
|
||||||
|
|
||||||
|
my $table1 = "vac_horizon_floor_table";
|
||||||
|
|
||||||
|
# Long-running Primary Session A
|
||||||
|
my $psql_primaryA =
|
||||||
|
$node_primary->background_psql($test_db, on_error_stop => 1);
|
||||||
|
|
||||||
|
# Long-running Primary Session B
|
||||||
|
my $psql_primaryB =
|
||||||
|
$node_primary->background_psql($test_db, on_error_stop => 1);
|
||||||
|
|
||||||
|
# Our test relies on two rounds of index vacuuming for reasons elaborated
|
||||||
|
# later. To trigger two rounds of index vacuuming, we must fill up the
|
||||||
|
# TIDStore with dead items partway through a vacuum of the table. The number
|
||||||
|
# of rows is just enough to ensure we exceed maintenance_work_mem on all
|
||||||
|
# supported platforms, while keeping test runtime as short as we can.
|
||||||
|
my $nrows = 2000;
|
||||||
|
|
||||||
|
# Because vacuum's first pass, pruning, is where we use the GlobalVisState to
|
||||||
|
# check tuple visibility, GlobalVisState->maybe_needed must move backwards
|
||||||
|
# during pruning before checking the visibility for a tuple which would have
|
||||||
|
# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but
|
||||||
|
# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed.
|
||||||
|
#
|
||||||
|
# We must not only force the horizon on the primary to move backwards but also
|
||||||
|
# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState
|
||||||
|
# is forced to update during index vacuuming.
|
||||||
|
#
|
||||||
|
# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the
|
||||||
|
# end of a round of index vacuuming, updating the backend's GlobalVisState
|
||||||
|
# and, in our case, moving maybe_needed backwards.
|
||||||
|
#
|
||||||
|
# Then vacuum's first (pruning) pass will continue and pruning will find our
|
||||||
|
# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to
|
||||||
|
# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin.
|
||||||
|
#
|
||||||
|
# Thus, we must force at least two rounds of index vacuuming to ensure that
|
||||||
|
# some tuple visibility checks will happen after a round of index vacuuming.
|
||||||
|
# To accomplish this, we set maintenance_work_mem to its minimum value and
|
||||||
|
# insert and delete enough rows that we force at least one round of index
|
||||||
|
# vacuuming before getting to a dead tuple which was killed after the standby
|
||||||
|
# is disconnected.
|
||||||
|
$node_primary->safe_psql($test_db, qq[
|
||||||
|
CREATE TABLE ${table1}(col1 int)
|
||||||
|
WITH (autovacuum_enabled=false, fillfactor=10);
|
||||||
|
INSERT INTO $table1 VALUES(7);
|
||||||
|
INSERT INTO $table1 SELECT generate_series(1, $nrows) % 3;
|
||||||
|
CREATE INDEX on ${table1}(col1);
|
||||||
|
DELETE FROM $table1 WHERE col1 = 0;
|
||||||
|
INSERT INTO $table1 VALUES(7);
|
||||||
|
]);
|
||||||
|
|
||||||
|
# We will later move the primary forward while the standby is disconnected.
|
||||||
|
# For now, however, there is no reason not to wait for the standby to catch
|
||||||
|
# up.
|
||||||
|
my $primary_lsn = $node_primary->lsn('flush');
|
||||||
|
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
|
||||||
|
|
||||||
|
# Test that the WAL receiver is up and running.
|
||||||
|
$node_replica->poll_query_until($test_db, qq[
|
||||||
|
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't');
|
||||||
|
|
||||||
|
# Set primary_conninfo to something invalid on the replica and reload the
|
||||||
|
# config. Once the config is reloaded, the startup process will force the WAL
|
||||||
|
# receiver to restart and it will be unable to reconnect because of the
|
||||||
|
# invalid connection information.
|
||||||
|
$node_replica->safe_psql($test_db, qq[
|
||||||
|
ALTER SYSTEM SET primary_conninfo = '';
|
||||||
|
SELECT pg_reload_conf();
|
||||||
|
]);
|
||||||
|
|
||||||
|
# Wait until the WAL receiver has shut down and been unable to start up again.
|
||||||
|
$node_replica->poll_query_until($test_db, qq[
|
||||||
|
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f');
|
||||||
|
|
||||||
|
# Now insert and update a tuple which will be visible to the vacuum on the
|
||||||
|
# primary but which will have xmax newer than the oldest xmin on the standby
|
||||||
|
# that was recently disconnected.
|
||||||
|
my $res = $psql_primaryA->query_safe(
|
||||||
|
qq[
|
||||||
|
INSERT INTO $table1 VALUES (99);
|
||||||
|
UPDATE $table1 SET col1 = 100 WHERE col1 = 99;
|
||||||
|
SELECT 'after_update';
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
# Make sure the UPDATE finished
|
||||||
|
like($res, qr/^after_update$/m, "UPDATE occurred on primary session A");
|
||||||
|
|
||||||
|
# Open a cursor on the primary whose pin will keep VACUUM from getting a
|
||||||
|
# cleanup lock on the first page of the relation. We want VACUUM to be able to
|
||||||
|
# start, calculate initial values for OldestXmin and GlobalVisState and then
|
||||||
|
# be unable to proceed with pruning our dead tuples. This will allow us to
|
||||||
|
# reconnect the standby and push the horizon back before we start actual
|
||||||
|
# pruning and vacuuming.
|
||||||
|
my $primary_cursor1 = "vac_horizon_floor_cursor1";
|
||||||
|
|
||||||
|
# The first value inserted into the table was a 7, so FETCH FORWARD should
|
||||||
|
# return a 7. That's how we know the cursor has a pin.
|
||||||
|
# Disable index scans so the cursor pins heap pages and not index pages.
|
||||||
|
$res = $psql_primaryB->query_safe(
|
||||||
|
qq[
|
||||||
|
BEGIN;
|
||||||
|
SET enable_bitmapscan = off;
|
||||||
|
SET enable_indexscan = off;
|
||||||
|
SET enable_indexonlyscan = off;
|
||||||
|
DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7;
|
||||||
|
FETCH $primary_cursor1;
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
is($res, 7, qq[Cursor query returned $res. Expected value 7.]);
|
||||||
|
|
||||||
|
# Get the PID of the session which will run the VACUUM FREEZE so that we can
|
||||||
|
# use it to filter pg_stat_activity later.
|
||||||
|
my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();");
|
||||||
|
|
||||||
|
# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs()
|
||||||
|
# and establish values of OldestXmin and GlobalVisState which are newer than
|
||||||
|
# all of our dead tuples. Then it will be unable to get a cleanup lock to
|
||||||
|
# start pruning, so it will hang.
|
||||||
|
#
|
||||||
|
# We use VACUUM FREEZE because it will wait for a cleanup lock instead of
|
||||||
|
# skipping the page pinned by the cursor. Note that works because the target
|
||||||
|
# tuple's xmax precedes OldestXmin which ensures that lazy_scan_noprune() will
|
||||||
|
# return false and we will wait for the cleanup lock.
|
||||||
|
#
|
||||||
|
# Disable any prefetching, parallelism, or other concurrent I/O by vacuum. The
|
||||||
|
# pages of the heap must be processed in order by a single worker to ensure
|
||||||
|
# test stability (PARALLEL 0 shouldn't be necessary but guards against the
|
||||||
|
# possibility of parallel heap vacuuming).
|
||||||
|
$psql_primaryA->{stdin} .= qq[
|
||||||
|
SET maintenance_io_concurrency = 0;
|
||||||
|
VACUUM (VERBOSE, FREEZE, PARALLEL 0) $table1;
|
||||||
|
\\echo VACUUM
|
||||||
|
];
|
||||||
|
|
||||||
|
# Make sure the VACUUM command makes it to the server.
|
||||||
|
$psql_primaryA->{run}->pump_nb();
|
||||||
|
|
||||||
|
# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is
|
||||||
|
# just waiting on the lock to start vacuuming. We don't want the standby to
|
||||||
|
# re-establish a connection to the primary and push the horizon back until
|
||||||
|
# we've saved initial values in GlobalVisState and calculated OldestXmin.
|
||||||
|
$node_primary->poll_query_until($test_db,
|
||||||
|
qq[
|
||||||
|
SELECT count(*) >= 1 FROM pg_stat_activity
|
||||||
|
WHERE pid = $vacuum_pid
|
||||||
|
AND wait_event = 'BufferPin';
|
||||||
|
],
|
||||||
|
't');
|
||||||
|
|
||||||
|
# Ensure the WAL receiver is still not active on the replica.
|
||||||
|
$node_replica->poll_query_until($test_db, qq[
|
||||||
|
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f');
|
||||||
|
|
||||||
|
# Allow the WAL receiver connection to re-establish.
|
||||||
|
$node_replica->safe_psql(
|
||||||
|
$test_db, qq[
|
||||||
|
ALTER SYSTEM SET primary_conninfo = '$orig_conninfo';
|
||||||
|
SELECT pg_reload_conf();
|
||||||
|
]);
|
||||||
|
|
||||||
|
# Ensure the new WAL receiver has connected.
|
||||||
|
$node_replica->poll_query_until($test_db, qq[
|
||||||
|
SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't');
|
||||||
|
|
||||||
|
# Once the WAL sender is shown on the primary, the replica should have
|
||||||
|
# connected with the primary and pushed the horizon backward. Primary Session
|
||||||
|
# A won't see that until the VACUUM FREEZE proceeds and does its first round
|
||||||
|
# of index vacuuming.
|
||||||
|
$node_primary->poll_query_until($test_db, qq[
|
||||||
|
SELECT EXISTS (SELECT * FROM pg_stat_replication);] , 't');
|
||||||
|
|
||||||
|
# Move the cursor forward to the next 7. We inserted the 7 much later, so
|
||||||
|
# advancing the cursor should allow vacuum to proceed vacuuming most pages of
|
||||||
|
# the relation. Because we set maintanence_work_mem sufficiently low, we
|
||||||
|
# expect that a round of index vacuuming has happened and that the vacuum is
|
||||||
|
# now waiting for the cursor to release its pin on the last page of the
|
||||||
|
# relation.
|
||||||
|
$res = $psql_primaryB->query_safe("FETCH $primary_cursor1");
|
||||||
|
is($res, 7,
|
||||||
|
qq[Cursor query returned $res from second fetch. Expected value 7.]);
|
||||||
|
|
||||||
|
# Prevent the test from incorrectly passing by confirming that we did indeed
|
||||||
|
# do a pass of index vacuuming.
|
||||||
|
$node_primary->poll_query_until($test_db, qq[
|
||||||
|
SELECT index_vacuum_count > 0
|
||||||
|
FROM pg_stat_progress_vacuum
|
||||||
|
WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass;
|
||||||
|
] , 't');
|
||||||
|
|
||||||
|
# Commit the transaction with the open cursor so that the VACUUM can finish.
|
||||||
|
$psql_primaryB->query_until(
|
||||||
|
qr/^commit$/m,
|
||||||
|
qq[
|
||||||
|
COMMIT;
|
||||||
|
\\echo commit
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
# VACUUM proceeds with pruning and does a visibility check on each tuple. In
|
||||||
|
# older versions of Postgres, pruning found our final dead tuple
|
||||||
|
# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new
|
||||||
|
# value of maybe_needed. Then heap_prepare_freeze_tuple() would decide the
|
||||||
|
# tuple xmax should be frozen because it precedes OldestXmin. Vacuum would
|
||||||
|
# then error out in heap_pre_freeze_checks() with "cannot freeze committed
|
||||||
|
# xmax". This was fixed by changing pruning to find all
|
||||||
|
# HEAPTUPLE_RECENTLY_DEAD tuples with xmaxes preceding OldestXmin
|
||||||
|
# HEAPTUPLE_DEAD and removing them.
|
||||||
|
|
||||||
|
# With the fix, VACUUM should finish successfully, incrementing the table
|
||||||
|
# vacuum_count.
|
||||||
|
$node_primary->poll_query_until($test_db,
|
||||||
|
qq[
|
||||||
|
SELECT vacuum_count > 0
|
||||||
|
FROM pg_stat_all_tables WHERE relname = '${table1}';
|
||||||
|
]
|
||||||
|
, 't');
|
||||||
|
|
||||||
|
$primary_lsn = $node_primary->lsn('flush');
|
||||||
|
|
||||||
|
# Make sure something causes us to flush
|
||||||
|
$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);");
|
||||||
|
|
||||||
|
# Nothing on the replica should cause a recovery conflict, so this should
|
||||||
|
# finish successfully.
|
||||||
|
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
|
||||||
|
|
||||||
|
## Shut down psqls
|
||||||
|
$psql_primaryA->quit;
|
||||||
|
$psql_primaryB->quit;
|
||||||
|
|
||||||
|
$node_replica->stop();
|
||||||
|
$node_primary->stop();
|
||||||
|
|
||||||
|
done_testing();
|
Reference in New Issue
Block a user