mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
Fix invalidation of local pgstats references for entry reinitialization
818119afccd3 has introduced the "generation" concept in pgstats entries, incremented a counter when a pgstats entry is reinitialized, but it did not count on the fact that backends still holding local references to such entries need to be refreshed if the cache age is outdated. The previous logic only updated local references when an entry was dropped, but it needs also to consider entries that are reinitialized. This matters for replication slot stats (as well as custom pgstats kinds in 18~), where concurrent drops and creates of a slot could cause incorrect stats to be locally referenced. This would lead to an assertion failure at shutdown when writing out the stats file, as the backend holding an outdated local reference would not be able to drop during its shutdown sequence the stats entry that should be dropped, as the last process holding a reference to the stats entry. The checkpointer was then complaining about such an entry late in the shutdown sequence, after the shutdown checkpoint is finished with the control file updated, causing the stats file to not be generated. In non-assert builds, the entry would just be skipped with the stats file written. Note that only logical replication slots use statistics. A test case based on TAP is added to test_decoding, where a persistent connection peeking at a slot's data is kept with concurrent drops and creates of the same slot. This is based on the isolation test case that Anton has sent. As it requires a node shutdown with a check to make sure that the stats file is written with this specific sequence of events, TAP is used instead. Reported-by: Anton A. Melnikov Reviewed-by: Bertrand Drouvot Discussion: https://postgr.es/m/56bf8ff9-dd8c-47b2-872a-748ede82af99@postgrespro.ru Backpatch-through: 15
This commit is contained in:
parent
c1d6506acc
commit
ae77bcc3aa
@ -118,4 +118,64 @@ $node->safe_psql('postgres',
|
||||
# shutdown
|
||||
$node->stop;
|
||||
|
||||
# Test replication slot stats persistence in a single session. The slot
|
||||
# is dropped and created concurrently of a session peeking at its data
|
||||
# repeatedly, hence holding in its local cache a reference to the stats.
|
||||
$node->start;
|
||||
|
||||
my $slot_name_restart = 'regression_slot5';
|
||||
$node->safe_psql('postgres',
|
||||
"SELECT pg_create_logical_replication_slot('$slot_name_restart', 'test_decoding');"
|
||||
);
|
||||
|
||||
# Look at slot data, with a persistent connection.
|
||||
my $bpgsql = $node->background_psql('postgres', on_error_stop => 1);
|
||||
|
||||
# Launch query and look at slot data, incrementing the refcount of the
|
||||
# stats entry.
|
||||
$bpgsql->query_safe(
|
||||
"SELECT pg_logical_slot_peek_binary_changes('$slot_name_restart', NULL, NULL)"
|
||||
);
|
||||
|
||||
# Drop the slot entry. The stats entry is not dropped yet as the previous
|
||||
# session still holds a reference to it.
|
||||
$node->safe_psql('postgres',
|
||||
"SELECT pg_drop_replication_slot('$slot_name_restart')");
|
||||
|
||||
# Create again the same slot. The stats entry is reinitialized, not marked
|
||||
# as dropped anymore.
|
||||
$node->safe_psql('postgres',
|
||||
"SELECT pg_create_logical_replication_slot('$slot_name_restart', 'test_decoding');"
|
||||
);
|
||||
|
||||
# Look again at the slot data. The local stats reference should be refreshed
|
||||
# to the reinitialized entry.
|
||||
$bpgsql->query_safe(
|
||||
"SELECT pg_logical_slot_peek_binary_changes('$slot_name_restart', NULL, NULL)"
|
||||
);
|
||||
# Drop again the slot, the entry is not dropped yet as the previous session
|
||||
# still has a refcount on it.
|
||||
$node->safe_psql('postgres',
|
||||
"SELECT pg_drop_replication_slot('$slot_name_restart')");
|
||||
|
||||
# Shutdown the node, which should happen cleanly with the stats file written
|
||||
# to disk. Note that the background session created previously needs to be
|
||||
# hold *while* the node is shutting down to check that it drops the stats
|
||||
# entry of the slot before writing the stats file.
|
||||
$node->stop;
|
||||
|
||||
# Make sure that the node is correctly shut down. Checking the control file
|
||||
# is not enough, as the node may detect that something is incorrect after the
|
||||
# control file has been updated and the shutdown checkpoint is finished, so
|
||||
# also check that the stats file has been written out.
|
||||
command_like(
|
||||
[ 'pg_controldata', $node->data_dir ],
|
||||
qr/Database cluster state:\s+shut down\n/,
|
||||
'node shut down ok');
|
||||
|
||||
my $stats_file = "$datadir/pg_stat/pgstat.stat";
|
||||
ok(-f "$stats_file", "stats file must exist after shutdown");
|
||||
|
||||
$bpgsql->quit;
|
||||
|
||||
done_testing();
|
||||
|
@ -702,7 +702,8 @@ pgstat_gc_entry_refs(void)
|
||||
Assert(curage != 0);
|
||||
|
||||
/*
|
||||
* Some entries have been dropped. Invalidate cache pointer to them.
|
||||
* Some entries have been dropped or reinitialized. Invalidate cache
|
||||
* pointer to them.
|
||||
*/
|
||||
pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
|
||||
while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
|
||||
@ -712,7 +713,13 @@ pgstat_gc_entry_refs(void)
|
||||
Assert(!entry_ref->shared_stats ||
|
||||
entry_ref->shared_stats->magic == 0xdeadbeef);
|
||||
|
||||
if (!entry_ref->shared_entry->dropped)
|
||||
/*
|
||||
* "generation" checks for the case of entries being reinitialized,
|
||||
* and "dropped" for the case where these are.. dropped.
|
||||
*/
|
||||
if (!entry_ref->shared_entry->dropped &&
|
||||
pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
|
||||
entry_ref->generation)
|
||||
continue;
|
||||
|
||||
/* cannot gc shared ref that has pending data */
|
||||
|
Loading…
x
Reference in New Issue
Block a user