From b0afdcad21fde1470e6502a376bfaf0e10d384fa Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 30 Jan 2020 11:14:02 +0900 Subject: [PATCH] Fix slot data persistency when advancing physical replication slots Advancing a physical replication slot with pg_replication_slot_advance() did not mark the slot as dirty if any advancing was done, preventing the follow-up checkpoint to flush the slot data to disk. This caused the advancing to be lost even on clean restarts. This does not happen for logical slots as any advancing marked the slot as dirty. Per discussion, the original feature has been implemented so as in the event of a crash the slot may move backwards to a past LSN. This property is kept and more documentation is added about that. This commit adds some new TAP tests to check the persistency of physical and logical slots after advancing across clean restarts. Author: Alexey Kondratov, Michael Paquier Reviewed-by: Andres Freund, Kyotaro Horiguchi, Craig Ringer Discussion: https://postgr.es/m/059cc53a-8b14-653a-a24d-5f867503b0ee@postgrespro.ru Backpatch-through: 11 --- doc/src/sgml/func.sgml | 7 ++++-- src/backend/replication/slotfuncs.c | 23 +++++++++--------- src/test/recovery/t/001_stream_rep.pl | 27 ++++++++++++++++++++- src/test/recovery/t/006_logical_decoding.pl | 26 +++++++++++++++++++- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 895b4b7b1b6..ceda48e0fc3 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -20470,8 +20470,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); Advances the current confirmed position of a replication slot named slot_name. The slot will not be moved backwards, - and it will not be moved beyond the current insert location. Returns - name of the slot and real position to which it was advanced to. + and it will not be moved beyond the current insert location. Returns + the name of the slot and the real position to which it was advanced to. + The information of the updated slot is written out at the follow-up + checkpoint if any advancing is done. In the event of a crash, the + slot may return to an earlier position. diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 7c896946118..e4c4401b686 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -370,6 +370,14 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto) MyReplicationSlot->data.restart_lsn = moveto; SpinLockRelease(&MyReplicationSlot->mutex); retlsn = moveto; + + /* + * Dirty the slot so as it is written out at the next checkpoint. + * Note that the LSN position advanced may still be lost in the + * event of a crash, but this makes the data consistent after a + * clean shutdown. + */ + ReplicationSlotMarkDirty(); } return retlsn; @@ -467,9 +475,9 @@ pg_logical_replication_slot_advance(XLogRecPtr moveto) * keep track of their progress, so we should make more of an * effort to save it for them. * - * Dirty the slot so it's written out at the next checkpoint. - * We'll still lose its position on crash, as documented, but it's - * better than always losing the position even on clean restart. + * Dirty the slot so it is written out at the next checkpoint. + * The LSN position advanced to may still be lost on a crash + * but this makes the data consistent after a clean shutdown. */ ReplicationSlotMarkDirty(); } @@ -566,15 +574,6 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) values[0] = NameGetDatum(&MyReplicationSlot->data.name); nulls[0] = false; - /* Update the on disk state when lsn was updated. */ - if (XLogRecPtrIsInvalid(endlsn)) - { - ReplicationSlotMarkDirty(); - ReplicationSlotsComputeRequiredXmin(false); - ReplicationSlotsComputeRequiredLSN(); - ReplicationSlotSave(); - } - ReplicationSlotRelease(); /* Return the reached position. */ diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl index 3c743d7d7cc..d09ebe65a39 100644 --- a/src/test/recovery/t/001_stream_rep.pl +++ b/src/test/recovery/t/001_stream_rep.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 32; +use Test::More tests => 34; # Initialize master node my $node_master = get_new_node('master'); @@ -344,3 +344,28 @@ is($catalog_xmin, '', is($xmin, '', 'xmin of cascaded slot null with hs feedback reset'); is($catalog_xmin, '', 'catalog xmin of cascaded slot still null with hs_feedback reset'); + +# Test physical slot advancing and its durability. Create a new slot on +# the primary, not used by any of the standbys. This reserves WAL at creation. +my $phys_slot = 'phys_slot'; +$node_master->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('$phys_slot', true);"); +$node_master->psql('postgres', " + CREATE TABLE tab_phys_slot (a int); + INSERT INTO tab_phys_slot VALUES (generate_series(1,10));"); +my $current_lsn = $node_master->safe_psql('postgres', + "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_master->psql('postgres', + "SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);"); +is($psql_rc, '0', 'slot advancing with physical slot'); +my $phys_restart_lsn_pre = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"); +chomp($phys_restart_lsn_pre); +# Slot advance should persist across clean restarts. +$node_master->restart; +my $phys_restart_lsn_post = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"); +chomp($phys_restart_lsn_post); +ok(($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0, + "physical slot advance persists across restarts"); diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl index c23cc4dda76..721d54fc4e6 100644 --- a/src/test/recovery/t/006_logical_decoding.pl +++ b/src/test/recovery/t/006_logical_decoding.pl @@ -7,7 +7,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 10; +use Test::More tests => 12; use Config; # Initialize master node @@ -135,5 +135,29 @@ is($node_master->psql('postgres', 'DROP DATABASE otherdb'), is($node_master->slot('otherdb_slot')->{'slot_name'}, undef, 'logical slot was actually dropped with DB'); +# Test logical slot advancing and its durability. +my $logical_slot = 'logical_slot'; +$node_master->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);"); +$node_master->psql('postgres', " + CREATE TABLE tab_logical_slot (a int); + INSERT INTO tab_logical_slot VALUES (generate_series(1,10));"); +my $current_lsn = $node_master->safe_psql('postgres', + "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_master->psql('postgres', + "SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);"); +is($psql_rc, '0', 'slot advancing with logical slot'); +my $logical_restart_lsn_pre = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"); +chomp($logical_restart_lsn_pre); +# Slot advance should persists across clean restarts. +$node_master->restart; +my $logical_restart_lsn_post = $node_master->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"); +chomp($logical_restart_lsn_post); +ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0, + "logical slot advance persists across restarts"); + # done with the node $node_master->stop;