mirror of
https://github.com/postgres/postgres.git
synced 2025-06-13 07:41:39 +03:00
Fix data loss in logical replication.
Data loss can happen when the DDLs like ALTER PUBLICATION ... ADD TABLE ... or ALTER TYPE ... that don't take a strong lock on table happens concurrently to DMLs on the tables involved in the DDL. This happens because logical decoding doesn't distribute invalidations to concurrent transactions and those transactions use stale cache data to decode the changes. The problem becomes bigger because we keep using the stale cache even after those in-progress transactions are finished and skip the changes required to be sent to the client. This commit fixes the issue by distributing invalidation messages from catalog-modifying transactions to all concurrent in-progress transactions. This allows the necessary rebuild of the catalog cache when decoding new changes after concurrent DDL. We observed performance regression primarily during frequent execution of *publication DDL* statements that modify the published tables. The regression is minor or nearly nonexistent for DDLs that do not affect the published tables or occur infrequently, making this a worthwhile cost to resolve a longstanding data loss issue. An alternative approach considered was to take a strong lock on each affected table during publication modification. However, this would only address issues related to publication DDLs (but not the ALTER TYPE ...) and require locking every relation in the database for publications created as FOR ALL TABLES, which is impractical. The bug exists in all supported branches, but we are backpatching till 14. The fix for 13 requires somewhat bigger changes than this fix, so the fix for that branch is still under discussion. Reported-by: hubert depesz lubaczewski <depesz@depesz.com> Reported-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Author: Shlok Kyal <shlok.kyal.oss@gmail.com> Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Zhijie Hou <houzj.fnst@fujitsu.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Tested-by: Benoit Lobréau <benoit.lobreau@dalibo.com> Backpatch-through: 14 Discussion: https://postgr.es/m/de52b282-1166-1180-45a2-8d8917ca74c6@enterprisedb.com Discussion: https://postgr.es/m/CAD21AoAenVqiMjpN-PvGHL1N9DWnHSq673bfgr6phmBUzx=kLQ@mail.gmail.com
This commit is contained in:
@ -9,7 +9,7 @@ REGRESS = ddl xact rewrite toast permissions decoding_in_xact \
|
||||
ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \
|
||||
oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \
|
||||
twophase_snapshot slot_creation_error catalog_change_snapshot \
|
||||
skip_snapshot_restore
|
||||
skip_snapshot_restore invalidation_distrubution
|
||||
|
||||
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
|
||||
ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
|
||||
|
20
contrib/test_decoding/expected/invalidation_distrubution.out
Normal file
20
contrib/test_decoding/expected/invalidation_distrubution.out
Normal file
@ -0,0 +1,20 @@
|
||||
Parsed test spec with 2 sessions
|
||||
|
||||
starting permutation: s1_insert_tbl1 s1_begin s1_insert_tbl1 s2_alter_pub_add_tbl s1_commit s1_insert_tbl1 s2_get_binary_changes
|
||||
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
|
||||
step s1_begin: BEGIN;
|
||||
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
|
||||
step s2_alter_pub_add_tbl: ALTER PUBLICATION pub ADD TABLE tbl1;
|
||||
step s1_commit: COMMIT;
|
||||
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
|
||||
step s2_get_binary_changes: SELECT count(data) FROM pg_logical_slot_get_binary_changes('isolation_slot', NULL, NULL, 'proto_version', '4', 'publication_names', 'pub') WHERE get_byte(data, 0) = 73;
|
||||
count
|
||||
-----
|
||||
1
|
||||
(1 row)
|
||||
|
||||
?column?
|
||||
--------
|
||||
stop
|
||||
(1 row)
|
||||
|
@ -63,6 +63,7 @@ tests += {
|
||||
'twophase_snapshot',
|
||||
'slot_creation_error',
|
||||
'skip_snapshot_restore',
|
||||
'invalidation_distrubution',
|
||||
],
|
||||
'regress_args': [
|
||||
'--temp-config', files('logical.conf'),
|
||||
|
32
contrib/test_decoding/specs/invalidation_distrubution.spec
Normal file
32
contrib/test_decoding/specs/invalidation_distrubution.spec
Normal file
@ -0,0 +1,32 @@
|
||||
# Test that catalog cache invalidation messages are distributed to ongoing
|
||||
# transactions, ensuring they can access the updated catalog content after
|
||||
# processing these messages.
|
||||
setup
|
||||
{
|
||||
SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'pgoutput');
|
||||
CREATE TABLE tbl1(val1 integer, val2 integer);
|
||||
CREATE PUBLICATION pub;
|
||||
}
|
||||
|
||||
teardown
|
||||
{
|
||||
DROP TABLE tbl1;
|
||||
DROP PUBLICATION pub;
|
||||
SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
|
||||
}
|
||||
|
||||
session "s1"
|
||||
setup { SET synchronous_commit=on; }
|
||||
|
||||
step "s1_begin" { BEGIN; }
|
||||
step "s1_insert_tbl1" { INSERT INTO tbl1 (val1, val2) VALUES (1, 1); }
|
||||
step "s1_commit" { COMMIT; }
|
||||
|
||||
session "s2"
|
||||
setup { SET synchronous_commit=on; }
|
||||
|
||||
step "s2_alter_pub_add_tbl" { ALTER PUBLICATION pub ADD TABLE tbl1; }
|
||||
step "s2_get_binary_changes" { SELECT count(data) FROM pg_logical_slot_get_binary_changes('isolation_slot', NULL, NULL, 'proto_version', '4', 'publication_names', 'pub') WHERE get_byte(data, 0) = 73; }
|
||||
|
||||
# Expect to get one insert change. LOGICAL_REP_MSG_INSERT = 'I'
|
||||
permutation "s1_insert_tbl1" "s1_begin" "s1_insert_tbl1" "s2_alter_pub_add_tbl" "s1_commit" "s1_insert_tbl1" "s2_get_binary_changes"
|
Reference in New Issue
Block a user