mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
Fix logical decoding error when system table w/ toast is repeatedly rewritten.
Repeatedly rewriting a mapped catalog table with VACUUM FULL or CLUSTER could cause logical decoding to fail with: ERROR, "could not map filenode \"%s\" to relation OID" To trigger the problem the rewritten catalog had to have live tuples with toasted columns. The problem was triggered as during catalog table rewrites the heap_insert() check that prevents logical decoding information to be emitted for system catalogs, failed to treat the new heap's toast table as a system catalog (because the new heap is not recognized as a catalog table via RelationIsLogicallyLogged()). The relmapper, in contrast to the normal catalog contents, does not contain historical information. After a single rewrite of a mapped table the new relation is known to the relmapper, but if the table is rewritten twice before logical decoding occurs, the relfilenode cannot be mapped to a relation anymore. Which then leads us to error out. This only happens for toast tables, because the main table contents aren't re-inserted with heap_insert(). The fix is simple, add a new heap_insert() flag that prevents logical decoding information from being emitted, and accept during decoding that there might not be tuple data for toast tables. Unfortunately that does not fix pre-existing logical decoding errors. Doing so would require not throwing an error when a filenode cannot be mapped to a relation during decoding, and that seems too likely to hide bugs. If it's crucial to fix decoding for an existing slot, temporarily changing the ERROR in ReorderBufferCommit() to a WARNING appears to be the best fix. Author: Andres Freund Discussion: https://postgr.es/m/20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de Backpatch: 9.4-, where logical decoding was introduced
This commit is contained in:
parent
a653569c14
commit
a88482dd24
@ -1,6 +1,61 @@
|
|||||||
-- predictability
|
-- predictability
|
||||||
SET synchronous_commit = on;
|
SET synchronous_commit = on;
|
||||||
DROP TABLE IF EXISTS replication_example;
|
DROP TABLE IF EXISTS replication_example;
|
||||||
|
-- Ensure there's tables with toast datums. To do so, we dynamically
|
||||||
|
-- create a function returning a large textblob. We want tables of
|
||||||
|
-- different kinds: mapped catalog table, unmapped catalog table,
|
||||||
|
-- shared catalog table and usertable.
|
||||||
|
CREATE FUNCTION exec(text) returns void language plpgsql volatile
|
||||||
|
AS $f$
|
||||||
|
BEGIN
|
||||||
|
EXECUTE $1;
|
||||||
|
END;
|
||||||
|
$f$;
|
||||||
|
CREATE ROLE justforcomments NOLOGIN;
|
||||||
|
SELECT exec(
|
||||||
|
format($outer$CREATE FUNCTION iamalongfunction() RETURNS TEXT IMMUTABLE LANGUAGE SQL AS $f$SELECT text %L$f$$outer$,
|
||||||
|
(SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i))));
|
||||||
|
exec
|
||||||
|
------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT exec(
|
||||||
|
format($outer$COMMENT ON FUNCTION iamalongfunction() IS %L$outer$,
|
||||||
|
iamalongfunction()));
|
||||||
|
exec
|
||||||
|
------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT exec(
|
||||||
|
format($outer$COMMENT ON ROLE JUSTFORCOMMENTS IS %L$outer$,
|
||||||
|
iamalongfunction()));
|
||||||
|
exec
|
||||||
|
------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE iamalargetable AS SELECT iamalongfunction() longfunctionoutput;
|
||||||
|
-- verify toast usage
|
||||||
|
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_proc'::regclass)) > 0;
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_description'::regclass)) > 0;
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_shdescription'::regclass)) > 0;
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
|
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
|
||||||
?column?
|
?column?
|
||||||
----------
|
----------
|
||||||
@ -76,6 +131,23 @@ SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'inc
|
|||||||
COMMIT
|
COMMIT
|
||||||
(15 rows)
|
(15 rows)
|
||||||
|
|
||||||
|
-- trigger repeated rewrites of a system catalog with a toast table,
|
||||||
|
-- that previously was buggy: 20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de
|
||||||
|
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
|
||||||
|
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (8, 6, 1);
|
||||||
|
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
|
||||||
|
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (9, 7, 1);
|
||||||
|
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
|
||||||
|
data
|
||||||
|
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
BEGIN
|
||||||
|
table public.replication_example: INSERT: id[integer]:9 somedata[integer]:8 text[character varying]:null testcolumn1[integer]:6 testcolumn2[integer]:null testcolumn3[integer]:1
|
||||||
|
COMMIT
|
||||||
|
BEGIN
|
||||||
|
table public.replication_example: INSERT: id[integer]:10 somedata[integer]:9 text[character varying]:null testcolumn1[integer]:7 testcolumn2[integer]:null testcolumn3[integer]:1
|
||||||
|
COMMIT
|
||||||
|
(6 rows)
|
||||||
|
|
||||||
SELECT pg_drop_replication_slot('regression_slot');
|
SELECT pg_drop_replication_slot('regression_slot');
|
||||||
pg_drop_replication_slot
|
pg_drop_replication_slot
|
||||||
--------------------------
|
--------------------------
|
||||||
@ -83,3 +155,6 @@ SELECT pg_drop_replication_slot('regression_slot');
|
|||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
DROP TABLE IF EXISTS replication_example;
|
DROP TABLE IF EXISTS replication_example;
|
||||||
|
DROP FUNCTION iamalongfunction();
|
||||||
|
DROP FUNCTION exec(text);
|
||||||
|
DROP ROLE justforcomments;
|
||||||
|
@ -3,6 +3,35 @@ SET synchronous_commit = on;
|
|||||||
|
|
||||||
DROP TABLE IF EXISTS replication_example;
|
DROP TABLE IF EXISTS replication_example;
|
||||||
|
|
||||||
|
-- Ensure there's tables with toast datums. To do so, we dynamically
|
||||||
|
-- create a function returning a large textblob. We want tables of
|
||||||
|
-- different kinds: mapped catalog table, unmapped catalog table,
|
||||||
|
-- shared catalog table and usertable.
|
||||||
|
CREATE FUNCTION exec(text) returns void language plpgsql volatile
|
||||||
|
AS $f$
|
||||||
|
BEGIN
|
||||||
|
EXECUTE $1;
|
||||||
|
END;
|
||||||
|
$f$;
|
||||||
|
CREATE ROLE justforcomments NOLOGIN;
|
||||||
|
|
||||||
|
SELECT exec(
|
||||||
|
format($outer$CREATE FUNCTION iamalongfunction() RETURNS TEXT IMMUTABLE LANGUAGE SQL AS $f$SELECT text %L$f$$outer$,
|
||||||
|
(SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i))));
|
||||||
|
SELECT exec(
|
||||||
|
format($outer$COMMENT ON FUNCTION iamalongfunction() IS %L$outer$,
|
||||||
|
iamalongfunction()));
|
||||||
|
SELECT exec(
|
||||||
|
format($outer$COMMENT ON ROLE JUSTFORCOMMENTS IS %L$outer$,
|
||||||
|
iamalongfunction()));
|
||||||
|
CREATE TABLE iamalargetable AS SELECT iamalongfunction() longfunctionoutput;
|
||||||
|
|
||||||
|
-- verify toast usage
|
||||||
|
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_proc'::regclass)) > 0;
|
||||||
|
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_description'::regclass)) > 0;
|
||||||
|
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_shdescription'::regclass)) > 0;
|
||||||
|
|
||||||
|
|
||||||
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
|
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
|
||||||
CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
|
CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
|
||||||
INSERT INTO replication_example(somedata) VALUES (1);
|
INSERT INTO replication_example(somedata) VALUES (1);
|
||||||
@ -57,6 +86,17 @@ COMMIT;
|
|||||||
CHECKPOINT;
|
CHECKPOINT;
|
||||||
|
|
||||||
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
|
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
|
||||||
SELECT pg_drop_replication_slot('regression_slot');
|
|
||||||
|
|
||||||
|
-- trigger repeated rewrites of a system catalog with a toast table,
|
||||||
|
-- that previously was buggy: 20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de
|
||||||
|
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
|
||||||
|
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (8, 6, 1);
|
||||||
|
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
|
||||||
|
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (9, 7, 1);
|
||||||
|
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
|
||||||
|
|
||||||
|
SELECT pg_drop_replication_slot('regression_slot');
|
||||||
DROP TABLE IF EXISTS replication_example;
|
DROP TABLE IF EXISTS replication_example;
|
||||||
|
DROP FUNCTION iamalongfunction();
|
||||||
|
DROP FUNCTION exec(text);
|
||||||
|
DROP ROLE justforcomments;
|
||||||
|
@ -2353,6 +2353,11 @@ FreeBulkInsertState(BulkInsertState bistate)
|
|||||||
* Speculatively inserted tuples behave as "value locks" of short duration,
|
* Speculatively inserted tuples behave as "value locks" of short duration,
|
||||||
* used to implement INSERT .. ON CONFLICT.
|
* used to implement INSERT .. ON CONFLICT.
|
||||||
*
|
*
|
||||||
|
* HEAP_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
|
||||||
|
* information for the tuple. This should solely be used during table rewrites
|
||||||
|
* where RelationIsLogicallyLogged(relation) is not yet accurate for the new
|
||||||
|
* relation.
|
||||||
|
*
|
||||||
* Note that most of these options will be applied when inserting into the
|
* Note that most of these options will be applied when inserting into the
|
||||||
* heap's TOAST table, too, if the tuple requires any out-of-line data. Only
|
* heap's TOAST table, too, if the tuple requires any out-of-line data. Only
|
||||||
* HEAP_INSERT_IS_SPECULATIVE is explicitly ignored, as the toast data does
|
* HEAP_INSERT_IS_SPECULATIVE is explicitly ignored, as the toast data does
|
||||||
@ -2481,7 +2486,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
|||||||
* page write, so make sure it's included even if we take a full-page
|
* page write, so make sure it's included even if we take a full-page
|
||||||
* image. (XXX We could alternatively store a pointer into the FPW).
|
* image. (XXX We could alternatively store a pointer into the FPW).
|
||||||
*/
|
*/
|
||||||
if (RelationIsLogicallyLogged(relation))
|
if (RelationIsLogicallyLogged(relation) &&
|
||||||
|
!(options & HEAP_INSERT_NO_LOGICAL))
|
||||||
{
|
{
|
||||||
xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
|
xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
|
||||||
bufflags |= REGBUF_KEEP_DATA;
|
bufflags |= REGBUF_KEEP_DATA;
|
||||||
@ -2644,6 +2650,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||||||
bool need_tuple_data = RelationIsLogicallyLogged(relation);
|
bool need_tuple_data = RelationIsLogicallyLogged(relation);
|
||||||
bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
|
bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
|
||||||
|
|
||||||
|
/* currently not needed (thus unsupported) for heap_multi_insert() */
|
||||||
|
AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
|
||||||
|
|
||||||
needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
|
needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
|
||||||
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
|
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
|
||||||
HEAP_DEFAULT_FILLFACTOR);
|
HEAP_DEFAULT_FILLFACTOR);
|
||||||
|
@ -649,10 +649,23 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
|
|||||||
heaptup = tup;
|
heaptup = tup;
|
||||||
}
|
}
|
||||||
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
|
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
|
||||||
|
{
|
||||||
|
int options = HEAP_INSERT_SKIP_FSM;
|
||||||
|
|
||||||
|
if (!state->rs_use_wal)
|
||||||
|
options |= HEAP_INSERT_SKIP_WAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The new relfilenode's relcache entrye doesn't have the necessary
|
||||||
|
* information to determine whether a relation should emit data for
|
||||||
|
* logical decoding. Force it to off if necessary.
|
||||||
|
*/
|
||||||
|
if (!RelationIsLogicallyLogged(state->rs_old_rel))
|
||||||
|
options |= HEAP_INSERT_NO_LOGICAL;
|
||||||
|
|
||||||
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
|
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
|
||||||
HEAP_INSERT_SKIP_FSM |
|
options);
|
||||||
(state->rs_use_wal ?
|
}
|
||||||
0 : HEAP_INSERT_SKIP_WAL));
|
|
||||||
else
|
else
|
||||||
heaptup = tup;
|
heaptup = tup;
|
||||||
|
|
||||||
|
@ -1588,8 +1588,16 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
|
|||||||
change->data.tp.relnode.relNode);
|
change->data.tp.relnode.relNode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Catalog tuple without data, emitted while catalog was
|
* Mapped catalog tuple without data, emitted while
|
||||||
* in the process of being rewritten.
|
* catalog table was in the process of being rewritten. We
|
||||||
|
* can fail to look up the relfilenode, because the the
|
||||||
|
* relmapper has no "historic" view, in contrast to normal
|
||||||
|
* the normal catalog during decoding. Thus repeated
|
||||||
|
* rewrites can cause a lookup failure. That's OK because
|
||||||
|
* we do not decode catalog changes anyway. Normally such
|
||||||
|
* tuples would be skipped over below, but we can't
|
||||||
|
* identify whether the table should be logically logged
|
||||||
|
* without mapping the relfilenode to the oid.
|
||||||
*/
|
*/
|
||||||
if (reloid == InvalidOid &&
|
if (reloid == InvalidOid &&
|
||||||
change->data.tp.newtuple == NULL &&
|
change->data.tp.newtuple == NULL &&
|
||||||
@ -1644,10 +1652,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
|
|||||||
* transaction's changes. Otherwise it will get
|
* transaction's changes. Otherwise it will get
|
||||||
* freed/reused while restoring spooled data from
|
* freed/reused while restoring spooled data from
|
||||||
* disk.
|
* disk.
|
||||||
|
*
|
||||||
|
* But skip doing so if there's no tuple-data. That
|
||||||
|
* happens if a non-mapped system catalog with a toast
|
||||||
|
* table is rewritten.
|
||||||
*/
|
*/
|
||||||
dlist_delete(&change->node);
|
if (change->data.tp.newtuple != NULL)
|
||||||
ReorderBufferToastAppendChunk(rb, txn, relation,
|
{
|
||||||
change);
|
dlist_delete(&change->node);
|
||||||
|
ReorderBufferToastAppendChunk(rb, txn, relation,
|
||||||
|
change);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
change_done:
|
change_done:
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#define HEAP_INSERT_SKIP_FSM 0x0002
|
#define HEAP_INSERT_SKIP_FSM 0x0002
|
||||||
#define HEAP_INSERT_FROZEN 0x0004
|
#define HEAP_INSERT_FROZEN 0x0004
|
||||||
#define HEAP_INSERT_SPECULATIVE 0x0008
|
#define HEAP_INSERT_SPECULATIVE 0x0008
|
||||||
|
#define HEAP_INSERT_NO_LOGICAL 0x0010
|
||||||
|
|
||||||
typedef struct BulkInsertStateData *BulkInsertState;
|
typedef struct BulkInsertStateData *BulkInsertState;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user