mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix logical decoding error when system table w/ toast is repeatedly rewritten.
Repeatedly rewriting a mapped catalog table with VACUUM FULL or CLUSTER could cause logical decoding to fail with: ERROR, "could not map filenode \"%s\" to relation OID" To trigger the problem the rewritten catalog had to have live tuples with toasted columns. The problem was triggered as during catalog table rewrites the heap_insert() check that prevents logical decoding information to be emitted for system catalogs, failed to treat the new heap's toast table as a system catalog (because the new heap is not recognized as a catalog table via RelationIsLogicallyLogged()). The relmapper, in contrast to the normal catalog contents, does not contain historical information. After a single rewrite of a mapped table the new relation is known to the relmapper, but if the table is rewritten twice before logical decoding occurs, the relfilenode cannot be mapped to a relation anymore. Which then leads us to error out. This only happens for toast tables, because the main table contents aren't re-inserted with heap_insert(). The fix is simple, add a new heap_insert() flag that prevents logical decoding information from being emitted, and accept during decoding that there might not be tuple data for toast tables. Unfortunately that does not fix pre-existing logical decoding errors. Doing so would require not throwing an error when a filenode cannot be mapped to a relation during decoding, and that seems too likely to hide bugs. If it's crucial to fix decoding for an existing slot, temporarily changing the ERROR in ReorderBufferCommit() to a WARNING appears to be the best fix. Author: Andres Freund Discussion: https://postgr.es/m/20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de Backpatch: 9.4-, where logical decoding was introduced
This commit is contained in:
		@@ -2435,6 +2435,11 @@ ReleaseBulkInsertStatePin(BulkInsertState bistate)
 | 
			
		||||
 * Speculatively inserted tuples behave as "value locks" of short duration,
 | 
			
		||||
 * used to implement INSERT .. ON CONFLICT.
 | 
			
		||||
 *
 | 
			
		||||
 * HEAP_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
 | 
			
		||||
 * information for the tuple. This should solely be used during table rewrites
 | 
			
		||||
 * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
 | 
			
		||||
 * relation.
 | 
			
		||||
 *
 | 
			
		||||
 * Note that most of these options will be applied when inserting into the
 | 
			
		||||
 * heap's TOAST table, too, if the tuple requires any out-of-line data.  Only
 | 
			
		||||
 * HEAP_INSERT_SPECULATIVE is explicitly ignored, as the toast data does not
 | 
			
		||||
@@ -2563,7 +2568,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 | 
			
		||||
		 * page write, so make sure it's included even if we take a full-page
 | 
			
		||||
		 * image. (XXX We could alternatively store a pointer into the FPW).
 | 
			
		||||
		 */
 | 
			
		||||
		if (RelationIsLogicallyLogged(relation))
 | 
			
		||||
		if (RelationIsLogicallyLogged(relation) &&
 | 
			
		||||
			!(options & HEAP_INSERT_NO_LOGICAL))
 | 
			
		||||
		{
 | 
			
		||||
			xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
 | 
			
		||||
			bufflags |= REGBUF_KEEP_DATA;
 | 
			
		||||
@@ -2728,6 +2734,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 | 
			
		||||
	bool		need_tuple_data = RelationIsLogicallyLogged(relation);
 | 
			
		||||
	bool		need_cids = RelationIsAccessibleInLogicalDecoding(relation);
 | 
			
		||||
 | 
			
		||||
	/* currently not needed (thus unsupported) for heap_multi_insert() */
 | 
			
		||||
	AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
 | 
			
		||||
 | 
			
		||||
	needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
 | 
			
		||||
	saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
 | 
			
		||||
												   HEAP_DEFAULT_FILLFACTOR);
 | 
			
		||||
 
 | 
			
		||||
@@ -652,10 +652,23 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 | 
			
		||||
		heaptup = tup;
 | 
			
		||||
	}
 | 
			
		||||
	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
 | 
			
		||||
	{
 | 
			
		||||
		int options = HEAP_INSERT_SKIP_FSM;
 | 
			
		||||
 | 
			
		||||
		if (!state->rs_use_wal)
 | 
			
		||||
			options |= HEAP_INSERT_SKIP_WAL;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * The new relfilenode's relcache entrye doesn't have the necessary
 | 
			
		||||
		 * information to determine whether a relation should emit data for
 | 
			
		||||
		 * logical decoding.  Force it to off if necessary.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!RelationIsLogicallyLogged(state->rs_old_rel))
 | 
			
		||||
			options |= HEAP_INSERT_NO_LOGICAL;
 | 
			
		||||
 | 
			
		||||
		heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
 | 
			
		||||
										 HEAP_INSERT_SKIP_FSM |
 | 
			
		||||
										 (state->rs_use_wal ?
 | 
			
		||||
										  0 : HEAP_INSERT_SKIP_WAL));
 | 
			
		||||
										 options);
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
		heaptup = tup;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1527,8 +1527,16 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 | 
			
		||||
												change->data.tp.relnode.relNode);
 | 
			
		||||
 | 
			
		||||
					/*
 | 
			
		||||
					 * Catalog tuple without data, emitted while catalog was
 | 
			
		||||
					 * in the process of being rewritten.
 | 
			
		||||
					 * Mapped catalog tuple without data, emitted while
 | 
			
		||||
					 * catalog table was in the process of being rewritten. We
 | 
			
		||||
					 * can fail to look up the relfilenode, because the the
 | 
			
		||||
					 * relmapper has no "historic" view, in contrast to normal
 | 
			
		||||
					 * the normal catalog during decoding. Thus repeated
 | 
			
		||||
					 * rewrites can cause a lookup failure. That's OK because
 | 
			
		||||
					 * we do not decode catalog changes anyway. Normally such
 | 
			
		||||
					 * tuples would be skipped over below, but we can't
 | 
			
		||||
					 * identify whether the table should be logically logged
 | 
			
		||||
					 * without mapping the relfilenode to the oid.
 | 
			
		||||
					 */
 | 
			
		||||
					if (reloid == InvalidOid &&
 | 
			
		||||
						change->data.tp.newtuple == NULL &&
 | 
			
		||||
@@ -1590,10 +1598,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 | 
			
		||||
						 * transaction's changes. Otherwise it will get
 | 
			
		||||
						 * freed/reused while restoring spooled data from
 | 
			
		||||
						 * disk.
 | 
			
		||||
						 *
 | 
			
		||||
						 * But skip doing so if there's no tuple-data. That
 | 
			
		||||
						 * happens if a non-mapped system catalog with a toast
 | 
			
		||||
						 * table is rewritten.
 | 
			
		||||
						 */
 | 
			
		||||
						dlist_delete(&change->node);
 | 
			
		||||
						ReorderBufferToastAppendChunk(rb, txn, relation,
 | 
			
		||||
													  change);
 | 
			
		||||
						if (change->data.tp.newtuple != NULL)
 | 
			
		||||
						{
 | 
			
		||||
							dlist_delete(&change->node);
 | 
			
		||||
							ReorderBufferToastAppendChunk(rb, txn, relation,
 | 
			
		||||
														  change);
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
			change_done:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user