mirror of
https://github.com/postgres/postgres.git
synced 2025-12-07 12:02:30 +03:00
This commit only implements one prerequisite part for allowing logical decoding. The commit message contains an explanation of the overall design, which later commits will refer back to. Overall design: 1. We want to enable logical decoding on standbys, but replay of WAL from the primary might remove data that is needed by logical decoding, causing error(s) on the standby. To prevent those errors, a new replication conflict scenario needs to be addressed (as much as hot standby does). 2. Our chosen strategy for dealing with this type of replication slot is to invalidate logical slots for which needed data has been removed. 3. To do this we need the latestRemovedXid for each change, just as we do for physical replication conflicts, but we also need to know whether any particular change was to data that logical replication might access. That way, during WAL replay, we know when there is a risk of conflict and, if so, if there is a conflict. 4. We can't rely on the standby's relcache entries for this purpose in any way, because the startup process can't access catalog contents. 5. Therefore every WAL record that potentially removes data from the index or heap must carry a flag indicating whether or not it is one that might be accessed during logical decoding. Why do we need this for logical decoding on standby? First, let's forget about logical decoding on standby and recall that on a primary database, any catalog rows that may be needed by a logical decoding replication slot are not removed. This is done thanks to the catalog_xmin associated with the logical replication slot. But, with logical decoding on standby, in the following cases: - hot_standby_feedback is off - hot_standby_feedback is on but there is no a physical slot between the primary and the standby. Then, hot_standby_feedback will work, but only while the connection is alive (for example a node restart would break it) Then, the primary may delete system catalog rows that could be needed by the logical decoding on the standby (as it does not know about the catalog_xmin on the standby). So, it’s mandatory to identify those rows and invalidate the slots that may need them if any. Identifying those rows is the purpose of this commit. Implementation: When a WAL replay on standby indicates that a catalog table tuple is to be deleted by an xid that is greater than a logical slot's catalog_xmin, then that means the slot's catalog_xmin conflicts with the xid, and we need to handle the conflict. While subsequent commits will do the actual conflict handling, this commit adds a new field isCatalogRel in such WAL records (and a new bit set in the xl_heap_visible flags field), that is true for catalog tables, so as to arrange for conflict handling. The affected WAL records are the ones that already contain the snapshotConflictHorizon field, namely: - gistxlogDelete - gistxlogPageReuse - xl_hash_vacuum_one_page - xl_heap_prune - xl_heap_freeze_page - xl_heap_visible - xl_btree_reuse_page - xl_btree_delete - spgxlogVacuumRedirect Due to this new field being added, xl_hash_vacuum_one_page and gistxlogDelete do now contain the offsets to be deleted as a FLEXIBLE_ARRAY_MEMBER. This is needed to ensure correct alignment. It's not needed on the others struct where isCatalogRel has been added. This commit just introduces the WAL format changes mentioned above. Handling the actual conflicts will follow in future commits. Bumps XLOG_PAGE_MAGIC as the several WAL records are changed. Author: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com> Author: Andres Freund <andres@anarazel.de> (in an older version) Author: Amit Khandekar <amitdkhan.pg@gmail.com> (in an older version) Reviewed-by: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Fabrízio de Royes Mello <fabriziomello@gmail.com> Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
118 lines
3.5 KiB
C
118 lines
3.5 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* gistxlog.h
|
|
* gist xlog routines
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/include/access/gistxlog.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#ifndef GIST_XLOG_H
|
|
#define GIST_XLOG_H
|
|
|
|
#include "access/gist.h"
|
|
#include "access/xlogreader.h"
|
|
#include "lib/stringinfo.h"
|
|
|
|
#define XLOG_GIST_PAGE_UPDATE 0x00
|
|
#define XLOG_GIST_DELETE 0x10 /* delete leaf index tuples for a
|
|
* page */
|
|
#define XLOG_GIST_PAGE_REUSE 0x20 /* old page is about to be reused
|
|
* from FSM */
|
|
#define XLOG_GIST_PAGE_SPLIT 0x30
|
|
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
|
|
/* #define XLOG_GIST_CREATE_INDEX 0x50 */ /* not used anymore */
|
|
#define XLOG_GIST_PAGE_DELETE 0x60
|
|
#define XLOG_GIST_ASSIGN_LSN 0x70 /* nop, assign new LSN */
|
|
|
|
/*
|
|
* Backup Blk 0: updated page.
|
|
* Backup Blk 1: If this operation completes a page split, by inserting a
|
|
* downlink for the split page, the left half of the split
|
|
*/
|
|
typedef struct gistxlogPageUpdate
|
|
{
|
|
/* number of deleted offsets */
|
|
uint16 ntodelete;
|
|
uint16 ntoinsert;
|
|
|
|
/*
|
|
* In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
|
|
*/
|
|
} gistxlogPageUpdate;
|
|
|
|
/*
|
|
* Backup Blk 0: Leaf page, whose index tuples are deleted.
|
|
*/
|
|
typedef struct gistxlogDelete
|
|
{
|
|
TransactionId snapshotConflictHorizon;
|
|
uint16 ntodelete; /* number of deleted offsets */
|
|
bool isCatalogRel; /* to handle recovery conflict during logical
|
|
* decoding on standby */
|
|
|
|
/* TODELETE OFFSET NUMBERS */
|
|
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
|
} gistxlogDelete;
|
|
|
|
#define SizeOfGistxlogDelete offsetof(gistxlogDelete, offsets)
|
|
|
|
/*
|
|
* Backup Blk 0: If this operation completes a page split, by inserting a
|
|
* downlink for the split page, the left half of the split
|
|
* Backup Blk 1 - npage: split pages (1 is the original page)
|
|
*/
|
|
typedef struct gistxlogPageSplit
|
|
{
|
|
BlockNumber origrlink; /* rightlink of the page before split */
|
|
GistNSN orignsn; /* NSN of the page before split */
|
|
bool origleaf; /* was splitted page a leaf page? */
|
|
|
|
uint16 npage; /* # of pages in the split */
|
|
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
|
|
|
|
/*
|
|
* follow: 1. gistxlogPage and array of IndexTupleData per page
|
|
*/
|
|
} gistxlogPageSplit;
|
|
|
|
/*
|
|
* Backup Blk 0: page that was deleted.
|
|
* Backup Blk 1: parent page, containing the downlink to the deleted page.
|
|
*/
|
|
typedef struct gistxlogPageDelete
|
|
{
|
|
FullTransactionId deleteXid; /* last Xid which could see page in scan */
|
|
OffsetNumber downlinkOffset; /* Offset of downlink referencing this
|
|
* page */
|
|
} gistxlogPageDelete;
|
|
|
|
#define SizeOfGistxlogPageDelete (offsetof(gistxlogPageDelete, downlinkOffset) + sizeof(OffsetNumber))
|
|
|
|
|
|
/*
|
|
* This is what we need to know about page reuse, for hot standby.
|
|
*/
|
|
typedef struct gistxlogPageReuse
|
|
{
|
|
RelFileLocator locator;
|
|
BlockNumber block;
|
|
FullTransactionId snapshotConflictHorizon;
|
|
bool isCatalogRel; /* to handle recovery conflict during logical
|
|
* decoding on standby */
|
|
} gistxlogPageReuse;
|
|
|
|
#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, isCatalogRel) + sizeof(bool))
|
|
|
|
extern void gist_redo(XLogReaderState *record);
|
|
extern void gist_desc(StringInfo buf, XLogReaderState *record);
|
|
extern const char *gist_identify(uint8 info);
|
|
extern void gist_xlog_startup(void);
|
|
extern void gist_xlog_cleanup(void);
|
|
extern void gist_mask(char *pagedata, BlockNumber blkno);
|
|
|
|
#endif
|