mirror of
https://github.com/postgres/postgres.git
synced 2025-10-21 02:52:47 +03:00
We have been using the term RelFileNode to refer to either (1) the integer that is used to name the sequence of files for a certain relation within the directory set aside for that tablespace/database combination; or (2) that value plus the OIDs of the tablespace and database; or occasionally (3) the whole series of files created for a relation based on those values. Using the same name for more than one thing is confusing. Replace RelFileNode with RelFileNumber when we're talking about just the single number, i.e. (1) from above, and with RelFileLocator when we're talking about all the things that are needed to locate a relation's files on disk, i.e. (2) from above. In the places where we refer to (3) as a relfilenode, instead refer to "relation storage". Since there is a ton of SQL code in the world that knows about pg_class.relfilenode, don't change the name of that column, or of other SQL-facing things that derive their name from it. On the other hand, do adjust closely-related internal terminology. For example, the structure member names dbNode and spcNode appear to be derived from the fact that the structure itself was called RelFileNode, so change those to dbOid and spcOid. Likewise, various variables with names like rnode and relnode get renamed appropriately, according to how they're being used in context. Hopefully, this is clearer than before. It is also preparation for future patches that intend to widen the relfilenumber fields from its current width of 32 bits. Variables that store a relfilenumber are now declared as type RelFileNumber rather than type Oid; right now, these are the same, but that can now more easily be changed. Dilip Kumar, per an idea from me. Reviewed also by Andres Freund. I fixed some whitespace issues, changed a couple of words in a comment, and made one other minor correction. Discussion: http://postgr.es/m/CA+TgmoamOtXbVAQf9hWFzonUo6bhhjS6toZQd7HZ-pmojtAmag@mail.gmail.com Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com Discussion: http://postgr.es/m/CAFiTN-vTe79M8uDH1yprOU64MNFE+R3ODRuA+JWf27JbhY4hJw@mail.gmail.com
238 lines
8.6 KiB
C
238 lines
8.6 KiB
C
/*
|
|
* xlogrecord.h
|
|
*
|
|
* Definitions for the WAL record format.
|
|
*
|
|
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/include/access/xlogrecord.h
|
|
*/
|
|
#ifndef XLOGRECORD_H
|
|
#define XLOGRECORD_H
|
|
|
|
#include "access/rmgr.h"
|
|
#include "access/xlogdefs.h"
|
|
#include "port/pg_crc32c.h"
|
|
#include "storage/block.h"
|
|
#include "storage/relfilelocator.h"
|
|
|
|
/*
|
|
* The overall layout of an XLOG record is:
|
|
* Fixed-size header (XLogRecord struct)
|
|
* XLogRecordBlockHeader struct
|
|
* XLogRecordBlockHeader struct
|
|
* ...
|
|
* XLogRecordDataHeader[Short|Long] struct
|
|
* block data
|
|
* block data
|
|
* ...
|
|
* main data
|
|
*
|
|
* There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
|
|
* rmgr-specific data not associated with a block. XLogRecord structs
|
|
* always start on MAXALIGN boundaries in the WAL files, but the rest of
|
|
* the fields are not aligned.
|
|
*
|
|
* The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
|
|
* XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
|
|
* used to distinguish between block references, and the main data structs.
|
|
*/
|
|
typedef struct XLogRecord
|
|
{
|
|
uint32 xl_tot_len; /* total len of entire record */
|
|
TransactionId xl_xid; /* xact id */
|
|
XLogRecPtr xl_prev; /* ptr to previous record in log */
|
|
uint8 xl_info; /* flag bits, see below */
|
|
RmgrId xl_rmid; /* resource manager for this record */
|
|
/* 2 bytes of padding here, initialize to zero */
|
|
pg_crc32c xl_crc; /* CRC for this record */
|
|
|
|
/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
|
|
|
|
} XLogRecord;
|
|
|
|
#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
|
|
|
|
/*
|
|
* The high 4 bits in xl_info may be used freely by rmgr. The
|
|
* XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
|
|
* XLogInsert caller. The rest are set internally by XLogInsert.
|
|
*/
|
|
#define XLR_INFO_MASK 0x0F
|
|
#define XLR_RMGR_INFO_MASK 0xF0
|
|
|
|
/*
|
|
* If a WAL record modifies any relation files, in ways not covered by the
|
|
* usual block references, this flag is set. This is not used for anything
|
|
* by PostgreSQL itself, but it allows external tools that read WAL and keep
|
|
* track of modified blocks to recognize such special record types.
|
|
*/
|
|
#define XLR_SPECIAL_REL_UPDATE 0x01
|
|
|
|
/*
|
|
* Enforces consistency checks of replayed WAL at recovery. If enabled,
|
|
* each record will log a full-page write for each block modified by the
|
|
* record and will reuse it afterwards for consistency checks. The caller
|
|
* of XLogInsert can use this value if necessary, but if
|
|
* wal_consistency_checking is enabled for a rmgr this is set unconditionally.
|
|
*/
|
|
#define XLR_CHECK_CONSISTENCY 0x02
|
|
|
|
/*
|
|
* Header info for block data appended to an XLOG record.
|
|
*
|
|
* 'data_length' is the length of the rmgr-specific payload data associated
|
|
* with this block. It does not include the possible full page image, nor
|
|
* XLogRecordBlockHeader struct itself.
|
|
*
|
|
* Note that we don't attempt to align the XLogRecordBlockHeader struct!
|
|
* So, the struct must be copied to aligned local storage before use.
|
|
*/
|
|
typedef struct XLogRecordBlockHeader
|
|
{
|
|
uint8 id; /* block reference ID */
|
|
uint8 fork_flags; /* fork within the relation, and flags */
|
|
uint16 data_length; /* number of payload bytes (not including page
|
|
* image) */
|
|
|
|
/* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
|
|
/* If BKPBLOCK_SAME_REL is not set, a RelFileLocator follows */
|
|
/* BlockNumber follows */
|
|
} XLogRecordBlockHeader;
|
|
|
|
#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
|
|
|
|
/*
|
|
* Additional header information when a full-page image is included
|
|
* (i.e. when BKPBLOCK_HAS_IMAGE is set).
|
|
*
|
|
* The XLOG code is aware that PG data pages usually contain an unused "hole"
|
|
* in the middle, which contains only zero bytes. Since we know that the
|
|
* "hole" is all zeros, we remove it from the stored data (and it's not counted
|
|
* in the XLOG record's CRC, either). Hence, the amount of block data actually
|
|
* present is (BLCKSZ - <length of "hole" bytes>).
|
|
*
|
|
* Additionally, when wal_compression is enabled, we will try to compress full
|
|
* page images using one of the supported algorithms, after removing the
|
|
* "hole". This can reduce the WAL volume, but at some extra cost of CPU spent
|
|
* on the compression during WAL logging. In this case, since the "hole"
|
|
* length cannot be calculated by subtracting the number of page image bytes
|
|
* from BLCKSZ, basically it needs to be stored as an extra information.
|
|
* But when no "hole" exists, we can assume that the "hole" length is zero
|
|
* and no such an extra information needs to be stored. Note that
|
|
* the original version of page image is stored in WAL instead of the
|
|
* compressed one if the number of bytes saved by compression is less than
|
|
* the length of extra information. Hence, when a page image is successfully
|
|
* compressed, the amount of block data actually present is less than
|
|
* BLCKSZ - the length of "hole" bytes - the length of extra information.
|
|
*/
|
|
typedef struct XLogRecordBlockImageHeader
|
|
{
|
|
uint16 length; /* number of page image bytes */
|
|
uint16 hole_offset; /* number of bytes before "hole" */
|
|
uint8 bimg_info; /* flag bits, see below */
|
|
|
|
/*
|
|
* If BKPIMAGE_HAS_HOLE and BKPIMAGE_COMPRESSED(), an
|
|
* XLogRecordBlockCompressHeader struct follows.
|
|
*/
|
|
} XLogRecordBlockImageHeader;
|
|
|
|
#define SizeOfXLogRecordBlockImageHeader \
|
|
(offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8))
|
|
|
|
/* Information stored in bimg_info */
|
|
#define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */
|
|
#define BKPIMAGE_APPLY 0x02 /* page image should be restored
|
|
* during replay */
|
|
/* compression methods supported */
|
|
#define BKPIMAGE_COMPRESS_PGLZ 0x04
|
|
#define BKPIMAGE_COMPRESS_LZ4 0x08
|
|
#define BKPIMAGE_COMPRESS_ZSTD 0x10
|
|
|
|
#define BKPIMAGE_COMPRESSED(info) \
|
|
((info & (BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | \
|
|
BKPIMAGE_COMPRESS_ZSTD)) != 0)
|
|
|
|
/*
|
|
* Extra header information used when page image has "hole" and
|
|
* is compressed.
|
|
*/
|
|
typedef struct XLogRecordBlockCompressHeader
|
|
{
|
|
uint16 hole_length; /* number of bytes in "hole" */
|
|
} XLogRecordBlockCompressHeader;
|
|
|
|
#define SizeOfXLogRecordBlockCompressHeader \
|
|
sizeof(XLogRecordBlockCompressHeader)
|
|
|
|
/*
|
|
* Maximum size of the header for a block reference. This is used to size a
|
|
* temporary buffer for constructing the header.
|
|
*/
|
|
#define MaxSizeOfXLogRecordBlockHeader \
|
|
(SizeOfXLogRecordBlockHeader + \
|
|
SizeOfXLogRecordBlockImageHeader + \
|
|
SizeOfXLogRecordBlockCompressHeader + \
|
|
sizeof(RelFileLocator) + \
|
|
sizeof(BlockNumber))
|
|
|
|
/*
|
|
* The fork number fits in the lower 4 bits in the fork_flags field. The upper
|
|
* bits are used for flags.
|
|
*/
|
|
#define BKPBLOCK_FORK_MASK 0x0F
|
|
#define BKPBLOCK_FLAG_MASK 0xF0
|
|
#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
|
|
#define BKPBLOCK_HAS_DATA 0x20
|
|
#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
|
|
#define BKPBLOCK_SAME_REL 0x80 /* RelFileLocator omitted, same as
|
|
* previous */
|
|
|
|
/*
|
|
* XLogRecordDataHeaderShort/Long are used for the "main data" portion of
|
|
* the record. If the length of the data is less than 256 bytes, the short
|
|
* form is used, with a single byte to hold the length. Otherwise the long
|
|
* form is used.
|
|
*
|
|
* (These structs are currently not used in the code, they are here just for
|
|
* documentation purposes).
|
|
*/
|
|
typedef struct XLogRecordDataHeaderShort
|
|
{
|
|
uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
|
|
uint8 data_length; /* number of payload bytes */
|
|
} XLogRecordDataHeaderShort;
|
|
|
|
#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
|
|
|
|
typedef struct XLogRecordDataHeaderLong
|
|
{
|
|
uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
|
|
/* followed by uint32 data_length, unaligned */
|
|
} XLogRecordDataHeaderLong;
|
|
|
|
#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
|
|
|
|
/*
|
|
* Block IDs used to distinguish different kinds of record fragments. Block
|
|
* references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
|
|
* any ID number in that range (although you should stick to small numbers,
|
|
* because the WAL machinery is optimized for that case). A few ID
|
|
* numbers are reserved to denote the "main" data portion of the record,
|
|
* as well as replication-supporting transaction metadata.
|
|
*
|
|
* The maximum is currently set at 32, quite arbitrarily. Most records only
|
|
* need a handful of block references, but there are a few exceptions that
|
|
* need more.
|
|
*/
|
|
#define XLR_MAX_BLOCK_ID 32
|
|
|
|
#define XLR_BLOCK_ID_DATA_SHORT 255
|
|
#define XLR_BLOCK_ID_DATA_LONG 254
|
|
#define XLR_BLOCK_ID_ORIGIN 253
|
|
#define XLR_BLOCK_ID_TOPLEVEL_XID 252
|
|
|
|
#endif /* XLOGRECORD_H */
|