diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b14c5d81a15..3dec0b7cfeb 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4318,11 +4318,17 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows Enables the WAL summarizer process. Note that WAL summarization can - be enabled either on a primary or on a standby. WAL summarization - cannot be enabled when wal_level is set to - minimal. This parameter can only be set in the - postgresql.conf file or on the server command line. - The default is off. + be enabled either on a primary or on a standby. This parameter can only + be set in the postgresql.conf file or on the server + command line. The default is off. + + + The server cannot be started with summarize_wal=on + if wal_level is set to minimal. If + summarize_wal=on is configured after server startup + while wal_level=minimal, the summarizer will run + but refuse to generate summary files for any WAL generated with + wal_level=minimal. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 3f93c61aa3c..fd5699f4d85 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -28001,6 +28001,17 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres} not running, it will be equal to summarized_lsn. summarizer_pid is the PID of the WAL summarizer process, if it is running, and otherwise NULL. + + + As a special exception, the WAL summarizer will refuse to generate + WAL summary files if run on WAL generated under + wal_level=minimal, since such summaries would be + unsafe to use as the basis for an incremental backup. In this case, + the fields above will continue to advance as if summaries were being + generated, but nothing will be written to disk. Once the summarizer + reaches WAL generated while wal_level was set + to replica or higher, it will resume writing + summaries to disk. diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index e455400716d..363294d6234 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -33,6 +33,27 @@ const struct config_enum_entry wal_level_options[] = { {NULL, 0, false} }; +/* + * Find a string representation for wal_level + */ +static const char * +get_wal_level_string(int wal_level) +{ + const struct config_enum_entry *entry; + const char *wal_level_str = "?"; + + for (entry = wal_level_options; entry->name; entry++) + { + if (entry->val == wal_level) + { + wal_level_str = entry->name; + break; + } + } + + return wal_level_str; +} + void xlog_desc(StringInfo buf, XLogReaderState *record) { @@ -45,7 +66,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " - "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " + "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; " "oldest xid %u in DB %u; oldest multi %u in DB %u; " "oldest/newest commit timestamp xid: %u/%u; " "oldest running xid %u; %s", @@ -53,6 +74,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", + get_wal_level_string(checkpoint->wal_level), EpochFromFullTransactionId(checkpoint->nextXid), XidFromFullTransactionId(checkpoint->nextXid), checkpoint->nextOid, @@ -95,20 +117,9 @@ xlog_desc(StringInfo buf, XLogReaderState *record) { xl_parameter_change xlrec; const char *wal_level_str; - const struct config_enum_entry *entry; memcpy(&xlrec, rec, sizeof(xl_parameter_change)); - - /* Find a string representation for wal_level */ - wal_level_str = "?"; - for (entry = wal_level_options; entry->name; entry++) - { - if (entry->val == xlrec.wal_level) - { - wal_level_str = entry->name; - break; - } - } + wal_level_str = get_wal_level_string(xlrec.wal_level); appendStringInfo(buf, "max_connections=%d max_worker_processes=%d " "max_wal_senders=%d max_prepared_xacts=%d " @@ -135,9 +146,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record) xl_end_of_recovery xlrec; memcpy(&xlrec, rec, sizeof(xl_end_of_recovery)); - appendStringInfo(buf, "tli %u; prev tli %u; time %s", + appendStringInfo(buf, "tli %u; prev tli %u; time %s; wal_level %s", xlrec.ThisTimeLineID, xlrec.PrevTimeLineID, - timestamptz_to_str(xlrec.end_time)); + timestamptz_to_str(xlrec.end_time), + get_wal_level_string(xlrec.wal_level)); } else if (info == XLOG_OVERWRITE_CONTRECORD) { @@ -150,7 +162,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record) } else if (info == XLOG_CHECKPOINT_REDO) { - /* No details to write out */ + int wal_level; + + memcpy(&wal_level, rec, sizeof(int)); + appendStringInfo(buf, "wal_level %s", get_wal_level_string(wal_level)); } } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 33e27a6e72c..636be5ca4da 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6934,6 +6934,7 @@ CreateCheckPoint(int flags) WALInsertLockAcquireExclusive(); checkPoint.fullPageWrites = Insert->fullPageWrites; + checkPoint.wal_level = wal_level; if (shutdown) { @@ -6987,11 +6988,9 @@ CreateCheckPoint(int flags) */ if (!shutdown) { - int dummy = 0; - - /* Record must have payload to avoid assertion failure. */ + /* Include WAL level in record for WAL summarizer's benefit. */ XLogBeginInsert(); - XLogRegisterData((char *) &dummy, sizeof(dummy)); + XLogRegisterData((char *) &wal_level, sizeof(wal_level)); (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO); /* @@ -7314,6 +7313,7 @@ CreateEndOfRecoveryRecord(void) elog(ERROR, "can only be used to end recovery"); xlrec.end_time = GetCurrentTimestamp(); + xlrec.wal_level = wal_level; WALInsertLockAcquireExclusive(); xlrec.ThisTimeLineID = XLogCtl->InsertTimeLineID; diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index 55dc2ea8f53..83c178e7662 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -154,7 +154,8 @@ static void SummarizeSmgrRecord(XLogReaderState *xlogreader, BlockRefTable *brtab); static void SummarizeXactRecord(XLogReaderState *xlogreader, BlockRefTable *brtab); -static bool SummarizeXlogRecord(XLogReaderState *xlogreader); +static bool SummarizeXlogRecord(XLogReaderState *xlogreader, + bool *new_fast_forward); static int summarizer_read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, @@ -802,6 +803,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, char final_path[MAXPGPATH]; WalSummaryIO io; BlockRefTable *brtab = CreateEmptyBlockRefTable(); + bool fast_forward = true; /* Initialize private data for xlogreader. */ private_data = (SummarizerReadLocalXLogPrivate *) @@ -900,7 +902,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, int block_id; char *errormsg; XLogRecord *record; - bool stop_requested = false; + uint8 rmid; HandleWalSummarizerInterrupts(); @@ -969,56 +971,86 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, break; } - /* Special handling for particular types of WAL records. */ - switch (XLogRecGetRmid(xlogreader)) + /* + * Certain types of records require special handling. Redo points and + * shutdown checkpoints trigger creation of new summary files and can + * also cause us to enter or exit "fast forward" mode. Other types of + * records can require special updates to the block reference table. + */ + rmid = XLogRecGetRmid(xlogreader); + if (rmid == RM_XLOG_ID) { - case RM_DBASE_ID: - SummarizeDbaseRecord(xlogreader, brtab); - break; - case RM_SMGR_ID: - SummarizeSmgrRecord(xlogreader, brtab); - break; - case RM_XACT_ID: - SummarizeXactRecord(xlogreader, brtab); - break; - case RM_XLOG_ID: - stop_requested = SummarizeXlogRecord(xlogreader); - break; - default: - break; + bool new_fast_forward; + + /* + * If we've already processed some WAL records when we hit a redo + * point or shutdown checkpoint, then we stop summarization before + * including this record in the current file, so that it will be + * the first record in the next file. + * + * When we hit one of those record types as the first record in a + * file, we adjust our notion of whether we're fast-forwarding. + * Any WAL generated with wal_level=minimal must be skipped + * without actually generating any summary file, because an + * incremental backup that crosses such WAL would be unsafe. + */ + if (SummarizeXlogRecord(xlogreader, &new_fast_forward)) + { + if (xlogreader->ReadRecPtr > summary_start_lsn) + { + summary_end_lsn = xlogreader->ReadRecPtr; + break; + } + else + fast_forward = new_fast_forward; + } + } + else if (!fast_forward) + { + /* + * This switch handles record types that require extra updates to + * the contents of the block reference table. + */ + switch (rmid) + { + case RM_DBASE_ID: + SummarizeDbaseRecord(xlogreader, brtab); + break; + case RM_SMGR_ID: + SummarizeSmgrRecord(xlogreader, brtab); + break; + case RM_XACT_ID: + SummarizeXactRecord(xlogreader, brtab); + break; + } } /* - * If we've been told that it's time to end this WAL summary file, do - * so. As an exception, if there's nothing included in this WAL - * summary file yet, then stopping doesn't make any sense, and we - * should wait until the next stop point instead. + * If we're in fast-forward mode, we don't really need to do anything. + * Otherwise, feed block references from xlog record to block + * reference table. */ - if (stop_requested && xlogreader->ReadRecPtr > summary_start_lsn) + if (!fast_forward) { - summary_end_lsn = xlogreader->ReadRecPtr; - break; - } + for (block_id = 0; block_id <= XLogRecMaxBlockId(xlogreader); + block_id++) + { + RelFileLocator rlocator; + ForkNumber forknum; + BlockNumber blocknum; - /* Feed block references from xlog record to block reference table. */ - for (block_id = 0; block_id <= XLogRecMaxBlockId(xlogreader); - block_id++) - { - RelFileLocator rlocator; - ForkNumber forknum; - BlockNumber blocknum; + if (!XLogRecGetBlockTagExtended(xlogreader, block_id, &rlocator, + &forknum, &blocknum, NULL)) + continue; - if (!XLogRecGetBlockTagExtended(xlogreader, block_id, &rlocator, - &forknum, &blocknum, NULL)) - continue; - - /* - * As we do elsewhere, ignore the FSM fork, because it's not fully - * WAL-logged. - */ - if (forknum != FSM_FORKNUM) - BlockRefTableMarkBlockModified(brtab, &rlocator, forknum, - blocknum); + /* + * As we do elsewhere, ignore the FSM fork, because it's not + * fully WAL-logged. + */ + if (forknum != FSM_FORKNUM) + BlockRefTableMarkBlockModified(brtab, &rlocator, forknum, + blocknum); + } } /* Update our notion of where this summary file ends. */ @@ -1047,9 +1079,10 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, /* * If a timeline switch occurs, we may fail to make any progress at all * before exiting the loop above. If that happens, we don't write a WAL - * summary file at all. + * summary file at all. We can also skip writing a file if we're in + * fast-forward mode. */ - if (summary_end_lsn > summary_start_lsn) + if (summary_end_lsn > summary_start_lsn && !fast_forward) { /* Generate temporary and final path name. */ snprintf(temp_path, MAXPGPATH, @@ -1085,6 +1118,14 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, durable_rename(temp_path, final_path, ERROR); } + /* If we skipped a non-zero amount of WAL, log a debug message. */ + if (summary_end_lsn > summary_start_lsn && fast_forward) + ereport(DEBUG1, + errmsg("skipped summarizing WAL on TLI %u from %X/%X to %X/%X", + tli, + LSN_FORMAT_ARGS(summary_start_lsn), + LSN_FORMAT_ARGS(summary_end_lsn))); + return summary_end_lsn; } @@ -1263,22 +1304,70 @@ SummarizeXactRecord(XLogReaderState *xlogreader, BlockRefTable *brtab) /* * Special handling for WAL records with RM_XLOG_ID. + * + * The return value is true if WAL summarization should stop before this + * record and false otherwise. When the return value is true, + * *new_fast_forward indicates whether future processing should be done + * in fast forward mode (i.e. read WAL without emitting summaries) or not. */ static bool -SummarizeXlogRecord(XLogReaderState *xlogreader) +SummarizeXlogRecord(XLogReaderState *xlogreader, bool *new_fast_forward) { uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK; + int record_wal_level; - if (info == XLOG_CHECKPOINT_REDO || info == XLOG_CHECKPOINT_SHUTDOWN) + if (info == XLOG_CHECKPOINT_REDO) { - /* - * This is an LSN at which redo might begin, so we'd like - * summarization to stop just before this WAL record. - */ - return true; + /* Payload is wal_level at the time record was written. */ + memcpy(&record_wal_level, XLogRecGetData(xlogreader), sizeof(int)); + } + else if (info == XLOG_CHECKPOINT_SHUTDOWN) + { + CheckPoint rec_ckpt; + + /* Extract wal_level at time record was written from payload. */ + memcpy(&rec_ckpt, XLogRecGetData(xlogreader), sizeof(CheckPoint)); + record_wal_level = rec_ckpt.wal_level; + } + else if (info == XLOG_PARAMETER_CHANGE) + { + xl_parameter_change xlrec; + + /* Extract wal_level at time record was written from payload. */ + memcpy(&xlrec, XLogRecGetData(xlogreader), + sizeof(xl_parameter_change)); + record_wal_level = xlrec.wal_level; + } + else if (info == XLOG_END_OF_RECOVERY) + { + xl_end_of_recovery xlrec; + + /* Extract wal_level at time record was written from payload. */ + memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery)); + record_wal_level = xlrec.wal_level; + } + else + { + /* No special handling required. Return false. */ + return false; } - return false; + /* + * Redo can only begin at an XLOG_CHECKPOINT_REDO or + * XLOG_CHECKPOINT_SHUTDOWN record, so we want WAL summarization to begin + * at those points. Hence, when those records are encountered, return + * true, so that we stop just before summarizing either of those records. + * + * We also reach here if we just saw XLOG_END_OF_RECOVERY or + * XLOG_PARAMETER_CHANGE. These are not places where recovery can start, + * but they're still relevant here. A new timeline can begin with + * XLOG_END_OF_RECOVERY, so we need to confirm the WAL level at that + * point; and a restart can provoke XLOG_PARAMETER_CHANGE after an + * intervening change to postgresql.conf, which might force us to stop + * summarizing. + */ + *new_fast_forward = (record_wal_level == WAL_LEVEL_MINIMAL); + return true; } /* diff --git a/src/bin/pg_combinebackup/meson.build b/src/bin/pg_combinebackup/meson.build index 1d4b9c218fd..d871b2e3b80 100644 --- a/src/bin/pg_combinebackup/meson.build +++ b/src/bin/pg_combinebackup/meson.build @@ -34,6 +34,7 @@ tests += { 't/004_manifest.pl', 't/005_integrity.pl', 't/006_db_file_copy.pl', + 't/007_wal_level_minimal.pl', ], } } diff --git a/src/bin/pg_combinebackup/t/007_wal_level_minimal.pl b/src/bin/pg_combinebackup/t/007_wal_level_minimal.pl new file mode 100644 index 00000000000..900df6b2864 --- /dev/null +++ b/src/bin/pg_combinebackup/t/007_wal_level_minimal.pl @@ -0,0 +1,73 @@ +# Copyright (c) 2021-2024, PostgreSQL Global Development Group +# +# This test aims to validate that taking an incremental backup fails when +# wal_level has been changed to minimal between the full backup and the +# attempted incremental backup. + +use strict; +use warnings FATAL => 'all'; +use File::Compare; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Can be changed to test the other modes. +my $mode = $ENV{PG_TEST_PG_COMBINEBACKUP_MODE} || '--copy'; + +note "testing using mode $mode"; + +# Set up a new database instance. +my $node1 = PostgreSQL::Test::Cluster->new('node1'); +$node1->init(allows_streaming => 1); +$node1->append_conf('postgresql.conf', <start; + +# Create a table and insert a test row into it. +$node1->safe_psql('postgres', <backup_dir . '/backup1'; +$node1->command_ok( + [ 'pg_basebackup', '-D', $backup1path, '--no-sync', '-cfast' ], + "full backup"); + +# Switch to wal_level=minimal, which also requires max_wal_senders=0 and +# summarize_wal=off +$node1->safe_psql('postgres', <restart; + +# Insert a second row on the original node. +$node1->safe_psql('postgres', <safe_psql('postgres', <restart; + +# Now take an incremental backup. +my $backup2path = $node1->backup_dir . '/backup2'; +$node1->command_fails_like( + [ + 'pg_basebackup', '-D', $backup2path, '--no-sync', '-cfast', + '--incremental', $backup1path . '/backup_manifest' + ], + qr/WAL summaries are required on timeline 1 from.*are incomplete/, + "incremental backup fails"); + +# OK, that's all. +done_testing(); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 5161b72f281..c6a91fb4560 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD115 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD116 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { @@ -302,6 +302,7 @@ typedef struct xl_end_of_recovery TimestampTz end_time; TimeLineID ThisTimeLineID; /* new TLI */ TimeLineID PrevTimeLineID; /* previous TLI we forked off from */ + int wal_level; } xl_end_of_recovery; /* diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index a00606ffcdf..e80ff8e4140 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -22,7 +22,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 1300 +#define PG_CONTROL_VERSION 1700 /* Nonce key length, see below */ #define MOCK_AUTH_NONCE_LEN 32 @@ -40,6 +40,7 @@ typedef struct CheckPoint TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new * timeline (equals ThisTimeLineID otherwise) */ bool fullPageWrites; /* current full_page_writes */ + int wal_level; /* current wal_level */ FullTransactionId nextXid; /* next free transaction ID */ Oid nextOid; /* next free OID */ MultiXactId nextMulti; /* next free MultiXactId */