diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 8c488506fad..b94dd4ac654 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2382,7 +2382,7 @@ The commands accepted in replication mode are: - BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] + BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] BASE_BACKUP @@ -2481,6 +2481,17 @@ The commands accepted in replication mode are: + + + NOVERIFY_CHECKSUMS + + + By default, checksums are verified during a base backup if they are + enabled. Specifying NOVERIFY_CHECKSUMS disables + this verification. + + + diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index e8921b1bb48..95045669c93 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -506,6 +506,22 @@ PostgreSQL documentation + + + + + + Disables verification of checksums, if they are enabled on the server + the base backup is taken from. + + + By default, checksums are verified and checksum failures will result in + a non-zero exit status. However, the base backup will not be removed in + this case, as if the --no-clean option was used. + + + + diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 516eea57f8d..300dbfbcd6a 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -31,6 +31,8 @@ #include "replication/basebackup.h" #include "replication/walsender.h" #include "replication/walsender_private.h" +#include "storage/bufpage.h" +#include "storage/checksum.h" #include "storage/dsm_impl.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -70,6 +72,7 @@ static void parse_basebackup_options(List *options, basebackup_options *opt); static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); static int compareWalFileNames(const void *a, const void *b); static void throttle(size_t increment); +static bool is_checksummed_file(const char *fullpath, const char *filename); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; @@ -99,6 +102,15 @@ static TimeOffset elapsed_min_unit; /* The last check of the transfer rate. */ static TimestampTz throttled_last; +/* The starting XLOG position of the base backup. */ +static XLogRecPtr startptr; + +/* Total number of checksum failures during base backup. */ +static int64 total_checksum_failures; + +/* Do not verify checksums. */ +static bool noverify_checksums = false; + /* * The contents of these directories are removed or recreated during server * start so they are not included in backups. The directories themselves are @@ -175,6 +187,18 @@ static const char *excludeFiles[] = NULL }; +/* + * List of files excluded from checksum validation. + */ +static const char *noChecksumFiles[] = { + "pg_control", + "pg_filenode.map", + "pg_internal.init", + "PG_VERSION", + NULL, +}; + + /* * Called when ERROR or FATAL happens in perform_base_backup() after * we have started the backup - make sure we end it! @@ -194,7 +218,6 @@ base_backup_cleanup(int code, Datum arg) static void perform_base_backup(basebackup_options *opt) { - XLogRecPtr startptr; TimeLineID starttli; XLogRecPtr endptr; TimeLineID endtli; @@ -210,6 +233,8 @@ perform_base_backup(basebackup_options *opt) labelfile = makeStringInfo(); tblspc_map_file = makeStringInfo(); + total_checksum_failures = 0; + startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, labelfile, &tablespaces, tblspc_map_file, @@ -568,6 +593,17 @@ perform_base_backup(basebackup_options *opt) pq_putemptymessage('c'); } SendXlogRecPtrResult(endptr, endtli); + + if (total_checksum_failures) + { + if (total_checksum_failures > 1) + ereport(WARNING, + (errmsg("%ld total checksum verification failures", total_checksum_failures))); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("checksum verification failure during base backup"))); + } + } /* @@ -597,6 +633,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_wal = false; bool o_maxrate = false; bool o_tablespace_map = false; + bool o_noverify_checksums = false; MemSet(opt, 0, sizeof(*opt)); foreach(lopt, options) @@ -676,6 +713,15 @@ parse_basebackup_options(List *options, basebackup_options *opt) opt->sendtblspcmapfile = true; o_tablespace_map = true; } + else if (strcmp(defel->defname, "noverify_checksums") == 0) + { + if (o_noverify_checksums) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + noverify_checksums = true; + o_noverify_checksums = true; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -1257,6 +1303,33 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, return size; } +/* + * Check if a file should have its checksum validated. + * We validate checksums on files in regular tablespaces + * (including global and default) only, and in those there + * are some files that are explicitly excluded. + */ +static bool +is_checksummed_file(const char *fullpath, const char *filename) +{ + const char **f; + + /* Check that the file is in a tablespace */ + if (strncmp(fullpath, "./global/", 9) == 0 || + strncmp(fullpath, "./base/", 7) == 0 || + strncmp(fullpath, "/", 1) == 0) + { + /* Compare file against noChecksumFiles skiplist */ + for (f = noChecksumFiles; *f; f++) + if (strcmp(*f, filename) == 0) + return false; + + return true; + } + else + return false; +} + /***** * Functions for handling tar file format * @@ -1277,10 +1350,20 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf bool missing_ok) { FILE *fp; + BlockNumber blkno = 0; + bool block_retry = false; char buf[TAR_SEND_SIZE]; + uint16 checksum; + int checksum_failures = 0; size_t cnt; + int i; pgoff_t len = 0; + char *page; size_t pad; + PageHeader phdr; + int segmentno = 0; + char *segmentpath; + bool verify_checksum = false; fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1294,8 +1377,142 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf _tarWriteHeader(tarfilename, NULL, statbuf, false); + if (!noverify_checksums && DataChecksumsEnabled()) + { + char *filename; + + /* + * Get the filename (excluding path). As last_dir_separator() + * includes the last directory separator, we chop that off by + * incrementing the pointer. + */ + filename = last_dir_separator(readfilename) + 1; + + if (is_checksummed_file(readfilename, filename)) + { + verify_checksum = true; + + /* + * Cut off at the segment boundary (".") to get the segment number + * in order to mix it into the checksum. + */ + segmentpath = strstr(filename, "."); + if (segmentpath != NULL) + { + segmentno = atoi(segmentpath + 1); + if (segmentno == 0) + ereport(ERROR, + (errmsg("invalid segment number %d in file \"%s\"", + segmentno, filename))); + } + } + } + while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) { + if (verify_checksum) + { + /* + * The checksums are verified at block level, so we iterate over + * the buffer in chunks of BLCKSZ, after making sure that + * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple + * of BLCKSZ bytes. + */ + Assert(TAR_SEND_SIZE % BLCKSZ == 0); + + if (cnt % BLCKSZ != 0) + { + ereport(WARNING, + (errmsg("cannot verify checksum in file \"%s\", block " + "%d: read buffer size %d and page size %d " + "differ", + readfilename, blkno, (int) cnt, BLCKSZ))); + verify_checksum = false; + continue; + } + for (i = 0; i < cnt / BLCKSZ; i++) + { + page = buf + BLCKSZ * i; + + /* + * Only check pages which have not been modified since the + * start of the base backup. Otherwise, they might have been + * written only halfway and the checksum would not be valid. + * However, replaying WAL would reinstate the correct page in + * this case. + */ + if (PageGetLSN(page) < startptr) + { + checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); + phdr = (PageHeader) page; + if (phdr->pd_checksum != checksum) + { + /* + * Retry the block on the first failure. It's + * possible that we read the first 4K page of the + * block just before postgres updated the entire block + * so it ends up looking torn to us. We only need to + * retry once because the LSN should be updated to + * something we can ignore on the next pass. If the + * error happens again then it is a true validation + * failure. + */ + if (block_retry == false) + { + /* Reread the failed block */ + if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not reread block %d of file \"%s\": %m", + blkno, readfilename))); + } + + if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + /* Set flag so we know a retry was attempted */ + block_retry = true; + + /* Reset loop to validate the block again */ + i--; + continue; + } + + checksum_failures++; + + if (checksum_failures <= 5) + ereport(WARNING, + (errmsg("checksum verification failed in " + "file \"%s\", block %d: calculated " + "%X but expected %X", + readfilename, blkno, checksum, + phdr->pd_checksum))); + if (checksum_failures == 5) + ereport(WARNING, + (errmsg("further checksum verification " + "failures in file \"%s\" will not " + "be reported", readfilename))); + } + } + block_retry = false; + blkno++; + } + } + /* Send the chunk as a CopyData message */ if (pq_putmessage('d', buf, cnt)) ereport(ERROR, @@ -1341,6 +1558,14 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf FreeFile(fp); + if (checksum_failures > 1) + { + ereport(WARNING, + (errmsg("file \"%s\" has a total of %d checksum verification " + "failures", readfilename, checksum_failures))); + } + total_checksum_failures += checksum_failures; + return true; } diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index beb2c2877b2..843a878ff37 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -77,6 +77,7 @@ static SQLCmd *make_sqlcmd(void); %token K_MAX_RATE %token K_WAL %token K_TABLESPACE_MAP +%token K_NOVERIFY_CHECKSUMS %token K_TIMELINE %token K_PHYSICAL %token K_LOGICAL @@ -154,7 +155,7 @@ var_name: IDENT { $$ = $1; } /* * BASE_BACKUP [LABEL '