From 053183138a7a11408d6faa9281001ff7b1ffee2e Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 3 Oct 2023 10:37:20 -0400 Subject: [PATCH] In basebackup.c, refactor to create verify_page_checksum. If checksum verification fails for a particular page, we reread the page and try one more time. The code that does this somewhat complex and difficult to follow. Move some of the logic into a new function and rearrange the code a bit to try to make it clearer. This way, we don't need the block_retry Boolean, a couple of other variables move from sendFile() into the new function, and some code is now less deeply indented. Patch by me, reviewed by David Steele. Discussion: http://postgr.es/m/CA+TgmoYt5jXH4U6cu1dm9Oe2FTn1aae6hBNhZzJJjyjbE_zYig@mail.gmail.com --- src/backend/backup/basebackup.c | 188 ++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 84 deletions(-) diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 5d66014499a..56e020732b8 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -83,6 +83,9 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, struct stat *statbuf, bool missing_ok, Oid dboid, backup_manifest_info *manifest, const char *spcoid); +static bool verify_page_checksum(Page page, XLogRecPtr start_lsn, + BlockNumber blkno, + uint16 *expected_checksum); static void sendFileWithContent(bbsink *sink, const char *filename, const char *content, backup_manifest_info *manifest); @@ -1485,14 +1488,11 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, { int fd; BlockNumber blkno = 0; - bool block_retry = false; - uint16 checksum; int checksum_failures = 0; off_t cnt; int i; pgoff_t len = 0; char *page; - PageHeader phdr; int segmentno = 0; char *segmentpath; bool verify_checksum = false; @@ -1582,94 +1582,78 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, { for (i = 0; i < cnt / BLCKSZ; i++) { + int reread_cnt; + uint16 expected_checksum; + page = sink->bbs_buffer + BLCKSZ * i; + /* If the page is OK, go on to the next one. */ + if (verify_page_checksum(page, sink->bbs_state->startptr, + blkno + i + segmentno * RELSEG_SIZE, + &expected_checksum)) + continue; + /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. + * Retry the block on the first failure. It's possible that + * we read the first 4K page of the block just before postgres + * updated the entire block so it ends up looking torn to us. + * If, before we retry the read, the concurrent write of the + * block finishes, the page LSN will be updated and we'll + * realize that we should ignore this block. + * + * There's no guarantee that this will actually happen, + * though: the torn write could take an arbitrarily long time + * to complete. Retrying multiple times wouldn't fix this + * problem, either, though it would reduce the chances of it + * happening in practice. The only real fix here seems to be + * to have some kind of interlock that allows us to wait until + * we can be certain that no write to the block is in + * progress. Since we don't have any such thing right now, we + * just do this and hope for the best. */ - if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr) + reread_cnt = + basebackup_read_file(fd, + sink->bbs_buffer + BLCKSZ * i, + BLCKSZ, len + BLCKSZ * i, + readfilename, + false); + if (reread_cnt == 0) { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) - { - /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. If, before we - * retry the read, the concurrent write of the block - * finishes, the page LSN will be updated and we'll - * realize that we should ignore this block. - * - * There's no guarantee that this will actually - * happen, though: the torn write could take an - * arbitrarily long time to complete. Retrying - * multiple times wouldn't fix this problem, either, - * though it would reduce the chances of it happening - * in practice. The only real fix here seems to be to - * have some kind of interlock that allows us to wait - * until we can be certain that no write to the block - * is in progress. Since we don't have any such thing - * right now, we just do this and hope for the best. - */ - if (block_retry == false) - { - int reread_cnt; - - /* Reread the failed block */ - reread_cnt = - basebackup_read_file(fd, - sink->bbs_buffer + BLCKSZ * i, - BLCKSZ, len + BLCKSZ * i, - readfilename, - false); - if (reread_cnt == 0) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - cnt = BLCKSZ * i; - break; - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } - - checksum_failures++; - - if (checksum_failures <= 5) - ereport(WARNING, - (errmsg("checksum verification failed in " - "file \"%s\", block %u: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); - if (checksum_failures == 5) - ereport(WARNING, - (errmsg("further checksum verification " - "failures in file \"%s\" will not " - "be reported", readfilename))); - } + /* + * If we hit end-of-file, a concurrent truncation must + * have occurred, so break out of this loop just as if the + * initial fread() returned 0. We'll drop through to the + * same code that handles that case. (We must fix up cnt + * first, though.) + */ + cnt = BLCKSZ * i; + break; } - block_retry = false; - blkno++; + + /* If the page now looks OK, go on to the next one. */ + if (verify_page_checksum(page, sink->bbs_state->startptr, + blkno + i + segmentno * RELSEG_SIZE, + &expected_checksum)) + continue; + + /* Handle checksum failure. */ + checksum_failures++; + if (checksum_failures <= 5) + ereport(WARNING, + (errmsg("checksum verification failed in " + "file \"%s\", block %u: calculated " + "%X but expected %X", + readfilename, blkno + i, expected_checksum, + ((PageHeader) page)->pd_checksum))); + if (checksum_failures == 5) + ereport(WARNING, + (errmsg("further checksum verification " + "failures in file \"%s\" will not " + "be reported", readfilename))); } + + /* Update block number for next pass through the outer loop. */ + blkno += i; } /* @@ -1734,6 +1718,42 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, return true; } +/* + * Try to verify the checksum for the provided page, if it seems appropriate + * to do so. + * + * Returns true if verification succeeds or if we decide not to check it, + * and false if verification fails. When return false, it also sets + * *expected_checksum to the computed value. + */ +static bool +verify_page_checksum(Page page, XLogRecPtr start_lsn, BlockNumber blkno, + uint16 *expected_checksum) +{ + PageHeader phdr; + uint16 checksum; + + /* + * Only check pages which have not been modified since the start of the + * base backup. Otherwise, they might have been written only halfway and + * the checksum would not be valid. However, replaying WAL would + * reinstate the correct page in this case. We also skip completely new + * pages, since they don't have a checksum yet. + */ + if (PageIsNew(page) || PageGetLSN(page) >= start_lsn) + return true; + + /* Perform the actual checksum calculation. */ + checksum = pg_checksum_page(page, blkno); + + /* See whether it matches the value from the page. */ + phdr = (PageHeader) page; + if (phdr->pd_checksum == checksum) + return true; + *expected_checksum = checksum; + return false; +} + static int64 _tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly)