mirror of
https://github.com/postgres/postgres.git
synced 2025-06-01 14:21:49 +03:00
In basebackup.c, refactor to create verify_page_checksum.
If checksum verification fails for a particular page, we reread the page and try one more time. The code that does this somewhat complex and difficult to follow. Move some of the logic into a new function and rearrange the code a bit to try to make it clearer. This way, we don't need the block_retry Boolean, a couple of other variables move from sendFile() into the new function, and some code is now less deeply indented. Patch by me, reviewed by David Steele. Discussion: http://postgr.es/m/CA+TgmoYt5jXH4U6cu1dm9Oe2FTn1aae6hBNhZzJJjyjbE_zYig@mail.gmail.com
This commit is contained in:
parent
a956bd3fa9
commit
053183138a
@ -83,6 +83,9 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo
|
|||||||
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||||
struct stat *statbuf, bool missing_ok, Oid dboid,
|
struct stat *statbuf, bool missing_ok, Oid dboid,
|
||||||
backup_manifest_info *manifest, const char *spcoid);
|
backup_manifest_info *manifest, const char *spcoid);
|
||||||
|
static bool verify_page_checksum(Page page, XLogRecPtr start_lsn,
|
||||||
|
BlockNumber blkno,
|
||||||
|
uint16 *expected_checksum);
|
||||||
static void sendFileWithContent(bbsink *sink, const char *filename,
|
static void sendFileWithContent(bbsink *sink, const char *filename,
|
||||||
const char *content,
|
const char *content,
|
||||||
backup_manifest_info *manifest);
|
backup_manifest_info *manifest);
|
||||||
@ -1485,14 +1488,11 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
|||||||
{
|
{
|
||||||
int fd;
|
int fd;
|
||||||
BlockNumber blkno = 0;
|
BlockNumber blkno = 0;
|
||||||
bool block_retry = false;
|
|
||||||
uint16 checksum;
|
|
||||||
int checksum_failures = 0;
|
int checksum_failures = 0;
|
||||||
off_t cnt;
|
off_t cnt;
|
||||||
int i;
|
int i;
|
||||||
pgoff_t len = 0;
|
pgoff_t len = 0;
|
||||||
char *page;
|
char *page;
|
||||||
PageHeader phdr;
|
|
||||||
int segmentno = 0;
|
int segmentno = 0;
|
||||||
char *segmentpath;
|
char *segmentpath;
|
||||||
bool verify_checksum = false;
|
bool verify_checksum = false;
|
||||||
@ -1582,94 +1582,78 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
|||||||
{
|
{
|
||||||
for (i = 0; i < cnt / BLCKSZ; i++)
|
for (i = 0; i < cnt / BLCKSZ; i++)
|
||||||
{
|
{
|
||||||
|
int reread_cnt;
|
||||||
|
uint16 expected_checksum;
|
||||||
|
|
||||||
page = sink->bbs_buffer + BLCKSZ * i;
|
page = sink->bbs_buffer + BLCKSZ * i;
|
||||||
|
|
||||||
|
/* If the page is OK, go on to the next one. */
|
||||||
|
if (verify_page_checksum(page, sink->bbs_state->startptr,
|
||||||
|
blkno + i + segmentno * RELSEG_SIZE,
|
||||||
|
&expected_checksum))
|
||||||
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only check pages which have not been modified since the
|
* Retry the block on the first failure. It's possible that
|
||||||
* start of the base backup. Otherwise, they might have been
|
* we read the first 4K page of the block just before postgres
|
||||||
* written only halfway and the checksum would not be valid.
|
* updated the entire block so it ends up looking torn to us.
|
||||||
* However, replaying WAL would reinstate the correct page in
|
* If, before we retry the read, the concurrent write of the
|
||||||
* this case. We also skip completely new pages, since they
|
* block finishes, the page LSN will be updated and we'll
|
||||||
* don't have a checksum yet.
|
* realize that we should ignore this block.
|
||||||
|
*
|
||||||
|
* There's no guarantee that this will actually happen,
|
||||||
|
* though: the torn write could take an arbitrarily long time
|
||||||
|
* to complete. Retrying multiple times wouldn't fix this
|
||||||
|
* problem, either, though it would reduce the chances of it
|
||||||
|
* happening in practice. The only real fix here seems to be
|
||||||
|
* to have some kind of interlock that allows us to wait until
|
||||||
|
* we can be certain that no write to the block is in
|
||||||
|
* progress. Since we don't have any such thing right now, we
|
||||||
|
* just do this and hope for the best.
|
||||||
*/
|
*/
|
||||||
if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr)
|
reread_cnt =
|
||||||
|
basebackup_read_file(fd,
|
||||||
|
sink->bbs_buffer + BLCKSZ * i,
|
||||||
|
BLCKSZ, len + BLCKSZ * i,
|
||||||
|
readfilename,
|
||||||
|
false);
|
||||||
|
if (reread_cnt == 0)
|
||||||
{
|
{
|
||||||
checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
|
/*
|
||||||
phdr = (PageHeader) page;
|
* If we hit end-of-file, a concurrent truncation must
|
||||||
if (phdr->pd_checksum != checksum)
|
* have occurred, so break out of this loop just as if the
|
||||||
{
|
* initial fread() returned 0. We'll drop through to the
|
||||||
/*
|
* same code that handles that case. (We must fix up cnt
|
||||||
* Retry the block on the first failure. It's
|
* first, though.)
|
||||||
* possible that we read the first 4K page of the
|
*/
|
||||||
* block just before postgres updated the entire block
|
cnt = BLCKSZ * i;
|
||||||
* so it ends up looking torn to us. If, before we
|
break;
|
||||||
* retry the read, the concurrent write of the block
|
|
||||||
* finishes, the page LSN will be updated and we'll
|
|
||||||
* realize that we should ignore this block.
|
|
||||||
*
|
|
||||||
* There's no guarantee that this will actually
|
|
||||||
* happen, though: the torn write could take an
|
|
||||||
* arbitrarily long time to complete. Retrying
|
|
||||||
* multiple times wouldn't fix this problem, either,
|
|
||||||
* though it would reduce the chances of it happening
|
|
||||||
* in practice. The only real fix here seems to be to
|
|
||||||
* have some kind of interlock that allows us to wait
|
|
||||||
* until we can be certain that no write to the block
|
|
||||||
* is in progress. Since we don't have any such thing
|
|
||||||
* right now, we just do this and hope for the best.
|
|
||||||
*/
|
|
||||||
if (block_retry == false)
|
|
||||||
{
|
|
||||||
int reread_cnt;
|
|
||||||
|
|
||||||
/* Reread the failed block */
|
|
||||||
reread_cnt =
|
|
||||||
basebackup_read_file(fd,
|
|
||||||
sink->bbs_buffer + BLCKSZ * i,
|
|
||||||
BLCKSZ, len + BLCKSZ * i,
|
|
||||||
readfilename,
|
|
||||||
false);
|
|
||||||
if (reread_cnt == 0)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* If we hit end-of-file, a concurrent
|
|
||||||
* truncation must have occurred, so break out
|
|
||||||
* of this loop just as if the initial fread()
|
|
||||||
* returned 0. We'll drop through to the same
|
|
||||||
* code that handles that case. (We must fix
|
|
||||||
* up cnt first, though.)
|
|
||||||
*/
|
|
||||||
cnt = BLCKSZ * i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set flag so we know a retry was attempted */
|
|
||||||
block_retry = true;
|
|
||||||
|
|
||||||
/* Reset loop to validate the block again */
|
|
||||||
i--;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
checksum_failures++;
|
|
||||||
|
|
||||||
if (checksum_failures <= 5)
|
|
||||||
ereport(WARNING,
|
|
||||||
(errmsg("checksum verification failed in "
|
|
||||||
"file \"%s\", block %u: calculated "
|
|
||||||
"%X but expected %X",
|
|
||||||
readfilename, blkno, checksum,
|
|
||||||
phdr->pd_checksum)));
|
|
||||||
if (checksum_failures == 5)
|
|
||||||
ereport(WARNING,
|
|
||||||
(errmsg("further checksum verification "
|
|
||||||
"failures in file \"%s\" will not "
|
|
||||||
"be reported", readfilename)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
block_retry = false;
|
|
||||||
blkno++;
|
/* If the page now looks OK, go on to the next one. */
|
||||||
|
if (verify_page_checksum(page, sink->bbs_state->startptr,
|
||||||
|
blkno + i + segmentno * RELSEG_SIZE,
|
||||||
|
&expected_checksum))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Handle checksum failure. */
|
||||||
|
checksum_failures++;
|
||||||
|
if (checksum_failures <= 5)
|
||||||
|
ereport(WARNING,
|
||||||
|
(errmsg("checksum verification failed in "
|
||||||
|
"file \"%s\", block %u: calculated "
|
||||||
|
"%X but expected %X",
|
||||||
|
readfilename, blkno + i, expected_checksum,
|
||||||
|
((PageHeader) page)->pd_checksum)));
|
||||||
|
if (checksum_failures == 5)
|
||||||
|
ereport(WARNING,
|
||||||
|
(errmsg("further checksum verification "
|
||||||
|
"failures in file \"%s\" will not "
|
||||||
|
"be reported", readfilename)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Update block number for next pass through the outer loop. */
|
||||||
|
blkno += i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1734,6 +1718,42 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to verify the checksum for the provided page, if it seems appropriate
|
||||||
|
* to do so.
|
||||||
|
*
|
||||||
|
* Returns true if verification succeeds or if we decide not to check it,
|
||||||
|
* and false if verification fails. When return false, it also sets
|
||||||
|
* *expected_checksum to the computed value.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
verify_page_checksum(Page page, XLogRecPtr start_lsn, BlockNumber blkno,
|
||||||
|
uint16 *expected_checksum)
|
||||||
|
{
|
||||||
|
PageHeader phdr;
|
||||||
|
uint16 checksum;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only check pages which have not been modified since the start of the
|
||||||
|
* base backup. Otherwise, they might have been written only halfway and
|
||||||
|
* the checksum would not be valid. However, replaying WAL would
|
||||||
|
* reinstate the correct page in this case. We also skip completely new
|
||||||
|
* pages, since they don't have a checksum yet.
|
||||||
|
*/
|
||||||
|
if (PageIsNew(page) || PageGetLSN(page) >= start_lsn)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Perform the actual checksum calculation. */
|
||||||
|
checksum = pg_checksum_page(page, blkno);
|
||||||
|
|
||||||
|
/* See whether it matches the value from the page. */
|
||||||
|
phdr = (PageHeader) page;
|
||||||
|
if (phdr->pd_checksum == checksum)
|
||||||
|
return true;
|
||||||
|
*expected_checksum = checksum;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static int64
|
static int64
|
||||||
_tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget,
|
_tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget,
|
||||||
struct stat *statbuf, bool sizeonly)
|
struct stat *statbuf, bool sizeonly)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user