1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00

Add GUC to enable compression of full page images stored in WAL.

When newly-added GUC parameter, wal_compression, is on, the PostgreSQL server
compresses a full page image written to WAL when full_page_writes is on or
during a base backup. A compressed page image will be decompressed during WAL
replay. Turning this parameter on can reduce the WAL volume without increasing
the risk of unrecoverable data corruption, but at the cost of some extra CPU
spent on the compression during WAL logging and on the decompression during
WAL replay.

This commit changes the WAL format (so bumping WAL version number) so that
the one-byte flag indicating whether a full page image is compressed or not is
included in its header information. This means that the commit increases the
WAL volume one-byte per a full page image even if WAL compression is not used
at all. We can save that one-byte by borrowing one-bit from the existing field
like hole_offset in the header and using it as the flag, for example. But which
would reduce the code readability and the extensibility of the feature.
Per discussion, it's not worth paying those prices to save only one-byte, so we
decided to add the one-byte flag to the header.

This commit doesn't introduce any new compression algorithm like lz4.
Currently a full page image is compressed using the existing PGLZ algorithm.
Per discussion, we decided to use it at least in the first version of the
feature because there were no performance reports showing that its compression
ratio is unacceptably lower than that of other algorithm. Of course,
in the future, it's worth considering the support of other compression
algorithm for the better compression.

Rahila Syed and Michael Paquier, reviewed in various versions by myself,
Andres Freund, Robert Haas, Abhijit Menon-Sen and many others.
This commit is contained in:
Fujii Masao
2015-03-11 15:52:24 +09:00
parent 2fbb286647
commit 57aa5b2bb1
11 changed files with 320 additions and 39 deletions

View File

@ -20,6 +20,7 @@
#include "access/xlog_internal.h"
#include "access/xlogreader.h"
#include "catalog/pg_control.h"
#include "common/pg_lzcompress.h"
static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
@ -1037,9 +1038,78 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
if (blk->has_image)
{
COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16));
COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
datatotal += BLCKSZ - blk->hole_length;
COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8));
if (blk->bimg_info & BKPIMAGE_IS_COMPRESSED)
{
if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
else
blk->hole_length = 0;
}
else
blk->hole_length = BLCKSZ - blk->bimg_len;
datatotal += blk->bimg_len;
/*
* cross-check that hole_offset > 0, hole_length > 0 and
* bimg_len < BLCKSZ if the HAS_HOLE flag is set.
*/
if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
(blk->hole_offset == 0 ||
blk->hole_length == 0 ||
blk->bimg_len == BLCKSZ))
{
report_invalid_record(state,
"BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
(unsigned int) blk->hole_offset,
(unsigned int) blk->hole_length,
(unsigned int) blk->bimg_len,
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
goto err;
}
/*
* cross-check that hole_offset == 0 and hole_length == 0
* if the HAS_HOLE flag is not set.
*/
if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
(blk->hole_offset != 0 || blk->hole_length != 0))
{
report_invalid_record(state,
"BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
(unsigned int) blk->hole_offset,
(unsigned int) blk->hole_length,
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
goto err;
}
/*
* cross-check that bimg_len < BLCKSZ
* if the IS_COMPRESSED flag is set.
*/
if ((blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
blk->bimg_len == BLCKSZ)
{
report_invalid_record(state,
"BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X",
(unsigned int) blk->bimg_len,
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
goto err;
}
/*
* cross-check that bimg_len = BLCKSZ if neither
* HAS_HOLE nor IS_COMPRESSED flag is set.
*/
if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
!(blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
blk->bimg_len != BLCKSZ)
{
report_invalid_record(state,
"neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X",
(unsigned int) blk->data_len,
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
goto err;
}
}
if (!(fork_flags & BKPBLOCK_SAME_REL))
{
@ -1094,7 +1164,7 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
if (blk->has_image)
{
blk->bkp_image = ptr;
ptr += BLCKSZ - blk->hole_length;
ptr += blk->bimg_len;
}
if (blk->has_data)
{
@ -1200,6 +1270,8 @@ bool
RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
{
DecodedBkpBlock *bkpb;
char *ptr;
char tmp[BLCKSZ];
if (!record->blocks[block_id].in_use)
return false;
@ -1207,18 +1279,35 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
return false;
bkpb = &record->blocks[block_id];
ptr = bkpb->bkp_image;
if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED)
{
/* If a backup block image is compressed, decompress it */
if (pglz_decompress(ptr, bkpb->bimg_len, tmp,
BLCKSZ - bkpb->hole_length) < 0)
{
report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
(uint32) (record->ReadRecPtr >> 32),
(uint32) record->ReadRecPtr,
block_id);
return false;
}
ptr = tmp;
}
/* generate page, taking into account hole if necessary */
if (bkpb->hole_length == 0)
{
memcpy(page, bkpb->bkp_image, BLCKSZ);
memcpy(page, ptr, BLCKSZ);
}
else
{
memcpy(page, bkpb->bkp_image, bkpb->hole_offset);
memcpy(page, ptr, bkpb->hole_offset);
/* must zero-fill the hole */
MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
bkpb->bkp_image + bkpb->hole_offset,
ptr + bkpb->hole_offset,
BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
}