1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-19 15:49:24 +03:00

Align the data block sizes of pg_dump's various compression modes.

After commit fe8192a95, compress_zstd.c tends to produce data block
sizes around 128K, and we don't really have any control over that
unless we want to overrule ZSTD_CStreamOutSize().  Which seems like
a bad idea.  But let's try to align the other compression modes to
produce block sizes roughly comparable to that, so that pg_restore's
skip-data performance isn't enormously different for different modes.

gzip compression can be brought in line simply by setting
DEFAULT_IO_BUFFER_SIZE = 128K, which this patch does.  That
increases some unrelated buffer sizes, but none of them seem
problematic for modern platforms.

lz4's idea of appropriate block size is highly nonlinear:
if we just increase DEFAULT_IO_BUFFER_SIZE then the output
blocks end up around 200K.  I found that adjusting the slop
factor in LZ4State_compression_init was a not-too-ugly way
of bringing that number roughly into line.

With compress = none you get data blocks the same sizes as the
table rows, which seems potentially problematic for narrow tables.
Introduce a layer of buffering to make that case match the others.

Comments in compress_io.h and 002_pg_dump.pl suggest that if
we increase DEFAULT_IO_BUFFER_SIZE then we need to increase the
amount of data fed through the tests in order to improve coverage.
I've not done that here, leaving it for a separate patch.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/3515357.1760128017@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2025-10-16 12:50:18 -04:00
parent 812221b204
commit 66ec01dc41
4 changed files with 72 additions and 6 deletions

View File

@@ -22,9 +22,9 @@
*
* When changing this value, it's necessary to check the relevant test cases
* still exercise all the branches. This applies especially if the value is
* increased, in which case the overflow buffer may not be needed.
* increased, in which case some loops may not get iterated.
*/
#define DEFAULT_IO_BUFFER_SIZE 4096
#define DEFAULT_IO_BUFFER_SIZE (128 * 1024)
extern char *supports_compression(const pg_compress_specification compression_spec);

View File

@@ -100,9 +100,14 @@ LZ4State_compression_init(LZ4State *state)
state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs);
/*
* Then double it, to ensure we're not forced to flush every time.
* Add some slop to ensure we're not forced to flush every time.
*
* The present slop factor of 50% is chosen so that the typical output
* block size is about 128K when DEFAULT_IO_BUFFER_SIZE = 128K. We might
* need a different slop factor to maintain that equivalence if
* DEFAULT_IO_BUFFER_SIZE is changed dramatically.
*/
state->buflen *= 2;
state->buflen += state->buflen / 2;
/*
* LZ4F_compressBegin requires a buffer that is greater or equal to

View File

@@ -22,6 +22,18 @@
*----------------------
*/
/*
* We buffer outgoing data, just to ensure that data blocks written to the
* archive file are of reasonable size. The read side could use this struct,
* but there's no need because it does not retain data across calls.
*/
typedef struct NoneCompressorState
{
char *buffer; /* buffer for unwritten data */
size_t buflen; /* allocated size of buffer */
size_t bufdata; /* amount of valid data currently in buffer */
} NoneCompressorState;
/*
* Private routines
*/
@@ -49,13 +61,45 @@ static void
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen)
{
cs->writeF(AH, data, dLen);
NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
size_t remaining = dLen;
while (remaining > 0)
{
size_t chunk;
/* Dump buffer if full */
if (nonecs->bufdata >= nonecs->buflen)
{
cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
nonecs->bufdata = 0;
}
/* And fill it */
chunk = nonecs->buflen - nonecs->bufdata;
if (chunk > remaining)
chunk = remaining;
memcpy(nonecs->buffer + nonecs->bufdata, data, chunk);
nonecs->bufdata += chunk;
data = ((const char *) data) + chunk;
remaining -= chunk;
}
}
static void
EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
{
/* no op */
NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
if (nonecs)
{
/* Dump buffer if nonempty */
if (nonecs->bufdata > 0)
cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
/* Free working state */
pg_free(nonecs->buffer);
pg_free(nonecs);
cs->private_data = NULL;
}
}
/*
@@ -71,6 +115,22 @@ InitCompressorNone(CompressorState *cs,
cs->end = EndCompressorNone;
cs->compression_spec = compression_spec;
/*
* If the caller has defined a write function, prepare the necessary
* buffer.
*/
if (cs->writeF)
{
NoneCompressorState *nonecs;
nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState));
nonecs->buflen = DEFAULT_IO_BUFFER_SIZE;
nonecs->buffer = pg_malloc(nonecs->buflen);
nonecs->bufdata = 0;
cs->private_data = nonecs;
}
}

View File

@@ -1758,6 +1758,7 @@ NextValueExpr
Node
NodeTag
NonEmptyRange
NoneCompressorState
Notification
NotificationList
NotifyStmt