mirror of
https://github.com/postgres/postgres.git
synced 2025-10-19 15:49:24 +03:00
Align the data block sizes of pg_dump's various compression modes.
After commit fe8192a95
, compress_zstd.c tends to produce data block
sizes around 128K, and we don't really have any control over that
unless we want to overrule ZSTD_CStreamOutSize(). Which seems like
a bad idea. But let's try to align the other compression modes to
produce block sizes roughly comparable to that, so that pg_restore's
skip-data performance isn't enormously different for different modes.
gzip compression can be brought in line simply by setting
DEFAULT_IO_BUFFER_SIZE = 128K, which this patch does. That
increases some unrelated buffer sizes, but none of them seem
problematic for modern platforms.
lz4's idea of appropriate block size is highly nonlinear:
if we just increase DEFAULT_IO_BUFFER_SIZE then the output
blocks end up around 200K. I found that adjusting the slop
factor in LZ4State_compression_init was a not-too-ugly way
of bringing that number roughly into line.
With compress = none you get data blocks the same sizes as the
table rows, which seems potentially problematic for narrow tables.
Introduce a layer of buffering to make that case match the others.
Comments in compress_io.h and 002_pg_dump.pl suggest that if
we increase DEFAULT_IO_BUFFER_SIZE then we need to increase the
amount of data fed through the tests in order to improve coverage.
I've not done that here, leaving it for a separate patch.
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/3515357.1760128017@sss.pgh.pa.us
This commit is contained in:
@@ -22,9 +22,9 @@
|
||||
*
|
||||
* When changing this value, it's necessary to check the relevant test cases
|
||||
* still exercise all the branches. This applies especially if the value is
|
||||
* increased, in which case the overflow buffer may not be needed.
|
||||
* increased, in which case some loops may not get iterated.
|
||||
*/
|
||||
#define DEFAULT_IO_BUFFER_SIZE 4096
|
||||
#define DEFAULT_IO_BUFFER_SIZE (128 * 1024)
|
||||
|
||||
extern char *supports_compression(const pg_compress_specification compression_spec);
|
||||
|
||||
|
@@ -100,9 +100,14 @@ LZ4State_compression_init(LZ4State *state)
|
||||
state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs);
|
||||
|
||||
/*
|
||||
* Then double it, to ensure we're not forced to flush every time.
|
||||
* Add some slop to ensure we're not forced to flush every time.
|
||||
*
|
||||
* The present slop factor of 50% is chosen so that the typical output
|
||||
* block size is about 128K when DEFAULT_IO_BUFFER_SIZE = 128K. We might
|
||||
* need a different slop factor to maintain that equivalence if
|
||||
* DEFAULT_IO_BUFFER_SIZE is changed dramatically.
|
||||
*/
|
||||
state->buflen *= 2;
|
||||
state->buflen += state->buflen / 2;
|
||||
|
||||
/*
|
||||
* LZ4F_compressBegin requires a buffer that is greater or equal to
|
||||
|
@@ -22,6 +22,18 @@
|
||||
*----------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* We buffer outgoing data, just to ensure that data blocks written to the
|
||||
* archive file are of reasonable size. The read side could use this struct,
|
||||
* but there's no need because it does not retain data across calls.
|
||||
*/
|
||||
typedef struct NoneCompressorState
|
||||
{
|
||||
char *buffer; /* buffer for unwritten data */
|
||||
size_t buflen; /* allocated size of buffer */
|
||||
size_t bufdata; /* amount of valid data currently in buffer */
|
||||
} NoneCompressorState;
|
||||
|
||||
/*
|
||||
* Private routines
|
||||
*/
|
||||
@@ -49,13 +61,45 @@ static void
|
||||
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
|
||||
const void *data, size_t dLen)
|
||||
{
|
||||
cs->writeF(AH, data, dLen);
|
||||
NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
|
||||
size_t remaining = dLen;
|
||||
|
||||
while (remaining > 0)
|
||||
{
|
||||
size_t chunk;
|
||||
|
||||
/* Dump buffer if full */
|
||||
if (nonecs->bufdata >= nonecs->buflen)
|
||||
{
|
||||
cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
|
||||
nonecs->bufdata = 0;
|
||||
}
|
||||
/* And fill it */
|
||||
chunk = nonecs->buflen - nonecs->bufdata;
|
||||
if (chunk > remaining)
|
||||
chunk = remaining;
|
||||
memcpy(nonecs->buffer + nonecs->bufdata, data, chunk);
|
||||
nonecs->bufdata += chunk;
|
||||
data = ((const char *) data) + chunk;
|
||||
remaining -= chunk;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
|
||||
{
|
||||
/* no op */
|
||||
NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
|
||||
|
||||
if (nonecs)
|
||||
{
|
||||
/* Dump buffer if nonempty */
|
||||
if (nonecs->bufdata > 0)
|
||||
cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
|
||||
/* Free working state */
|
||||
pg_free(nonecs->buffer);
|
||||
pg_free(nonecs);
|
||||
cs->private_data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -71,6 +115,22 @@ InitCompressorNone(CompressorState *cs,
|
||||
cs->end = EndCompressorNone;
|
||||
|
||||
cs->compression_spec = compression_spec;
|
||||
|
||||
/*
|
||||
* If the caller has defined a write function, prepare the necessary
|
||||
* buffer.
|
||||
*/
|
||||
if (cs->writeF)
|
||||
{
|
||||
NoneCompressorState *nonecs;
|
||||
|
||||
nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState));
|
||||
nonecs->buflen = DEFAULT_IO_BUFFER_SIZE;
|
||||
nonecs->buffer = pg_malloc(nonecs->buflen);
|
||||
nonecs->bufdata = 0;
|
||||
|
||||
cs->private_data = nonecs;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1758,6 +1758,7 @@ NextValueExpr
|
||||
Node
|
||||
NodeTag
|
||||
NonEmptyRange
|
||||
NoneCompressorState
|
||||
Notification
|
||||
NotificationList
|
||||
NotifyStmt
|
||||
|
Reference in New Issue
Block a user