mirror of
https://github.com/postgres/postgres.git
synced 2025-10-19 15:49:24 +03:00
Align the data block sizes of pg_dump's various compression modes.
After commit fe8192a95
, compress_zstd.c tends to produce data block
sizes around 128K, and we don't really have any control over that
unless we want to overrule ZSTD_CStreamOutSize(). Which seems like
a bad idea. But let's try to align the other compression modes to
produce block sizes roughly comparable to that, so that pg_restore's
skip-data performance isn't enormously different for different modes.
gzip compression can be brought in line simply by setting
DEFAULT_IO_BUFFER_SIZE = 128K, which this patch does. That
increases some unrelated buffer sizes, but none of them seem
problematic for modern platforms.
lz4's idea of appropriate block size is highly nonlinear:
if we just increase DEFAULT_IO_BUFFER_SIZE then the output
blocks end up around 200K. I found that adjusting the slop
factor in LZ4State_compression_init was a not-too-ugly way
of bringing that number roughly into line.
With compress = none you get data blocks the same sizes as the
table rows, which seems potentially problematic for narrow tables.
Introduce a layer of buffering to make that case match the others.
Comments in compress_io.h and 002_pg_dump.pl suggest that if
we increase DEFAULT_IO_BUFFER_SIZE then we need to increase the
amount of data fed through the tests in order to improve coverage.
I've not done that here, leaving it for a separate patch.
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/3515357.1760128017@sss.pgh.pa.us
This commit is contained in:
@@ -22,9 +22,9 @@
|
|||||||
*
|
*
|
||||||
* When changing this value, it's necessary to check the relevant test cases
|
* When changing this value, it's necessary to check the relevant test cases
|
||||||
* still exercise all the branches. This applies especially if the value is
|
* still exercise all the branches. This applies especially if the value is
|
||||||
* increased, in which case the overflow buffer may not be needed.
|
* increased, in which case some loops may not get iterated.
|
||||||
*/
|
*/
|
||||||
#define DEFAULT_IO_BUFFER_SIZE 4096
|
#define DEFAULT_IO_BUFFER_SIZE (128 * 1024)
|
||||||
|
|
||||||
extern char *supports_compression(const pg_compress_specification compression_spec);
|
extern char *supports_compression(const pg_compress_specification compression_spec);
|
||||||
|
|
||||||
|
@@ -100,9 +100,14 @@ LZ4State_compression_init(LZ4State *state)
|
|||||||
state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs);
|
state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Then double it, to ensure we're not forced to flush every time.
|
* Add some slop to ensure we're not forced to flush every time.
|
||||||
|
*
|
||||||
|
* The present slop factor of 50% is chosen so that the typical output
|
||||||
|
* block size is about 128K when DEFAULT_IO_BUFFER_SIZE = 128K. We might
|
||||||
|
* need a different slop factor to maintain that equivalence if
|
||||||
|
* DEFAULT_IO_BUFFER_SIZE is changed dramatically.
|
||||||
*/
|
*/
|
||||||
state->buflen *= 2;
|
state->buflen += state->buflen / 2;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LZ4F_compressBegin requires a buffer that is greater or equal to
|
* LZ4F_compressBegin requires a buffer that is greater or equal to
|
||||||
|
@@ -22,6 +22,18 @@
|
|||||||
*----------------------
|
*----------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We buffer outgoing data, just to ensure that data blocks written to the
|
||||||
|
* archive file are of reasonable size. The read side could use this struct,
|
||||||
|
* but there's no need because it does not retain data across calls.
|
||||||
|
*/
|
||||||
|
typedef struct NoneCompressorState
|
||||||
|
{
|
||||||
|
char *buffer; /* buffer for unwritten data */
|
||||||
|
size_t buflen; /* allocated size of buffer */
|
||||||
|
size_t bufdata; /* amount of valid data currently in buffer */
|
||||||
|
} NoneCompressorState;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Private routines
|
* Private routines
|
||||||
*/
|
*/
|
||||||
@@ -49,13 +61,45 @@ static void
|
|||||||
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
|
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
|
||||||
const void *data, size_t dLen)
|
const void *data, size_t dLen)
|
||||||
{
|
{
|
||||||
cs->writeF(AH, data, dLen);
|
NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
|
||||||
|
size_t remaining = dLen;
|
||||||
|
|
||||||
|
while (remaining > 0)
|
||||||
|
{
|
||||||
|
size_t chunk;
|
||||||
|
|
||||||
|
/* Dump buffer if full */
|
||||||
|
if (nonecs->bufdata >= nonecs->buflen)
|
||||||
|
{
|
||||||
|
cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
|
||||||
|
nonecs->bufdata = 0;
|
||||||
|
}
|
||||||
|
/* And fill it */
|
||||||
|
chunk = nonecs->buflen - nonecs->bufdata;
|
||||||
|
if (chunk > remaining)
|
||||||
|
chunk = remaining;
|
||||||
|
memcpy(nonecs->buffer + nonecs->bufdata, data, chunk);
|
||||||
|
nonecs->bufdata += chunk;
|
||||||
|
data = ((const char *) data) + chunk;
|
||||||
|
remaining -= chunk;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
|
EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
|
||||||
{
|
{
|
||||||
/* no op */
|
NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
|
||||||
|
|
||||||
|
if (nonecs)
|
||||||
|
{
|
||||||
|
/* Dump buffer if nonempty */
|
||||||
|
if (nonecs->bufdata > 0)
|
||||||
|
cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
|
||||||
|
/* Free working state */
|
||||||
|
pg_free(nonecs->buffer);
|
||||||
|
pg_free(nonecs);
|
||||||
|
cs->private_data = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -71,6 +115,22 @@ InitCompressorNone(CompressorState *cs,
|
|||||||
cs->end = EndCompressorNone;
|
cs->end = EndCompressorNone;
|
||||||
|
|
||||||
cs->compression_spec = compression_spec;
|
cs->compression_spec = compression_spec;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the caller has defined a write function, prepare the necessary
|
||||||
|
* buffer.
|
||||||
|
*/
|
||||||
|
if (cs->writeF)
|
||||||
|
{
|
||||||
|
NoneCompressorState *nonecs;
|
||||||
|
|
||||||
|
nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState));
|
||||||
|
nonecs->buflen = DEFAULT_IO_BUFFER_SIZE;
|
||||||
|
nonecs->buffer = pg_malloc(nonecs->buflen);
|
||||||
|
nonecs->bufdata = 0;
|
||||||
|
|
||||||
|
cs->private_data = nonecs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1758,6 +1758,7 @@ NextValueExpr
|
|||||||
Node
|
Node
|
||||||
NodeTag
|
NodeTag
|
||||||
NonEmptyRange
|
NonEmptyRange
|
||||||
|
NoneCompressorState
|
||||||
Notification
|
Notification
|
||||||
NotificationList
|
NotificationList
|
||||||
NotifyStmt
|
NotifyStmt
|
||||||
|
Reference in New Issue
Block a user