1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-03 09:13:20 +03:00

pg_dump: Add support for zstd compression

Allow pg_dump to use the zstd compression, in addition to gzip/lz4. Bulk
of the new compression method is implemented in compress_zstd.{c,h},
covering the pg_dump compression APIs. The rest of the patch adds test
and makes various places aware of the new compression method.

The zstd library (which this patch relies on) supports multithreaded
compression since version 1.5. We however disallow that feature for now,
as it might interfere with parallel backups on platforms that rely on
threads (e.g. Windows). This can be improved / relaxed in the future.

This also fixes a minor issue in InitDiscoverCompressFileHandle(), which
was not updated to check if the file already has the .lz4 extension.

Adding zstd compression was originally proposed in 2020 (see the second
thread), but then was reworked to use the new compression API introduced
in e9960732a9. I've considered both threads when compiling the list of
reviewers.

Author: Justin Pryzby
Reviewed-by: Tomas Vondra, Jacob Champion, Andreas Karlsson
Discussion: https://postgr.es/m/20230224191840.GD1653@telsasoft.com
Discussion: https://postgr.es/m/20201221194924.GI30237@telsasoft.com
This commit is contained in:
Tomas Vondra
2023-04-05 21:38:04 +02:00
parent 794f259447
commit 84adc8e20f
12 changed files with 713 additions and 54 deletions

View File

@@ -52,8 +52,8 @@
*
* InitDiscoverCompressFileHandle tries to infer the compression by the
* filename suffix. If the suffix is not yet known then it tries to simply
* open the file and if it fails, it tries to open the same file with the .gz
* suffix, and then again with the .lz4 suffix.
* open the file and if it fails, it tries to open the same file with
* compressed suffixes (.gz, .lz4 and .zst, in this order).
*
* IDENTIFICATION
* src/bin/pg_dump/compress_io.c
@@ -69,6 +69,7 @@
#include "compress_io.h"
#include "compress_lz4.h"
#include "compress_none.h"
#include "compress_zstd.h"
#include "pg_backup_utils.h"
/*----------------------
@@ -77,7 +78,8 @@
*/
/*
* Checks whether a compression algorithm is supported.
* Checks whether support for a compression algorithm is implemented in
* pg_dump/restore.
*
* On success returns NULL, otherwise returns a malloc'ed string which can be
* used by the caller in an error message.
@@ -98,6 +100,10 @@ supports_compression(const pg_compress_specification compression_spec)
if (algorithm == PG_COMPRESSION_LZ4)
supported = true;
#endif
#ifdef USE_ZSTD
if (algorithm == PG_COMPRESSION_ZSTD)
supported = true;
#endif
if (!supported)
return psprintf("this build does not support compression with %s",
@@ -130,6 +136,8 @@ AllocateCompressor(const pg_compress_specification compression_spec,
InitCompressorGzip(cs, compression_spec);
else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
InitCompressorLZ4(cs, compression_spec);
else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
InitCompressorZstd(cs, compression_spec);
return cs;
}
@@ -196,20 +204,36 @@ InitCompressFileHandle(const pg_compress_specification compression_spec)
InitCompressFileHandleGzip(CFH, compression_spec);
else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
InitCompressFileHandleLZ4(CFH, compression_spec);
else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
InitCompressFileHandleZstd(CFH, compression_spec);
return CFH;
}
/*
* Checks if a compressed file (with the specified extension) exists.
*
* The filename of the tested file is stored to fname buffer (the existing
* buffer is freed, new buffer is allocated and returned through the pointer).
*/
static bool
check_compressed_file(const char *path, char **fname, char *ext)
{
free_keep_errno(*fname);
*fname = psprintf("%s.%s", path, ext);
return (access(*fname, F_OK) == 0);
}
/*
* Open a file for reading. 'path' is the file to open, and 'mode' should
* be either "r" or "rb".
*
* If the file at 'path' contains the suffix of a supported compression method,
* currently this includes ".gz" and ".lz4", then this compression will be used
* currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
* throughout. Otherwise the compression will be inferred by iteratively trying
* to open the file at 'path', first as is, then by appending known compression
* suffixes. So if you pass "foo" as 'path', this will open either "foo" or
* "foo.gz" or "foo.lz4", trying in that order.
* "foo.{gz,lz4,zst}", trying in that order.
*
* On failure, return NULL with an error code in errno.
*/
@@ -229,36 +253,20 @@ InitDiscoverCompressFileHandle(const char *path, const char *mode)
if (hasSuffix(fname, ".gz"))
compression_spec.algorithm = PG_COMPRESSION_GZIP;
else if (hasSuffix(fname, ".lz4"))
compression_spec.algorithm = PG_COMPRESSION_LZ4;
else if (hasSuffix(fname, ".zst"))
compression_spec.algorithm = PG_COMPRESSION_ZSTD;
else
{
bool exists;
exists = (stat(path, &st) == 0);
/* avoid unused warning if it is not built with compression */
if (exists)
if (stat(path, &st) == 0)
compression_spec.algorithm = PG_COMPRESSION_NONE;
#ifdef HAVE_LIBZ
if (!exists)
{
free_keep_errno(fname);
fname = psprintf("%s.gz", path);
exists = (stat(fname, &st) == 0);
if (exists)
compression_spec.algorithm = PG_COMPRESSION_GZIP;
}
#endif
#ifdef USE_LZ4
if (!exists)
{
free_keep_errno(fname);
fname = psprintf("%s.lz4", path);
exists = (stat(fname, &st) == 0);
if (exists)
compression_spec.algorithm = PG_COMPRESSION_LZ4;
}
#endif
else if (check_compressed_file(path, &fname, "gz"))
compression_spec.algorithm = PG_COMPRESSION_GZIP;
else if (check_compressed_file(path, &fname, "lz4"))
compression_spec.algorithm = PG_COMPRESSION_LZ4;
else if (check_compressed_file(path, &fname, "zst"))
compression_spec.algorithm = PG_COMPRESSION_ZSTD;
}
CFH = InitCompressFileHandle(compression_spec);