mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	pg_dump: Add support for zstd compression
Allow pg_dump to use the zstd compression, in addition to gzip/lz4. Bulk
of the new compression method is implemented in compress_zstd.{c,h},
covering the pg_dump compression APIs. The rest of the patch adds test
and makes various places aware of the new compression method.
The zstd library (which this patch relies on) supports multithreaded
compression since version 1.5. We however disallow that feature for now,
as it might interfere with parallel backups on platforms that rely on
threads (e.g. Windows). This can be improved / relaxed in the future.
This also fixes a minor issue in InitDiscoverCompressFileHandle(), which
was not updated to check if the file already has the .lz4 extension.
Adding zstd compression was originally proposed in 2020 (see the second
thread), but then was reworked to use the new compression API introduced
in e9960732a9. I've considered both threads when compiling the list of
reviewers.
Author: Justin Pryzby
Reviewed-by: Tomas Vondra, Jacob Champion, Andreas Karlsson
Discussion: https://postgr.es/m/20230224191840.GD1653@telsasoft.com
Discussion: https://postgr.es/m/20201221194924.GI30237@telsasoft.com
			
			
This commit is contained in:
		@@ -52,8 +52,8 @@
 | 
			
		||||
 *
 | 
			
		||||
 *	InitDiscoverCompressFileHandle tries to infer the compression by the
 | 
			
		||||
 *	filename suffix. If the suffix is not yet known then it tries to simply
 | 
			
		||||
 *	open the file and if it fails, it tries to open the same file with the .gz
 | 
			
		||||
 *	suffix, and then again with the .lz4 suffix.
 | 
			
		||||
 *	open the file and if it fails, it tries to open the same file with
 | 
			
		||||
 *	compressed suffixes (.gz, .lz4 and .zst, in this order).
 | 
			
		||||
 *
 | 
			
		||||
 * IDENTIFICATION
 | 
			
		||||
 *	   src/bin/pg_dump/compress_io.c
 | 
			
		||||
@@ -69,6 +69,7 @@
 | 
			
		||||
#include "compress_io.h"
 | 
			
		||||
#include "compress_lz4.h"
 | 
			
		||||
#include "compress_none.h"
 | 
			
		||||
#include "compress_zstd.h"
 | 
			
		||||
#include "pg_backup_utils.h"
 | 
			
		||||
 | 
			
		||||
/*----------------------
 | 
			
		||||
@@ -77,7 +78,8 @@
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Checks whether a compression algorithm is supported.
 | 
			
		||||
 * Checks whether support for a compression algorithm is implemented in
 | 
			
		||||
 * pg_dump/restore.
 | 
			
		||||
 *
 | 
			
		||||
 * On success returns NULL, otherwise returns a malloc'ed string which can be
 | 
			
		||||
 * used by the caller in an error message.
 | 
			
		||||
@@ -98,6 +100,10 @@ supports_compression(const pg_compress_specification compression_spec)
 | 
			
		||||
	if (algorithm == PG_COMPRESSION_LZ4)
 | 
			
		||||
		supported = true;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef USE_ZSTD
 | 
			
		||||
	if (algorithm == PG_COMPRESSION_ZSTD)
 | 
			
		||||
		supported = true;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	if (!supported)
 | 
			
		||||
		return psprintf("this build does not support compression with %s",
 | 
			
		||||
@@ -130,6 +136,8 @@ AllocateCompressor(const pg_compress_specification compression_spec,
 | 
			
		||||
		InitCompressorGzip(cs, compression_spec);
 | 
			
		||||
	else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
 | 
			
		||||
		InitCompressorLZ4(cs, compression_spec);
 | 
			
		||||
	else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
 | 
			
		||||
		InitCompressorZstd(cs, compression_spec);
 | 
			
		||||
 | 
			
		||||
	return cs;
 | 
			
		||||
}
 | 
			
		||||
@@ -196,20 +204,36 @@ InitCompressFileHandle(const pg_compress_specification compression_spec)
 | 
			
		||||
		InitCompressFileHandleGzip(CFH, compression_spec);
 | 
			
		||||
	else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
 | 
			
		||||
		InitCompressFileHandleLZ4(CFH, compression_spec);
 | 
			
		||||
	else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
 | 
			
		||||
		InitCompressFileHandleZstd(CFH, compression_spec);
 | 
			
		||||
 | 
			
		||||
	return CFH;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Checks if a compressed file (with the specified extension) exists.
 | 
			
		||||
 *
 | 
			
		||||
 * The filename of the tested file is stored to fname buffer (the existing
 | 
			
		||||
 * buffer is freed, new buffer is allocated and returned through the pointer).
 | 
			
		||||
 */
 | 
			
		||||
static bool
 | 
			
		||||
check_compressed_file(const char *path, char **fname, char *ext)
 | 
			
		||||
{
 | 
			
		||||
	free_keep_errno(*fname);
 | 
			
		||||
	*fname = psprintf("%s.%s", path, ext);
 | 
			
		||||
	return (access(*fname, F_OK) == 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Open a file for reading. 'path' is the file to open, and 'mode' should
 | 
			
		||||
 * be either "r" or "rb".
 | 
			
		||||
 *
 | 
			
		||||
 * If the file at 'path' contains the suffix of a supported compression method,
 | 
			
		||||
 * currently this includes ".gz" and ".lz4", then this compression will be used
 | 
			
		||||
 * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
 | 
			
		||||
 * throughout. Otherwise the compression will be inferred by iteratively trying
 | 
			
		||||
 * to open the file at 'path', first as is, then by appending known compression
 | 
			
		||||
 * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
 | 
			
		||||
 * "foo.gz" or "foo.lz4", trying in that order.
 | 
			
		||||
 * "foo.{gz,lz4,zst}", trying in that order.
 | 
			
		||||
 *
 | 
			
		||||
 * On failure, return NULL with an error code in errno.
 | 
			
		||||
 */
 | 
			
		||||
@@ -229,36 +253,20 @@ InitDiscoverCompressFileHandle(const char *path, const char *mode)
 | 
			
		||||
 | 
			
		||||
	if (hasSuffix(fname, ".gz"))
 | 
			
		||||
		compression_spec.algorithm = PG_COMPRESSION_GZIP;
 | 
			
		||||
	else if (hasSuffix(fname, ".lz4"))
 | 
			
		||||
		compression_spec.algorithm = PG_COMPRESSION_LZ4;
 | 
			
		||||
	else if (hasSuffix(fname, ".zst"))
 | 
			
		||||
		compression_spec.algorithm = PG_COMPRESSION_ZSTD;
 | 
			
		||||
	else
 | 
			
		||||
	{
 | 
			
		||||
		bool		exists;
 | 
			
		||||
 | 
			
		||||
		exists = (stat(path, &st) == 0);
 | 
			
		||||
		/* avoid unused warning if it is not built with compression */
 | 
			
		||||
		if (exists)
 | 
			
		||||
		if (stat(path, &st) == 0)
 | 
			
		||||
			compression_spec.algorithm = PG_COMPRESSION_NONE;
 | 
			
		||||
#ifdef HAVE_LIBZ
 | 
			
		||||
		if (!exists)
 | 
			
		||||
		{
 | 
			
		||||
			free_keep_errno(fname);
 | 
			
		||||
			fname = psprintf("%s.gz", path);
 | 
			
		||||
			exists = (stat(fname, &st) == 0);
 | 
			
		||||
 | 
			
		||||
			if (exists)
 | 
			
		||||
				compression_spec.algorithm = PG_COMPRESSION_GZIP;
 | 
			
		||||
		}
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef USE_LZ4
 | 
			
		||||
		if (!exists)
 | 
			
		||||
		{
 | 
			
		||||
			free_keep_errno(fname);
 | 
			
		||||
			fname = psprintf("%s.lz4", path);
 | 
			
		||||
			exists = (stat(fname, &st) == 0);
 | 
			
		||||
 | 
			
		||||
			if (exists)
 | 
			
		||||
				compression_spec.algorithm = PG_COMPRESSION_LZ4;
 | 
			
		||||
		}
 | 
			
		||||
#endif
 | 
			
		||||
		else if (check_compressed_file(path, &fname, "gz"))
 | 
			
		||||
			compression_spec.algorithm = PG_COMPRESSION_GZIP;
 | 
			
		||||
		else if (check_compressed_file(path, &fname, "lz4"))
 | 
			
		||||
			compression_spec.algorithm = PG_COMPRESSION_LZ4;
 | 
			
		||||
		else if (check_compressed_file(path, &fname, "zst"))
 | 
			
		||||
			compression_spec.algorithm = PG_COMPRESSION_ZSTD;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	CFH = InitCompressFileHandle(compression_spec);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user