Introduce a generic pg_dump compression API

Switch pg_dump to use the Compression API, implemented by bf9aa490db. The CompressFileHandle replaces the cfp* family of functions with a struct of callbacks for accessing (compressed) files. This allows adding new compression methods simply by introducing a new struct instance with appropriate implementation of the callbacks. Archives compressed using custom compression methods store an identifier of the compression algorithm in their header instead of the compression level. The header version is bumped. Author: Georgios Kokolatos Reviewed-by: Michael Paquier, Rachel Heaton, Justin Pryzby, Tomas Vondra Discussion: https://postgr.es/m/faUNEOpts9vunEaLnmxmG-DldLSg_ql137OC3JYDmgrOMHm1RvvWY2IdBkv_CRxm5spCCb_OmKNk2T03TMm0fBEWveFF9wA1WizPuAgB7Ss%3D%40protonmail.com
2025-11-03 09:13:20 +03:00 · 2023-02-23 18:33:30 +01:00
parent 739f1d6218
commit e9960732a9
16 changed files with 1090 additions and 791 deletions
--- a/src/bin/pg_dump/compress_io.c
+++ b/src/bin/pg_dump/compress_io.c
@@ -9,42 +9,51 @@
 *
 * This file includes two APIs for dealing with compressed data. The first
 * provides more flexibility, using callbacks to read/write data from the
- * underlying stream. The second API is a wrapper around fopen/gzopen and
+ * underlying stream. The second API is a wrapper around fopen and
 * friends, providing an interface similar to those, but abstracts away
- * the possible compression. Both APIs use libz for the compression, but
- * the second API uses gzip headers, so the resulting files can be easily
- * manipulated with the gzip utility.
+ * the possible compression. The second API is aimed for the resulting
+ * files to be easily manipulated with an external compression utility
+ * program.
 *
 * Compressor API
 * --------------
 *
 *	The interface for writing to an archive consists of three functions:
- *	AllocateCompressor, WriteDataToArchive and EndCompressor. First you call
- *	AllocateCompressor, then write all the data by calling WriteDataToArchive
- *	as many times as needed, and finally EndCompressor. WriteDataToArchive
- *	and EndCompressor will call the WriteFunc that was provided to
- *	AllocateCompressor for each chunk of compressed data.
+ *	AllocateCompressor, writeData, and EndCompressor. First you call
+ *	AllocateCompressor, then write all the data by calling writeData as many
+ *	times as needed, and finally EndCompressor. writeData will call the
+ *	WriteFunc that was provided to AllocateCompressor for each chunk of
+ *	compressed data.
 *
- *	The interface for reading an archive consists of just one function:
- *	ReadDataFromArchive. ReadDataFromArchive reads the whole compressed input
- *	stream, by repeatedly calling the given ReadFunc. ReadFunc returns the
- *	compressed data chunk at a time, and ReadDataFromArchive decompresses it
- *	and passes the decompressed data to ahwrite(), until ReadFunc returns 0
- *	to signal EOF.
- *
- *	The interface is the same for compressed and uncompressed streams.
+ *	The interface for reading an archive consists of the same three functions:
+ *	AllocateCompressor, readData, and EndCompressor. First you call
+ *	AllocateCompressor, then read all the data by calling readData to read the
+ *	whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
+ *	returns the compressed data one chunk at a time. Then readData decompresses
+ *	it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
+ *	to signal EOF. The interface is the same for compressed and uncompressed
+ *	streams.
 *
 * Compressed stream API
 * ----------------------
 *
- *	The compressed stream API is a wrapper around the C standard fopen() and
- *	libz's gzopen() APIs. It allows you to use the same functions for
- *	compressed and uncompressed streams. cfopen_read() first tries to open
- *	the file with given name, and if it fails, it tries to open the same
- *	file with the .gz suffix. cfopen_write() opens a file for writing, an
- *	extra argument specifies if the file should be compressed, and adds the
- *	.gz suffix to the filename if so. This allows you to easily handle both
- *	compressed and uncompressed files.
+ *	The compressed stream API is providing a set of function pointers for
+ *	opening, reading, writing, and finally closing files. The implemented
+ *	function pointers are documented in the corresponding header file and are
+ *	common for all streams. It allows the caller to use the same functions for
+ *	both compressed and uncompressed streams.
+ *
+ *	The interface consists of three functions, InitCompressFileHandle,
+ *	InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
+ *	compression is known, then start by calling InitCompressFileHandle,
+ *	otherwise discover it by using InitDiscoverCompressFileHandle. Then call
+ *	the function pointers as required for the read/write operations. Finally
+ *	call EndCompressFileHandle to end the stream.
+ *
+ *	InitDiscoverCompressFileHandle tries to infer the compression by the
+ *	filename suffix. If the suffix is not yet known then it tries to simply
+ *	open the file and if it fails, it tries to open the same file with the .gz
+ *	suffix.
 *
 * IDENTIFICATION
 *	   src/bin/pg_dump/compress_io.c
@@ -53,12 +62,13 @@
 */
 #include "postgres_fe.h"

-#include "compress_io.h"
-#include "pg_backup_utils.h"
+#include <sys/stat.h>
+#include <unistd.h>

-#ifdef HAVE_LIBZ
-#include <zlib.h>
-#endif
+#include "compress_gzip.h"
+#include "compress_io.h"
+#include "compress_none.h"
+#include "pg_backup_utils.h"

 /*----------------------
 * Generic functions
@@ -96,663 +106,45 @@ supports_compression(const pg_compress_specification compression_spec)
 *----------------------
 */

-/* typedef appears in compress_io.h */
-struct CompressorState
-{
-	pg_compress_specification compression_spec;
-	WriteFunc	writeF;
-
-#ifdef HAVE_LIBZ
-	z_streamp	zp;
-	char	   *zlibOut;
-	size_t		zlibOutSize;
-#endif
-};
-
-/* Routines that support zlib compressed data I/O */
-#ifdef HAVE_LIBZ
-static void InitCompressorZlib(CompressorState *cs, int level);
-static void DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs,
-								  bool flush);
-static void ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF);
-static void WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
-								   const char *data, size_t dLen);
-static void EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs);
-#endif
-
-/* Routines that support uncompressed data I/O */
-static void ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF);
-static void WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
-								   const char *data, size_t dLen);
-
-/* Public interface routines */
-
-/* Allocate a new compressor */
+/*
+ * Allocate a new compressor.
+ */
 CompressorState *
 AllocateCompressor(const pg_compress_specification compression_spec,
-				   WriteFunc writeF)
+				   ReadFunc readF, WriteFunc writeF)
 {
 	CompressorState *cs;

-#ifndef HAVE_LIBZ
-	if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
-		pg_fatal("this build does not support compression with %s", "gzip");
-#endif
-
 	cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
+	cs->readF = readF;
 	cs->writeF = writeF;
-	cs->compression_spec = compression_spec;

-	/*
-	 * Perform compression algorithm specific initialization.
-	 */
-#ifdef HAVE_LIBZ
-	if (cs->compression_spec.algorithm == PG_COMPRESSION_GZIP)
-		InitCompressorZlib(cs, cs->compression_spec.level);
-#endif
+	if (compression_spec.algorithm == PG_COMPRESSION_NONE)
+		InitCompressorNone(cs, compression_spec);
+	else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
+		InitCompressorGzip(cs, compression_spec);

 	return cs;
 }

-/*
- * Read all compressed data from the input stream (via readF) and print it
- * out with ahwrite().
- */
-void
-ReadDataFromArchive(ArchiveHandle *AH,
-					const pg_compress_specification compression_spec,
-					ReadFunc readF)
-{
-	if (compression_spec.algorithm == PG_COMPRESSION_NONE)
-		ReadDataFromArchiveNone(AH, readF);
-	if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
-	{
-#ifdef HAVE_LIBZ
-		ReadDataFromArchiveZlib(AH, readF);
-#else
-		pg_fatal("this build does not support compression with %s", "gzip");
-#endif
-	}
-}
-
-/*
- * Compress and write data to the output stream (via writeF).
- */
-void
-WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
-				   const void *data, size_t dLen)
-{
-	switch (cs->compression_spec.algorithm)
-	{
-		case PG_COMPRESSION_GZIP:
-#ifdef HAVE_LIBZ
-			WriteDataToArchiveZlib(AH, cs, data, dLen);
-#else
-			pg_fatal("this build does not support compression with %s", "gzip");
-#endif
-			break;
-		case PG_COMPRESSION_NONE:
-			WriteDataToArchiveNone(AH, cs, data, dLen);
-			break;
-		case PG_COMPRESSION_LZ4:
-			/* fallthrough */
-		case PG_COMPRESSION_ZSTD:
-			pg_fatal("invalid compression method");
-			break;
-	}
-}
-
 /*
 * Terminate compression library context and flush its buffers.
 */
 void
 EndCompressor(ArchiveHandle *AH, CompressorState *cs)
 {
-#ifdef HAVE_LIBZ
-	if (cs->compression_spec.algorithm == PG_COMPRESSION_GZIP)
-		EndCompressorZlib(AH, cs);
-#endif
-	free(cs);
+	cs->end(AH, cs);
+	pg_free(cs);
 }

-/* Private routines, specific to each compression method. */
-
-#ifdef HAVE_LIBZ
-/*
- * Functions for zlib compressed output.
- */
-
-static void
-InitCompressorZlib(CompressorState *cs, int level)
-{
-	z_streamp	zp;
-
-	zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
-	zp->zalloc = Z_NULL;
-	zp->zfree = Z_NULL;
-	zp->opaque = Z_NULL;
-
-	/*
-	 * zlibOutSize is the buffer size we tell zlib it can output to.  We
-	 * actually allocate one extra byte because some routines want to append a
-	 * trailing zero byte to the zlib output.
-	 */
-	cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
-	cs->zlibOutSize = ZLIB_OUT_SIZE;
-
-	if (deflateInit(zp, level) != Z_OK)
-		pg_fatal("could not initialize compression library: %s",
-				 zp->msg);
-
-	/* Just be paranoid - maybe End is called after Start, with no Write */
-	zp->next_out = (void *) cs->zlibOut;
-	zp->avail_out = cs->zlibOutSize;
-}
-
-static void
-EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs)
-{
-	z_streamp	zp = cs->zp;
-
-	zp->next_in = NULL;
-	zp->avail_in = 0;
-
-	/* Flush any remaining data from zlib buffer */
-	DeflateCompressorZlib(AH, cs, true);
-
-	if (deflateEnd(zp) != Z_OK)
-		pg_fatal("could not close compression stream: %s", zp->msg);
-
-	free(cs->zlibOut);
-	free(cs->zp);
-}
-
-static void
-DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
-{
-	z_streamp	zp = cs->zp;
-	char	   *out = cs->zlibOut;
-	int			res = Z_OK;
-
-	while (cs->zp->avail_in != 0 || flush)
-	{
-		res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
-		if (res == Z_STREAM_ERROR)
-			pg_fatal("could not compress data: %s", zp->msg);
-		if ((flush && (zp->avail_out < cs->zlibOutSize))
-			|| (zp->avail_out == 0)
-			|| (zp->avail_in != 0)
-			)
-		{
-			/*
-			 * Extra paranoia: avoid zero-length chunks, since a zero length
-			 * chunk is the EOF marker in the custom format. This should never
-			 * happen but...
-			 */
-			if (zp->avail_out < cs->zlibOutSize)
-			{
-				/*
-				 * Any write function should do its own error checking but to
-				 * make sure we do a check here as well...
-				 */
-				size_t		len = cs->zlibOutSize - zp->avail_out;
-
-				cs->writeF(AH, out, len);
-			}
-			zp->next_out = (void *) out;
-			zp->avail_out = cs->zlibOutSize;
-		}
-
-		if (res == Z_STREAM_END)
-			break;
-	}
-}
-
-static void
-WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
-					   const char *data, size_t dLen)
-{
-	cs->zp->next_in = (void *) unconstify(char *, data);
-	cs->zp->avail_in = dLen;
-	DeflateCompressorZlib(AH, cs, false);
-}
-
-static void
-ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
-{
-	z_streamp	zp;
-	char	   *out;
-	int			res = Z_OK;
-	size_t		cnt;
-	char	   *buf;
-	size_t		buflen;
-
-	zp = (z_streamp) pg_malloc(sizeof(z_stream));
-	zp->zalloc = Z_NULL;
-	zp->zfree = Z_NULL;
-	zp->opaque = Z_NULL;
-
-	buf = pg_malloc(ZLIB_IN_SIZE);
-	buflen = ZLIB_IN_SIZE;
-
-	out = pg_malloc(ZLIB_OUT_SIZE + 1);
-
-	if (inflateInit(zp) != Z_OK)
-		pg_fatal("could not initialize compression library: %s",
-				 zp->msg);
-
-	/* no minimal chunk size for zlib */
-	while ((cnt = readF(AH, &buf, &buflen)))
-	{
-		zp->next_in = (void *) buf;
-		zp->avail_in = cnt;
-
-		while (zp->avail_in > 0)
-		{
-			zp->next_out = (void *) out;
-			zp->avail_out = ZLIB_OUT_SIZE;
-
-			res = inflate(zp, 0);
-			if (res != Z_OK && res != Z_STREAM_END)
-				pg_fatal("could not uncompress data: %s", zp->msg);
-
-			out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
-			ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
-		}
-	}
-
-	zp->next_in = NULL;
-	zp->avail_in = 0;
-	while (res != Z_STREAM_END)
-	{
-		zp->next_out = (void *) out;
-		zp->avail_out = ZLIB_OUT_SIZE;
-		res = inflate(zp, 0);
-		if (res != Z_OK && res != Z_STREAM_END)
-			pg_fatal("could not uncompress data: %s", zp->msg);
-
-		out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
-		ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
-	}
-
-	if (inflateEnd(zp) != Z_OK)
-		pg_fatal("could not close compression library: %s", zp->msg);
-
-	free(buf);
-	free(out);
-	free(zp);
-}
-#endif							/* HAVE_LIBZ */
-
-
-/*
- * Functions for uncompressed output.
- */
-
-static void
-ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF)
-{
-	size_t		cnt;
-	char	   *buf;
-	size_t		buflen;
-
-	buf = pg_malloc(ZLIB_OUT_SIZE);
-	buflen = ZLIB_OUT_SIZE;
-
-	while ((cnt = readF(AH, &buf, &buflen)))
-	{
-		ahwrite(buf, 1, cnt, AH);
-	}
-
-	free(buf);
-}
-
-static void
-WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
-					   const char *data, size_t dLen)
-{
-	cs->writeF(AH, data, dLen);
-}
-
-
 /*----------------------
 * Compressed stream API
 *----------------------
 */

 /*
- * cfp represents an open stream, wrapping the underlying FILE or gzFile
- * pointer. This is opaque to the callers.
+ * Private routines
 */
-struct cfp
-{
-	FILE	   *uncompressedfp;
-#ifdef HAVE_LIBZ
-	gzFile		compressedfp;
-#endif
-};
-
-#ifdef HAVE_LIBZ
-static int	hasSuffix(const char *filename, const char *suffix);
-#endif
-
-/* free() without changing errno; useful in several places below */
-static void
-free_keep_errno(void *p)
-{
-	int			save_errno = errno;
-
-	free(p);
-	errno = save_errno;
-}
-
-/*
- * Open a file for reading. 'path' is the file to open, and 'mode' should
- * be either "r" or "rb".
- *
- * If the file at 'path' does not exist, we append the ".gz" suffix (if 'path'
- * doesn't already have it) and try again. So if you pass "foo" as 'path',
- * this will open either "foo" or "foo.gz".
- *
- * On failure, return NULL with an error code in errno.
- */
-cfp *
-cfopen_read(const char *path, const char *mode)
-{
-	cfp		   *fp;
-
-	pg_compress_specification compression_spec = {0};
-
-#ifdef HAVE_LIBZ
-	if (hasSuffix(path, ".gz"))
-	{
-		compression_spec.algorithm = PG_COMPRESSION_GZIP;
-		fp = cfopen(path, mode, compression_spec);
-	}
-	else
-#endif
-	{
-		compression_spec.algorithm = PG_COMPRESSION_NONE;
-		fp = cfopen(path, mode, compression_spec);
-#ifdef HAVE_LIBZ
-		if (fp == NULL)
-		{
-			char	   *fname;
-
-			fname = psprintf("%s.gz", path);
-			compression_spec.algorithm = PG_COMPRESSION_GZIP;
-			fp = cfopen(fname, mode, compression_spec);
-			free_keep_errno(fname);
-		}
-#endif
-	}
-	return fp;
-}
-
-/*
- * Open a file for writing. 'path' indicates the path name, and 'mode' must
- * be a filemode as accepted by fopen() and gzopen() that indicates writing
- * ("w", "wb", "a", or "ab").
- *
- * If 'compression_spec.algorithm' is GZIP, a gzip compressed stream is opened,
- * and 'compression_spec.level' used. The ".gz" suffix is automatically added to
- * 'path' in that case.
- *
- * On failure, return NULL with an error code in errno.
- */
-cfp *
-cfopen_write(const char *path, const char *mode,
-			 const pg_compress_specification compression_spec)
-{
-	cfp		   *fp;
-
-	if (compression_spec.algorithm == PG_COMPRESSION_NONE)
-		fp = cfopen(path, mode, compression_spec);
-	else
-	{
-#ifdef HAVE_LIBZ
-		char	   *fname;
-
-		fname = psprintf("%s.gz", path);
-		fp = cfopen(fname, mode, compression_spec);
-		free_keep_errno(fname);
-#else
-		pg_fatal("this build does not support compression with %s", "gzip");
-		fp = NULL;				/* keep compiler quiet */
-#endif
-	}
-	return fp;
-}
-
-/*
- * This is the workhorse for cfopen() or cfdopen(). It opens file 'path' or
- * associates a stream 'fd', if 'fd' is a valid descriptor, in 'mode'. The
- * descriptor is not dup'ed and it is the caller's responsibility to do so.
- * The caller must verify that the 'compress_algorithm' is supported by the
- * current build.
- *
- * On failure, return NULL with an error code in errno.
- */
-static cfp *
-cfopen_internal(const char *path, int fd, const char *mode,
-				pg_compress_specification compression_spec)
-{
-	cfp		   *fp = pg_malloc0(sizeof(cfp));
-
-	if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
-	{
-#ifdef HAVE_LIBZ
-		if (compression_spec.level != Z_DEFAULT_COMPRESSION)
-		{
-			/* user has specified a compression level, so tell zlib to use it */
-			char		mode_compression[32];
-
-			snprintf(mode_compression, sizeof(mode_compression), "%s%d",
-					 mode, compression_spec.level);
-			if (fd >= 0)
-				fp->compressedfp = gzdopen(fd, mode_compression);
-			else
-				fp->compressedfp = gzopen(path, mode_compression);
-		}
-		else
-		{
-			/* don't specify a level, just use the zlib default */
-			if (fd >= 0)
-				fp->compressedfp = gzdopen(fd, mode);
-			else
-				fp->compressedfp = gzopen(path, mode);
-		}
-
-		if (fp->compressedfp == NULL)
-		{
-			free_keep_errno(fp);
-			fp = NULL;
-		}
-#else
-		pg_fatal("this build does not support compression with %s", "gzip");
-#endif
-	}
-	else
-	{
-		if (fd >= 0)
-			fp->uncompressedfp = fdopen(fd, mode);
-		else
-			fp->uncompressedfp = fopen(path, mode);
-
-		if (fp->uncompressedfp == NULL)
-		{
-			free_keep_errno(fp);
-			fp = NULL;
-		}
-	}
-
-	return fp;
-}
-
-/*
- * Opens file 'path' in 'mode' and compression as defined in
- * compression_spec. The caller must verify that the compression
- * is supported by the current build.
- *
- * On failure, return NULL with an error code in errno.
- */
-cfp *
-cfopen(const char *path, const char *mode,
-	   const pg_compress_specification compression_spec)
-{
-	return cfopen_internal(path, -1, mode, compression_spec);
-}
-
-/*
- * Associates a stream 'fd', if 'fd' is a valid descriptor, in 'mode'
- * and compression as defined in compression_spec. The caller must
- * verify that the compression is supported by the current build.
- *
- * On failure, return NULL with an error code in errno.
- */
-cfp *
-cfdopen(int fd, const char *mode,
-		const pg_compress_specification compression_spec)
-{
-	return cfopen_internal(NULL, fd, mode, compression_spec);
-}
-
-int
-cfread(void *ptr, int size, cfp *fp)
-{
-	int			ret;
-
-	if (size == 0)
-		return 0;
-
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-	{
-		ret = gzread(fp->compressedfp, ptr, size);
-		if (ret != size && !gzeof(fp->compressedfp))
-		{
-			int			errnum;
-			const char *errmsg = gzerror(fp->compressedfp, &errnum);
-
-			pg_fatal("could not read from input file: %s",
-					 errnum == Z_ERRNO ? strerror(errno) : errmsg);
-		}
-	}
-	else
-#endif
-	{
-		ret = fread(ptr, 1, size, fp->uncompressedfp);
-		if (ret != size && !feof(fp->uncompressedfp))
-			READ_ERROR_EXIT(fp->uncompressedfp);
-	}
-	return ret;
-}
-
-int
-cfwrite(const void *ptr, int size, cfp *fp)
-{
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-		return gzwrite(fp->compressedfp, ptr, size);
-	else
-#endif
-		return fwrite(ptr, 1, size, fp->uncompressedfp);
-}
-
-int
-cfgetc(cfp *fp)
-{
-	int			ret;
-
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-	{
-		ret = gzgetc(fp->compressedfp);
-		if (ret == EOF)
-		{
-			if (!gzeof(fp->compressedfp))
-				pg_fatal("could not read from input file: %s", strerror(errno));
-			else
-				pg_fatal("could not read from input file: end of file");
-		}
-	}
-	else
-#endif
-	{
-		ret = fgetc(fp->uncompressedfp);
-		if (ret == EOF)
-			READ_ERROR_EXIT(fp->uncompressedfp);
-	}
-
-	return ret;
-}
-
-char *
-cfgets(cfp *fp, char *buf, int len)
-{
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-		return gzgets(fp->compressedfp, buf, len);
-	else
-#endif
-		return fgets(buf, len, fp->uncompressedfp);
-}
-
-int
-cfclose(cfp *fp)
-{
-	int			result;
-
-	if (fp == NULL)
-	{
-		errno = EBADF;
-		return EOF;
-	}
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-	{
-		result = gzclose(fp->compressedfp);
-		fp->compressedfp = NULL;
-	}
-	else
-#endif
-	{
-		result = fclose(fp->uncompressedfp);
-		fp->uncompressedfp = NULL;
-	}
-	free_keep_errno(fp);
-
-	return result;
-}
-
-int
-cfeof(cfp *fp)
-{
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-		return gzeof(fp->compressedfp);
-	else
-#endif
-		return feof(fp->uncompressedfp);
-}
-
-const char *
-get_cfp_error(cfp *fp)
-{
-#ifdef HAVE_LIBZ
-	if (fp->compressedfp)
-	{
-		int			errnum;
-		const char *errmsg = gzerror(fp->compressedfp, &errnum);
-
-		if (errnum != Z_ERRNO)
-			return errmsg;
-	}
-#endif
-	return strerror(errno);
-}
-
-#ifdef HAVE_LIBZ
 static int
 hasSuffix(const char *filename, const char *suffix)
 {
@@ -767,4 +159,113 @@ hasSuffix(const char *filename, const char *suffix)
 				  suffixlen) == 0;
 }

+/* free() without changing errno; useful in several places below */
+static void
+free_keep_errno(void *p)
+{
+	int			save_errno = errno;
+
+	free(p);
+	errno = save_errno;
+}
+
+/*
+ * Public interface
+ */
+
+/*
+ * Initialize a compress file handle for the specified compression algorithm.
+ */
+CompressFileHandle *
+InitCompressFileHandle(const pg_compress_specification compression_spec)
+{
+	CompressFileHandle *CFH;
+
+	CFH = pg_malloc0(sizeof(CompressFileHandle));
+
+	if (compression_spec.algorithm == PG_COMPRESSION_NONE)
+		InitCompressFileHandleNone(CFH, compression_spec);
+	else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
+		InitCompressFileHandleGzip(CFH, compression_spec);
+
+	return CFH;
+}
+
+/*
+ * Open a file for reading. 'path' is the file to open, and 'mode' should
+ * be either "r" or "rb".
+ *
+ * If the file at 'path' contains the suffix of a supported compression method,
+ * currently this includes only ".gz", then this compression will be used
+ * throughout. Otherwise the compression will be inferred by iteratively trying
+ * to open the file at 'path', first as is, then by appending known compression
+ * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
+ * "foo.gz", trying in that order.
+ *
+ * On failure, return NULL with an error code in errno.
+ */
+CompressFileHandle *
+InitDiscoverCompressFileHandle(const char *path, const char *mode)
+{
+	CompressFileHandle *CFH = NULL;
+	struct stat st;
+	char	   *fname;
+	pg_compress_specification compression_spec = {0};
+
+	compression_spec.algorithm = PG_COMPRESSION_NONE;
+
+	Assert(strcmp(mode, PG_BINARY_R) == 0);
+
+	fname = strdup(path);
+
+	if (hasSuffix(fname, ".gz"))
+		compression_spec.algorithm = PG_COMPRESSION_GZIP;
+	else
+	{
+		bool		exists;
+
+		exists = (stat(path, &st) == 0);
+		/* avoid unused warning if it is not built with compression */
+		if (exists)
+			compression_spec.algorithm = PG_COMPRESSION_NONE;
+#ifdef HAVE_LIBZ
+		if (!exists)
+		{
+			free_keep_errno(fname);
+			fname = psprintf("%s.gz", path);
+			exists = (stat(fname, &st) == 0);
+
+			if (exists)
+				compression_spec.algorithm = PG_COMPRESSION_GZIP;
+		}
 #endif
+	}
+
+	CFH = InitCompressFileHandle(compression_spec);
+	if (CFH->open_func(fname, -1, mode, CFH))
+	{
+		free_keep_errno(CFH);
+		CFH = NULL;
+	}
+	free_keep_errno(fname);
+
+	return CFH;
+}
+
+/*
+ * Close an open file handle and release its memory.
+ *
+ * On failure, returns an error value and sets errno appropriately.
+ */
+int
+EndCompressFileHandle(CompressFileHandle *CFH)
+{
+	int			ret = 0;
+
+	if (CFH->private_data)
+		ret = CFH->close_func(CFH);
+
+	free_keep_errno(CFH);
+
+	return ret;
+}