1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-08 07:21:33 +03:00
postgres/src/backend/access/common/toast_compression.c
Robert Haas e5595de03e Tidy up more loose ends related to configurable TOAST compression.
Change the default_toast_compression GUC to be an enum rather than
a string. Earlier, uncommitted versions of the patch supported using
CREATE ACCESS METHOD to add new compression methods to a running
system, but that idea was dropped before commit. So, we can simplify
the GUC handling as well, which has the nice side effect of improving
the error messages.

While updating the documentation to reflect the new GUC type, also
move it back to the right place in the list. I moved this while
revising what became commit 24f0e395ac5892cd12e8914646fe921fac5ba23d,
but apparently the intended ordering is "alphabetical" rather than
"whatever Robert thinks looks nice."

Rejigger things to avoid having access/toast_compression.h depend on
utils/guc.h, so that we don't end up with every file that includes
it also depending on something largely unrelated. Move a few
inline functions back into the C source file partly to help reduce
dependencies and partly just to avoid clutter. A few very minor
cosmetic fixes.

Original patch by Justin Pryzby, but very heavily edited by me,
and reverse reviewed by him and also reviewed by by Tom Lane.

Discussion: http://postgr.es/m/CA+TgmoYp=GT_ztUCeZg2i4hkHAQv8o=-nVJ1-TKWTG1zQOmOpg@mail.gmail.com
2021-03-24 12:36:08 -04:00

319 lines
7.7 KiB
C

/*-------------------------------------------------------------------------
*
* toast_compression.c
* Functions for toast compression.
*
* Copyright (c) 2021, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/access/common/toast_compression.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef USE_LZ4
#include <lz4.h>
#endif
#include "access/detoast.h"
#include "access/toast_compression.h"
#include "common/pg_lzcompress.h"
#include "fmgr.h"
#include "utils/builtins.h"
/* GUC */
int default_toast_compression = TOAST_PGLZ_COMPRESSION;
#define NO_LZ4_SUPPORT() \
ereport(ERROR, \
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
errmsg("unsupported LZ4 compression method"), \
errdetail("This functionality requires the server to be built with lz4 support."), \
errhint("You need to rebuild PostgreSQL using --with-lz4.")))
/*
* Compress a varlena using PGLZ.
*
* Returns the compressed varlena, or NULL if compression fails.
*/
struct varlena *
pglz_compress_datum(const struct varlena *value)
{
int32 valsize,
len;
struct varlena *tmp = NULL;
valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
/*
* No point in wasting a palloc cycle if value size is outside the allowed
* range for compression.
*/
if (valsize < PGLZ_strategy_default->min_input_size ||
valsize > PGLZ_strategy_default->max_input_size)
return NULL;
/*
* Figure out the maximum possible size of the pglz output, add the bytes
* that will be needed for varlena overhead, and allocate that amount.
*/
tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
VARHDRSZ_COMPRESSED);
len = pglz_compress(VARDATA_ANY(value),
valsize,
(char *) tmp + VARHDRSZ_COMPRESSED,
NULL);
if (len < 0)
{
pfree(tmp);
return NULL;
}
SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
return tmp;
}
/*
* Decompress a varlena that was compressed using PGLZ.
*/
struct varlena *
pglz_decompress_datum(const struct varlena *value)
{
struct varlena *result;
int32 rawsize;
/* allocate memory for the uncompressed data */
result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
/* decompress the data */
rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
VARSIZE(value) - VARHDRSZ_COMPRESSED,
VARDATA(result),
VARDATA_COMPRESSED_GET_EXTSIZE(value), true);
if (rawsize < 0)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("compressed pglz data is corrupt")));
SET_VARSIZE(result, rawsize + VARHDRSZ);
return result;
}
/*
* Decompress part of a varlena that was compressed using PGLZ.
*/
struct varlena *
pglz_decompress_datum_slice(const struct varlena *value,
int32 slicelength)
{
struct varlena *result;
int32 rawsize;
/* allocate memory for the uncompressed data */
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
/* decompress the data */
rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
VARSIZE(value) - VARHDRSZ_COMPRESSED,
VARDATA(result),
slicelength, false);
if (rawsize < 0)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("compressed pglz data is corrupt")));
SET_VARSIZE(result, rawsize + VARHDRSZ);
return result;
}
/*
* Compress a varlena using LZ4.
*
* Returns the compressed varlena, or NULL if compression fails.
*/
struct varlena *
lz4_compress_datum(const struct varlena *value)
{
#ifndef USE_LZ4
NO_LZ4_SUPPORT();
return NULL; /* keep compiler quiet */
#else
int32 valsize;
int32 len;
int32 max_size;
struct varlena *tmp = NULL;
valsize = VARSIZE_ANY_EXHDR(value);
/*
* Figure out the maximum possible size of the LZ4 output, add the bytes
* that will be needed for varlena overhead, and allocate that amount.
*/
max_size = LZ4_compressBound(valsize);
tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
len = LZ4_compress_default(VARDATA_ANY(value),
(char *) tmp + VARHDRSZ_COMPRESSED,
valsize, max_size);
if (len <= 0)
elog(ERROR, "lz4 compression failed");
/* data is incompressible so just free the memory and return NULL */
if (len > valsize)
{
pfree(tmp);
return NULL;
}
SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
return tmp;
#endif
}
/*
* Decompress a varlena that was compressed using LZ4.
*/
struct varlena *
lz4_decompress_datum(const struct varlena *value)
{
#ifndef USE_LZ4
NO_LZ4_SUPPORT();
return NULL; /* keep compiler quiet */
#else
int32 rawsize;
struct varlena *result;
/* allocate memory for the uncompressed data */
result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
/* decompress the data */
rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED,
VARDATA(result),
VARSIZE(value) - VARHDRSZ_COMPRESSED,
VARDATA_COMPRESSED_GET_EXTSIZE(value));
if (rawsize < 0)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("compressed lz4 data is corrupt")));
SET_VARSIZE(result, rawsize + VARHDRSZ);
return result;
#endif
}
/*
* Decompress part of a varlena that was compressed using LZ4.
*/
struct varlena *
lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
{
#ifndef USE_LZ4
NO_LZ4_SUPPORT();
return NULL; /* keep compiler quiet */
#else
int32 rawsize;
struct varlena *result;
/* slice decompression not supported prior to 1.8.3 */
if (LZ4_versionNumber() < 10803)
return lz4_decompress_datum(value);
/* allocate memory for the uncompressed data */
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
/* decompress the data */
rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED,
VARDATA(result),
VARSIZE(value) - VARHDRSZ_COMPRESSED,
slicelength,
slicelength);
if (rawsize < 0)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("compressed lz4 data is corrupt")));
SET_VARSIZE(result, rawsize + VARHDRSZ);
return result;
#endif
}
/*
* Extract compression ID from a varlena.
*
* Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
*/
ToastCompressionId
toast_get_compression_id(struct varlena *attr)
{
ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
/*
* If it is stored externally then fetch the compression method id from the
* external toast pointer. If compressed inline, fetch it from the toast
* compression header.
*/
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
}
else if (VARATT_IS_COMPRESSED(attr))
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
return cmid;
}
/*
* CompressionNameToMethod - Get compression method from compression name
*
* Search in the available built-in methods. If the compression not found
* in the built-in methods then return InvalidCompressionMethod.
*/
char
CompressionNameToMethod(const char *compression)
{
if (strcmp(compression, "pglz") == 0)
return TOAST_PGLZ_COMPRESSION;
else if (strcmp(compression, "lz4") == 0)
{
#ifndef USE_LZ4
NO_LZ4_SUPPORT();
#endif
return TOAST_LZ4_COMPRESSION;
}
return InvalidCompressionMethod;
}
/*
* GetCompressionMethodName - Get compression method name
*/
const char *
GetCompressionMethodName(char method)
{
switch (method)
{
case TOAST_PGLZ_COMPRESSION:
return "pglz";
case TOAST_LZ4_COMPRESSION:
return "lz4";
default:
elog(ERROR, "invalid compression method %c", method);
return NULL; /* keep compiler quiet */
}
}