mirror of
https://github.com/postgres/postgres.git
synced 2025-05-08 07:21:33 +03:00
Change the default_toast_compression GUC to be an enum rather than a string. Earlier, uncommitted versions of the patch supported using CREATE ACCESS METHOD to add new compression methods to a running system, but that idea was dropped before commit. So, we can simplify the GUC handling as well, which has the nice side effect of improving the error messages. While updating the documentation to reflect the new GUC type, also move it back to the right place in the list. I moved this while revising what became commit 24f0e395ac5892cd12e8914646fe921fac5ba23d, but apparently the intended ordering is "alphabetical" rather than "whatever Robert thinks looks nice." Rejigger things to avoid having access/toast_compression.h depend on utils/guc.h, so that we don't end up with every file that includes it also depending on something largely unrelated. Move a few inline functions back into the C source file partly to help reduce dependencies and partly just to avoid clutter. A few very minor cosmetic fixes. Original patch by Justin Pryzby, but very heavily edited by me, and reverse reviewed by him and also reviewed by by Tom Lane. Discussion: http://postgr.es/m/CA+TgmoYp=GT_ztUCeZg2i4hkHAQv8o=-nVJ1-TKWTG1zQOmOpg@mail.gmail.com
319 lines
7.7 KiB
C
319 lines
7.7 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* toast_compression.c
|
|
* Functions for toast compression.
|
|
*
|
|
* Copyright (c) 2021, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/common/toast_compression.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#ifdef USE_LZ4
|
|
#include <lz4.h>
|
|
#endif
|
|
|
|
#include "access/detoast.h"
|
|
#include "access/toast_compression.h"
|
|
#include "common/pg_lzcompress.h"
|
|
#include "fmgr.h"
|
|
#include "utils/builtins.h"
|
|
|
|
/* GUC */
|
|
int default_toast_compression = TOAST_PGLZ_COMPRESSION;
|
|
|
|
#define NO_LZ4_SUPPORT() \
|
|
ereport(ERROR, \
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
|
|
errmsg("unsupported LZ4 compression method"), \
|
|
errdetail("This functionality requires the server to be built with lz4 support."), \
|
|
errhint("You need to rebuild PostgreSQL using --with-lz4.")))
|
|
|
|
/*
|
|
* Compress a varlena using PGLZ.
|
|
*
|
|
* Returns the compressed varlena, or NULL if compression fails.
|
|
*/
|
|
struct varlena *
|
|
pglz_compress_datum(const struct varlena *value)
|
|
{
|
|
int32 valsize,
|
|
len;
|
|
struct varlena *tmp = NULL;
|
|
|
|
valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
|
|
|
|
/*
|
|
* No point in wasting a palloc cycle if value size is outside the allowed
|
|
* range for compression.
|
|
*/
|
|
if (valsize < PGLZ_strategy_default->min_input_size ||
|
|
valsize > PGLZ_strategy_default->max_input_size)
|
|
return NULL;
|
|
|
|
/*
|
|
* Figure out the maximum possible size of the pglz output, add the bytes
|
|
* that will be needed for varlena overhead, and allocate that amount.
|
|
*/
|
|
tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
|
|
VARHDRSZ_COMPRESSED);
|
|
|
|
len = pglz_compress(VARDATA_ANY(value),
|
|
valsize,
|
|
(char *) tmp + VARHDRSZ_COMPRESSED,
|
|
NULL);
|
|
if (len < 0)
|
|
{
|
|
pfree(tmp);
|
|
return NULL;
|
|
}
|
|
|
|
SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
|
|
|
|
return tmp;
|
|
}
|
|
|
|
/*
|
|
* Decompress a varlena that was compressed using PGLZ.
|
|
*/
|
|
struct varlena *
|
|
pglz_decompress_datum(const struct varlena *value)
|
|
{
|
|
struct varlena *result;
|
|
int32 rawsize;
|
|
|
|
/* allocate memory for the uncompressed data */
|
|
result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
|
|
|
|
/* decompress the data */
|
|
rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
|
|
VARSIZE(value) - VARHDRSZ_COMPRESSED,
|
|
VARDATA(result),
|
|
VARDATA_COMPRESSED_GET_EXTSIZE(value), true);
|
|
if (rawsize < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
errmsg_internal("compressed pglz data is corrupt")));
|
|
|
|
SET_VARSIZE(result, rawsize + VARHDRSZ);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Decompress part of a varlena that was compressed using PGLZ.
|
|
*/
|
|
struct varlena *
|
|
pglz_decompress_datum_slice(const struct varlena *value,
|
|
int32 slicelength)
|
|
{
|
|
struct varlena *result;
|
|
int32 rawsize;
|
|
|
|
/* allocate memory for the uncompressed data */
|
|
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
|
|
|
|
/* decompress the data */
|
|
rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
|
|
VARSIZE(value) - VARHDRSZ_COMPRESSED,
|
|
VARDATA(result),
|
|
slicelength, false);
|
|
if (rawsize < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
errmsg_internal("compressed pglz data is corrupt")));
|
|
|
|
SET_VARSIZE(result, rawsize + VARHDRSZ);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Compress a varlena using LZ4.
|
|
*
|
|
* Returns the compressed varlena, or NULL if compression fails.
|
|
*/
|
|
struct varlena *
|
|
lz4_compress_datum(const struct varlena *value)
|
|
{
|
|
#ifndef USE_LZ4
|
|
NO_LZ4_SUPPORT();
|
|
return NULL; /* keep compiler quiet */
|
|
#else
|
|
int32 valsize;
|
|
int32 len;
|
|
int32 max_size;
|
|
struct varlena *tmp = NULL;
|
|
|
|
valsize = VARSIZE_ANY_EXHDR(value);
|
|
|
|
/*
|
|
* Figure out the maximum possible size of the LZ4 output, add the bytes
|
|
* that will be needed for varlena overhead, and allocate that amount.
|
|
*/
|
|
max_size = LZ4_compressBound(valsize);
|
|
tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
|
|
|
|
len = LZ4_compress_default(VARDATA_ANY(value),
|
|
(char *) tmp + VARHDRSZ_COMPRESSED,
|
|
valsize, max_size);
|
|
if (len <= 0)
|
|
elog(ERROR, "lz4 compression failed");
|
|
|
|
/* data is incompressible so just free the memory and return NULL */
|
|
if (len > valsize)
|
|
{
|
|
pfree(tmp);
|
|
return NULL;
|
|
}
|
|
|
|
SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
|
|
|
|
return tmp;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Decompress a varlena that was compressed using LZ4.
|
|
*/
|
|
struct varlena *
|
|
lz4_decompress_datum(const struct varlena *value)
|
|
{
|
|
#ifndef USE_LZ4
|
|
NO_LZ4_SUPPORT();
|
|
return NULL; /* keep compiler quiet */
|
|
#else
|
|
int32 rawsize;
|
|
struct varlena *result;
|
|
|
|
/* allocate memory for the uncompressed data */
|
|
result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
|
|
|
|
/* decompress the data */
|
|
rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED,
|
|
VARDATA(result),
|
|
VARSIZE(value) - VARHDRSZ_COMPRESSED,
|
|
VARDATA_COMPRESSED_GET_EXTSIZE(value));
|
|
if (rawsize < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
errmsg_internal("compressed lz4 data is corrupt")));
|
|
|
|
|
|
SET_VARSIZE(result, rawsize + VARHDRSZ);
|
|
|
|
return result;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Decompress part of a varlena that was compressed using LZ4.
|
|
*/
|
|
struct varlena *
|
|
lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
|
|
{
|
|
#ifndef USE_LZ4
|
|
NO_LZ4_SUPPORT();
|
|
return NULL; /* keep compiler quiet */
|
|
#else
|
|
int32 rawsize;
|
|
struct varlena *result;
|
|
|
|
/* slice decompression not supported prior to 1.8.3 */
|
|
if (LZ4_versionNumber() < 10803)
|
|
return lz4_decompress_datum(value);
|
|
|
|
/* allocate memory for the uncompressed data */
|
|
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
|
|
|
|
/* decompress the data */
|
|
rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED,
|
|
VARDATA(result),
|
|
VARSIZE(value) - VARHDRSZ_COMPRESSED,
|
|
slicelength,
|
|
slicelength);
|
|
if (rawsize < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
errmsg_internal("compressed lz4 data is corrupt")));
|
|
|
|
SET_VARSIZE(result, rawsize + VARHDRSZ);
|
|
|
|
return result;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Extract compression ID from a varlena.
|
|
*
|
|
* Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
|
|
*/
|
|
ToastCompressionId
|
|
toast_get_compression_id(struct varlena *attr)
|
|
{
|
|
ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
|
|
|
|
/*
|
|
* If it is stored externally then fetch the compression method id from the
|
|
* external toast pointer. If compressed inline, fetch it from the toast
|
|
* compression header.
|
|
*/
|
|
if (VARATT_IS_EXTERNAL_ONDISK(attr))
|
|
{
|
|
struct varatt_external toast_pointer;
|
|
|
|
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
|
|
|
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
|
|
cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
|
|
}
|
|
else if (VARATT_IS_COMPRESSED(attr))
|
|
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
|
|
|
|
return cmid;
|
|
}
|
|
|
|
/*
|
|
* CompressionNameToMethod - Get compression method from compression name
|
|
*
|
|
* Search in the available built-in methods. If the compression not found
|
|
* in the built-in methods then return InvalidCompressionMethod.
|
|
*/
|
|
char
|
|
CompressionNameToMethod(const char *compression)
|
|
{
|
|
if (strcmp(compression, "pglz") == 0)
|
|
return TOAST_PGLZ_COMPRESSION;
|
|
else if (strcmp(compression, "lz4") == 0)
|
|
{
|
|
#ifndef USE_LZ4
|
|
NO_LZ4_SUPPORT();
|
|
#endif
|
|
return TOAST_LZ4_COMPRESSION;
|
|
}
|
|
|
|
return InvalidCompressionMethod;
|
|
}
|
|
|
|
/*
|
|
* GetCompressionMethodName - Get compression method name
|
|
*/
|
|
const char *
|
|
GetCompressionMethodName(char method)
|
|
{
|
|
switch (method)
|
|
{
|
|
case TOAST_PGLZ_COMPRESSION:
|
|
return "pglz";
|
|
case TOAST_LZ4_COMPRESSION:
|
|
return "lz4";
|
|
default:
|
|
elog(ERROR, "invalid compression method %c", method);
|
|
return NULL; /* keep compiler quiet */
|
|
}
|
|
}
|