1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-22 14:32:25 +03:00

Allow configurable LZ4 TOAST compression.

There is now a per-column COMPRESSION option which can be set to pglz
(the default, and the only option in up until now) or lz4. Or, if you
like, you can set the new default_toast_compression GUC to lz4, and
then that will be the default for new table columns for which no value
is specified. We don't have lz4 support in the PostgreSQL code, so
to use lz4 compression, PostgreSQL must be built --with-lz4.

In general, TOAST compression means compression of individual column
values, not the whole tuple, and those values can either be compressed
inline within the tuple or compressed and then stored externally in
the TOAST table, so those properties also apply to this feature.

Prior to this commit, a TOAST pointer has two unused bits as part of
the va_extsize field, and a compessed datum has two unused bits as
part of the va_rawsize field. These bits are unused because the length
of a varlena is limited to 1GB; we now use them to indicate the
compression type that was used. This means we only have bit space for
2 more built-in compresison types, but we could work around that
problem, if necessary, by introducing a new vartag_external value for
any further types we end up wanting to add. Hopefully, it won't be
too important to offer a wide selection of algorithms here, since
each one we add not only takes more coding but also adds a build
dependency for every packager. Nevertheless, it seems worth doing
at least this much, because LZ4 gets better compression than PGLZ
with less CPU usage.

It's possible for LZ4-compressed datums to leak into composite type
values stored on disk, just as it is for PGLZ. It's also possible for
LZ4-compressed attributes to be copied into a different table via SQL
commands such as CREATE TABLE AS or INSERT .. SELECT.  It would be
expensive to force such values to be decompressed, so PostgreSQL has
never done so. For the same reasons, we also don't force recompression
of already-compressed values even if the target table prefers a
different compression method than was used for the source data.  These
architectural decisions are perhaps arguable but revisiting them is
well beyond the scope of what seemed possible to do as part of this
project.  However, it's relatively cheap to recompress as part of
VACUUM FULL or CLUSTER, so this commit adjusts those commands to do
so, if the configured compression method of the table happens not to
match what was used for some column value stored therein.

Dilip Kumar. The original patches on which this work was based were
written by Ildus Kurbangaliev, and those were patches were based on
even earlier work by Nikita Glukhov, but the design has since changed
very substantially, since allow a potentially large number of
compression methods that could be added and dropped on a running
system proved too problematic given some of the architectural issues
mentioned above; the choice of which specific compression method to
add first is now different; and a lot of the code has been heavily
refactored.  More recently, Justin Przyby helped quite a bit with
testing and reviewing and this version also includes some code
contributions from him. Other design input and review from Tomas
Vondra, Álvaro Herrera, Andres Freund, Oleg Bartunov, Alexander
Korotkov, and me.

Discussion: http://postgr.es/m/20170907194236.4cefce96%40wp.localdomain
Discussion: http://postgr.es/m/CAFiTN-uUpX3ck%3DK0mLEk-G_kUQY%3DSNOTeqdaNRR9FMdQrHKebw%40mail.gmail.com
This commit is contained in:
Robert Haas
2021-03-19 15:10:38 -04:00
parent e589c4890b
commit bbe0a81db6
61 changed files with 2261 additions and 160 deletions

View File

@@ -12,16 +12,6 @@
#ifndef DETOAST_H
#define DETOAST_H
/*
* Testing whether an externally-stored value is compressed now requires
* comparing extsize (the actual length of the external data) to rawsize
* (the original uncompressed datum's size). The latter includes VARHDRSZ
* overhead, the former doesn't. We never use compression unless it actually
* saves space, so we expect either equality or less-than.
*/
#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
/*
* Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
* into a local "struct varatt_external" toast pointer. This should be

View File

@@ -0,0 +1,123 @@
/*-------------------------------------------------------------------------
*
* toast_compression.h
* Functions for toast compression.
*
* Copyright (c) 2021, PostgreSQL Global Development Group
*
* src/include/access/toast_compression.h
*
*-------------------------------------------------------------------------
*/
#ifndef TOAST_COMPRESSION_H
#define TOAST_COMPRESSION_H
#include "utils/guc.h"
/* GUCs */
extern char *default_toast_compression;
/* default compression method if not specified. */
#define DEFAULT_TOAST_COMPRESSION "pglz"
/*
* Built-in compression method-id. The toast compression header will store
* this in the first 2 bits of the raw length. These built-in compression
* method-id are directly mapped to the built-in compression methods.
*/
typedef enum ToastCompressionId
{
TOAST_PGLZ_COMPRESSION_ID = 0,
TOAST_LZ4_COMPRESSION_ID = 1,
TOAST_INVALID_COMPRESSION_ID = 2
} ToastCompressionId;
/*
* Built-in compression methods. pg_attribute will store this in the
* attcompression column.
*/
#define TOAST_PGLZ_COMPRESSION 'p'
#define TOAST_LZ4_COMPRESSION 'l'
#define InvalidCompressionMethod '\0'
#define CompressionMethodIsValid(cm) ((bool) ((cm) != InvalidCompressionMethod))
#define NO_LZ4_SUPPORT() \
ereport(ERROR, \
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
errmsg("unsupported LZ4 compression method"), \
errdetail("This functionality requires the server to be built with lz4 support."), \
errhint("You need to rebuild PostgreSQL using --with-lz4.")))
#define IsValidCompression(cm) ((cm) != InvalidCompressionMethod)
#define IsStorageCompressible(storage) ((storage) != TYPSTORAGE_PLAIN && \
(storage) != TYPSTORAGE_EXTERNAL)
/*
* GetCompressionMethodName - Get compression method name
*/
static inline const char *
GetCompressionMethodName(char method)
{
switch (method)
{
case TOAST_PGLZ_COMPRESSION:
return "pglz";
case TOAST_LZ4_COMPRESSION:
return "lz4";
default:
elog(ERROR, "invalid compression method %c", method);
}
}
/*
* CompressionNameToMethod - Get compression method from compression name
*
* Search in the available built-in methods. If the compression not found
* in the built-in methods then return InvalidCompressionMethod.
*/
static inline char
CompressionNameToMethod(char *compression)
{
if (strcmp(compression, "pglz") == 0)
return TOAST_PGLZ_COMPRESSION;
else if (strcmp(compression, "lz4") == 0)
{
#ifndef USE_LZ4
NO_LZ4_SUPPORT();
#endif
return TOAST_LZ4_COMPRESSION;
}
return InvalidCompressionMethod;
}
/*
* GetDefaultToastCompression -- get the default toast compression method
*
* This exists to hide the use of the default_toast_compression GUC variable.
*/
static inline char
GetDefaultToastCompression(void)
{
return CompressionNameToMethod(default_toast_compression);
}
/* pglz compression/decompression routines */
extern struct varlena *pglz_compress_datum(const struct varlena *value);
extern struct varlena *pglz_decompress_datum(const struct varlena *value);
extern struct varlena *pglz_decompress_datum_slice(const struct varlena *value,
int32 slicelength);
/* lz4 compression/decompression routines */
extern struct varlena *lz4_compress_datum(const struct varlena *value);
extern struct varlena *lz4_decompress_datum(const struct varlena *value);
extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value,
int32 slicelength);
extern ToastCompressionId toast_get_compression_id(struct varlena *attr);
extern bool check_default_toast_compression(char **newval, void **extra,
GucSource source);
#endif /* TOAST_COMPRESSION_H */

View File

@@ -32,6 +32,7 @@ typedef struct
struct varlena *tai_oldexternal;
int32 tai_size;
uint8 tai_colflags;
char tai_compression;
} ToastAttrInfo;
/*

View File

@@ -12,6 +12,7 @@
#ifndef TOAST_INTERNALS_H
#define TOAST_INTERNALS_H
#include "access/toast_compression.h"
#include "storage/lockdefs.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
@@ -22,22 +23,26 @@
typedef struct toast_compress_header
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 rawsize;
uint32 tcinfo; /* 2 bits for compression method and 30 bits
* rawsize */
} toast_compress_header;
/*
* Utilities for manipulation of header information for compressed
* toast entries.
*/
#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
#define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE_ANY(ptr) - TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_RAWDATA(ptr) \
(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
(((toast_compress_header *) (ptr))->rawsize = (len))
#define TOAST_COMPRESS_METHOD(ptr) \
(((toast_compress_header *) (ptr))->tcinfo >> VARLENA_RAWSIZE_BITS)
#define TOAST_COMPRESS_SET_SIZE_AND_METHOD(ptr, len, cm_method) \
do { \
Assert((len) > 0 && (len) <= VARLENA_RAWSIZE_MASK); \
Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \
(cm_method) == TOAST_LZ4_COMPRESSION_ID); \
((toast_compress_header *) (ptr))->tcinfo = \
((len) | (cm_method) << VARLENA_RAWSIZE_BITS); \
} while (0)
extern Datum toast_compress_datum(Datum value);
extern Datum toast_compress_datum(Datum value, char cmethod);
extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock);
extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative);