From aeb1631ed207cef2d80e20f79eb52c72f03bca7d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 22 Mar 2021 13:43:10 -0400 Subject: [PATCH] Mostly-cosmetic adjustments of TOAST-related macros. The authors of bbe0a81db hadn't quite got the idea that macros named like SOMETHING_4B_C were only meant for internal endianness-related details in postgres.h. Choose more legible names for macros that are intended to be used elsewhere. Rearrange postgres.h a bit to clarify the separation between those internal macros and ones intended for wider use. Also, avoid using the term "rawsize" for true decompressed size; we've used "extsize" for that, because "rawsize" generally denotes total Datum size including header. This choice seemed particularly unfortunate in tests that were comparing one of these meanings to the other. This patch includes a couple of not-purely-cosmetic changes: be sure that the shifts aligning compression methods are unsigned (not critical today, but will be when compression method 2 exists), and fix broken definition of VARATT_EXTERNAL_GET_COMPRESSION (now VARATT_EXTERNAL_GET_COMPRESS_METHOD), whose callers worked only accidentally. Discussion: https://postgr.es/m/574197.1616428079@sss.pgh.pa.us --- src/backend/access/common/detoast.c | 4 +- src/backend/access/common/toast_compression.c | 40 +++---- src/backend/access/common/toast_internals.c | 8 +- src/include/access/toast_internals.h | 13 ++- src/include/postgres.h | 106 +++++++++--------- 5 files changed, 87 insertions(+), 84 deletions(-) diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index bed50e86034..a7f7a474782 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -251,7 +251,7 @@ detoast_attr_slice(struct varlena *attr, * determine how much compressed data we need to be sure of being * able to decompress the required slice. */ - if (VARATT_EXTERNAL_GET_COMPRESSION(toast_pointer) == + if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == TOAST_PGLZ_COMPRESSION_ID) max_size = pglz_maximum_compressed_size(slicelimit, max_size); @@ -562,7 +562,7 @@ toast_raw_datum_size(Datum value) else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ - result = VARRAWSIZE_4B_C(attr) + VARHDRSZ; + result = VARDATA_COMPRESSED_GET_EXTSIZE(attr) + VARHDRSZ; } else if (VARATT_IS_SHORT(attr)) { diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 00af1740cfa..645eb03bf07 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -53,11 +53,11 @@ pglz_compress_datum(const struct varlena *value) * that will be needed for varlena overhead, and allocate that amount. */ tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + - VARHDRSZ_COMPRESS); + VARHDRSZ_COMPRESSED); len = pglz_compress(VARDATA_ANY(value), valsize, - (char *) tmp + VARHDRSZ_COMPRESS, + (char *) tmp + VARHDRSZ_COMPRESSED, NULL); if (len < 0) { @@ -65,7 +65,7 @@ pglz_compress_datum(const struct varlena *value) return NULL; } - SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESS); + SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); return tmp; } @@ -80,13 +80,13 @@ pglz_decompress_datum(const struct varlena *value) int32 rawsize; /* allocate memory for the uncompressed data */ - result = (struct varlena *) palloc(VARRAWSIZE_4B_C(value) + VARHDRSZ); + result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); /* decompress the data */ - rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESS, - VARSIZE(value) - VARHDRSZ_COMPRESS, + rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, + VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA(result), - VARRAWSIZE_4B_C(value), true); + VARDATA_COMPRESSED_GET_EXTSIZE(value), true); if (rawsize < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), @@ -111,8 +111,8 @@ pglz_decompress_datum_slice(const struct varlena *value, result = (struct varlena *) palloc(slicelength + VARHDRSZ); /* decompress the data */ - rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESS, - VARSIZE(value) - VARHDRSZ_COMPRESS, + rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, + VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA(result), slicelength, false); if (rawsize < 0) @@ -149,10 +149,10 @@ lz4_compress_datum(const struct varlena *value) * that will be needed for varlena overhead, and allocate that amount. */ max_size = LZ4_compressBound(valsize); - tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESS); + tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED); len = LZ4_compress_default(VARDATA_ANY(value), - (char *) tmp + VARHDRSZ_COMPRESS, + (char *) tmp + VARHDRSZ_COMPRESSED, valsize, max_size); if (len <= 0) elog(ERROR, "lz4 compression failed"); @@ -164,7 +164,7 @@ lz4_compress_datum(const struct varlena *value) return NULL; } - SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESS); + SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); return tmp; #endif @@ -184,13 +184,13 @@ lz4_decompress_datum(const struct varlena *value) struct varlena *result; /* allocate memory for the uncompressed data */ - result = (struct varlena *) palloc(VARRAWSIZE_4B_C(value) + VARHDRSZ); + result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); /* decompress the data */ - rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESS, + rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED, VARDATA(result), - VARSIZE(value) - VARHDRSZ_COMPRESS, - VARRAWSIZE_4B_C(value)); + VARSIZE(value) - VARHDRSZ_COMPRESSED, + VARDATA_COMPRESSED_GET_EXTSIZE(value)); if (rawsize < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), @@ -224,9 +224,9 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) result = (struct varlena *) palloc(slicelength + VARHDRSZ); /* decompress the data */ - rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESS, + rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED, VARDATA(result), - VARSIZE(value) - VARHDRSZ_COMPRESS, + VARSIZE(value) - VARHDRSZ_COMPRESSED, slicelength, slicelength); if (rawsize < 0) @@ -262,10 +262,10 @@ toast_get_compression_id(struct varlena *attr) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) - cmid = VARATT_EXTERNAL_GET_COMPRESSION(toast_pointer); + cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); } else if (VARATT_IS_COMPRESSED(attr)) - cmid = VARCOMPRESS_4B_C(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); return cmid; } diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index c81ce178221..730cd04a2d7 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -91,7 +91,7 @@ toast_compress_datum(Datum value, char cmethod) { /* successful compression */ Assert(cmid != TOAST_INVALID_COMPRESSION_ID); - TOAST_COMPRESS_SET_SIZE_AND_METHOD(tmp, valsize, cmid); + TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid); return PointerGetDatum(tmp); } else @@ -181,11 +181,11 @@ toast_save_datum(Relation rel, Datum value, data_p = VARDATA(dval); data_todo = VARSIZE(dval) - VARHDRSZ; /* rawsize in a compressed datum is just the size of the payload */ - toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ; + toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ; /* set external size and compression method */ - VARATT_EXTERNAL_SET_SIZE_AND_COMPRESSION(toast_pointer, data_todo, - VARCOMPRESS_4B_C(dval)); + VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo, + VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval)); /* Assert that the numbers look like it's compressed */ Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index b4d068459ac..bf118f04829 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -24,22 +24,23 @@ typedef struct toast_compress_header { int32 vl_len_; /* varlena header (do not touch directly!) */ uint32 tcinfo; /* 2 bits for compression method and 30 bits - * rawsize */ + * external size; see va_extinfo */ } toast_compress_header; /* * Utilities for manipulation of header information for compressed * toast entries. */ -#define TOAST_COMPRESS_METHOD(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo >> VARLENA_RAWSIZE_BITS) -#define TOAST_COMPRESS_SET_SIZE_AND_METHOD(ptr, len, cm_method) \ +#define TOAST_COMPRESS_METHOD(ptr) \ + (((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS) + +#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \ do { \ - Assert((len) > 0 && (len) <= VARLENA_RAWSIZE_MASK); \ + Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ ((toast_compress_header *) (ptr))->tcinfo = \ - ((len) | (cm_method) << VARLENA_RAWSIZE_BITS); \ + (len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \ } while (0) extern Datum toast_compress_datum(Datum value, char cmethod); diff --git a/src/include/postgres.h b/src/include/postgres.h index 2ccbea8e502..8e02f911086 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -55,8 +55,8 @@ /* * struct varatt_external is a traditional "TOAST pointer", that is, the * information needed to fetch a Datum stored out-of-line in a TOAST table. - * The data is compressed if and only if the size stored in va_extinfo < - * va_rawsize - VARHDRSZ. + * The data is compressed if and only if the external size stored in + * va_extinfo is less than va_rawsize - VARHDRSZ. * * This struct must not contain any padding, because we sometimes compare * these pointers using memcmp. @@ -75,6 +75,13 @@ typedef struct varatt_external Oid va_toastrelid; /* RelID of TOAST table containing it */ } varatt_external; +/* + * These macros define the "saved size" portion of va_extinfo. Its remaining + * two high-order bits identify the compression method. + */ +#define VARLENA_EXTSIZE_BITS 30 +#define VARLENA_EXTSIZE_MASK ((1U << VARLENA_EXTSIZE_BITS) - 1) + /* * struct varatt_indirect is a "TOAST pointer" representing an out-of-line * Datum that's stored in memory, not in an external toast relation. @@ -149,7 +156,7 @@ typedef union { uint32 va_header; uint32 va_tcinfo; /* Original data size (excludes header) and - * compression method */ + * compression method; see va_extinfo */ char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Compressed data */ } va_compressed; } varattrib_4b; @@ -235,6 +242,7 @@ typedef struct #define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x80, \ ((varattrib_1b_e *) (PTR))->va_tag = (tag)) + #else /* !WORDS_BIGENDIAN */ #define VARATT_IS_4B(PTR) \ @@ -267,36 +275,28 @@ typedef struct #define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x01, \ ((varattrib_1b_e *) (PTR))->va_tag = (tag)) + #endif /* WORDS_BIGENDIAN */ -#define VARHDRSZ_SHORT offsetof(varattrib_1b, va_data) -#define VARATT_SHORT_MAX 0x7F -#define VARATT_CAN_MAKE_SHORT(PTR) \ - (VARATT_IS_4B_U(PTR) && \ - (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) <= VARATT_SHORT_MAX) -#define VARATT_CONVERTED_SHORT_SIZE(PTR) \ - (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) - -#define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data) -#define VARHDRSZ_COMPRESS offsetof(varattrib_4b, va_compressed.va_data) - #define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data) #define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data) #define VARDATA_1B(PTR) (((varattrib_1b *) (PTR))->va_data) #define VARDATA_1B_E(PTR) (((varattrib_1b_e *) (PTR))->va_data) -#define VARLENA_RAWSIZE_BITS 30 -#define VARLENA_RAWSIZE_MASK ((1U << VARLENA_RAWSIZE_BITS) - 1) - /* - * va_tcinfo in va_compress contains raw size of datum and compression method. + * Externally visible TOAST macros begin here. */ -#define VARRAWSIZE_4B_C(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_RAWSIZE_MASK) -#define VARCOMPRESS_4B_C(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_RAWSIZE_BITS) -/* Externally visible macros */ +#define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data) +#define VARHDRSZ_COMPRESSED offsetof(varattrib_4b, va_compressed.va_data) +#define VARHDRSZ_SHORT offsetof(varattrib_1b, va_data) + +#define VARATT_SHORT_MAX 0x7F +#define VARATT_CAN_MAKE_SHORT(PTR) \ + (VARATT_IS_4B_U(PTR) && \ + (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) <= VARATT_SHORT_MAX) +#define VARATT_CONVERTED_SHORT_SIZE(PTR) \ + (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) /* * In consumers oblivious to data alignment, call PG_DETOAST_DATUM_PACKED(), @@ -336,35 +336,6 @@ typedef struct (VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) #define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \ (VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) - -/* - * va_extinfo in varatt_external contains actual length of the external data - * and compression method if external data is compressed. - */ -#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ - ((toast_pointer).va_extinfo & VARLENA_RAWSIZE_MASK) - -#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESSION(toast_pointer, len, cm) \ - do { \ - Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_pointer).va_extinfo = (len) | (cm) << VARLENA_RAWSIZE_BITS); \ - } while (0) - -#define VARATT_EXTERNAL_GET_COMPRESSION(PTR) \ - ((toast_pointer).va_extinfo >> VARLENA_RAWSIZE_BITS) - -/* - * Testing whether an externally-stored value is compressed now requires - * comparing size stored in va_extinfo (the actual length of the external data) - * to rawsize (the original uncompressed datum's size). The latter includes - * VARHDRSZ overhead, the former doesn't. We never use compression unless it - * actually saves space, so we expect either equality or less-than. - */ -#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ - (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ - (toast_pointer).va_rawsize - VARHDRSZ) - #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) @@ -390,6 +361,37 @@ typedef struct #define VARDATA_ANY(PTR) \ (VARATT_IS_1B(PTR) ? VARDATA_1B(PTR) : VARDATA_4B(PTR)) +/* Decompressed size and compression method of an external compressed Datum */ +#define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \ + (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK) +#define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \ + (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) + +/* Same, when working directly with a struct varatt_external */ +#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ + ((toast_pointer).va_extinfo & VARLENA_EXTSIZE_MASK) +#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \ + ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) + +#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ + do { \ + Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm) == TOAST_LZ4_COMPRESSION_ID); \ + ((toast_pointer).va_extinfo = \ + (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \ + } while (0) + +/* + * Testing whether an externally-stored value is compressed now requires + * comparing size stored in va_extinfo (the actual length of the external data) + * to rawsize (the original uncompressed datum's size). The latter includes + * VARHDRSZ overhead, the former doesn't. We never use compression unless it + * actually saves space, so we expect either equality or less-than. + */ +#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ + (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ + (toast_pointer).va_rawsize - VARHDRSZ) + /* ---------------------------------------------------------------- * Section 2: Datum type + support macros