1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-24 00:23:06 +03:00
Files
postgres/src/backend/access/common/detoast.c
Bruce Momjian 29275b1d17 Update copyright for 2024
Reported-by: Michael Paquier

Discussion: https://postgr.es/m/ZZKTDPxBBMt3C0J9@paquier.xyz

Backpatch-through: 12
2024-01-03 20:49:05 -05:00

647 lines
17 KiB
C

/*-------------------------------------------------------------------------
*
* detoast.c
* Retrieve compressed or external variable size attributes.
*
* Copyright (c) 2000-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/access/common/detoast.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/detoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/toast_internals.h"
#include "common/int.h"
#include "common/pg_lzcompress.h"
#include "utils/expandeddatum.h"
#include "utils/rel.h"
static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
/* ----------
* detoast_external_attr -
*
* Public entry point to get back a toasted value from
* external source (possibly still in compressed format).
*
* This will return a datum that contains all the data internally, ie, not
* relying on external storage or memory, but it can still be compressed or
* have a short header. Note some callers assume that if the input is an
* EXTERNAL datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
detoast_external_attr(struct varlena *attr)
{
struct varlena *result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an external stored plain value
*/
result = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
* This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
/* recurse if value is still external in some other way */
if (VARATT_IS_EXTERNAL(attr))
return detoast_external_attr(attr);
/*
* Copy into the caller's memory context, in case caller tries to
* pfree the result.
*/
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/*
* This is an expanded-object pointer --- get flat format
*/
ExpandedObjectHeader *eoh;
Size resultsize;
eoh = DatumGetEOHP(PointerGetDatum(attr));
resultsize = EOH_get_flat_size(eoh);
result = (struct varlena *) palloc(resultsize);
EOH_flatten_into(eoh, (void *) result, resultsize);
}
else
{
/*
* This is a plain value inside of the main tuple - why am I called?
*/
result = attr;
}
return result;
}
/* ----------
* detoast_attr -
*
* Public entry point to get back a toasted value from compression
* or external storage. The result is always non-extended varlena form.
*
* Note some callers assume that if the input is an EXTERNAL or COMPRESSED
* datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
detoast_attr(struct varlena *attr)
{
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an externally stored datum --- fetch it back from there
*/
attr = toast_fetch_datum(attr);
/* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr))
{
struct varlena *tmp = attr;
attr = toast_decompress_datum(tmp);
pfree(tmp);
}
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
* This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
/* recurse in case value is still extended in some other way */
attr = detoast_attr(attr);
/* if it isn't, we'd better copy it */
if (attr == (struct varlena *) redirect.pointer)
{
struct varlena *result;
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
attr = result;
}
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/*
* This is an expanded-object pointer --- get flat format
*/
attr = detoast_external_attr(attr);
/* flatteners are not allowed to produce compressed/short output */
Assert(!VARATT_IS_EXTENDED(attr));
}
else if (VARATT_IS_COMPRESSED(attr))
{
/*
* This is a compressed value inside of the main tuple
*/
attr = toast_decompress_datum(attr);
}
else if (VARATT_IS_SHORT(attr))
{
/*
* This is a short-header varlena --- convert to 4-byte header format
*/
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
Size new_size = data_size + VARHDRSZ;
struct varlena *new_attr;
new_attr = (struct varlena *) palloc(new_size);
SET_VARSIZE(new_attr, new_size);
memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
attr = new_attr;
}
return attr;
}
/* ----------
* detoast_attr_slice -
*
* Public entry point to get back part of a toasted value
* from compression or external storage.
*
* sliceoffset is where to start (zero or more)
* If slicelength < 0, return everything beyond sliceoffset
* ----------
*/
struct varlena *
detoast_attr_slice(struct varlena *attr,
int32 sliceoffset, int32 slicelength)
{
struct varlena *preslice;
struct varlena *result;
char *attrdata;
int32 slicelimit;
int32 attrsize;
if (sliceoffset < 0)
elog(ERROR, "invalid sliceoffset: %d", sliceoffset);
/*
* Compute slicelimit = offset + length, or -1 if we must fetch all of the
* value. In case of integer overflow, we must fetch all.
*/
if (slicelength < 0)
slicelimit = -1;
else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
slicelength = slicelimit = -1;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* fast path for non-compressed external datums */
if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
/*
* For compressed values, we need to fetch enough slices to decompress
* at least the requested part (when a prefix is requested).
* Otherwise, just fetch all slices.
*/
if (slicelimit >= 0)
{
int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
/*
* Determine maximum amount of compressed data needed for a prefix
* of a given length (after decompression).
*
* At least for now, if it's LZ4 data, we'll have to fetch the
* whole thing, because there doesn't seem to be an API call to
* determine how much compressed data we need to be sure of being
* able to decompress the required slice.
*/
if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
TOAST_PGLZ_COMPRESSION_ID)
max_size = pglz_maximum_compressed_size(slicelimit, max_size);
/*
* Fetch enough compressed slices (compressed marker will get set
* automatically).
*/
preslice = toast_fetch_datum_slice(attr, 0, max_size);
}
else
preslice = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
return detoast_attr_slice(redirect.pointer,
sliceoffset, slicelength);
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/* pass it off to detoast_external_attr to flatten */
preslice = detoast_external_attr(attr);
}
else
preslice = attr;
Assert(!VARATT_IS_EXTERNAL(preslice));
if (VARATT_IS_COMPRESSED(preslice))
{
struct varlena *tmp = preslice;
/* Decompress enough to encompass the slice and the offset */
if (slicelimit >= 0)
preslice = toast_decompress_datum_slice(tmp, slicelimit);
else
preslice = toast_decompress_datum(tmp);
if (tmp != attr)
pfree(tmp);
}
if (VARATT_IS_SHORT(preslice))
{
attrdata = VARDATA_SHORT(preslice);
attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
}
else
{
attrdata = VARDATA(preslice);
attrsize = VARSIZE(preslice) - VARHDRSZ;
}
/* slicing of datum for compressed cases and plain value */
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
else if (slicelength < 0 || slicelimit > attrsize)
slicelength = attrsize - sliceoffset;
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
SET_VARSIZE(result, slicelength + VARHDRSZ);
memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
if (preslice != attr)
pfree(preslice);
return result;
}
/* ----------
* toast_fetch_datum -
*
* Reconstruct an in memory Datum from the chunks saved
* in the toast relation
* ----------
*/
static struct varlena *
toast_fetch_datum(struct varlena *attr)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
result = (struct varlena *) palloc(attrsize + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
else
SET_VARSIZE(result, attrsize + VARHDRSZ);
if (attrsize == 0)
return result; /* Probably shouldn't happen, but just in
* case. */
/*
* Open the toast relation and its indexes
*/
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
attrsize, 0, attrsize, result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/* ----------
* toast_fetch_datum_slice -
*
* Reconstruct a segment of a Datum from the chunks saved
* in the toast relation
*
* Note that this function supports non-compressed external datums
* and compressed external datums (in which case the requested slice
* has to be a prefix, i.e. sliceoffset has to be 0).
* ----------
*/
static struct varlena *
toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
int32 slicelength)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* It's nonsense to fetch slices of a compressed datum unless when it's a
* prefix -- this isn't lo_* we can't return a compressed datum which is
* meaningful to toast later.
*/
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
/*
* When fetching a prefix of a compressed external datum, account for the
* space required by va_tcinfo, which is stored at the beginning as an
* int32 value.
*/
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
slicelength = slicelength + sizeof(int32);
/*
* Adjust length request if needed. (Note: our sole caller,
* detoast_attr_slice, protects us against sliceoffset + slicelength
* overflowing.)
*/
if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
slicelength = attrsize - sliceoffset;
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
else
SET_VARSIZE(result, slicelength + VARHDRSZ);
if (slicelength == 0)
return result; /* Can save a lot of work at this point! */
/* Open the toast relation */
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
attrsize, sliceoffset, slicelength,
result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/* ----------
* toast_decompress_datum -
*
* Decompress a compressed version of a varlena datum
*/
static struct varlena *
toast_decompress_datum(struct varlena *attr)
{
ToastCompressionId cmid;
Assert(VARATT_IS_COMPRESSED(attr));
/*
* Fetch the compression method id stored in the compression header and
* decompress the data using the appropriate decompression routine.
*/
cmid = TOAST_COMPRESS_METHOD(attr);
switch (cmid)
{
case TOAST_PGLZ_COMPRESSION_ID:
return pglz_decompress_datum(attr);
case TOAST_LZ4_COMPRESSION_ID:
return lz4_decompress_datum(attr);
default:
elog(ERROR, "invalid compression method id %d", cmid);
return NULL; /* keep compiler quiet */
}
}
/* ----------
* toast_decompress_datum_slice -
*
* Decompress the front of a compressed version of a varlena datum.
* offset handling happens in detoast_attr_slice.
* Here we just decompress a slice from the front.
*/
static struct varlena *
toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
{
ToastCompressionId cmid;
Assert(VARATT_IS_COMPRESSED(attr));
/*
* Some callers may pass a slicelength that's more than the actual
* decompressed size. If so, just decompress normally. This avoids
* possibly allocating a larger-than-necessary result object, and may be
* faster and/or more robust as well. Notably, some versions of liblz4
* have been seen to give wrong results if passed an output size that is
* more than the data's true decompressed size.
*/
if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr))
return toast_decompress_datum(attr);
/*
* Fetch the compression method id stored in the compression header and
* decompress the data slice using the appropriate decompression routine.
*/
cmid = TOAST_COMPRESS_METHOD(attr);
switch (cmid)
{
case TOAST_PGLZ_COMPRESSION_ID:
return pglz_decompress_datum_slice(attr, slicelength);
case TOAST_LZ4_COMPRESSION_ID:
return lz4_decompress_datum_slice(attr, slicelength);
default:
elog(ERROR, "invalid compression method id %d", cmid);
return NULL; /* keep compiler quiet */
}
}
/* ----------
* toast_raw_datum_size -
*
* Return the raw (detoasted) size of a varlena datum
* (including the VARHDRSZ header)
* ----------
*/
Size
toast_raw_datum_size(Datum value)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
result = EOH_get_flat_size(DatumGetEOHP(value));
}
else if (VARATT_IS_COMPRESSED(attr))
{
/* here, va_rawsize is just the payload size */
result = VARDATA_COMPRESSED_GET_EXTSIZE(attr) + VARHDRSZ;
}
else if (VARATT_IS_SHORT(attr))
{
/*
* we have to normalize the header length to VARHDRSZ or else the
* callers of this function will be confused.
*/
result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
}
else
{
/* plain untoasted datum */
result = VARSIZE(attr);
}
return result;
}
/* ----------
* toast_datum_size
*
* Return the physical storage size (possibly compressed) of a varlena datum
* ----------
*/
Size
toast_datum_size(Datum value)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* Attribute is stored externally - return the extsize whether
* compressed or not. We do not count the size of the toast pointer
* ... should we?
*/
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
result = EOH_get_flat_size(DatumGetEOHP(value));
}
else if (VARATT_IS_SHORT(attr))
{
result = VARSIZE_SHORT(attr);
}
else
{
/*
* Attribute is stored inline either compressed or not, just calculate
* the size of the datum in either case.
*/
result = VARSIZE(attr);
}
return result;
}