1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-01 14:21:49 +03:00
David Rowley 5983a4cffc Introduce CompactAttribute array in TupleDesc, take 2
The new compact_attrs array stores a few select fields from
FormData_pg_attribute in a more compact way, using only 16 bytes per
column instead of the 104 bytes that FormData_pg_attribute uses.  Using
CompactAttribute allows performance-critical operations such as tuple
deformation to be performed without looking at the FormData_pg_attribute
element in TupleDesc which means fewer cacheline accesses.

For some workloads, tuple deformation can be the most CPU intensive part
of processing the query.  Some testing with 16 columns on a table
where the first column is variable length showed around a 10% increase in
transactions per second for an OLAP type query performing aggregation on
the 16th column.  However, in certain cases, the increases were much
higher, up to ~25% on one AMD Zen4 machine.

This also makes pg_attribute.attcacheoff redundant.  A follow-on commit
will remove it, thus shrinking the FormData_pg_attribute struct by 4
bytes.

Author: David Rowley
Reviewed-by: Andres Freund, Victor Yegorov
Discussion: https://postgr.es/m/CAApHDvrBztXP3yx=NKNmo3xwFAFhEdyPnvrDg3=M0RhDs+4vYw@mail.gmail.com
2024-12-20 22:31:26 +13:00

367 lines
9.1 KiB
C

/*
* gistfuncs.c
* Functions to investigate the content of GiST indexes
*
* Copyright (c) 2014-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/pageinspect/gistfuncs.c
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/htup.h"
#include "access/relation.h"
#include "catalog/pg_am_d.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "pageinspect.h"
#include "storage/itemptr.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_lsn.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
PG_FUNCTION_INFO_V1(gist_page_opaque_info);
PG_FUNCTION_INFO_V1(gist_page_items);
PG_FUNCTION_INFO_V1(gist_page_items_bytea);
#define IS_GIST(r) ((r)->rd_rel->relam == GIST_AM_OID)
static Page verify_gist_page(bytea *raw_page);
/*
* Verify that the given bytea contains a GIST page or die in the attempt.
* A pointer to the page is returned.
*/
static Page
verify_gist_page(bytea *raw_page)
{
Page page = get_page_from_raw(raw_page);
GISTPageOpaque opaq;
if (PageIsNew(page))
return page;
/* verify the special space has the expected size */
if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("input page is not a valid %s page", "GiST"),
errdetail("Expected special size %d, got %d.",
(int) MAXALIGN(sizeof(GISTPageOpaqueData)),
(int) PageGetSpecialSize(page))));
opaq = GistPageGetOpaque(page);
if (opaq->gist_page_id != GIST_PAGE_ID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("input page is not a valid %s page", "GiST"),
errdetail("Expected %08x, got %08x.",
GIST_PAGE_ID,
opaq->gist_page_id)));
return page;
}
Datum
gist_page_opaque_info(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
TupleDesc tupdesc;
Page page;
HeapTuple resultTuple;
Datum values[4];
bool nulls[4];
Datum flags[16];
int nflags = 0;
uint16 flagbits;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
page = verify_gist_page(raw_page);
if (PageIsNew(page))
PG_RETURN_NULL();
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
/* Convert the flags bitmask to an array of human-readable names */
flagbits = GistPageGetOpaque(page)->flags;
if (flagbits & F_LEAF)
flags[nflags++] = CStringGetTextDatum("leaf");
if (flagbits & F_DELETED)
flags[nflags++] = CStringGetTextDatum("deleted");
if (flagbits & F_TUPLES_DELETED)
flags[nflags++] = CStringGetTextDatum("tuples_deleted");
if (flagbits & F_FOLLOW_RIGHT)
flags[nflags++] = CStringGetTextDatum("follow_right");
if (flagbits & F_HAS_GARBAGE)
flags[nflags++] = CStringGetTextDatum("has_garbage");
flagbits &= ~(F_LEAF | F_DELETED | F_TUPLES_DELETED | F_FOLLOW_RIGHT | F_HAS_GARBAGE);
if (flagbits)
{
/* any flags we don't recognize are printed in hex */
flags[nflags++] = DirectFunctionCall1(to_hex32, Int32GetDatum(flagbits));
}
memset(nulls, 0, sizeof(nulls));
values[0] = LSNGetDatum(PageGetLSN(page));
values[1] = LSNGetDatum(GistPageGetNSN(page));
values[2] = Int64GetDatum(GistPageGetOpaque(page)->rightlink);
values[3] = PointerGetDatum(construct_array_builtin(flags, nflags, TEXTOID));
/* Build and return the result tuple. */
resultTuple = heap_form_tuple(tupdesc, values, nulls);
return HeapTupleGetDatum(resultTuple);
}
Datum
gist_page_items_bytea(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Page page;
OffsetNumber offset;
OffsetNumber maxoff = InvalidOffsetNumber;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
InitMaterializedSRF(fcinfo, 0);
page = verify_gist_page(raw_page);
if (PageIsNew(page))
PG_RETURN_NULL();
/* Avoid bogus PageGetMaxOffsetNumber() call with deleted pages */
if (GistPageIsDeleted(page))
elog(NOTICE, "page is deleted");
else
maxoff = PageGetMaxOffsetNumber(page);
for (offset = FirstOffsetNumber;
offset <= maxoff;
offset++)
{
Datum values[5];
bool nulls[5];
ItemId id;
IndexTuple itup;
bytea *tuple_bytea;
int tuple_len;
id = PageGetItemId(page, offset);
if (!ItemIdIsValid(id))
elog(ERROR, "invalid ItemId");
itup = (IndexTuple) PageGetItem(page, id);
tuple_len = IndexTupleSize(itup);
memset(nulls, 0, sizeof(nulls));
values[0] = DatumGetInt16(offset);
values[1] = ItemPointerGetDatum(&itup->t_tid);
values[2] = Int32GetDatum((int) IndexTupleSize(itup));
tuple_bytea = (bytea *) palloc(tuple_len + VARHDRSZ);
SET_VARSIZE(tuple_bytea, tuple_len + VARHDRSZ);
memcpy(VARDATA(tuple_bytea), itup, tuple_len);
values[3] = BoolGetDatum(ItemIdIsDead(id));
values[4] = PointerGetDatum(tuple_bytea);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
return (Datum) 0;
}
Datum
gist_page_items(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
Oid indexRelid = PG_GETARG_OID(1);
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Relation indexRel;
TupleDesc tupdesc;
Page page;
uint16 flagbits;
bits16 printflags = 0;
OffsetNumber offset;
OffsetNumber maxoff = InvalidOffsetNumber;
char *index_columns;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
InitMaterializedSRF(fcinfo, 0);
/* Open the relation */
indexRel = index_open(indexRelid, AccessShareLock);
if (!IS_GIST(indexRel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a %s index",
RelationGetRelationName(indexRel), "GiST")));
page = verify_gist_page(raw_page);
if (PageIsNew(page))
{
index_close(indexRel, AccessShareLock);
PG_RETURN_NULL();
}
flagbits = GistPageGetOpaque(page)->flags;
/*
* Included attributes are added when dealing with leaf pages, discarded
* for non-leaf pages as these include only data for key attributes.
*/
printflags |= RULE_INDEXDEF_PRETTY;
if (flagbits & F_LEAF)
{
tupdesc = RelationGetDescr(indexRel);
}
else
{
tupdesc = CreateTupleDescTruncatedCopy(RelationGetDescr(indexRel),
IndexRelationGetNumberOfKeyAttributes(indexRel));
printflags |= RULE_INDEXDEF_KEYS_ONLY;
}
index_columns = pg_get_indexdef_columns_extended(indexRelid,
printflags);
/* Avoid bogus PageGetMaxOffsetNumber() call with deleted pages */
if (GistPageIsDeleted(page))
elog(NOTICE, "page is deleted");
else
maxoff = PageGetMaxOffsetNumber(page);
for (offset = FirstOffsetNumber;
offset <= maxoff;
offset++)
{
Datum values[5];
bool nulls[5];
ItemId id;
IndexTuple itup;
Datum itup_values[INDEX_MAX_KEYS];
bool itup_isnull[INDEX_MAX_KEYS];
StringInfoData buf;
int i;
id = PageGetItemId(page, offset);
if (!ItemIdIsValid(id))
elog(ERROR, "invalid ItemId");
itup = (IndexTuple) PageGetItem(page, id);
index_deform_tuple(itup, tupdesc,
itup_values, itup_isnull);
memset(nulls, 0, sizeof(nulls));
values[0] = DatumGetInt16(offset);
values[1] = ItemPointerGetDatum(&itup->t_tid);
values[2] = Int32GetDatum((int) IndexTupleSize(itup));
values[3] = BoolGetDatum(ItemIdIsDead(id));
if (index_columns)
{
initStringInfo(&buf);
appendStringInfo(&buf, "(%s)=(", index_columns);
/* Most of this is copied from record_out(). */
for (i = 0; i < tupdesc->natts; i++)
{
char *value;
char *tmp;
bool nq = false;
if (itup_isnull[i])
value = "null";
else
{
Oid foutoid;
bool typisvarlena;
Oid typoid;
typoid = TupleDescAttr(tupdesc, i)->atttypid;
getTypeOutputInfo(typoid, &foutoid, &typisvarlena);
value = OidOutputFunctionCall(foutoid, itup_values[i]);
}
if (i == IndexRelationGetNumberOfKeyAttributes(indexRel))
appendStringInfoString(&buf, ") INCLUDE (");
else if (i > 0)
appendStringInfoString(&buf, ", ");
/* Check whether we need double quotes for this value */
nq = (value[0] == '\0'); /* force quotes for empty string */
for (tmp = value; *tmp; tmp++)
{
char ch = *tmp;
if (ch == '"' || ch == '\\' ||
ch == '(' || ch == ')' || ch == ',' ||
isspace((unsigned char) ch))
{
nq = true;
break;
}
}
/* And emit the string */
if (nq)
appendStringInfoCharMacro(&buf, '"');
for (tmp = value; *tmp; tmp++)
{
char ch = *tmp;
if (ch == '"' || ch == '\\')
appendStringInfoCharMacro(&buf, ch);
appendStringInfoCharMacro(&buf, ch);
}
if (nq)
appendStringInfoCharMacro(&buf, '"');
}
appendStringInfoChar(&buf, ')');
values[4] = CStringGetTextDatum(buf.data);
nulls[4] = false;
}
else
{
values[4] = (Datum) 0;
nulls[4] = true;
}
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
relation_close(indexRel, AccessShareLock);
return (Datum) 0;
}