1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-23 14:01:44 +03:00

TupleHashTable: store additional data along with tuple.

Previously, the caller needed to allocate the memory and the
TupleHashTable would store a pointer to it. That wastes space for the
palloc overhead as well as the size of the pointer itself.

Now, the TupleHashTable relies on the caller to correctly specify the
additionalsize, and allocates that amount of space. The caller can
then request a pointer into that space.

Discussion: https://postgr.es/m/b9cbf0219a9859dc8d240311643ff4362fd9602c.camel@j-davis.com
Reviewed-by: Heikki Linnakangas
This commit is contained in:
Jeff Davis
2025-01-10 17:14:37 -08:00
parent 34c6e65242
commit e0ece2a981
6 changed files with 78 additions and 31 deletions

View File

@ -20,6 +20,13 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
typedef struct TupleHashEntryData
{
MinimalTuple firstTuple; /* copy of first tuple in this group */
uint32 status; /* hash status */
uint32 hash; /* hash value (cached) */
} TupleHashEntryData;
static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
const MinimalTuple tuple); const MinimalTuple tuple);
@ -196,6 +203,7 @@ BuildTupleHashTable(PlanState *parent,
hashtable->tab_collations = collations; hashtable->tab_collations = collations;
hashtable->tablecxt = tablecxt; hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt; hashtable->tempcxt = tempcxt;
hashtable->additionalsize = additionalsize;
hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->tableslot = NULL; /* will be made on first lookup */
hashtable->inputslot = NULL; hashtable->inputslot = NULL;
hashtable->in_hash_expr = NULL; hashtable->in_hash_expr = NULL;
@ -273,6 +281,15 @@ ResetTupleHashTable(TupleHashTable hashtable)
tuplehash_reset(hashtable->hashtab); tuplehash_reset(hashtable->hashtab);
} }
/*
* Return size of the hash bucket. Useful for estimating memory usage.
*/
size_t
TupleHashEntrySize(void)
{
return sizeof(TupleHashEntryData);
}
/* /*
* Find or create a hashtable entry for the tuple group containing the * Find or create a hashtable entry for the tuple group containing the
* given tuple. The tuple must be the same type as the hashtable entries. * given tuple. The tuple must be the same type as the hashtable entries.
@ -339,6 +356,24 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
return hash; return hash;
} }
MinimalTuple
TupleHashEntryGetTuple(TupleHashEntry entry)
{
return entry->firstTuple;
}
/*
* Get a pointer into the additional space allocated for this entry. The
* amount of space available is the additionalsize specified to
* BuildTupleHashTable(). If additionalsize was specified as zero, no
* additional space is available and this function should not be called.
*/
void *
TupleHashEntryGetAdditional(TupleHashEntry entry)
{
return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len);
}
/* /*
* A variant of LookupTupleHashEntry for callers that have already computed * A variant of LookupTupleHashEntry for callers that have already computed
* the hash value. * the hash value.
@ -477,13 +512,31 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
} }
else else
{ {
MinimalTuple firstTuple;
size_t totalsize; /* including alignment and additionalsize */
/* created new entry */ /* created new entry */
*isnew = true; *isnew = true;
/* zero caller data */ /* zero caller data */
entry->additional = NULL;
MemoryContextSwitchTo(hashtable->tablecxt); MemoryContextSwitchTo(hashtable->tablecxt);
/* Copy the first tuple into the table context */ /* Copy the first tuple into the table context */
entry->firstTuple = ExecCopySlotMinimalTuple(slot); firstTuple = ExecCopySlotMinimalTuple(slot);
/*
* Allocate additional space right after the MinimalTuple of size
* additionalsize. The caller can get a pointer to this data with
* TupleHashEntryGetAdditional(), and store arbitrary data there.
*
* This avoids the need to store an extra pointer or allocate an
* additional chunk, which would waste memory.
*/
totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize;
firstTuple = repalloc(firstTuple, totalsize);
memset((char *) firstTuple + firstTuple->t_len, 0,
totalsize - firstTuple->t_len);
entry->firstTuple = firstTuple;
} }
} }
else else

View File

@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
transitionChunkSize = 0; transitionChunkSize = 0;
return return
sizeof(TupleHashEntryData) + TupleHashEntrySize() +
tupleChunkSize + tupleChunkSize +
pergroupChunkSize + pergroupChunkSize +
transitionChunkSize; transitionChunkSize;
@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
if (aggstate->hash_ngroups_current > 0) if (aggstate->hash_ngroups_current > 0)
{ {
aggstate->hashentrysize = aggstate->hashentrysize =
sizeof(TupleHashEntryData) + TupleHashEntrySize() +
(hashkey_mem / (double) aggstate->hash_ngroups_current); (hashkey_mem / (double) aggstate->hash_ngroups_current);
} }
} }
@ -2055,11 +2055,7 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
if (aggstate->numtrans == 0) if (aggstate->numtrans == 0)
return; return;
pergroup = (AggStatePerGroup) pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
MemoryContextAlloc(hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
entry->additional = pergroup;
/* /*
* Initialize aggregates for new tuple group, lookup_hash_entries() * Initialize aggregates for new tuple group, lookup_hash_entries()
@ -2123,7 +2119,7 @@ lookup_hash_entries(AggState *aggstate)
{ {
if (isnew) if (isnew)
initialize_hash_entry(aggstate, hashtable, entry); initialize_hash_entry(aggstate, hashtable, entry);
pergroup[setno] = entry->additional; pergroup[setno] = TupleHashEntryGetAdditional(entry);
} }
else else
{ {
@ -2681,7 +2677,7 @@ agg_refill_hash_table(AggState *aggstate)
{ {
if (isnew) if (isnew)
initialize_hash_entry(aggstate, perhash->hashtable, entry); initialize_hash_entry(aggstate, perhash->hashtable, entry);
aggstate->hash_pergroup[batch->setno] = entry->additional; aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry);
advance_aggregates(aggstate); advance_aggregates(aggstate);
} }
else else
@ -2773,7 +2769,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
ExprContext *econtext; ExprContext *econtext;
AggStatePerAgg peragg; AggStatePerAgg peragg;
AggStatePerGroup pergroup; AggStatePerGroup pergroup;
TupleHashEntryData *entry; TupleHashEntry entry;
TupleTableSlot *firstSlot; TupleTableSlot *firstSlot;
TupleTableSlot *result; TupleTableSlot *result;
AggStatePerHash perhash; AggStatePerHash perhash;
@ -2845,7 +2841,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
* Transform representative tuple back into one with the right * Transform representative tuple back into one with the right
* columns. * columns.
*/ */
ExecStoreMinimalTuple(entry->firstTuple, hashslot, false); ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false);
slot_getallattrs(hashslot); slot_getallattrs(hashslot);
ExecClearTuple(firstSlot); ExecClearTuple(firstSlot);
@ -2861,7 +2857,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
} }
ExecStoreVirtualTuple(firstSlot); ExecStoreVirtualTuple(firstSlot);
pergroup = (AggStatePerGroup) entry->additional; pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
/* /*
* Use the representative input tuple for any references to * Use the representative input tuple for any references to

View File

@ -425,6 +425,7 @@ setop_fill_hash_table(SetOpState *setopstate)
{ {
TupleTableSlot *outerslot; TupleTableSlot *outerslot;
TupleHashEntryData *entry; TupleHashEntryData *entry;
SetOpStatePerGroup pergroup;
bool isnew; bool isnew;
outerslot = ExecProcNode(outerPlan); outerslot = ExecProcNode(outerPlan);
@ -437,16 +438,16 @@ setop_fill_hash_table(SetOpState *setopstate)
outerslot, outerslot,
&isnew, NULL); &isnew, NULL);
pergroup = TupleHashEntryGetAdditional(entry);
/* If new tuple group, initialize counts to zero */ /* If new tuple group, initialize counts to zero */
if (isnew) if (isnew)
{ {
entry->additional = (SetOpStatePerGroup) pergroup->numLeft = 0;
MemoryContextAllocZero(setopstate->hashtable->tablecxt, pergroup->numRight = 0;
sizeof(SetOpStatePerGroupData));
} }
/* Advance the counts */ /* Advance the counts */
((SetOpStatePerGroup) entry->additional)->numLeft++; pergroup->numLeft++;
/* Must reset expression context after each hashtable lookup */ /* Must reset expression context after each hashtable lookup */
ResetExprContext(econtext); ResetExprContext(econtext);
@ -478,7 +479,7 @@ setop_fill_hash_table(SetOpState *setopstate)
/* Advance the counts if entry is already present */ /* Advance the counts if entry is already present */
if (entry) if (entry)
((SetOpStatePerGroup) entry->additional)->numRight++; ((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++;
/* Must reset expression context after each hashtable lookup */ /* Must reset expression context after each hashtable lookup */
ResetExprContext(econtext); ResetExprContext(econtext);
@ -496,7 +497,7 @@ setop_fill_hash_table(SetOpState *setopstate)
static TupleTableSlot * static TupleTableSlot *
setop_retrieve_hash_table(SetOpState *setopstate) setop_retrieve_hash_table(SetOpState *setopstate)
{ {
TupleHashEntryData *entry; TupleHashEntry entry;
TupleTableSlot *resultTupleSlot; TupleTableSlot *resultTupleSlot;
/* /*
@ -526,12 +527,12 @@ setop_retrieve_hash_table(SetOpState *setopstate)
* See if we should emit any copies of this tuple, and if so return * See if we should emit any copies of this tuple, and if so return
* the first copy. * the first copy.
*/ */
set_output_count(setopstate, (SetOpStatePerGroup) entry->additional); set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry));
if (setopstate->numOutput > 0) if (setopstate->numOutput > 0)
{ {
setopstate->numOutput--; setopstate->numOutput--;
return ExecStoreMinimalTuple(entry->firstTuple, return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry),
resultTupleSlot, resultTupleSlot,
false); false);
} }

View File

@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
{ {
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false); ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false);
if (!execTuplesUnequal(slot, hashtable->tableslot, if (!execTuplesUnequal(slot, hashtable->tableslot,
numCols, keyColIdx, numCols, keyColIdx,
eqfunctions, eqfunctions,

View File

@ -148,9 +148,12 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
bool *isnew, uint32 *hash); bool *isnew, uint32 *hash);
extern uint32 TupleHashTableHash(TupleHashTable hashtable, extern uint32 TupleHashTableHash(TupleHashTable hashtable,
TupleTableSlot *slot); TupleTableSlot *slot);
extern size_t TupleHashEntrySize(void);
extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
bool *isnew, uint32 hash); bool *isnew, uint32 hash);
extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry);
extern void *TupleHashEntryGetAdditional(TupleHashEntry entry);
extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
ExprState *eqcomp, ExprState *eqcomp,

View File

@ -806,17 +806,10 @@ typedef struct ExecAuxRowMark
* point to tab_hash_expr and tab_eq_func respectively. * point to tab_hash_expr and tab_eq_func respectively.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
typedef struct TupleHashEntryData TupleHashEntryData;
typedef struct TupleHashEntryData *TupleHashEntry; typedef struct TupleHashEntryData *TupleHashEntry;
typedef struct TupleHashTableData *TupleHashTable; typedef struct TupleHashTableData *TupleHashTable;
typedef struct TupleHashEntryData
{
MinimalTuple firstTuple; /* copy of first tuple in this group */
void *additional; /* user data */
uint32 status; /* hash status */
uint32 hash; /* hash value (cached) */
} TupleHashEntryData;
/* define parameters necessary to generate the tuple hash table interface */ /* define parameters necessary to generate the tuple hash table interface */
#define SH_PREFIX tuplehash #define SH_PREFIX tuplehash
#define SH_ELEMENT_TYPE TupleHashEntryData #define SH_ELEMENT_TYPE TupleHashEntryData
@ -835,6 +828,7 @@ typedef struct TupleHashTableData
Oid *tab_collations; /* collations for hash and comparison */ Oid *tab_collations; /* collations for hash and comparison */
MemoryContext tablecxt; /* memory context containing table */ MemoryContext tablecxt; /* memory context containing table */
MemoryContext tempcxt; /* context for function evaluations */ MemoryContext tempcxt; /* context for function evaluations */
Size additionalsize; /* size of additional data */
TupleTableSlot *tableslot; /* slot for referencing table entries */ TupleTableSlot *tableslot; /* slot for referencing table entries */
/* The following fields are set transiently for each table search: */ /* The following fields are set transiently for each table search: */
TupleTableSlot *inputslot; /* current input tuple's slot */ TupleTableSlot *inputslot; /* current input tuple's slot */