1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-18 05:01:01 +03:00

bufmgr: Separate keys for private refcount infrastructure

This makes lookups faster, due to allowing auto-vectorized lookups. It is also
beneficial for an upcoming patch, independent of auto-vectorization, as the
upcoming patch wants to track more information for each pinned buffer, making
the existing loop, iterating over an array of PrivateRefCountEntry, more
expensive due to increasing its size.

Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Discussion: https://postgr.es/m/fvfmkr5kk4nyex56ejgxj3uzi63isfxovp2biecb4bspbjrze7@az2pljabhnff
This commit is contained in:
Andres Freund
2025-12-14 13:09:43 -05:00
parent 5b275a6e15
commit edbaaea0a9
2 changed files with 97 additions and 45 deletions

View File

@@ -90,10 +90,32 @@
*/ */
#define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32) #define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32)
/*
* This is separated out from PrivateRefCountEntry to allow for copying all
* the data members via struct assignment.
*/
typedef struct PrivateRefCountData
{
/*
* How many times has the buffer been pinned by this backend.
*/
int32 refcount;
} PrivateRefCountData;
typedef struct PrivateRefCountEntry typedef struct PrivateRefCountEntry
{ {
/*
* Note that this needs to be same as the entry's corresponding
* PrivateRefCountArrayKeys[i], if the entry is stored in the array. We
* store it in both places as this is used for the hashtable key and
* because it is more convenient (passing around a PrivateRefCountEntry
* suffices to identify the buffer) and faster (checking the keys array is
* faster when checking many entries, checking the entry is faster if just
* checking a single entry).
*/
Buffer buffer; Buffer buffer;
int32 refcount;
PrivateRefCountData data;
} PrivateRefCountEntry; } PrivateRefCountEntry;
/* 64 bytes, about the size of a cache line on common systems */ /* 64 bytes, about the size of a cache line on common systems */
@@ -194,7 +216,8 @@ static BufferDesc *PinCountWaitBuf = NULL;
* *
* To avoid - as we used to - requiring an array with NBuffers entries to keep * To avoid - as we used to - requiring an array with NBuffers entries to keep
* track of local buffers, we use a small sequentially searched array * track of local buffers, we use a small sequentially searched array
* (PrivateRefCountArray) and an overflow hash table (PrivateRefCountHash) to * (PrivateRefCountArrayKeys, with the corresponding data stored in
* PrivateRefCountArray) and an overflow hash table (PrivateRefCountHash) to
* keep track of backend local pins. * keep track of backend local pins.
* *
* Until no more than REFCOUNT_ARRAY_ENTRIES buffers are pinned at once, all * Until no more than REFCOUNT_ARRAY_ENTRIES buffers are pinned at once, all
@@ -212,11 +235,12 @@ static BufferDesc *PinCountWaitBuf = NULL;
* memory allocations in NewPrivateRefCountEntry() which can be important * memory allocations in NewPrivateRefCountEntry() which can be important
* because in some scenarios it's called with a spinlock held... * because in some scenarios it's called with a spinlock held...
*/ */
static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES];
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]; static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES];
static HTAB *PrivateRefCountHash = NULL; static HTAB *PrivateRefCountHash = NULL;
static int32 PrivateRefCountOverflowed = 0; static int32 PrivateRefCountOverflowed = 0;
static uint32 PrivateRefCountClock = 0; static uint32 PrivateRefCountClock = 0;
static PrivateRefCountEntry *ReservedRefCountEntry = NULL; static int ReservedRefCountSlot = -1;
static uint32 MaxProportionalPins; static uint32 MaxProportionalPins;
@@ -259,7 +283,7 @@ static void
ReservePrivateRefCountEntry(void) ReservePrivateRefCountEntry(void)
{ {
/* Already reserved (or freed), nothing to do */ /* Already reserved (or freed), nothing to do */
if (ReservedRefCountEntry != NULL) if (ReservedRefCountSlot != -1)
return; return;
/* /*
@@ -271,17 +295,20 @@ ReservePrivateRefCountEntry(void)
for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
{ {
PrivateRefCountEntry *res; if (PrivateRefCountArrayKeys[i] == InvalidBuffer)
res = &PrivateRefCountArray[i];
if (res->buffer == InvalidBuffer)
{ {
ReservedRefCountEntry = res; ReservedRefCountSlot = i;
/*
* We could return immediately, but iterating till the end of
* the array allows compiler-autovectorization.
*/
}
}
if (ReservedRefCountSlot != -1)
return; return;
} }
}
}
/* /*
* No luck. All array entries are full. Move one array entry into the hash * No luck. All array entries are full. Move one array entry into the hash
@@ -292,27 +319,37 @@ ReservePrivateRefCountEntry(void)
* Move entry from the current clock position in the array into the * Move entry from the current clock position in the array into the
* hashtable. Use that slot. * hashtable. Use that slot.
*/ */
int victim_slot;
PrivateRefCountEntry *victim_entry;
PrivateRefCountEntry *hashent; PrivateRefCountEntry *hashent;
bool found; bool found;
/* select victim slot */ /* select victim slot */
ReservedRefCountEntry = victim_slot = PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES;
&PrivateRefCountArray[PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES]; victim_entry = &PrivateRefCountArray[victim_slot];
ReservedRefCountSlot = victim_slot;
/* Better be used, otherwise we shouldn't get here. */ /* Better be used, otherwise we shouldn't get here. */
Assert(ReservedRefCountEntry->buffer != InvalidBuffer); Assert(PrivateRefCountArrayKeys[victim_slot] != InvalidBuffer);
Assert(PrivateRefCountArray[victim_slot].buffer != InvalidBuffer);
Assert(PrivateRefCountArrayKeys[victim_slot] == PrivateRefCountArray[victim_slot].buffer);
/* enter victim array entry into hashtable */ /* enter victim array entry into hashtable */
hashent = hash_search(PrivateRefCountHash, hashent = hash_search(PrivateRefCountHash,
&(ReservedRefCountEntry->buffer), &PrivateRefCountArrayKeys[victim_slot],
HASH_ENTER, HASH_ENTER,
&found); &found);
Assert(!found); Assert(!found);
hashent->refcount = ReservedRefCountEntry->refcount; /* move data from the entry in the array to the hash entry */
hashent->data = victim_entry->data;
/* clear the now free array slot */ /* clear the now free array slot */
ReservedRefCountEntry->buffer = InvalidBuffer; PrivateRefCountArrayKeys[victim_slot] = InvalidBuffer;
ReservedRefCountEntry->refcount = 0; victim_entry->buffer = InvalidBuffer;
/* clear the whole data member, just for future proofing */
memset(&victim_entry->data, 0, sizeof(victim_entry->data));
victim_entry->data.refcount = 0;
PrivateRefCountOverflowed++; PrivateRefCountOverflowed++;
} }
@@ -327,15 +364,17 @@ NewPrivateRefCountEntry(Buffer buffer)
PrivateRefCountEntry *res; PrivateRefCountEntry *res;
/* only allowed to be called when a reservation has been made */ /* only allowed to be called when a reservation has been made */
Assert(ReservedRefCountEntry != NULL); Assert(ReservedRefCountSlot != -1);
/* use up the reserved entry */ /* use up the reserved entry */
res = ReservedRefCountEntry; res = &PrivateRefCountArray[ReservedRefCountSlot];
ReservedRefCountEntry = NULL;
/* and fill it */ /* and fill it */
PrivateRefCountArrayKeys[ReservedRefCountSlot] = buffer;
res->buffer = buffer; res->buffer = buffer;
res->refcount = 0; res->data.refcount = 0;
ReservedRefCountSlot = -1;
return res; return res;
} }
@@ -347,10 +386,11 @@ NewPrivateRefCountEntry(Buffer buffer)
* do_move is true, and the entry resides in the hashtable the entry is * do_move is true, and the entry resides in the hashtable the entry is
* optimized for frequent access by moving it to the array. * optimized for frequent access by moving it to the array.
*/ */
static PrivateRefCountEntry * static inline PrivateRefCountEntry *
GetPrivateRefCountEntry(Buffer buffer, bool do_move) GetPrivateRefCountEntry(Buffer buffer, bool do_move)
{ {
PrivateRefCountEntry *res; PrivateRefCountEntry *res;
int match = -1;
int i; int i;
Assert(BufferIsValid(buffer)); Assert(BufferIsValid(buffer));
@@ -362,11 +402,15 @@ GetPrivateRefCountEntry(Buffer buffer, bool do_move)
*/ */
for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
{ {
res = &PrivateRefCountArray[i]; if (PrivateRefCountArrayKeys[i] == buffer)
{
if (res->buffer == buffer) match = i;
return res; /* see ReservePrivateRefCountEntry() for why we don't return */
} }
}
if (match != -1)
return &PrivateRefCountArray[match];
/* /*
* By here we know that the buffer, if already pinned, isn't residing in * By here we know that the buffer, if already pinned, isn't residing in
@@ -397,14 +441,18 @@ GetPrivateRefCountEntry(Buffer buffer, bool do_move)
ReservePrivateRefCountEntry(); ReservePrivateRefCountEntry();
/* Use up the reserved slot */ /* Use up the reserved slot */
Assert(ReservedRefCountEntry != NULL); Assert(ReservedRefCountSlot != -1);
free = ReservedRefCountEntry; free = &PrivateRefCountArray[ReservedRefCountSlot];
ReservedRefCountEntry = NULL; Assert(PrivateRefCountArrayKeys[ReservedRefCountSlot] == free->buffer);
Assert(free->buffer == InvalidBuffer); Assert(free->buffer == InvalidBuffer);
/* and fill it */ /* and fill it */
free->buffer = buffer; free->buffer = buffer;
free->refcount = res->refcount; free->data = res->data;
PrivateRefCountArrayKeys[ReservedRefCountSlot] = buffer;
ReservedRefCountSlot = -1;
/* delete from hashtable */ /* delete from hashtable */
hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
@@ -437,7 +485,7 @@ GetPrivateRefCount(Buffer buffer)
if (ref == NULL) if (ref == NULL)
return 0; return 0;
return ref->refcount; return ref->data.refcount;
} }
/* /*
@@ -447,19 +495,21 @@ GetPrivateRefCount(Buffer buffer)
static void static void
ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
{ {
Assert(ref->refcount == 0); Assert(ref->data.refcount == 0);
if (ref >= &PrivateRefCountArray[0] && if (ref >= &PrivateRefCountArray[0] &&
ref < &PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]) ref < &PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES])
{ {
ref->buffer = InvalidBuffer; ref->buffer = InvalidBuffer;
PrivateRefCountArrayKeys[ref - PrivateRefCountArray] = InvalidBuffer;
/* /*
* Mark the just used entry as reserved - in many scenarios that * Mark the just used entry as reserved - in many scenarios that
* allows us to avoid ever having to search the array/hash for free * allows us to avoid ever having to search the array/hash for free
* entries. * entries.
*/ */
ReservedRefCountEntry = ref; ReservedRefCountSlot = ref - PrivateRefCountArray;
} }
else else
{ {
@@ -3073,7 +3123,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
PrivateRefCountEntry *ref; PrivateRefCountEntry *ref;
Assert(!BufferIsLocal(b)); Assert(!BufferIsLocal(b));
Assert(ReservedRefCountEntry != NULL); Assert(ReservedRefCountSlot != -1);
ref = GetPrivateRefCountEntry(b, true); ref = GetPrivateRefCountEntry(b, true);
@@ -3145,8 +3195,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
*/ */
result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0; result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0;
Assert(ref->refcount > 0); Assert(ref->data.refcount > 0);
ref->refcount++; ref->data.refcount++;
ResourceOwnerRememberBuffer(CurrentResourceOwner, b); ResourceOwnerRememberBuffer(CurrentResourceOwner, b);
} }
@@ -3263,9 +3313,9 @@ UnpinBufferNoOwner(BufferDesc *buf)
/* not moving as we're likely deleting it soon anyway */ /* not moving as we're likely deleting it soon anyway */
ref = GetPrivateRefCountEntry(b, false); ref = GetPrivateRefCountEntry(b, false);
Assert(ref != NULL); Assert(ref != NULL);
Assert(ref->refcount > 0); Assert(ref->data.refcount > 0);
ref->refcount--; ref->data.refcount--;
if (ref->refcount == 0) if (ref->data.refcount == 0)
{ {
uint32 old_buf_state; uint32 old_buf_state;
@@ -3305,7 +3355,7 @@ TrackNewBufferPin(Buffer buf)
PrivateRefCountEntry *ref; PrivateRefCountEntry *ref;
ref = NewPrivateRefCountEntry(buf); ref = NewPrivateRefCountEntry(buf);
ref->refcount++; ref->data.refcount++;
ResourceOwnerRememberBuffer(CurrentResourceOwner, buf); ResourceOwnerRememberBuffer(CurrentResourceOwner, buf);
@@ -4018,6 +4068,7 @@ InitBufferManagerAccess(void)
MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS); MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS);
memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray)); memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
memset(&PrivateRefCountArrayKeys, 0, sizeof(PrivateRefCountArrayKeys));
hash_ctl.keysize = sizeof(Buffer); hash_ctl.keysize = sizeof(Buffer);
hash_ctl.entrysize = sizeof(PrivateRefCountEntry); hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
@@ -4066,11 +4117,11 @@ CheckForBufferLeaks(void)
/* check the array */ /* check the array */
for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
{
if (PrivateRefCountArrayKeys[i] != InvalidBuffer)
{ {
res = &PrivateRefCountArray[i]; res = &PrivateRefCountArray[i];
if (res->buffer != InvalidBuffer)
{
s = DebugPrintBufferRefcount(res->buffer); s = DebugPrintBufferRefcount(res->buffer);
elog(WARNING, "buffer refcount leak: %s", s); elog(WARNING, "buffer refcount leak: %s", s);
pfree(s); pfree(s);
@@ -5407,7 +5458,7 @@ IncrBufferRefCount(Buffer buffer)
ref = GetPrivateRefCountEntry(buffer, true); ref = GetPrivateRefCountEntry(buffer, true);
Assert(ref != NULL); Assert(ref != NULL);
ref->refcount++; ref->data.refcount++;
} }
ResourceOwnerRememberBuffer(CurrentResourceOwner, buffer); ResourceOwnerRememberBuffer(CurrentResourceOwner, buffer);
} }

View File

@@ -2333,6 +2333,7 @@ PrintfArgValue
PrintfTarget PrintfTarget
PrinttupAttrInfo PrinttupAttrInfo
PrivTarget PrivTarget
PrivateRefCountData
PrivateRefCountEntry PrivateRefCountEntry
ProcArrayStruct ProcArrayStruct
ProcLangInfo ProcLangInfo