1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-09 22:41:56 +03:00

Use HTAB for replication slot statistics.

Previously, we used to use the array of size max_replication_slots to
store stats for replication slots. But that had two problems in the cases
where a message for dropping a slot gets lost: 1) the stats for the new
slot are not recorded if the array is full and 2) writing beyond the end
of the array if the user reduces the max_replication_slots.

This commit uses HTAB for replication slot statistics, resolving both
problems. Now, pgstat_vacuum_stat() search for all the dead replication
slots in stats hashtable and tell the collector to remove them. To avoid
showing the stats for the already-dropped slots, pg_stat_replication_slots
view searches slot stats by the slot name taken from pg_replication_slots.

Also, we send a message for creating a slot at slot creation, initializing
the stats. This reduces the possibility that the stats are accumulated
into the old slot stats when a message for dropping a slot gets lost.

Reported-by: Andres Freund
Author: Sawada Masahiko, test case by Vignesh C
Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar
Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
This commit is contained in:
Amit Kapila
2021-04-27 09:09:11 +05:30
parent e7eea52b2d
commit 3fa17d3771
12 changed files with 389 additions and 260 deletions

View File

@ -1773,7 +1773,7 @@ void
UpdateDecodingStats(LogicalDecodingContext *ctx)
{
ReorderBuffer *rb = ctx->reorder;
PgStat_ReplSlotStats repSlotStat;
PgStat_StatReplSlotEntry repSlotStat;
/* Nothing to do if we don't have any replication stats to be sent. */
if (rb->spillBytes <= 0 && rb->streamBytes <= 0 && rb->totalBytes <= 0)

View File

@ -328,12 +328,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* ReplicationSlotAllocationLock.
*/
if (SlotIsLogical(slot))
{
PgStat_ReplSlotStats repSlotStat;
MemSet(&repSlotStat, 0, sizeof(PgStat_ReplSlotStats));
namestrcpy(&repSlotStat.slotname, NameStr(slot->data.name));
pgstat_report_replslot(&repSlotStat);
}
pgstat_report_replslot_create(NameStr(slot->data.name));
/*
* Now that the slot has been marked as in_use and active, it's safe to
@ -349,17 +344,15 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* Search for the named replication slot.
*
* Return the replication slot if found, otherwise NULL.
*
* The caller must hold ReplicationSlotControlLock in shared mode.
*/
ReplicationSlot *
SearchNamedReplicationSlot(const char *name)
SearchNamedReplicationSlot(const char *name, bool need_lock)
{
int i;
ReplicationSlot *slot = NULL;
ReplicationSlot *slot = NULL;
Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock,
LW_SHARED));
if (need_lock)
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
@ -372,6 +365,9 @@ SearchNamedReplicationSlot(const char *name)
}
}
if (need_lock)
LWLockRelease(ReplicationSlotControlLock);
return slot;
}
@ -416,7 +412,7 @@ retry:
* Search for the slot with the specified name if the slot to acquire is
* not given. If the slot is not found, we either return -1 or error out.
*/
s = slot ? slot : SearchNamedReplicationSlot(name);
s = slot ? slot : SearchNamedReplicationSlot(name, false);
if (s == NULL || !s->in_use)
{
LWLockRelease(ReplicationSlotControlLock);
@ -713,6 +709,12 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
* reduce that possibility. If the messages reached in reverse, we would
* lose one statistics update message. But the next update message will
* create the statistics for the replication slot.
*
* XXX In case, the messages for creation and drop slot of the same name
* get lost and create happens before (auto)vacuum cleans up the dead
* slot, the stats will be accumulated into the old slot. One can imagine
* having OIDs for each slot to avoid the accumulation of stats but that
* doesn't seem worth doing as in practice this won't happen frequently.
*/
if (SlotIsLogical(slot))
pgstat_report_replslot_drop(NameStr(slot->data.name));