mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
pgstat: store statistics in shared memory.
Previously the statistics collector received statistics updates via UDP and shared statistics data by writing them out to temporary files regularly. These files can reach tens of megabytes and are written out up to twice a second. This has repeatedly prevented us from adding additional useful statistics. Now statistics are stored in shared memory. Statistics for variable-numbered objects are stored in a dshash hashtable (backed by dynamic shared memory). Fixed-numbered stats are stored in plain shared memory. The header for pgstat.c contains an overview of the architecture. The stats collector is not needed anymore, remove it. By utilizing the transactional statistics drop infrastructure introduced in a prior commit statistics entries cannot "leak" anymore. Previously leaked statistics were dropped by pgstat_vacuum_stat(), called from [auto-]vacuum. On systems with many small relations pgstat_vacuum_stat() could be quite expensive. Now that replicas drop statistics entries for dropped objects, it is not necessary anymore to reset stats when starting from a cleanly shut down replica. Subsequent commits will perform some further code cleanup, adapt docs and add tests. Bumps PGSTAT_FILE_FORMAT_ID. Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Author: Andres Freund <andres@anarazel.de> Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-By: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: Justin Pryzby <pryzby@telsasoft.com> Reviewed-By: "David G. Johnston" <david.g.johnston@gmail.com> Reviewed-By: Tomas Vondra <tomas.vondra@2ndquadrant.com> (in a much earlier version) Reviewed-By: Arthur Zakirov <a.zakirov@postgrespro.ru> (in a much earlier version) Reviewed-By: Antonin Houska <ah@cybertec.at> (in a much earlier version) Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de Discussion: https://postgr.es/m/20220308205351.2xcn6k4x5yivcxyd@alap3.anarazel.de Discussion: https://postgr.es/m/20210319235115.y3wz7hpnnrshdyv6@alap3.anarazel.de
This commit is contained in:
@@ -1911,7 +1911,6 @@ UpdateDecodingStats(LogicalDecodingContext *ctx)
|
||||
(long long) rb->totalTxns,
|
||||
(long long) rb->totalBytes);
|
||||
|
||||
namestrcpy(&repSlotStat.slotname, NameStr(ctx->slot->data.name));
|
||||
repSlotStat.spill_txns = rb->spillTxns;
|
||||
repSlotStat.spill_count = rb->spillCount;
|
||||
repSlotStat.spill_bytes = rb->spillBytes;
|
||||
|
||||
@@ -141,7 +141,7 @@ finish_sync_worker(void)
|
||||
if (IsTransactionState())
|
||||
{
|
||||
CommitTransactionCommand();
|
||||
pgstat_report_stat(false);
|
||||
pgstat_report_stat(true);
|
||||
}
|
||||
|
||||
/* And flush all writes. */
|
||||
@@ -580,7 +580,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
|
||||
if (started_tx)
|
||||
{
|
||||
CommitTransactionCommand();
|
||||
pgstat_report_stat(false);
|
||||
pgstat_report_stat(true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1386,7 +1386,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
|
||||
MyLogicalRepWorker->relstate,
|
||||
MyLogicalRepWorker->relstate_lsn);
|
||||
CommitTransactionCommand();
|
||||
pgstat_report_stat(false);
|
||||
pgstat_report_stat(true);
|
||||
|
||||
StartTransactionCommand();
|
||||
|
||||
@@ -1630,7 +1630,7 @@ AllTablesyncsReady(void)
|
||||
if (started_tx)
|
||||
{
|
||||
CommitTransactionCommand();
|
||||
pgstat_report_stat(false);
|
||||
pgstat_report_stat(true);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -2937,6 +2937,12 @@ LogicalRepApplyLoop(XLogRecPtr last_received)
|
||||
}
|
||||
|
||||
send_feedback(last_received, requestReply, requestReply);
|
||||
|
||||
/*
|
||||
* Force reporting to ensure long idle periods don't lead to
|
||||
* arbitrarily delayed stats.
|
||||
*/
|
||||
pgstat_report_stat(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -502,6 +502,14 @@ retry:
|
||||
|
||||
/* We made this slot active, so it's ours now. */
|
||||
MyReplicationSlot = s;
|
||||
|
||||
/*
|
||||
* The call to pgstat_acquire_replslot() protects against stats for
|
||||
* a different slot, from before a restart or such, being present during
|
||||
* pgstat_report_replslot().
|
||||
*/
|
||||
if (SlotIsLogical(s))
|
||||
pgstat_acquire_replslot(s);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -746,20 +754,10 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
||||
elog(DEBUG3, "replication slot drop: %s: removed directory", NameStr(slot->data.name));
|
||||
|
||||
/*
|
||||
* Send a message to drop the replication slot to the stats collector.
|
||||
* Since there is no guarantee of the order of message transfer on a UDP
|
||||
* connection, it's possible that a message for creating a new slot
|
||||
* reaches before a message for removing the old slot. We send the drop
|
||||
* and create messages while holding ReplicationSlotAllocationLock to
|
||||
* reduce that possibility. If the messages reached in reverse, we would
|
||||
* lose one statistics update message. But the next update message will
|
||||
* create the statistics for the replication slot.
|
||||
*
|
||||
* XXX In case, the messages for creation and drop slot of the same name
|
||||
* get lost and create happens before (auto)vacuum cleans up the dead
|
||||
* slot, the stats will be accumulated into the old slot. One can imagine
|
||||
* having OIDs for each slot to avoid the accumulation of stats but that
|
||||
* doesn't seem worth doing as in practice this won't happen frequently.
|
||||
* Drop the statistics entry for the replication slot. Do this while
|
||||
* holding ReplicationSlotAllocationLock so that we don't drop a
|
||||
* statistics entry for another slot with the same name just created in
|
||||
* another session.
|
||||
*/
|
||||
if (SlotIsLogical(slot))
|
||||
pgstat_drop_replslot(slot);
|
||||
|
||||
Reference in New Issue
Block a user