mirror of
https://github.com/postgres/postgres.git
synced 2025-11-22 12:22:45 +03:00
pgstat: store statistics in shared memory.
Previously the statistics collector received statistics updates via UDP and shared statistics data by writing them out to temporary files regularly. These files can reach tens of megabytes and are written out up to twice a second. This has repeatedly prevented us from adding additional useful statistics. Now statistics are stored in shared memory. Statistics for variable-numbered objects are stored in a dshash hashtable (backed by dynamic shared memory). Fixed-numbered stats are stored in plain shared memory. The header for pgstat.c contains an overview of the architecture. The stats collector is not needed anymore, remove it. By utilizing the transactional statistics drop infrastructure introduced in a prior commit statistics entries cannot "leak" anymore. Previously leaked statistics were dropped by pgstat_vacuum_stat(), called from [auto-]vacuum. On systems with many small relations pgstat_vacuum_stat() could be quite expensive. Now that replicas drop statistics entries for dropped objects, it is not necessary anymore to reset stats when starting from a cleanly shut down replica. Subsequent commits will perform some further code cleanup, adapt docs and add tests. Bumps PGSTAT_FILE_FORMAT_ID. Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Author: Andres Freund <andres@anarazel.de> Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-By: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: Justin Pryzby <pryzby@telsasoft.com> Reviewed-By: "David G. Johnston" <david.g.johnston@gmail.com> Reviewed-By: Tomas Vondra <tomas.vondra@2ndquadrant.com> (in a much earlier version) Reviewed-By: Arthur Zakirov <a.zakirov@postgrespro.ru> (in a much earlier version) Reviewed-By: Antonin Houska <ah@cybertec.at> (in a much earlier version) Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de Discussion: https://postgr.es/m/20220308205351.2xcn6k4x5yivcxyd@alap3.anarazel.de Discussion: https://postgr.es/m/20210319235115.y3wz7hpnnrshdyv6@alap3.anarazel.de
This commit is contained in:
@@ -19,13 +19,12 @@
|
||||
|
||||
#include "utils/pgstat_internal.h"
|
||||
#include "utils/timestamp.h"
|
||||
#include "storage/procsignal.h"
|
||||
|
||||
|
||||
static bool pgstat_should_report_connstat(void);
|
||||
|
||||
|
||||
int pgStatXactCommit = 0;
|
||||
int pgStatXactRollback = 0;
|
||||
PgStat_Counter pgStatBlockReadTime = 0;
|
||||
PgStat_Counter pgStatBlockWriteTime = 0;
|
||||
PgStat_Counter pgStatActiveTime = 0;
|
||||
@@ -33,25 +32,18 @@ PgStat_Counter pgStatTransactionIdleTime = 0;
|
||||
SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
|
||||
|
||||
|
||||
static int pgStatXactCommit = 0;
|
||||
static int pgStatXactRollback = 0;
|
||||
static PgStat_Counter pgLastSessionReportTime = 0;
|
||||
|
||||
|
||||
/*
|
||||
* Tell the collector that we just dropped a database.
|
||||
* (If the message gets lost, we will still clean the dead DB eventually
|
||||
* via future invocations of pgstat_vacuum_stat().)
|
||||
* Remove entry for the database being dropped.
|
||||
*/
|
||||
void
|
||||
pgstat_drop_database(Oid databaseid)
|
||||
{
|
||||
PgStat_MsgDropdb msg;
|
||||
|
||||
if (pgStatSock == PGINVALID_SOCKET)
|
||||
return;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
|
||||
msg.m_databaseid = databaseid;
|
||||
pgstat_send(&msg, sizeof(msg));
|
||||
pgstat_drop_transactional(PGSTAT_KIND_DATABASE, databaseid, InvalidOid);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -62,16 +54,24 @@ pgstat_drop_database(Oid databaseid)
|
||||
void
|
||||
pgstat_report_autovac(Oid dboid)
|
||||
{
|
||||
PgStat_MsgAutovacStart msg;
|
||||
PgStat_EntryRef *entry_ref;
|
||||
PgStatShared_Database *dbentry;
|
||||
|
||||
if (pgStatSock == PGINVALID_SOCKET)
|
||||
return;
|
||||
/* can't get here in single user mode */
|
||||
Assert(IsUnderPostmaster);
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START);
|
||||
msg.m_databaseid = dboid;
|
||||
msg.m_start_time = GetCurrentTimestamp();
|
||||
/*
|
||||
* End-of-vacuum is reported instantly. Report the start the same way for
|
||||
* consistency. Vacuum doesn't run frequently and is a long-lasting
|
||||
* operation so it doesn't matter if we get blocked here a little.
|
||||
*/
|
||||
entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE,
|
||||
dboid, InvalidOid, false);
|
||||
|
||||
pgstat_send(&msg, sizeof(msg));
|
||||
dbentry = (PgStatShared_Database *) entry_ref->shared_stats;
|
||||
dbentry->stats.last_autovac_time = GetCurrentTimestamp();
|
||||
|
||||
pgstat_unlock_entry(entry_ref);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -80,15 +80,39 @@ pgstat_report_autovac(Oid dboid)
|
||||
void
|
||||
pgstat_report_recovery_conflict(int reason)
|
||||
{
|
||||
PgStat_MsgRecoveryConflict msg;
|
||||
PgStat_StatDBEntry *dbentry;
|
||||
|
||||
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
||||
Assert(IsUnderPostmaster);
|
||||
if (!pgstat_track_counts)
|
||||
return;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
|
||||
msg.m_databaseid = MyDatabaseId;
|
||||
msg.m_reason = reason;
|
||||
pgstat_send(&msg, sizeof(msg));
|
||||
dbentry = pgstat_prep_database_pending(MyDatabaseId);
|
||||
|
||||
switch (reason)
|
||||
{
|
||||
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
|
||||
|
||||
/*
|
||||
* Since we drop the information about the database as soon as it
|
||||
* replicates, there is no point in counting these conflicts.
|
||||
*/
|
||||
break;
|
||||
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
|
||||
dbentry->n_conflict_tablespace++;
|
||||
break;
|
||||
case PROCSIG_RECOVERY_CONFLICT_LOCK:
|
||||
dbentry->n_conflict_lock++;
|
||||
break;
|
||||
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
|
||||
dbentry->n_conflict_snapshot++;
|
||||
break;
|
||||
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
|
||||
dbentry->n_conflict_bufferpin++;
|
||||
break;
|
||||
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
|
||||
dbentry->n_conflict_startup_deadlock++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -97,14 +121,13 @@ pgstat_report_recovery_conflict(int reason)
|
||||
void
|
||||
pgstat_report_deadlock(void)
|
||||
{
|
||||
PgStat_MsgDeadlock msg;
|
||||
PgStat_StatDBEntry *dbent;
|
||||
|
||||
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
||||
if (!pgstat_track_counts)
|
||||
return;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DEADLOCK);
|
||||
msg.m_databaseid = MyDatabaseId;
|
||||
pgstat_send(&msg, sizeof(msg));
|
||||
dbent = pgstat_prep_database_pending(MyDatabaseId);
|
||||
dbent->n_deadlocks++;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -113,17 +136,24 @@ pgstat_report_deadlock(void)
|
||||
void
|
||||
pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
|
||||
{
|
||||
PgStat_MsgChecksumFailure msg;
|
||||
PgStat_EntryRef *entry_ref;
|
||||
PgStatShared_Database *sharedent;
|
||||
|
||||
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
||||
if (!pgstat_track_counts)
|
||||
return;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CHECKSUMFAILURE);
|
||||
msg.m_databaseid = dboid;
|
||||
msg.m_failurecount = failurecount;
|
||||
msg.m_failure_time = GetCurrentTimestamp();
|
||||
/*
|
||||
* Update the shared stats directly - checksum failures should never be
|
||||
* common enough for that to be a problem.
|
||||
*/
|
||||
entry_ref =
|
||||
pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, dboid, InvalidOid, false);
|
||||
|
||||
pgstat_send(&msg, sizeof(msg));
|
||||
sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
|
||||
sharedent->stats.n_checksum_failures += failurecount;
|
||||
sharedent->stats.last_checksum_failure = GetCurrentTimestamp();
|
||||
|
||||
pgstat_unlock_entry(entry_ref);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -141,15 +171,14 @@ pgstat_report_checksum_failure(void)
|
||||
void
|
||||
pgstat_report_tempfile(size_t filesize)
|
||||
{
|
||||
PgStat_MsgTempFile msg;
|
||||
PgStat_StatDBEntry *dbent;
|
||||
|
||||
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
||||
if (!pgstat_track_counts)
|
||||
return;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TEMPFILE);
|
||||
msg.m_databaseid = MyDatabaseId;
|
||||
msg.m_filesize = filesize;
|
||||
pgstat_send(&msg, sizeof(msg));
|
||||
dbent = pgstat_prep_database_pending(MyDatabaseId);
|
||||
dbent->n_temp_bytes += filesize;
|
||||
dbent->n_temp_files++;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -158,16 +187,15 @@ pgstat_report_tempfile(size_t filesize)
|
||||
void
|
||||
pgstat_report_connect(Oid dboid)
|
||||
{
|
||||
PgStat_MsgConnect msg;
|
||||
PgStat_StatDBEntry *dbentry;
|
||||
|
||||
if (!pgstat_should_report_connstat())
|
||||
return;
|
||||
|
||||
pgLastSessionReportTime = MyStartTimestamp;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECT);
|
||||
msg.m_databaseid = MyDatabaseId;
|
||||
pgstat_send(&msg, sizeof(PgStat_MsgConnect));
|
||||
dbentry = pgstat_prep_database_pending(MyDatabaseId);
|
||||
dbentry->n_sessions++;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -176,15 +204,42 @@ pgstat_report_connect(Oid dboid)
|
||||
void
|
||||
pgstat_report_disconnect(Oid dboid)
|
||||
{
|
||||
PgStat_MsgDisconnect msg;
|
||||
PgStat_StatDBEntry *dbentry;
|
||||
|
||||
if (!pgstat_should_report_connstat())
|
||||
return;
|
||||
|
||||
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DISCONNECT);
|
||||
msg.m_databaseid = MyDatabaseId;
|
||||
msg.m_cause = pgStatSessionEndCause;
|
||||
pgstat_send(&msg, sizeof(PgStat_MsgDisconnect));
|
||||
dbentry = pgstat_prep_database_pending(MyDatabaseId);
|
||||
|
||||
switch (pgStatSessionEndCause)
|
||||
{
|
||||
case DISCONNECT_NOT_YET:
|
||||
case DISCONNECT_NORMAL:
|
||||
/* we don't collect these */
|
||||
break;
|
||||
case DISCONNECT_CLIENT_EOF:
|
||||
dbentry->n_sessions_abandoned++;
|
||||
break;
|
||||
case DISCONNECT_FATAL:
|
||||
dbentry->n_sessions_fatal++;
|
||||
break;
|
||||
case DISCONNECT_KILLED:
|
||||
dbentry->n_sessions_killed++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Support function for the SQL-callable pgstat* functions. Returns
|
||||
* the collected statistics for one database or NULL. NULL doesn't mean
|
||||
* that the database doesn't exist, just that there are no statistics, so the
|
||||
* caller is better off to report ZERO instead.
|
||||
*/
|
||||
PgStat_StatDBEntry *
|
||||
pgstat_fetch_stat_dbentry(Oid dboid)
|
||||
{
|
||||
return (PgStat_StatDBEntry *)
|
||||
pgstat_fetch_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -205,57 +260,47 @@ AtEOXact_PgStat_Database(bool isCommit, bool parallel)
|
||||
}
|
||||
|
||||
/*
|
||||
* Subroutine for pgstat_send_tabstat: Handle xact commit/rollback and I/O
|
||||
* Subroutine for pgstat_report_stat(): Handle xact commit/rollback and I/O
|
||||
* timings.
|
||||
*/
|
||||
void
|
||||
pgstat_update_dbstats(PgStat_MsgTabstat *tsmsg, TimestampTz now)
|
||||
pgstat_update_dbstats(TimestampTz ts)
|
||||
{
|
||||
if (OidIsValid(tsmsg->m_databaseid))
|
||||
{
|
||||
tsmsg->m_xact_commit = pgStatXactCommit;
|
||||
tsmsg->m_xact_rollback = pgStatXactRollback;
|
||||
tsmsg->m_block_read_time = pgStatBlockReadTime;
|
||||
tsmsg->m_block_write_time = pgStatBlockWriteTime;
|
||||
PgStat_StatDBEntry *dbentry;
|
||||
|
||||
if (pgstat_should_report_connstat())
|
||||
{
|
||||
long secs;
|
||||
int usecs;
|
||||
dbentry = pgstat_prep_database_pending(MyDatabaseId);
|
||||
|
||||
/*
|
||||
* pgLastSessionReportTime is initialized to MyStartTimestamp by
|
||||
* pgstat_report_connect().
|
||||
*/
|
||||
TimestampDifference(pgLastSessionReportTime, now, &secs, &usecs);
|
||||
pgLastSessionReportTime = now;
|
||||
tsmsg->m_session_time = (PgStat_Counter) secs * 1000000 + usecs;
|
||||
tsmsg->m_active_time = pgStatActiveTime;
|
||||
tsmsg->m_idle_in_xact_time = pgStatTransactionIdleTime;
|
||||
}
|
||||
else
|
||||
{
|
||||
tsmsg->m_session_time = 0;
|
||||
tsmsg->m_active_time = 0;
|
||||
tsmsg->m_idle_in_xact_time = 0;
|
||||
}
|
||||
pgStatXactCommit = 0;
|
||||
pgStatXactRollback = 0;
|
||||
pgStatBlockReadTime = 0;
|
||||
pgStatBlockWriteTime = 0;
|
||||
pgStatActiveTime = 0;
|
||||
pgStatTransactionIdleTime = 0;
|
||||
}
|
||||
else
|
||||
/*
|
||||
* Accumulate xact commit/rollback and I/O timings to stats entry of the
|
||||
* current database.
|
||||
*/
|
||||
dbentry->n_xact_commit += pgStatXactCommit;
|
||||
dbentry->n_xact_rollback += pgStatXactRollback;
|
||||
dbentry->n_block_read_time += pgStatBlockReadTime;
|
||||
dbentry->n_block_write_time += pgStatBlockWriteTime;
|
||||
|
||||
if (pgstat_should_report_connstat())
|
||||
{
|
||||
tsmsg->m_xact_commit = 0;
|
||||
tsmsg->m_xact_rollback = 0;
|
||||
tsmsg->m_block_read_time = 0;
|
||||
tsmsg->m_block_write_time = 0;
|
||||
tsmsg->m_session_time = 0;
|
||||
tsmsg->m_active_time = 0;
|
||||
tsmsg->m_idle_in_xact_time = 0;
|
||||
long secs;
|
||||
int usecs;
|
||||
|
||||
/*
|
||||
* pgLastSessionReportTime is initialized to MyStartTimestamp by
|
||||
* pgstat_report_connect().
|
||||
*/
|
||||
TimestampDifference(pgLastSessionReportTime, ts, &secs, &usecs);
|
||||
pgLastSessionReportTime = ts;
|
||||
dbentry->total_session_time += (PgStat_Counter) secs * 1000000 + usecs;
|
||||
dbentry->total_active_time += pgStatActiveTime;
|
||||
dbentry->total_idle_in_xact_time += pgStatTransactionIdleTime;
|
||||
}
|
||||
|
||||
pgStatXactCommit = 0;
|
||||
pgStatXactRollback = 0;
|
||||
pgStatBlockReadTime = 0;
|
||||
pgStatBlockWriteTime = 0;
|
||||
pgStatActiveTime = 0;
|
||||
pgStatTransactionIdleTime = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -270,3 +315,111 @@ pgstat_should_report_connstat(void)
|
||||
{
|
||||
return MyBackendType == B_BACKEND;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find or create a local PgStat_StatDBEntry entry for dboid.
|
||||
*/
|
||||
PgStat_StatDBEntry *
|
||||
pgstat_prep_database_pending(Oid dboid)
|
||||
{
|
||||
PgStat_EntryRef *entry_ref;
|
||||
|
||||
entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid,
|
||||
NULL);
|
||||
|
||||
return entry_ref->pending;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the database's reset timestamp, without resetting the contents of the
|
||||
* database stats.
|
||||
*/
|
||||
void
|
||||
pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts)
|
||||
{
|
||||
PgStat_EntryRef *dbref;
|
||||
PgStatShared_Database *dbentry;
|
||||
|
||||
dbref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, MyDatabaseId, InvalidOid,
|
||||
false);
|
||||
|
||||
dbentry = (PgStatShared_Database *) dbref->shared_stats;
|
||||
dbentry->stats.stat_reset_timestamp = ts;
|
||||
|
||||
pgstat_unlock_entry(dbref);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush out pending stats for the entry
|
||||
*
|
||||
* If nowait is true, this function returns false if lock could not
|
||||
* immediately acquired, otherwise true is returned.
|
||||
*/
|
||||
bool
|
||||
pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
|
||||
{
|
||||
PgStatShared_Database *sharedent;
|
||||
PgStat_StatDBEntry *pendingent;
|
||||
|
||||
pendingent = (PgStat_StatDBEntry *) entry_ref->pending;
|
||||
sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
|
||||
|
||||
if (!pgstat_lock_entry(entry_ref, nowait))
|
||||
return false;
|
||||
|
||||
#define PGSTAT_ACCUM_DBCOUNT(item) \
|
||||
(sharedent)->stats.item += (pendingent)->item
|
||||
|
||||
PGSTAT_ACCUM_DBCOUNT(n_xact_commit);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_xact_rollback);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_blocks_fetched);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_blocks_hit);
|
||||
|
||||
PGSTAT_ACCUM_DBCOUNT(n_tuples_returned);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_tuples_fetched);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_tuples_inserted);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_tuples_updated);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_tuples_deleted);
|
||||
|
||||
/* last_autovac_time is reported immediately */
|
||||
Assert(pendingent->last_autovac_time == 0);
|
||||
|
||||
PGSTAT_ACCUM_DBCOUNT(n_conflict_tablespace);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_conflict_lock);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_conflict_snapshot);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_conflict_bufferpin);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_conflict_startup_deadlock);
|
||||
|
||||
PGSTAT_ACCUM_DBCOUNT(n_temp_bytes);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_temp_files);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_deadlocks);
|
||||
|
||||
/* checksum failures are reported immediately */
|
||||
Assert(pendingent->n_checksum_failures == 0);
|
||||
Assert(pendingent->last_checksum_failure == 0);
|
||||
|
||||
PGSTAT_ACCUM_DBCOUNT(n_block_read_time);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_block_write_time);
|
||||
|
||||
PGSTAT_ACCUM_DBCOUNT(n_sessions);
|
||||
PGSTAT_ACCUM_DBCOUNT(total_session_time);
|
||||
PGSTAT_ACCUM_DBCOUNT(total_active_time);
|
||||
PGSTAT_ACCUM_DBCOUNT(total_idle_in_xact_time);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_sessions_abandoned);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_sessions_fatal);
|
||||
PGSTAT_ACCUM_DBCOUNT(n_sessions_killed);
|
||||
#undef PGSTAT_ACCUM_DBCOUNT
|
||||
|
||||
pgstat_unlock_entry(entry_ref);
|
||||
|
||||
memset(pendingent, 0, sizeof(*pendingent));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
|
||||
{
|
||||
((PgStatShared_Database *) header)->stats.stat_reset_timestamp = ts;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user