1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-22 12:22:45 +03:00

pgstat: store statistics in shared memory.

Previously the statistics collector received statistics updates via UDP and
shared statistics data by writing them out to temporary files regularly. These
files can reach tens of megabytes and are written out up to twice a
second. This has repeatedly prevented us from adding additional useful
statistics.

Now statistics are stored in shared memory. Statistics for variable-numbered
objects are stored in a dshash hashtable (backed by dynamic shared
memory). Fixed-numbered stats are stored in plain shared memory.

The header for pgstat.c contains an overview of the architecture.

The stats collector is not needed anymore, remove it.

By utilizing the transactional statistics drop infrastructure introduced in a
prior commit statistics entries cannot "leak" anymore. Previously leaked
statistics were dropped by pgstat_vacuum_stat(), called from [auto-]vacuum. On
systems with many small relations pgstat_vacuum_stat() could be quite
expensive.

Now that replicas drop statistics entries for dropped objects, it is not
necessary anymore to reset stats when starting from a cleanly shut down
replica.

Subsequent commits will perform some further code cleanup, adapt docs and add
tests.

Bumps PGSTAT_FILE_FORMAT_ID.

Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Thomas Munro <thomas.munro@gmail.com>
Reviewed-By: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-By: "David G. Johnston" <david.g.johnston@gmail.com>
Reviewed-By: Tomas Vondra <tomas.vondra@2ndquadrant.com> (in a much earlier version)
Reviewed-By: Arthur Zakirov <a.zakirov@postgrespro.ru> (in a much earlier version)
Reviewed-By: Antonin Houska <ah@cybertec.at> (in a much earlier version)
Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
Discussion: https://postgr.es/m/20220308205351.2xcn6k4x5yivcxyd@alap3.anarazel.de
Discussion: https://postgr.es/m/20210319235115.y3wz7hpnnrshdyv6@alap3.anarazel.de
This commit is contained in:
Andres Freund
2022-04-06 21:29:46 -07:00
parent be902e2651
commit 5891c7a8ed
50 changed files with 4395 additions and 5485 deletions

View File

@@ -19,13 +19,12 @@
#include "utils/pgstat_internal.h"
#include "utils/timestamp.h"
#include "storage/procsignal.h"
static bool pgstat_should_report_connstat(void);
int pgStatXactCommit = 0;
int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
PgStat_Counter pgStatActiveTime = 0;
@@ -33,25 +32,18 @@ PgStat_Counter pgStatTransactionIdleTime = 0;
SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
static PgStat_Counter pgLastSessionReportTime = 0;
/*
* Tell the collector that we just dropped a database.
* (If the message gets lost, we will still clean the dead DB eventually
* via future invocations of pgstat_vacuum_stat().)
* Remove entry for the database being dropped.
*/
void
pgstat_drop_database(Oid databaseid)
{
PgStat_MsgDropdb msg;
if (pgStatSock == PGINVALID_SOCKET)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
msg.m_databaseid = databaseid;
pgstat_send(&msg, sizeof(msg));
pgstat_drop_transactional(PGSTAT_KIND_DATABASE, databaseid, InvalidOid);
}
/*
@@ -62,16 +54,24 @@ pgstat_drop_database(Oid databaseid)
void
pgstat_report_autovac(Oid dboid)
{
PgStat_MsgAutovacStart msg;
PgStat_EntryRef *entry_ref;
PgStatShared_Database *dbentry;
if (pgStatSock == PGINVALID_SOCKET)
return;
/* can't get here in single user mode */
Assert(IsUnderPostmaster);
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START);
msg.m_databaseid = dboid;
msg.m_start_time = GetCurrentTimestamp();
/*
* End-of-vacuum is reported instantly. Report the start the same way for
* consistency. Vacuum doesn't run frequently and is a long-lasting
* operation so it doesn't matter if we get blocked here a little.
*/
entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE,
dboid, InvalidOid, false);
pgstat_send(&msg, sizeof(msg));
dbentry = (PgStatShared_Database *) entry_ref->shared_stats;
dbentry->stats.last_autovac_time = GetCurrentTimestamp();
pgstat_unlock_entry(entry_ref);
}
/*
@@ -80,15 +80,39 @@ pgstat_report_autovac(Oid dboid)
void
pgstat_report_recovery_conflict(int reason)
{
PgStat_MsgRecoveryConflict msg;
PgStat_StatDBEntry *dbentry;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
Assert(IsUnderPostmaster);
if (!pgstat_track_counts)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
msg.m_databaseid = MyDatabaseId;
msg.m_reason = reason;
pgstat_send(&msg, sizeof(msg));
dbentry = pgstat_prep_database_pending(MyDatabaseId);
switch (reason)
{
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
/*
* Since we drop the information about the database as soon as it
* replicates, there is no point in counting these conflicts.
*/
break;
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
dbentry->n_conflict_tablespace++;
break;
case PROCSIG_RECOVERY_CONFLICT_LOCK:
dbentry->n_conflict_lock++;
break;
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
dbentry->n_conflict_snapshot++;
break;
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
dbentry->n_conflict_bufferpin++;
break;
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
dbentry->n_conflict_startup_deadlock++;
break;
}
}
/*
@@ -97,14 +121,13 @@ pgstat_report_recovery_conflict(int reason)
void
pgstat_report_deadlock(void)
{
PgStat_MsgDeadlock msg;
PgStat_StatDBEntry *dbent;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
if (!pgstat_track_counts)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DEADLOCK);
msg.m_databaseid = MyDatabaseId;
pgstat_send(&msg, sizeof(msg));
dbent = pgstat_prep_database_pending(MyDatabaseId);
dbent->n_deadlocks++;
}
/*
@@ -113,17 +136,24 @@ pgstat_report_deadlock(void)
void
pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
{
PgStat_MsgChecksumFailure msg;
PgStat_EntryRef *entry_ref;
PgStatShared_Database *sharedent;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
if (!pgstat_track_counts)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CHECKSUMFAILURE);
msg.m_databaseid = dboid;
msg.m_failurecount = failurecount;
msg.m_failure_time = GetCurrentTimestamp();
/*
* Update the shared stats directly - checksum failures should never be
* common enough for that to be a problem.
*/
entry_ref =
pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, dboid, InvalidOid, false);
pgstat_send(&msg, sizeof(msg));
sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
sharedent->stats.n_checksum_failures += failurecount;
sharedent->stats.last_checksum_failure = GetCurrentTimestamp();
pgstat_unlock_entry(entry_ref);
}
/*
@@ -141,15 +171,14 @@ pgstat_report_checksum_failure(void)
void
pgstat_report_tempfile(size_t filesize)
{
PgStat_MsgTempFile msg;
PgStat_StatDBEntry *dbent;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
if (!pgstat_track_counts)
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TEMPFILE);
msg.m_databaseid = MyDatabaseId;
msg.m_filesize = filesize;
pgstat_send(&msg, sizeof(msg));
dbent = pgstat_prep_database_pending(MyDatabaseId);
dbent->n_temp_bytes += filesize;
dbent->n_temp_files++;
}
/*
@@ -158,16 +187,15 @@ pgstat_report_tempfile(size_t filesize)
void
pgstat_report_connect(Oid dboid)
{
PgStat_MsgConnect msg;
PgStat_StatDBEntry *dbentry;
if (!pgstat_should_report_connstat())
return;
pgLastSessionReportTime = MyStartTimestamp;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECT);
msg.m_databaseid = MyDatabaseId;
pgstat_send(&msg, sizeof(PgStat_MsgConnect));
dbentry = pgstat_prep_database_pending(MyDatabaseId);
dbentry->n_sessions++;
}
/*
@@ -176,15 +204,42 @@ pgstat_report_connect(Oid dboid)
void
pgstat_report_disconnect(Oid dboid)
{
PgStat_MsgDisconnect msg;
PgStat_StatDBEntry *dbentry;
if (!pgstat_should_report_connstat())
return;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DISCONNECT);
msg.m_databaseid = MyDatabaseId;
msg.m_cause = pgStatSessionEndCause;
pgstat_send(&msg, sizeof(PgStat_MsgDisconnect));
dbentry = pgstat_prep_database_pending(MyDatabaseId);
switch (pgStatSessionEndCause)
{
case DISCONNECT_NOT_YET:
case DISCONNECT_NORMAL:
/* we don't collect these */
break;
case DISCONNECT_CLIENT_EOF:
dbentry->n_sessions_abandoned++;
break;
case DISCONNECT_FATAL:
dbentry->n_sessions_fatal++;
break;
case DISCONNECT_KILLED:
dbentry->n_sessions_killed++;
break;
}
}
/*
* Support function for the SQL-callable pgstat* functions. Returns
* the collected statistics for one database or NULL. NULL doesn't mean
* that the database doesn't exist, just that there are no statistics, so the
* caller is better off to report ZERO instead.
*/
PgStat_StatDBEntry *
pgstat_fetch_stat_dbentry(Oid dboid)
{
return (PgStat_StatDBEntry *)
pgstat_fetch_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid);
}
void
@@ -205,57 +260,47 @@ AtEOXact_PgStat_Database(bool isCommit, bool parallel)
}
/*
* Subroutine for pgstat_send_tabstat: Handle xact commit/rollback and I/O
* Subroutine for pgstat_report_stat(): Handle xact commit/rollback and I/O
* timings.
*/
void
pgstat_update_dbstats(PgStat_MsgTabstat *tsmsg, TimestampTz now)
pgstat_update_dbstats(TimestampTz ts)
{
if (OidIsValid(tsmsg->m_databaseid))
{
tsmsg->m_xact_commit = pgStatXactCommit;
tsmsg->m_xact_rollback = pgStatXactRollback;
tsmsg->m_block_read_time = pgStatBlockReadTime;
tsmsg->m_block_write_time = pgStatBlockWriteTime;
PgStat_StatDBEntry *dbentry;
if (pgstat_should_report_connstat())
{
long secs;
int usecs;
dbentry = pgstat_prep_database_pending(MyDatabaseId);
/*
* pgLastSessionReportTime is initialized to MyStartTimestamp by
* pgstat_report_connect().
*/
TimestampDifference(pgLastSessionReportTime, now, &secs, &usecs);
pgLastSessionReportTime = now;
tsmsg->m_session_time = (PgStat_Counter) secs * 1000000 + usecs;
tsmsg->m_active_time = pgStatActiveTime;
tsmsg->m_idle_in_xact_time = pgStatTransactionIdleTime;
}
else
{
tsmsg->m_session_time = 0;
tsmsg->m_active_time = 0;
tsmsg->m_idle_in_xact_time = 0;
}
pgStatXactCommit = 0;
pgStatXactRollback = 0;
pgStatBlockReadTime = 0;
pgStatBlockWriteTime = 0;
pgStatActiveTime = 0;
pgStatTransactionIdleTime = 0;
}
else
/*
* Accumulate xact commit/rollback and I/O timings to stats entry of the
* current database.
*/
dbentry->n_xact_commit += pgStatXactCommit;
dbentry->n_xact_rollback += pgStatXactRollback;
dbentry->n_block_read_time += pgStatBlockReadTime;
dbentry->n_block_write_time += pgStatBlockWriteTime;
if (pgstat_should_report_connstat())
{
tsmsg->m_xact_commit = 0;
tsmsg->m_xact_rollback = 0;
tsmsg->m_block_read_time = 0;
tsmsg->m_block_write_time = 0;
tsmsg->m_session_time = 0;
tsmsg->m_active_time = 0;
tsmsg->m_idle_in_xact_time = 0;
long secs;
int usecs;
/*
* pgLastSessionReportTime is initialized to MyStartTimestamp by
* pgstat_report_connect().
*/
TimestampDifference(pgLastSessionReportTime, ts, &secs, &usecs);
pgLastSessionReportTime = ts;
dbentry->total_session_time += (PgStat_Counter) secs * 1000000 + usecs;
dbentry->total_active_time += pgStatActiveTime;
dbentry->total_idle_in_xact_time += pgStatTransactionIdleTime;
}
pgStatXactCommit = 0;
pgStatXactRollback = 0;
pgStatBlockReadTime = 0;
pgStatBlockWriteTime = 0;
pgStatActiveTime = 0;
pgStatTransactionIdleTime = 0;
}
/*
@@ -270,3 +315,111 @@ pgstat_should_report_connstat(void)
{
return MyBackendType == B_BACKEND;
}
/*
* Find or create a local PgStat_StatDBEntry entry for dboid.
*/
PgStat_StatDBEntry *
pgstat_prep_database_pending(Oid dboid)
{
PgStat_EntryRef *entry_ref;
entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid,
NULL);
return entry_ref->pending;
}
/*
* Reset the database's reset timestamp, without resetting the contents of the
* database stats.
*/
void
pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts)
{
PgStat_EntryRef *dbref;
PgStatShared_Database *dbentry;
dbref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, MyDatabaseId, InvalidOid,
false);
dbentry = (PgStatShared_Database *) dbref->shared_stats;
dbentry->stats.stat_reset_timestamp = ts;
pgstat_unlock_entry(dbref);
}
/*
* Flush out pending stats for the entry
*
* If nowait is true, this function returns false if lock could not
* immediately acquired, otherwise true is returned.
*/
bool
pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
{
PgStatShared_Database *sharedent;
PgStat_StatDBEntry *pendingent;
pendingent = (PgStat_StatDBEntry *) entry_ref->pending;
sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
if (!pgstat_lock_entry(entry_ref, nowait))
return false;
#define PGSTAT_ACCUM_DBCOUNT(item) \
(sharedent)->stats.item += (pendingent)->item
PGSTAT_ACCUM_DBCOUNT(n_xact_commit);
PGSTAT_ACCUM_DBCOUNT(n_xact_rollback);
PGSTAT_ACCUM_DBCOUNT(n_blocks_fetched);
PGSTAT_ACCUM_DBCOUNT(n_blocks_hit);
PGSTAT_ACCUM_DBCOUNT(n_tuples_returned);
PGSTAT_ACCUM_DBCOUNT(n_tuples_fetched);
PGSTAT_ACCUM_DBCOUNT(n_tuples_inserted);
PGSTAT_ACCUM_DBCOUNT(n_tuples_updated);
PGSTAT_ACCUM_DBCOUNT(n_tuples_deleted);
/* last_autovac_time is reported immediately */
Assert(pendingent->last_autovac_time == 0);
PGSTAT_ACCUM_DBCOUNT(n_conflict_tablespace);
PGSTAT_ACCUM_DBCOUNT(n_conflict_lock);
PGSTAT_ACCUM_DBCOUNT(n_conflict_snapshot);
PGSTAT_ACCUM_DBCOUNT(n_conflict_bufferpin);
PGSTAT_ACCUM_DBCOUNT(n_conflict_startup_deadlock);
PGSTAT_ACCUM_DBCOUNT(n_temp_bytes);
PGSTAT_ACCUM_DBCOUNT(n_temp_files);
PGSTAT_ACCUM_DBCOUNT(n_deadlocks);
/* checksum failures are reported immediately */
Assert(pendingent->n_checksum_failures == 0);
Assert(pendingent->last_checksum_failure == 0);
PGSTAT_ACCUM_DBCOUNT(n_block_read_time);
PGSTAT_ACCUM_DBCOUNT(n_block_write_time);
PGSTAT_ACCUM_DBCOUNT(n_sessions);
PGSTAT_ACCUM_DBCOUNT(total_session_time);
PGSTAT_ACCUM_DBCOUNT(total_active_time);
PGSTAT_ACCUM_DBCOUNT(total_idle_in_xact_time);
PGSTAT_ACCUM_DBCOUNT(n_sessions_abandoned);
PGSTAT_ACCUM_DBCOUNT(n_sessions_fatal);
PGSTAT_ACCUM_DBCOUNT(n_sessions_killed);
#undef PGSTAT_ACCUM_DBCOUNT
pgstat_unlock_entry(entry_ref);
memset(pendingent, 0, sizeof(*pendingent));
return true;
}
void
pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
{
((PgStatShared_Database *) header)->stats.stat_reset_timestamp = ts;
}