1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-11 10:01:57 +03:00

Reconsider pg_stat_subscription_workers view.

It was decided (refer to the Discussion link below) that the stats
collector is not an appropriate place to store the error information of
subscription workers.

This patch changes the pg_stat_subscription_workers view (introduced by
commit 8d74fc96db) so that it stores only statistics counters:
apply_error_count and sync_error_count, and has one entry for
each subscription. The removed error information such as error-XID and
the error message would be stored in another way in the future which is
more reliable and persistent.

After removing these error details, there is no longer any relation
information, so the subscription statistics are now a cluster-wide
statistics.

The patch also changes the view name to pg_stat_subscription_stats since
the word "worker" is an implementation detail that we use one worker for
one tablesync and one apply.

Author: Masahiko Sawada, based on suggestions by Andres Freund
Reviewed-by: Peter Smith, Haiying Tang, Takamichi Osumi, Amit Kapila
Discussion: https://postgr.es/m/20220125063131.4cmvsxbz2tdg6g65@alap3.anarazel.de
This commit is contained in:
Amit Kapila
2022-03-01 06:17:52 +05:30
parent 54bd1e43ca
commit 7a85073290
14 changed files with 583 additions and 865 deletions

View File

@ -67,6 +67,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_RESETSINGLECOUNTER,
PGSTAT_MTYPE_RESETSLRUCOUNTER,
PGSTAT_MTYPE_RESETREPLSLOTCOUNTER,
PGSTAT_MTYPE_RESETSUBCOUNTER,
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
PGSTAT_MTYPE_ANALYZE,
@ -84,8 +85,8 @@ typedef enum StatMsgType
PGSTAT_MTYPE_REPLSLOT,
PGSTAT_MTYPE_CONNECT,
PGSTAT_MTYPE_DISCONNECT,
PGSTAT_MTYPE_SUBSCRIPTIONPURGE,
PGSTAT_MTYPE_SUBWORKERERROR,
PGSTAT_MTYPE_SUBSCRIPTIONDROP,
PGSTAT_MTYPE_SUBSCRIPTIONERROR,
} StatMsgType;
/* ----------
@ -148,8 +149,7 @@ typedef enum PgStat_Shared_Reset_Target
typedef enum PgStat_Single_Reset_Type
{
RESET_TABLE,
RESET_FUNCTION,
RESET_SUBWORKER
RESET_FUNCTION
} PgStat_Single_Reset_Type;
/* ------------------------------------------------------------
@ -368,7 +368,6 @@ typedef struct PgStat_MsgResetsinglecounter
Oid m_databaseid;
PgStat_Single_Reset_Type m_resettype;
Oid m_objectid;
Oid m_subobjectid;
} PgStat_MsgResetsinglecounter;
/* ----------
@ -394,6 +393,19 @@ typedef struct PgStat_MsgResetreplslotcounter
bool clearall;
} PgStat_MsgResetreplslotcounter;
/* ----------
* PgStat_MsgResetsubcounter Sent by the backend to tell the collector
* to reset subscription counter(s)
* ----------
*/
typedef struct PgStat_MsgResetsubcounter
{
PgStat_MsgHdr m_hdr;
Oid m_subid; /* InvalidOid means reset all subscription
* stats */
} PgStat_MsgResetsubcounter;
/* ----------
* PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
* that a database is going to be processed
@ -542,53 +554,28 @@ typedef struct PgStat_MsgReplSlot
} PgStat_MsgReplSlot;
/* ----------
* PgStat_MsgSubscriptionPurge Sent by the backend and autovacuum to tell the
* collector about the dead subscriptions.
* PgStat_MsgSubscriptionDrop Sent by the backend and autovacuum to tell the
* collector about the dead subscription.
* ----------
*/
#define PGSTAT_NUM_SUBSCRIPTIONPURGE \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) / sizeof(Oid))
typedef struct PgStat_MsgSubscriptionPurge
typedef struct PgStat_MsgSubscriptionDrop
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
Oid m_subids[PGSTAT_NUM_SUBSCRIPTIONPURGE];
} PgStat_MsgSubscriptionPurge;
Oid m_subid;
} PgStat_MsgSubscriptionDrop;
/* ----------
* PgStat_MsgSubWorkerError Sent by the apply worker or the table sync
* worker to report the error occurred while
* processing changes.
* PgStat_MsgSubscriptionError Sent by the apply worker or the table sync
* worker to report an error on the subscription.
* ----------
*/
#define PGSTAT_SUBWORKERERROR_MSGLEN 256
typedef struct PgStat_MsgSubWorkerError
typedef struct PgStat_MsgSubscriptionError
{
PgStat_MsgHdr m_hdr;
/*
* m_subid and m_subrelid are used to determine the subscription and the
* reporter of the error. m_subrelid is InvalidOid if reported by an apply
* worker otherwise reported by a table sync worker.
*/
Oid m_databaseid;
Oid m_subid;
Oid m_subrelid;
/*
* Oid of the table that the reporter was actually processing. m_relid can
* be InvalidOid if an error occurred during worker applying a
* non-data-modification message such as RELATION.
*/
Oid m_relid;
LogicalRepMsgType m_command;
TransactionId m_xid;
TimestampTz m_timestamp;
char m_message[PGSTAT_SUBWORKERERROR_MSGLEN];
} PgStat_MsgSubWorkerError;
bool m_is_apply_error;
} PgStat_MsgSubscriptionError;
/* ----------
* PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
@ -750,6 +737,7 @@ typedef union PgStat_Msg
PgStat_MsgResetsinglecounter msg_resetsinglecounter;
PgStat_MsgResetslrucounter msg_resetslrucounter;
PgStat_MsgResetreplslotcounter msg_resetreplslotcounter;
PgStat_MsgResetsubcounter msg_resetsubcounter;
PgStat_MsgAutovacStart msg_autovacuum_start;
PgStat_MsgVacuum msg_vacuum;
PgStat_MsgAnalyze msg_analyze;
@ -767,8 +755,8 @@ typedef union PgStat_Msg
PgStat_MsgReplSlot msg_replslot;
PgStat_MsgConnect msg_connect;
PgStat_MsgDisconnect msg_disconnect;
PgStat_MsgSubscriptionPurge msg_subscriptionpurge;
PgStat_MsgSubWorkerError msg_subworkererror;
PgStat_MsgSubscriptionError msg_subscriptionerror;
PgStat_MsgSubscriptionDrop msg_subscriptiondrop;
} PgStat_Msg;
@ -780,7 +768,7 @@ typedef union PgStat_Msg
* ------------------------------------------------------------
*/
#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA5
#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA6
/* ----------
* PgStat_StatDBEntry The collector's data per database
@ -823,16 +811,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz stats_timestamp; /* time of db stats file update */
/*
* tables, functions, and subscription workers must be last in the struct,
* because we don't write the pointers out to the stats file.
*
* subworkers is the hash table of PgStat_StatSubWorkerEntry which stores
* statistics of logical replication workers: apply worker and table sync
* worker.
* tables and functions must be last in the struct, because we don't write
* the pointers out to the stats file.
*/
HTAB *tables;
HTAB *functions;
HTAB *subworkers;
} PgStat_StatDBEntry;
@ -989,38 +972,17 @@ typedef struct PgStat_StatReplSlotEntry
TimestampTz stat_reset_timestamp;
} PgStat_StatReplSlotEntry;
/* The lookup key for subscription worker hash table */
typedef struct PgStat_StatSubWorkerKey
{
Oid subid;
/*
* Oid of the table for which tablesync worker will copy the initial data.
* An InvalidOid will be assigned for apply workers.
*/
Oid subrelid;
} PgStat_StatSubWorkerKey;
/*
* Logical replication apply worker and table sync worker statistics kept in the
* stats collector.
* Subscription statistics kept in the stats collector.
*/
typedef struct PgStat_StatSubWorkerEntry
typedef struct PgStat_StatSubEntry
{
PgStat_StatSubWorkerKey key; /* hash key (must be first) */
Oid subid; /* hash key (must be first) */
/*
* Subscription worker error statistics representing an error that
* occurred during application of changes or the initial table
* synchronization.
*/
Oid last_error_relid;
LogicalRepMsgType last_error_command;
TransactionId last_error_xid;
PgStat_Counter last_error_count;
TimestampTz last_error_time;
char last_error_message[PGSTAT_SUBWORKERERROR_MSGLEN];
} PgStat_StatSubWorkerEntry;
PgStat_Counter apply_error_count;
PgStat_Counter sync_error_count;
TimestampTz stat_reset_timestamp;
} PgStat_StatSubEntry;
/*
* Working state needed to accumulate per-function-call timing statistics.
@ -1111,10 +1073,10 @@ extern void pgstat_drop_database(Oid databaseid);
extern void pgstat_clear_snapshot(void);
extern void pgstat_reset_counters(void);
extern void pgstat_reset_shared_counters(const char *);
extern void pgstat_reset_single_counter(Oid objectid, Oid subobjectid,
PgStat_Single_Reset_Type type);
extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
extern void pgstat_reset_slru_counter(const char *);
extern void pgstat_reset_replslot_counter(const char *name);
extern void pgstat_reset_subscription_counter(Oid subid);
extern void pgstat_report_connect(Oid dboid);
extern void pgstat_report_autovac(Oid dboid);
@ -1131,9 +1093,7 @@ extern void pgstat_report_checksum_failure(void);
extern void pgstat_report_replslot(const PgStat_StatReplSlotEntry *repSlotStat);
extern void pgstat_report_replslot_create(const char *slotname);
extern void pgstat_report_replslot_drop(const char *slotname);
extern void pgstat_report_subworker_error(Oid subid, Oid subrelid, Oid relid,
LogicalRepMsgType command,
TransactionId xid, const char *errmsg);
extern void pgstat_report_subscription_error(Oid subid, bool is_apply_error);
extern void pgstat_report_subscription_drop(Oid subid);
extern void pgstat_initialize(void);
@ -1226,8 +1186,7 @@ extern void pgstat_send_wal(bool force);
extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
extern PgStat_StatSubWorkerEntry *pgstat_fetch_stat_subworker_entry(Oid subid,
Oid subrelid);
extern PgStat_StatSubEntry *pgstat_fetch_stat_subscription(Oid subid);
extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void);
extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);