mirror of
https://github.com/postgres/postgres.git
synced 2025-08-24 09:27:52 +03:00
We have repeatedly seen the buildfarm reach the Assert(false) in SyncRepGetSyncStandbysPriority. This apparently is due to failing to consider the possibility that the sync_standby_priority values in shared memory might be inconsistent; but they will be whenever only some of the walsenders have updated their values after a change in the synchronous_standby_names setting. That function is vastly too complex for what it does, anyway, so rewriting it seems better than trying to apply a band-aid fix. Furthermore, the API of SyncRepGetSyncStandbys is broken by design: it returns a list of WalSnd array indexes, but there is nothing guaranteeing that the contents of the WalSnd array remain stable. Thus, if some walsender exits and then a new walsender process takes over that WalSnd array slot, a caller might make use of WAL position data that it should not, potentially leading to incorrect decisions about whether to release transactions that are waiting for synchronous commit. To fix, replace SyncRepGetSyncStandbys with a new function SyncRepGetCandidateStandbys that copies all the required data from shared memory while holding the relevant mutexes. If the associated walsender process then exits, this data is still safe to make release decisions with, since we know that that much WAL *was* sent to a valid standby server. This incidentally means that we no longer need to treat sync_standby_priority as protected by the SyncRepLock rather than the per-walsender mutex. SyncRepGetSyncStandbys is no longer used by the core code, so remove it entirely in HEAD. However, it seems possible that external code is relying on that function, so do not remove it from the back branches. Instead, just remove the known-incorrect Assert. When the bug occurs, the function will return a too-short list, which callers should treat as meaning there are not enough sync standbys, which seems like a reasonably safe fallback until the inconsistent state is resolved. Moreover it's bug-compatible with what has been happening in non-assert builds. We cannot do anything about the walsender-replacement race condition without an API/ABI break. The bogus assertion exists back to 9.6, but 9.6 is sufficiently different from the later branches that the patch doesn't apply at all. I chose to just remove the bogus assertion in 9.6, feeling that the probability of a bad outcome from the walsender-replacement race condition is too low to justify rewriting the whole patch for 9.6. Discussion: https://postgr.es/m/21519.1585272409@sss.pgh.pa.us
119 lines
3.6 KiB
C
119 lines
3.6 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* syncrep.h
|
|
* Exports from replication/syncrep.c.
|
|
*
|
|
* Portions Copyright (c) 2010-2019, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/include/replication/syncrep.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#ifndef _SYNCREP_H
|
|
#define _SYNCREP_H
|
|
|
|
#include "access/xlogdefs.h"
|
|
#include "utils/guc.h"
|
|
|
|
#define SyncRepRequested() \
|
|
(max_wal_senders > 0 && synchronous_commit > SYNCHRONOUS_COMMIT_LOCAL_FLUSH)
|
|
|
|
/* SyncRepWaitMode */
|
|
#define SYNC_REP_NO_WAIT (-1)
|
|
#define SYNC_REP_WAIT_WRITE 0
|
|
#define SYNC_REP_WAIT_FLUSH 1
|
|
#define SYNC_REP_WAIT_APPLY 2
|
|
|
|
#define NUM_SYNC_REP_WAIT_MODE 3
|
|
|
|
/* syncRepState */
|
|
#define SYNC_REP_NOT_WAITING 0
|
|
#define SYNC_REP_WAITING 1
|
|
#define SYNC_REP_WAIT_COMPLETE 2
|
|
|
|
/* syncrep_method of SyncRepConfigData */
|
|
#define SYNC_REP_PRIORITY 0
|
|
#define SYNC_REP_QUORUM 1
|
|
|
|
/*
|
|
* SyncRepGetCandidateStandbys returns an array of these structs,
|
|
* one per candidate synchronous walsender.
|
|
*/
|
|
typedef struct SyncRepStandbyData
|
|
{
|
|
/* Copies of relevant fields from WalSnd shared-memory struct */
|
|
pid_t pid;
|
|
XLogRecPtr write;
|
|
XLogRecPtr flush;
|
|
XLogRecPtr apply;
|
|
int sync_standby_priority;
|
|
/* Index of this walsender in the WalSnd shared-memory array */
|
|
int walsnd_index;
|
|
/* This flag indicates whether this struct is about our own process */
|
|
bool is_me;
|
|
} SyncRepStandbyData;
|
|
|
|
/*
|
|
* Struct for the configuration of synchronous replication.
|
|
*
|
|
* Note: this must be a flat representation that can be held in a single
|
|
* chunk of malloc'd memory, so that it can be stored as the "extra" data
|
|
* for the synchronous_standby_names GUC.
|
|
*/
|
|
typedef struct SyncRepConfigData
|
|
{
|
|
int config_size; /* total size of this struct, in bytes */
|
|
int num_sync; /* number of sync standbys that we need to
|
|
* wait for */
|
|
uint8 syncrep_method; /* method to choose sync standbys */
|
|
int nmembers; /* number of members in the following list */
|
|
/* member_names contains nmembers consecutive nul-terminated C strings */
|
|
char member_names[FLEXIBLE_ARRAY_MEMBER];
|
|
} SyncRepConfigData;
|
|
|
|
extern SyncRepConfigData *SyncRepConfig;
|
|
|
|
/* communication variables for parsing synchronous_standby_names GUC */
|
|
extern SyncRepConfigData *syncrep_parse_result;
|
|
extern char *syncrep_parse_error_msg;
|
|
|
|
/* user-settable parameters for synchronous replication */
|
|
extern char *SyncRepStandbyNames;
|
|
|
|
/* called by user backend */
|
|
extern void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit);
|
|
|
|
/* called at backend exit */
|
|
extern void SyncRepCleanupAtProcExit(void);
|
|
|
|
/* called by wal sender */
|
|
extern void SyncRepInitConfig(void);
|
|
extern void SyncRepReleaseWaiters(void);
|
|
|
|
/* called by wal sender and user backend */
|
|
extern int SyncRepGetCandidateStandbys(SyncRepStandbyData **standbys);
|
|
|
|
/* obsolete, do not use in new code */
|
|
extern List *SyncRepGetSyncStandbys(bool *am_sync);
|
|
|
|
/* called by checkpointer */
|
|
extern void SyncRepUpdateSyncStandbysDefined(void);
|
|
|
|
/* GUC infrastructure */
|
|
extern bool check_synchronous_standby_names(char **newval, void **extra, GucSource source);
|
|
extern void assign_synchronous_standby_names(const char *newval, void *extra);
|
|
extern void assign_synchronous_commit(int newval, void *extra);
|
|
|
|
/*
|
|
* Internal functions for parsing synchronous_standby_names grammar,
|
|
* in syncrep_gram.y and syncrep_scanner.l
|
|
*/
|
|
extern int syncrep_yyparse(void);
|
|
extern int syncrep_yylex(void);
|
|
extern void syncrep_yyerror(const char *str);
|
|
extern void syncrep_scanner_init(const char *query_string);
|
|
extern void syncrep_scanner_finish(void);
|
|
|
|
#endif /* _SYNCREP_H */
|