From 092c6936de49effe63daad94855bcd8ef26a09dd Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 27 Mar 2020 16:04:52 -0300 Subject: [PATCH] Set wal_receiver_create_temp_slot PGC_POSTMASTER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 329730827848 gave walreceiver the ability to create and use a temporary replication slot, and made it controllable by a GUC (enabled by default) that can be changed with SIGHUP. That's useful but has two problems: one, it's possible to cause the origin server to fill its disk if the slot doesn't advance in time; and also there's a disconnect between state passed down via the startup process and GUCs that walreceiver reads directly. We handle the first problem by setting the option to disabled by default. If the user enables it, its on their head to make sure that disk doesn't fill up. We handle the second problem by passing the flag via startup rather than having walreceiver acquire it directly, and making it PGC_POSTMASTER (which ensures a walreceiver always has the fresh value). A future commit can relax this (to PGC_SIGHUP again) by having the startup process signal walreceiver to shutdown whenever the value changes. Author: Sergei Kornilov Reviewed-by: Michael Paquier Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/20200122055510.GH174860@paquier.xyz --- doc/src/sgml/config.sgml | 6 +- src/backend/access/transam/xlog.c | 4 +- src/backend/replication/walreceiver.c | 62 +++++++++---------- src/backend/replication/walreceiverfuncs.c | 28 +++++++-- src/backend/utils/misc/guc.c | 4 +- src/backend/utils/misc/postgresql.conf.sample | 4 +- src/include/access/xlog.h | 1 + src/include/replication/walreceiver.h | 4 +- 8 files changed, 63 insertions(+), 50 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 355b408b0a6..b5de6c32376 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4163,11 +4163,7 @@ ANY num_sync ( ). - The default is on. The only reason to turn this off would be if the - remote instance is currently out of available replication slots. This - parameter can only be set in the postgresql.conf - file or on the server command line. Changes only take effect when the - WAL receiver process starts a new connection. + The default is off. This parameter can only be set at server start. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7621fc05e24..c7d93d37351 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -290,6 +290,7 @@ bool StandbyModeRequested = false; char *PrimaryConnInfo = NULL; char *PrimarySlotName = NULL; char *PromoteTriggerFile = NULL; +bool wal_receiver_create_temp_slot = false; /* are we currently in standby mode? */ bool StandbyMode = false; @@ -11975,7 +11976,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, } curFileTLI = tli; RequestXLogStreaming(tli, ptr, PrimaryConnInfo, - PrimarySlotName); + PrimarySlotName, + wal_receiver_create_temp_slot); receivedUpto = 0; } diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 25e0333c9e1..a1459ca7f69 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -15,6 +15,12 @@ * WalRcv->receivedUpto variable in shared memory, to inform the startup * process of how far it can proceed with XLOG replay. * + * A WAL receiver cannot directly load GUC parameters used when establishing + * its connection to the primary. Instead it relies on parameter values + * that are passed down by the startup process when streaming is requested. + * This applies, for example, to the replication slot and the connection + * string to be used for the connection with the primary. + * * If the primary server ends streaming, but doesn't disconnect, walreceiver * goes into "waiting" mode, and waits for the startup process to give new * instructions. The startup process will treat that the same as @@ -73,8 +79,11 @@ #include "utils/timestamp.h" -/* GUC variables */ -bool wal_receiver_create_temp_slot; +/* + * GUC variables. (Other variables that affect walreceiver are in xlog.c + * because they're passed down from the startup process, for better + * synchronization.) + */ int wal_receiver_status_interval; int wal_receiver_timeout; bool hot_standby_feedback; @@ -236,6 +245,12 @@ WalReceiverMain(void) startpoint = walrcv->receiveStart; startpointTLI = walrcv->receiveStartTLI; + /* + * At most one of is_temp_slot and slotname can be set; otherwise, + * RequestXLogStreaming messed up. + */ + Assert(!is_temp_slot || (slotname[0] == '\0')); + /* Initialise to a sanish value */ walrcv->lastMsgSendTime = walrcv->lastMsgReceiptTime = walrcv->latestWalEndTime = now; @@ -349,42 +364,21 @@ WalReceiverMain(void) WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI); /* - * Create temporary replication slot if no slot name is configured or - * the slot from the previous run was temporary, unless - * wal_receiver_create_temp_slot is disabled. We also need to handle - * the case where the previous run used a temporary slot but - * wal_receiver_create_temp_slot was changed in the meantime. In that - * case, we delete the old slot name in shared memory. (This would - * all be a bit easier if we just didn't copy the slot name into - * shared memory, since we won't need it again later, but then we - * can't see the slot name in the stats views.) + * Create temporary replication slot if requested, and update slot + * name in shared memory. (Note the slot name cannot already be set + * in this case.) */ - if (slotname[0] == '\0' || is_temp_slot) + if (is_temp_slot) { - bool changed = false; + snprintf(slotname, sizeof(slotname), + "pg_walreceiver_%lld", + (long long int) walrcv_get_backend_pid(wrconn)); - if (wal_receiver_create_temp_slot) - { - snprintf(slotname, sizeof(slotname), - "pg_walreceiver_%lld", - (long long int) walrcv_get_backend_pid(wrconn)); + walrcv_create_slot(wrconn, slotname, true, 0, NULL); - walrcv_create_slot(wrconn, slotname, true, 0, NULL); - changed = true; - } - else if (slotname[0] != '\0') - { - slotname[0] = '\0'; - changed = true; - } - - if (changed) - { - SpinLockAcquire(&walrcv->mutex); - strlcpy(walrcv->slotname, slotname, NAMEDATALEN); - walrcv->is_temp_slot = wal_receiver_create_temp_slot; - SpinLockRelease(&walrcv->mutex); - } + SpinLockAcquire(&walrcv->mutex); + strlcpy(walrcv->slotname, slotname, NAMEDATALEN); + SpinLockRelease(&walrcv->mutex); } /* diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c index 89c903e45af..21d18236076 100644 --- a/src/backend/replication/walreceiverfuncs.c +++ b/src/backend/replication/walreceiverfuncs.c @@ -215,13 +215,19 @@ ShutdownWalRcv(void) /* * Request postmaster to start walreceiver. * - * recptr indicates the position where streaming should begin, conninfo - * is a libpq connection string to use, and slotname is, optionally, the name - * of a replication slot to acquire. + * "recptr" indicates the position where streaming should begin. "conninfo" + * is a libpq connection string to use. "slotname" is, optionally, the name + * of a replication slot to acquire. "create_temp_slot" indicates to create + * a temporary slot when no "slotname" is given. + * + * WAL receivers do not directly load GUC parameters used for the connection + * to the primary, and rely on the values passed down by the caller of this + * routine instead. Hence, the addition of any new parameters should happen + * through this code path. */ void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo, - const char *slotname) + const char *slotname, bool create_temp_slot) { WalRcvData *walrcv = WalRcv; bool launch = false; @@ -248,10 +254,22 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo, else walrcv->conninfo[0] = '\0'; - if (slotname != NULL) + /* + * Use configured replication slot if present, and ignore the value + * of create_temp_slot as the slot name should be persistent. Otherwise, + * use create_temp_slot to determine whether this WAL receiver should + * create a temporary slot by itself and use it, or not. + */ + if (slotname != NULL && slotname[0] != '\0') + { strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN); + walrcv->is_temp_slot = false; + } else + { walrcv->slotname[0] = '\0'; + walrcv->is_temp_slot = create_temp_slot; + } if (walrcv->walRcvState == WALRCV_STOPPED) { diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index af876d1f01d..2c3cbbaa68e 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2050,11 +2050,11 @@ static struct config_bool ConfigureNamesBool[] = }, { - {"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY, + {"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY, gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."), }, &wal_receiver_create_temp_slot, - true, + false, NULL, NULL, NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index aa44f0c9bf2..f2e55d1bd35 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -321,7 +321,9 @@ #max_standby_streaming_delay = 30s # max delay before canceling queries # when reading streaming WAL; # -1 allows indefinite delay -#wal_receiver_create_temp_slot = on # create temp slot if primary_slot_name not set +#wal_receiver_create_temp_slot = off # Create temp slot if primary_slot_name + # is not set. + # (change requires restart) #wal_receiver_status_interval = 10s # send replies at least this often # 0 disables #hot_standby_feedback = off # send info from standby to prevent diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 331497bcfb9..a4485efc00f 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -129,6 +129,7 @@ extern int recoveryTargetAction; extern int recovery_min_apply_delay; extern char *PrimaryConnInfo; extern char *PrimarySlotName; +extern bool wal_receiver_create_temp_slot; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index e08afc6548f..cf3e43128c7 100644 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -23,7 +23,6 @@ #include "utils/tuplestore.h" /* user-settable parameters */ -extern bool wal_receiver_create_temp_slot; extern int wal_receiver_status_interval; extern int wal_receiver_timeout; extern bool hot_standby_feedback; @@ -321,7 +320,8 @@ extern void ShutdownWalRcv(void); extern bool WalRcvStreaming(void); extern bool WalRcvRunning(void); extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, - const char *conninfo, const char *slotname); + const char *conninfo, const char *slotname, + bool create_temp_slot); extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI); extern int GetReplicationApplyDelay(void); extern int GetReplicationTransferLatency(void);