diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index bb5d9962ed3..b5d32bb720a 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -2148,18 +2148,14 @@ LOG: database system is ready to accept read only connections
- The setting of some parameters on the standby will need reconfiguration
- if they have been changed on the primary. For these parameters,
- the value on the standby must
- be equal to or greater than the value on the primary.
- Therefore, if you want to increase these values, you should do so on all
- standby servers first, before applying the changes to the primary server.
- Conversely, if you want to decrease these values, you should do so on the
- primary server first, before applying the changes to all standby servers.
- If these parameters
- are not set high enough then the standby will refuse to start.
- Higher values can then be supplied and the server
- restarted to begin recovery again. These parameters are:
+ The settings of some parameters determine the size of shared memory for
+ tracking transaction IDs, locks, and prepared transactions. These shared
+ memory structures should be no smaller on a standby than on the primary.
+ Otherwise, it could happen that the standby runs out of shared memory
+ during recovery. For example, if the primary uses a prepared transaction
+ but the standby did not allocate any shared memory for tracking prepared
+ transactions, then recovery will abort and cannot continue until the
+ standby's configuration is changed. The parameters affected are:
@@ -2188,6 +2184,34 @@ LOG: database system is ready to accept read only connections
+
+ The easiest way to ensure this does not become a problem is to have these
+ parameters set on the standbys to values equal to or greater than on the
+ primary. Therefore, if you want to increase these values, you should do
+ so on all standby servers first, before applying the changes to the
+ primary server. Conversely, if you want to decrease these values, you
+ should do so on the primary server first, before applying the changes to
+ all standby servers. The WAL tracks changes to these parameters on the
+ primary, and if a standby processes WAL that indicates that the current
+ value on the primary is higher than its own value, it will log a warning, for example:
+
+WARNING: insufficient setting for parameter max_connections
+DETAIL: max_connections = 80 is a lower setting than on the master server (where its value was 100).
+HINT: Change parameters and restart the server, or there may be resource exhaustion errors sooner or later.
+
+ Recovery will continue but could abort at any time thereafter. (It could
+ also never end up failing if the activity on the primary does not actually
+ require the full extent of the allocated shared memory resources.) If
+ recovery reaches a point where it cannot continue due to lack of shared
+ memory, recovery will pause and another warning will be logged, for example:
+
+WARNING: recovery paused because of insufficient parameter settings
+DETAIL: See earlier in the log about which settings are insufficient.
+HINT: Recovery cannot continue unless the configuration is changed and the server restarted.
+
+ This warning will repeated once a minute. At that point, the settings on
+ the standby need to be updated and the instance restarted before recovery
+ can continue.
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 5adf956f413..ce35f15f347 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -2360,11 +2360,14 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
/* Get a free gxact from the freelist */
if (TwoPhaseState->freeGXacts == NULL)
+ {
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("maximum number of prepared transactions reached"),
errhint("Increase max_prepared_transactions (currently %d).",
max_prepared_xacts)));
+ }
gxact = TwoPhaseState->freeGXacts;
TwoPhaseState->freeGXacts = gxact->next;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8fe92962b0d..1951103b262 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -264,6 +264,8 @@ bool InArchiveRecovery = false;
static bool standby_signal_file_found = false;
static bool recovery_signal_file_found = false;
+static bool need_restart_for_parameter_values = false;
+
/* Was the last xlog file restored from archive, or local? */
static bool restoredFromArchive = false;
@@ -5998,6 +6000,54 @@ SetRecoveryPause(bool recoveryPause)
SpinLockRelease(&XLogCtl->info_lck);
}
+/*
+ * If in hot standby, pause recovery because of a parameter conflict.
+ *
+ * Similar to recoveryPausesHere() but with a different messaging. The user
+ * is expected to make the parameter change and restart the server. If they
+ * just unpause recovery, they will then run into whatever error is after this
+ * function call for the non-hot-standby case.
+ *
+ * We intentionally do not give advice about specific parameters or values
+ * here because it might be misleading. For example, if we run out of lock
+ * space, then in the single-server case we would recommend raising
+ * max_locks_per_transaction, but in recovery it could equally be the case
+ * that max_connections is out of sync with the primary. If we get here, we
+ * have already logged any parameter discrepancies in
+ * RecoveryRequiresIntParameter(), so users can go back to that and get
+ * concrete and accurate information.
+ */
+void
+StandbyParamErrorPauseRecovery(void)
+{
+ TimestampTz last_warning = 0;
+
+ if (!AmStartupProcess() || !need_restart_for_parameter_values)
+ return;
+
+ SetRecoveryPause(true);
+
+ do
+ {
+ TimestampTz now = GetCurrentTimestamp();
+
+ if (TimestampDifferenceExceeds(last_warning, now, 60000))
+ {
+ ereport(WARNING,
+ (errmsg("recovery paused because of insufficient parameter settings"),
+ errdetail("See earlier in the log about which settings are insufficient."),
+ errhint("Recovery cannot continue unless the configuration is changed and the server restarted.")));
+ last_warning = now;
+ }
+
+ pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
+ pg_usleep(1000000L); /* 1000 ms */
+ pgstat_report_wait_end();
+ HandleStartupProcInterrupts();
+ }
+ while (RecoveryIsPaused());
+}
+
/*
* When recovery_min_apply_delay is set, we wait long enough to make sure
* certain record types are applied at least that interval behind the master.
@@ -6177,16 +6227,20 @@ GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
* Note that text field supplied is a parameter name and does not require
* translation
*/
-#define RecoveryRequiresIntParameter(param_name, currValue, minValue) \
-do { \
- if ((currValue) < (minValue)) \
- ereport(ERROR, \
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
- errmsg("hot standby is not possible because %s = %d is a lower setting than on the master server (its value was %d)", \
- param_name, \
- currValue, \
- minValue))); \
-} while(0)
+static void
+RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
+{
+ if (currValue < minValue)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("insufficient setting for parameter %s", param_name),
+ errdetail("%s = %d is a lower setting than on the master server (where its value was %d).",
+ param_name, currValue, minValue),
+ errhint("Change parameters and restart the server, or there may be resource exhaustion errors sooner or later.")));
+ need_restart_for_parameter_values = true;
+ }
+}
/*
* Check to see if required parameters are set high enough on this server
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index f45a619deb8..cfb88db4a4d 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -3654,7 +3654,14 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
* If it still won't fit then we're out of memory
*/
if (head + nxids > pArray->maxKnownAssignedXids)
- elog(ERROR, "too many KnownAssignedXids");
+ {
+ StandbyParamErrorPauseRecovery();
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory"),
+ errdetail("There are no more KnownAssignedXids slots."),
+ errhint("You might need to increase max_connections.")));
+ }
}
/* Now we can insert the xids into the space starting at head */
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 3013ef63d05..c8def1674f4 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -965,10 +965,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
if (locallockp)
*locallockp = NULL;
if (reportMemoryError)
+ {
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
errhint("You might need to increase max_locks_per_transaction.")));
+ }
else
return LOCKACQUIRE_NOT_AVAIL;
}
@@ -1003,10 +1006,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
if (locallockp)
*locallockp = NULL;
if (reportMemoryError)
+ {
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
errhint("You might need to increase max_locks_per_transaction.")));
+ }
else
return LOCKACQUIRE_NOT_AVAIL;
}
@@ -2828,6 +2834,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
{
LWLockRelease(partitionLock);
LWLockRelease(&MyProc->backendLock);
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -4158,6 +4165,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
if (!lock)
{
LWLockRelease(partitionLock);
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -4223,6 +4231,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
elog(PANIC, "lock table corrupted");
}
LWLockRelease(partitionLock);
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -4515,6 +4524,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
{
LWLockRelease(partitionLock);
LWLockRelease(&proc->backendLock);
+ StandbyParamErrorPauseRecovery();
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 2b1b67d35c1..9ec7b31cce1 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -287,6 +287,7 @@ extern XLogRecPtr GetXLogInsertRecPtr(void);
extern XLogRecPtr GetXLogWriteRecPtr(void);
extern bool RecoveryIsPaused(void);
extern void SetRecoveryPause(bool recoveryPause);
+extern void StandbyParamErrorPauseRecovery(void);
extern TimestampTz GetLatestXTime(void);
extern TimestampTz GetCurrentChunkReplayStartTime(void);