1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-04 12:42:24 +03:00

Fix the race condition in ReplicationSlotAcquire().

After commit f41d8468dd, a process could acquire and use a replication
slot that had just been invalidated, leading to failures while accessing
WAL.

To ensure that we don't accidentally start using invalid slots, we must
perform the invalidation check after acquiring the slot or under the
spinlock where we associate the slot with a particular process. We choose
the earlier method to keep the code simple.

Reported-by: Hou Zhijie <houzj.fnst@fujitsu.com>
Author: Nisha Moond <nisha.moond412@gmail.com>
Reviewed-by: Hou Zhijie <houzj.fnst@fujitsu.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Discussion: https://postgr.es/m/CABdArM7J-LbGoMPGUPiFiLOyB_TZ5+YaZb=HMES0mQqzVTn8Gg@mail.gmail.com
This commit is contained in:
Amit Kapila 2025-02-27 09:47:04 +05:30
parent 845511a72a
commit 8709dccc79

View File

@ -580,19 +580,6 @@ retry:
name)));
}
/* Invalid slots can't be modified or used before accessing the WAL. */
if (error_if_invalid && s->data.invalidated != RS_INVAL_NONE)
{
LWLockRelease(ReplicationSlotControlLock);
ereport(ERROR,
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("can no longer access replication slot \"%s\"",
NameStr(s->data.name)),
errdetail("This replication slot has been invalidated due to \"%s\".",
GetSlotInvalidationCauseName(s->data.invalidated)));
}
/*
* This is the slot we want; check if it's active under some other
* process. In single user mode, we don't need this check.
@ -650,12 +637,25 @@ retry:
else if (!nowait)
ConditionVariableCancelSleep(); /* no sleep needed after all */
/* Let everybody know we've modified this slot */
ConditionVariableBroadcast(&s->active_cv);
/* We made this slot active, so it's ours now. */
MyReplicationSlot = s;
/*
* We need to check for invalidation after making the slot ours to avoid
* the possible race condition with the checkpointer that can otherwise
* invalidate the slot immediately after the check.
*/
if (error_if_invalid && s->data.invalidated != RS_INVAL_NONE)
ereport(ERROR,
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("can no longer access replication slot \"%s\"",
NameStr(s->data.name)),
errdetail("This replication slot has been invalidated due to \"%s\".",
GetSlotInvalidationCauseName(s->data.invalidated)));
/* Let everybody know we've modified this slot */
ConditionVariableBroadcast(&s->active_cv);
/*
* The call to pgstat_acquire_replslot() protects against stats for a
* different slot, from before a restart or such, being present during