From 2f6501fa3c54bbe4568e3bcccd9a60d26a46b5ee Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 14 Feb 2022 16:44:28 -0800 Subject: [PATCH] Move replication slot release to before_shmem_exit(). Previously, replication slots were released in ProcKill() on error, resulting in reporting replication slot drop of ephemeral slots after the stats subsystem was already shut down. To fix this problem, move replication slot release to a before_shmem_exit() hook that is called before the stats collector shuts down. There wasn't really a good reason for the slot handling to be in ProcKill() anyway. Patch by Masahiko Sawada, with very minor polishing by me. I, Andres, wrote a test for dropping slots during process exit, but there may be some OS dependent issues around the number of times FATAL error messages are displayed due to a still debated libpq issue. So that test will be committed separately / later. Reviewed-By: Kyotaro Horiguchi Reviewed-By: Andres Freund Author: Masahiko Sawada Discussion: https://postgr.es/m/CAD21AoDAeEpAbZEyYJsPZJUmSPaRicVSBObaL7sPaofnKz+9zg@mail.gmail.com --- src/backend/replication/slot.c | 25 +++++++++++++++++++++++++ src/backend/storage/lmgr/proc.c | 7 ------- src/backend/tcop/postgres.c | 4 ++-- src/backend/utils/init/postinit.c | 7 +++++++ src/include/replication/slot.h | 1 + 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index e5e0cf87688..5da5fa825a2 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -46,6 +46,7 @@ #include "pgstat.h" #include "replication/slot.h" #include "storage/fd.h" +#include "storage/ipc.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/builtins.h" @@ -99,6 +100,7 @@ ReplicationSlot *MyReplicationSlot = NULL; int max_replication_slots = 0; /* the maximum number of replication * slots */ +static void ReplicationSlotShmemExit(int code, Datum arg); static void ReplicationSlotDropAcquired(void); static void ReplicationSlotDropPtr(ReplicationSlot *slot); @@ -160,6 +162,29 @@ ReplicationSlotsShmemInit(void) } } +/* + * Register the callback for replication slot cleanup and releasing. + */ +void +ReplicationSlotInitialize(void) +{ + before_shmem_exit(ReplicationSlotShmemExit, 0); +} + +/* + * Release and cleanup replication slots. + */ +static void +ReplicationSlotShmemExit(int code, Datum arg) +{ + /* Make sure active replication slots are released */ + if (MyReplicationSlot != NULL) + ReplicationSlotRelease(); + + /* Also cleanup all the temporary slots. */ + ReplicationSlotCleanup(); +} + /* * Check whether the passed slot name is valid and report errors at elevel. * diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 37f032e7b95..90283f8a9f5 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -831,13 +831,6 @@ ProcKill(int code, Datum arg) /* Cancel any pending condition variable sleep, too */ ConditionVariableCancelSleep(); - /* Make sure active replication slots are released */ - if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); - - /* Also cleanup all the temporary slots. */ - ReplicationSlotCleanup(); - /* * Detach from any lock group of which we are a member. If the leader * exist before all other group members, its PGPROC will remain allocated diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index fda2e9360e2..38d8b97894c 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4261,8 +4261,8 @@ PostgresMain(const char *dbname, const char *username) * We can't release replication slots inside AbortTransaction() as we * need to be able to start and abort transactions while having a slot * acquired. But we never need to hold them across top level errors, - * so releasing here is fine. There's another cleanup in ProcKill() - * ensuring we'll correctly cleanup on FATAL errors as well. + * so releasing here is fine. There also is a before_shmem_exit() + * callback ensuring correct cleanup on FATAL errors. */ if (MyReplicationSlot != NULL) ReplicationSlotRelease(); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 02207a5b6f8..e2208151e45 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -43,6 +43,7 @@ #include "pgstat.h" #include "postmaster/autovacuum.h" #include "postmaster/postmaster.h" +#include "replication/slot.h" #include "replication/walsender.h" #include "storage/bufmgr.h" #include "storage/fd.h" @@ -626,6 +627,12 @@ BaseInit(void) * ever try to insert XLOG. */ InitXLogInsert(); + + /* + * Initialize replication slots after pgstat. The exit hook might need to + * drop ephemeral slots, which in turn triggers stats reporting. + */ + ReplicationSlotInitialize(); } diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index f833b1d6dc9..24b30210c3e 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -206,6 +206,7 @@ extern void ReplicationSlotSave(void); extern void ReplicationSlotMarkDirty(void); /* misc stuff */ +extern void ReplicationSlotInitialize(void); extern bool ReplicationSlotValidateName(const char *name, int elevel); extern void ReplicationSlotReserveWal(void); extern void ReplicationSlotsComputeRequiredXmin(bool already_locked);