1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-02 11:44:50 +03:00

smgr: Hold interrupts in most smgr functions

We need to hold interrupts across most of the smgr.c/md.c functions, as
otherwise interrupt processing, e.g. due to a < ERROR elog/ereport, can
trigger procsignal processing, which in turn can trigger smgrreleaseall(). As
the relevant code is not reentrant, we quickly end up in a bad situation.

The only reason we haven't noticed this before is that there is only one
non-error ereport called in affected routines, in register_dirty_segments(),
and that one is extremely rarely reached. If one enables fd.c's FDDEBUG it's
easy to reproduce crashes.

It seems better to put the HOLD_INTERRUPTS()/RESUME_INTERRUPTS() in smgr.c,
instead of trying to push them down to md.c where possible: For one, every
smgr implementation would be vulnerable, for another, a good bit of smgr.c
code itself is affected too.

Eventually we might want a more targeted solution, allowing e.g. a networked
smgr implementation to be interrupted, but many other, more complicated,
problems would need to be fixed for that to be viable (e.g. smgr.c is often
called with interrupts already held).

One could argue this should be backpatched, but the existing < ERROR
elog/ereports that can be reached with unmodified sources are unlikely to be
reached. On balance the risk of backpatching seems higher than the gain - at
least for now.

Reviewed-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/3vae7l5ozvqtxmd7rr7zaeq3qkuipz365u3rtim5t5wdkr6f4g@vkgf2fogjirl
This commit is contained in:
Andres Freund 2025-03-20 17:32:59 -04:00
parent fdb5dd6331
commit fc51a60dd4

View File

@ -40,6 +40,18 @@
* themselves, as there could pointers to them in active use. See * themselves, as there could pointers to them in active use. See
* smgrrelease() and smgrreleaseall(). * smgrrelease() and smgrreleaseall().
* *
* NB: We need to hold interrupts across most of the functions in this file,
* as otherwise interrupt processing, e.g. due to a < ERROR elog/ereport, can
* trigger procsignal processing, which in turn can trigger
* smgrreleaseall(). Most of the relevant code is not reentrant. It seems
* better to put the HOLD_INTERRUPTS()/RESUME_INTERRUPTS() here, instead of
* trying to push them down to md.c where possible: For one, every smgr
* implementation would be vulnerable, for another, a good bit of smgr.c code
* itself is affected too. Eventually we might want a more targeted solution,
* allowing e.g. a networked smgr implementation to be interrupted, but many
* other, more complicated, problems would need to be fixed for that to be
* viable (e.g. smgr.c is often called with interrupts already held).
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
@ -53,6 +65,7 @@
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "lib/ilist.h" #include "lib/ilist.h"
#include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/md.h" #include "storage/md.h"
@ -158,12 +171,16 @@ smgrinit(void)
{ {
int i; int i;
HOLD_INTERRUPTS();
for (i = 0; i < NSmgr; i++) for (i = 0; i < NSmgr; i++)
{ {
if (smgrsw[i].smgr_init) if (smgrsw[i].smgr_init)
smgrsw[i].smgr_init(); smgrsw[i].smgr_init();
} }
RESUME_INTERRUPTS();
/* register the shutdown proc */ /* register the shutdown proc */
on_proc_exit(smgrshutdown, 0); on_proc_exit(smgrshutdown, 0);
} }
@ -176,11 +193,15 @@ smgrshutdown(int code, Datum arg)
{ {
int i; int i;
HOLD_INTERRUPTS();
for (i = 0; i < NSmgr; i++) for (i = 0; i < NSmgr; i++)
{ {
if (smgrsw[i].smgr_shutdown) if (smgrsw[i].smgr_shutdown)
smgrsw[i].smgr_shutdown(); smgrsw[i].smgr_shutdown();
} }
RESUME_INTERRUPTS();
} }
/* /*
@ -206,6 +227,8 @@ smgropen(RelFileLocator rlocator, ProcNumber backend)
Assert(RelFileNumberIsValid(rlocator.relNumber)); Assert(RelFileNumberIsValid(rlocator.relNumber));
HOLD_INTERRUPTS();
if (SMgrRelationHash == NULL) if (SMgrRelationHash == NULL)
{ {
/* First time through: initialize the hash table */ /* First time through: initialize the hash table */
@ -242,6 +265,8 @@ smgropen(RelFileLocator rlocator, ProcNumber backend)
smgrsw[reln->smgr_which].smgr_open(reln); smgrsw[reln->smgr_which].smgr_open(reln);
} }
RESUME_INTERRUPTS();
return reln; return reln;
} }
@ -283,6 +308,8 @@ smgrdestroy(SMgrRelation reln)
Assert(reln->pincount == 0); Assert(reln->pincount == 0);
HOLD_INTERRUPTS();
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
smgrsw[reln->smgr_which].smgr_close(reln, forknum); smgrsw[reln->smgr_which].smgr_close(reln, forknum);
@ -292,6 +319,8 @@ smgrdestroy(SMgrRelation reln)
&(reln->smgr_rlocator), &(reln->smgr_rlocator),
HASH_REMOVE, NULL) == NULL) HASH_REMOVE, NULL) == NULL)
elog(ERROR, "SMgrRelation hashtable corrupted"); elog(ERROR, "SMgrRelation hashtable corrupted");
RESUME_INTERRUPTS();
} }
/* /*
@ -302,12 +331,16 @@ smgrdestroy(SMgrRelation reln)
void void
smgrrelease(SMgrRelation reln) smgrrelease(SMgrRelation reln)
{ {
HOLD_INTERRUPTS();
for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++) for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
{ {
smgrsw[reln->smgr_which].smgr_close(reln, forknum); smgrsw[reln->smgr_which].smgr_close(reln, forknum);
reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
} }
reln->smgr_targblock = InvalidBlockNumber; reln->smgr_targblock = InvalidBlockNumber;
RESUME_INTERRUPTS();
} }
/* /*
@ -336,6 +369,9 @@ smgrdestroyall(void)
{ {
dlist_mutable_iter iter; dlist_mutable_iter iter;
/* seems unsafe to accept interrupts while in a dlist_foreach_modify() */
HOLD_INTERRUPTS();
/* /*
* Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove
* each one from the list. * each one from the list.
@ -347,6 +383,8 @@ smgrdestroyall(void)
smgrdestroy(rel); smgrdestroy(rel);
} }
RESUME_INTERRUPTS();
} }
/* /*
@ -362,12 +400,17 @@ smgrreleaseall(void)
if (SMgrRelationHash == NULL) if (SMgrRelationHash == NULL)
return; return;
/* seems unsafe to accept interrupts while iterating */
HOLD_INTERRUPTS();
hash_seq_init(&status, SMgrRelationHash); hash_seq_init(&status, SMgrRelationHash);
while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
{ {
smgrrelease(reln); smgrrelease(reln);
} }
RESUME_INTERRUPTS();
} }
/* /*
@ -400,7 +443,13 @@ smgrreleaserellocator(RelFileLocatorBackend rlocator)
bool bool
smgrexists(SMgrRelation reln, ForkNumber forknum) smgrexists(SMgrRelation reln, ForkNumber forknum)
{ {
return smgrsw[reln->smgr_which].smgr_exists(reln, forknum); bool ret;
HOLD_INTERRUPTS();
ret = smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
RESUME_INTERRUPTS();
return ret;
} }
/* /*
@ -413,7 +462,9 @@ smgrexists(SMgrRelation reln, ForkNumber forknum)
void void
smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
RESUME_INTERRUPTS();
} }
/* /*
@ -436,6 +487,8 @@ smgrdosyncall(SMgrRelation *rels, int nrels)
FlushRelationsAllBuffers(rels, nrels); FlushRelationsAllBuffers(rels, nrels);
HOLD_INTERRUPTS();
/* /*
* Sync the physical file(s). * Sync the physical file(s).
*/ */
@ -449,6 +502,8 @@ smgrdosyncall(SMgrRelation *rels, int nrels)
smgrsw[which].smgr_immedsync(rels[i], forknum); smgrsw[which].smgr_immedsync(rels[i], forknum);
} }
} }
RESUME_INTERRUPTS();
} }
/* /*
@ -471,6 +526,13 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
if (nrels == 0) if (nrels == 0)
return; return;
/*
* It would be unsafe to process interrupts between DropRelationBuffers()
* and unlinking the underlying files. This probably should be a critical
* section, but we're not there yet.
*/
HOLD_INTERRUPTS();
/* /*
* Get rid of any remaining buffers for the relations. bufmgr will just * Get rid of any remaining buffers for the relations. bufmgr will just
* drop them without bothering to write the contents. * drop them without bothering to write the contents.
@ -522,6 +584,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
} }
pfree(rlocators); pfree(rlocators);
RESUME_INTERRUPTS();
} }
@ -538,6 +602,8 @@ void
smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void *buffer, bool skipFsync) const void *buffer, bool skipFsync)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
buffer, skipFsync); buffer, skipFsync);
@ -550,6 +616,8 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
reln->smgr_cached_nblocks[forknum] = blocknum + 1; reln->smgr_cached_nblocks[forknum] = blocknum + 1;
else else
reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
RESUME_INTERRUPTS();
} }
/* /*
@ -563,6 +631,8 @@ void
smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
int nblocks, bool skipFsync) int nblocks, bool skipFsync)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum, smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
nblocks, skipFsync); nblocks, skipFsync);
@ -575,6 +645,8 @@ smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
reln->smgr_cached_nblocks[forknum] = blocknum + nblocks; reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
else else
reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
RESUME_INTERRUPTS();
} }
/* /*
@ -588,7 +660,13 @@ bool
smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
int nblocks) int nblocks)
{ {
return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks); bool ret;
HOLD_INTERRUPTS();
ret = smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
RESUME_INTERRUPTS();
return ret;
} }
/* /*
@ -601,7 +679,13 @@ uint32
smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum) BlockNumber blocknum)
{ {
return smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum); uint32 ret;
HOLD_INTERRUPTS();
ret = smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum);
RESUME_INTERRUPTS();
return ret;
} }
/* /*
@ -619,8 +703,10 @@ void
smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks) void **buffers, BlockNumber nblocks)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers, smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers,
nblocks); nblocks);
RESUME_INTERRUPTS();
} }
/* /*
@ -653,8 +739,10 @@ void
smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void **buffers, BlockNumber nblocks, bool skipFsync) const void **buffers, BlockNumber nblocks, bool skipFsync)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum, smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum,
buffers, nblocks, skipFsync); buffers, nblocks, skipFsync);
RESUME_INTERRUPTS();
} }
/* /*
@ -665,8 +753,10 @@ void
smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
BlockNumber nblocks) BlockNumber nblocks)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
nblocks); nblocks);
RESUME_INTERRUPTS();
} }
/* /*
@ -683,10 +773,14 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum)
if (result != InvalidBlockNumber) if (result != InvalidBlockNumber)
return result; return result;
HOLD_INTERRUPTS();
result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
reln->smgr_cached_nblocks[forknum] = result; reln->smgr_cached_nblocks[forknum] = result;
RESUME_INTERRUPTS();
return result; return result;
} }
@ -784,7 +878,9 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
void void
smgrregistersync(SMgrRelation reln, ForkNumber forknum) smgrregistersync(SMgrRelation reln, ForkNumber forknum)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_registersync(reln, forknum); smgrsw[reln->smgr_which].smgr_registersync(reln, forknum);
RESUME_INTERRUPTS();
} }
/* /*
@ -816,7 +912,9 @@ smgrregistersync(SMgrRelation reln, ForkNumber forknum)
void void
smgrimmedsync(SMgrRelation reln, ForkNumber forknum) smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
{ {
HOLD_INTERRUPTS();
smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
RESUME_INTERRUPTS();
} }
/* /*