1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-31 17:02:12 +03:00

Handle impending sinval queue overflow by means of a separate signal

(SIGUSR1, which we have not been using recently) instead of piggybacking
on SIGUSR2-driven NOTIFY processing.  This has several good results:
the processing needed to drain the sinval queue is a lot less than the
processing needed to answer a NOTIFY; there's less contention since we
don't have a bunch of backends all trying to acquire exclusive lock on
pg_listener; backends that are sitting inside a transaction block can
still drain the queue, whereas NOTIFY processing can't run if there's
an open transaction block.  (This last is a fairly serious issue that
I don't think we ever recognized before --- with clients like JDBC that
tend to sit with open transaction blocks, the sinval queue draining
mechanism never really worked as intended, probably resulting in a lot
of useless cache-reset overhead.)  This is the last of several proposed
changes in response to Philip Warner's recent report of sinval-induced
performance problems.
This commit is contained in:
Tom Lane
2004-05-23 03:50:45 +00:00
parent 4d86ae4260
commit ebfc56d3fb
8 changed files with 289 additions and 37 deletions

View File

@@ -8,20 +8,46 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.62 2003/11/29 19:51:56 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.63 2004/05/23 03:50:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <signal.h>
#include "commands/async.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "storage/sinvaladt.h"
#include "utils/inval.h"
#include "utils/tqual.h"
#include "miscadmin.h"
/*
* Because backends sitting idle will not be reading sinval events, we
* need a way to give an idle backend a swift kick in the rear and make
* it catch up before the sinval queue overflows and forces everyone
* through a cache reset exercise. This is done by broadcasting SIGUSR1
* to all backends when the queue is threatening to become full.
*
* State for catchup events consists of two flags: one saying whether
* the signal handler is currently allowed to call ProcessCatchupEvent
* directly, and one saying whether the signal has occurred but the handler
* was not allowed to call ProcessCatchupEvent at the time.
*
* NB: the "volatile" on these declarations is critical! If your compiler
* does not grok "volatile", you'd be best advised to compile this file
* with all optimization turned off.
*/
static volatile int catchupInterruptEnabled = 0;
static volatile int catchupInterruptOccurred = 0;
static void ProcessCatchupEvent(void);
/****************************************************************************/
/* CreateSharedInvalidationState() Initialize SI buffer */
/* */
@@ -91,6 +117,12 @@ ReceiveSharedInvalidMessages(
for (;;)
{
/*
* We can discard any pending catchup event, since we will not exit
* this loop until we're fully caught up.
*/
catchupInterruptOccurred = 0;
/*
* We can run SIGetDataEntry in parallel with other backends
* running SIGetDataEntry for themselves, since each instance will
@@ -137,6 +169,203 @@ ReceiveSharedInvalidMessages(
}
/*
* CatchupInterruptHandler
*
* This is the signal handler for SIGUSR1.
*
* If we are idle (catchupInterruptEnabled is set), we can safely
* invoke ProcessCatchupEvent directly. Otherwise, just set a flag
* to do it later. (Note that it's quite possible for normal processing
* of the current transaction to cause ReceiveSharedInvalidMessages()
* to be run later on; in that case the flag will get cleared again,
* since there's no longer any reason to do anything.)
*/
void
CatchupInterruptHandler(SIGNAL_ARGS)
{
int save_errno = errno;
/*
* Note: this is a SIGNAL HANDLER. You must be very wary what you do
* here.
*/
/* Don't joggle the elbow of proc_exit */
if (proc_exit_inprogress)
return;
if (catchupInterruptEnabled)
{
bool save_ImmediateInterruptOK = ImmediateInterruptOK;
/*
* We may be called while ImmediateInterruptOK is true; turn it
* off while messing with the catchup state. (We would have to
* save and restore it anyway, because PGSemaphore operations
* inside ProcessCatchupEvent() might reset it.)
*/
ImmediateInterruptOK = false;
/*
* I'm not sure whether some flavors of Unix might allow another
* SIGUSR1 occurrence to recursively interrupt this routine. To
* cope with the possibility, we do the same sort of dance that
* EnableCatchupInterrupt must do --- see that routine for
* comments.
*/
catchupInterruptEnabled = 0; /* disable any recursive signal */
catchupInterruptOccurred = 1; /* do at least one iteration */
for (;;)
{
catchupInterruptEnabled = 1;
if (!catchupInterruptOccurred)
break;
catchupInterruptEnabled = 0;
if (catchupInterruptOccurred)
{
/* Here, it is finally safe to do stuff. */
ProcessCatchupEvent();
}
}
/*
* Restore ImmediateInterruptOK, and check for interrupts if
* needed.
*/
ImmediateInterruptOK = save_ImmediateInterruptOK;
if (save_ImmediateInterruptOK)
CHECK_FOR_INTERRUPTS();
}
else
{
/*
* In this path it is NOT SAFE to do much of anything, except
* this:
*/
catchupInterruptOccurred = 1;
}
errno = save_errno;
}
/*
* EnableCatchupInterrupt
*
* This is called by the PostgresMain main loop just before waiting
* for a frontend command. We process any pending catchup events,
* and enable the signal handler to process future events directly.
*
* NOTE: the signal handler starts out disabled, and stays so until
* PostgresMain calls this the first time.
*/
void
EnableCatchupInterrupt(void)
{
/*
* This code is tricky because we are communicating with a signal
* handler that could interrupt us at any point. If we just checked
* catchupInterruptOccurred and then set catchupInterruptEnabled, we
* could fail to respond promptly to a signal that happens in between
* those two steps. (A very small time window, perhaps, but Murphy's
* Law says you can hit it...) Instead, we first set the enable flag,
* then test the occurred flag. If we see an unserviced interrupt has
* occurred, we re-clear the enable flag before going off to do the
* service work. (That prevents re-entrant invocation of
* ProcessCatchupEvent() if another interrupt occurs.) If an
* interrupt comes in between the setting and clearing of
* catchupInterruptEnabled, then it will have done the service work and
* left catchupInterruptOccurred zero, so we have to check again after
* clearing enable. The whole thing has to be in a loop in case
* another interrupt occurs while we're servicing the first. Once we
* get out of the loop, enable is set and we know there is no
* unserviced interrupt.
*
* NB: an overenthusiastic optimizing compiler could easily break this
* code. Hopefully, they all understand what "volatile" means these
* days.
*/
for (;;)
{
catchupInterruptEnabled = 1;
if (!catchupInterruptOccurred)
break;
catchupInterruptEnabled = 0;
if (catchupInterruptOccurred)
{
ProcessCatchupEvent();
}
}
}
/*
* DisableCatchupInterrupt
*
* This is called by the PostgresMain main loop just after receiving
* a frontend command. Signal handler execution of catchup events
* is disabled until the next EnableCatchupInterrupt call.
*
* The SIGUSR2 signal handler also needs to call this, so as to
* prevent conflicts if one signal interrupts the other. So we
* must return the previous state of the flag.
*/
bool
DisableCatchupInterrupt(void)
{
bool result = (catchupInterruptEnabled != 0);
catchupInterruptEnabled = 0;
return result;
}
/*
* ProcessCatchupEvent
*
* Respond to a catchup event (SIGUSR1) from another backend.
*
* This is called either directly from the SIGUSR1 signal handler,
* or the next time control reaches the outer idle loop (assuming
* there's still anything to do by then).
*/
static void
ProcessCatchupEvent(void)
{
bool notify_enabled;
/* Must prevent SIGUSR2 interrupt while I am running */
notify_enabled = DisableNotifyInterrupt();
/*
* What we need to do here is cause ReceiveSharedInvalidMessages()
* to run, which will do the necessary work and also reset the
* catchupInterruptOccurred flag. If we are inside a transaction
* we can just call AcceptInvalidationMessages() to do this. If we
* aren't, we start and immediately end a transaction; the call to
* AcceptInvalidationMessages() happens down inside transaction start.
*
* It is awfully tempting to just call AcceptInvalidationMessages()
* without the rest of the xact start/stop overhead, and I think that
* would actually work in the normal case; but I am not sure that things
* would clean up nicely if we got an error partway through.
*/
if (IsTransactionOrTransactionBlock())
{
elog(DEBUG4, "ProcessCatchupEvent inside transaction");
AcceptInvalidationMessages();
}
else
{
elog(DEBUG4, "ProcessCatchupEvent outside transaction");
StartTransactionCommand();
CommitTransactionCommand();
}
if (notify_enabled)
EnableNotifyInterrupt();
}
/****************************************************************************/
/* Functions that need to scan the PGPROC structures of all running backends. */
/* It's a bit strange to keep these in sinval.c, since they don't have any */