1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-12 05:01:15 +03:00

Create an internal semaphore API that is not tied to SysV semaphores.

As proof of concept, provide an alternate implementation based on POSIX
semaphores.  Also push the SysV shared-memory implementation into a
separate file so that it can be replaced conveniently.
This commit is contained in:
Tom Lane
2002-05-05 00:03:29 +00:00
parent 91fc10fdac
commit 72a3902a66
37 changed files with 1659 additions and 1370 deletions

View File

@@ -13,7 +13,7 @@
# be converted to Method 2.
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/port/Makefile,v 1.11 2002/03/13 00:05:06 petere Exp $
# $Header: /cvsroot/pgsql/src/backend/port/Makefile,v 1.12 2002/05/05 00:03:28 tgl Exp $
#
#-------------------------------------------------------------------------
@@ -21,7 +21,7 @@ subdir = src/backend/port
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = dynloader.o
OBJS = dynloader.o pg_sema.o pg_shmem.o
OBJS += $(GETHOSTNAME) $(GETRUSAGE) $(INET_ATON) $(ISINF) $(MEMCMP) \
$(MISSING_RANDOM) $(SNPRINTF) $(SRANDOM) $(STRCASECMP) $(STRERROR) \

View File

@@ -0,0 +1,357 @@
/*-------------------------------------------------------------------------
*
* posix_sema.c
* Implement PGSemaphores using POSIX semaphore facilities
*
* We prefer the unnamed style of POSIX semaphore (the kind made with
* sem_init). We can cope with the kind made with sem_open, however.
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/port/posix_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include "storage/pg_sema.h"
#ifdef USE_NAMED_POSIX_SEMAPHORES
/* PGSemaphore is pointer to pointer to sem_t */
#define PG_SEM_REF(x) (*(x))
#else
/* PGSemaphore is pointer to sem_t */
#define PG_SEM_REF(x) (x)
#endif
#define IPCProtection (0600) /* access/modify by user only */
static sem_t **mySemPointers; /* keep track of created semaphores */
static int numSems; /* number of semas acquired so far */
static int maxSems; /* allocated size of mySemaPointers array */
static int nextSemKey; /* next name to try */
static void ReleaseSemaphores(int status, Datum arg);
#ifdef USE_NAMED_POSIX_SEMAPHORES
/*
* PosixSemaphoreCreate
*
* Attempt to create a new named semaphore.
*
* If we fail with a failure code other than collision-with-existing-sema,
* print out an error and abort. Other types of errors suggest nonrecoverable
* problems.
*/
static sem_t *
PosixSemaphoreCreate(void)
{
int semKey;
char semname[64];
sem_t *mySem;
for (;;)
{
semKey = nextSemKey++;
snprintf(semname, sizeof(semname), "/pgsql-%d", semKey);
mySem = sem_open(semname, O_CREAT | O_EXCL,
(mode_t) IPCProtection, (unsigned) 1);
if (mySem != SEM_FAILED)
break;
/* Loop if error indicates a collision */
if (errno == EEXIST || errno == EACCES || errno == EINTR)
continue;
/*
* Else complain and abort
*/
fprintf(stderr, "PosixSemaphoreCreate: sem_open(%s) failed: %s\n",
semname, strerror(errno));
proc_exit(1);
}
/*
* Unlink the semaphore immediately, so it can't be accessed externally.
* This also ensures that it will go away if we crash.
*/
sem_unlink(semname);
return mySem;
}
#else /* !USE_NAMED_POSIX_SEMAPHORES */
/*
* PosixSemaphoreCreate
*
* Attempt to create a new unnamed semaphore.
*/
static void
PosixSemaphoreCreate(sem_t *sem)
{
if (sem_init(sem, 1, 1) < 0)
{
fprintf(stderr, "PosixSemaphoreCreate: sem_init failed: %s\n",
strerror(errno));
proc_exit(1);
}
}
#endif /* USE_NAMED_POSIX_SEMAPHORES */
/*
* PosixSemaphoreKill - removes a semaphore
*/
static void
PosixSemaphoreKill(sem_t *sem)
{
#ifdef USE_NAMED_POSIX_SEMAPHORES
/* Got to use sem_close for named semaphores */
if (sem_close(sem) < 0)
fprintf(stderr, "PosixSemaphoreKill: sem_close failed: %s\n",
strerror(errno));
#else
/* Got to use sem_destroy for unnamed semaphores */
if (sem_destroy(sem) < 0)
fprintf(stderr, "PosixSemaphoreKill: sem_destroy failed: %s\n",
strerror(errno));
#endif
}
/*
* PGReserveSemaphores --- initialize semaphore support
*
* This is called during postmaster start or shared memory reinitialization.
* It should do whatever is needed to be able to support up to maxSemas
* subsequent PGSemaphoreCreate calls. Also, if any system resources
* are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
* callback to release them.
*
* The port number is passed for possible use as a key (for Posix, we use
* it to generate the starting semaphore name). In a standalone backend,
* zero will be passed.
*
* In the Posix implementation, we acquire semaphores on-demand; the
* maxSemas parameter is just used to size the array that keeps track of
* acquired semas for subsequent releasing.
*/
void
PGReserveSemaphores(int maxSemas, int port)
{
mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
if (mySemPointers == NULL)
elog(PANIC, "Out of memory in PGReserveSemaphores");
numSems = 0;
maxSems = maxSemas;
nextSemKey = port * 1000;
on_shmem_exit(ReleaseSemaphores, 0);
}
/*
* Release semaphores at shutdown or shmem reinitialization
*
* (called as an on_shmem_exit callback, hence funny argument list)
*/
static void
ReleaseSemaphores(int status, Datum arg)
{
int i;
for (i = 0; i < numSems; i++)
PosixSemaphoreKill(mySemPointers[i]);
free(mySemPointers);
}
/*
* PGSemaphoreCreate
*
* Initialize a PGSemaphore structure to represent a sema with count 1
*/
void
PGSemaphoreCreate(PGSemaphore sema)
{
sem_t *newsem;
/* Can't do this in a backend, because static state is postmaster's */
Assert(!IsUnderPostmaster);
if (numSems >= maxSems)
elog(PANIC, "PGSemaphoreCreate: too many semaphores created");
#ifdef USE_NAMED_POSIX_SEMAPHORES
*sema = newsem = PosixSemaphoreCreate();
#else
PosixSemaphoreCreate(sema);
newsem = sema;
#endif
/* Remember new sema for ReleaseSemaphores */
mySemPointers[numSems++] = newsem;
}
/*
* PGSemaphoreReset
*
* Reset a previously-initialized PGSemaphore to have count 0
*/
void
PGSemaphoreReset(PGSemaphore sema)
{
/*
* There's no direct API for this in POSIX, so we have to ratchet the
* semaphore down to 0 with repeated trywait's.
*/
for (;;)
{
if (sem_trywait(PG_SEM_REF(sema)) < 0)
{
if (errno == EAGAIN || errno == EDEADLK)
break; /* got it down to 0 */
if (errno == EINTR)
continue; /* can this happen? */
fprintf(stderr, "PGSemaphoreReset: sem_trywait failed: %s\n",
strerror(errno));
proc_exit(1);
}
}
}
/*
* PGSemaphoreLock
*
* Lock a semaphore (decrement count), blocking if count would be < 0
*/
void
PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
{
int errStatus;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*
* Each time around the loop, we check for a cancel/die interrupt. We
* assume that if such an interrupt comes in while we are waiting, it
* will cause the sem_wait() call to exit with errno == EINTR, so that we
* will be able to service the interrupt (if not in a critical section
* already).
*
* Once we acquire the lock, we do NOT check for an interrupt before
* returning. The caller needs to be able to record ownership of the
* lock before any interrupt can be accepted.
*
* There is a window of a few instructions between CHECK_FOR_INTERRUPTS
* and entering the sem_wait() call. If a cancel/die interrupt occurs in
* that window, we would fail to notice it until after we acquire the
* lock (or get another interrupt to escape the sem_wait()). We can
* avoid this problem by temporarily setting ImmediateInterruptOK to
* true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
* this interval will execute directly. However, there is a huge
* pitfall: there is another window of a few instructions after the
* sem_wait() before we are able to reset ImmediateInterruptOK. If an
* interrupt occurs then, we'll lose control, which means that the
* lock has been acquired but our caller did not get a chance to
* record the fact. Therefore, we only set ImmediateInterruptOK if the
* caller tells us it's OK to do so, ie, the caller does not need to
* record acquiring the lock. (This is currently true for lockmanager
* locks, since the process that granted us the lock did all the
* necessary state updates. It's not true for Posix semaphores used to
* implement LW locks or emulate spinlocks --- but the wait time for
* such locks should not be very long, anyway.)
*/
do
{
ImmediateInterruptOK = interruptOK;
CHECK_FOR_INTERRUPTS();
errStatus = sem_wait(PG_SEM_REF(sema));
ImmediateInterruptOK = false;
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
fprintf(stderr, "PGSemaphoreLock: sem_wait failed: %s\n",
strerror(errno));
proc_exit(255);
}
}
/*
* PGSemaphoreUnlock
*
* Unlock a semaphore (increment count)
*/
void
PGSemaphoreUnlock(PGSemaphore sema)
{
int errStatus;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and unlock the semaphore again. Not clear this
* can really happen, but might as well cope.
*/
do
{
errStatus = sem_post(PG_SEM_REF(sema));
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
fprintf(stderr, "PGSemaphoreUnlock: sem_post failed: %s\n",
strerror(errno));
proc_exit(255);
}
}
/*
* PGSemaphoreTryLock
*
* Lock a semaphore only if able to do so without blocking
*/
bool
PGSemaphoreTryLock(PGSemaphore sema)
{
int errStatus;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*/
do
{
errStatus = sem_trywait(PG_SEM_REF(sema));
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
if (errno == EAGAIN || errno == EDEADLK)
return false; /* failed to lock it */
/* Otherwise we got trouble */
fprintf(stderr, "PGSemaphoreTryLock: sem_trywait failed: %s\n",
strerror(errno));
proc_exit(255);
}
return true;
}

View File

@@ -0,0 +1,522 @@
/*-------------------------------------------------------------------------
*
* sysv_sema.c
* Implement PGSemaphores using SysV semaphore facilities
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/port/sysv_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif
#ifdef HAVE_SYS_SEM_H
#include <sys/sem.h>
#endif
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_sema.h"
#ifndef HAVE_UNION_SEMUN
union semun
{
int val;
struct semid_ds *buf;
unsigned short *array;
};
#endif
typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */
typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
/*
* SEMAS_PER_SET is the number of useful semaphores in each semaphore set
* we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
* per set) parameter, which is often around 25. (Less than, because we
* allocate one extra sema in each set for identification purposes.)
*/
#define SEMAS_PER_SET 16
#define IPCProtection (0600) /* access/modify by user only */
#define PGSemaMagic 537 /* must be less than SEMVMX */
static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
static int numSemaSets; /* number of sema sets acquired so far */
static int maxSemaSets; /* allocated size of mySemaSets array */
static IpcSemaphoreKey nextSemaKey; /* next key to try using */
static int nextSemaNumber; /* next free sem num in last sema set */
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
int numSems);
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
int value);
static void IpcSemaphoreKill(IpcSemaphoreId semId);
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
static void ReleaseSemaphores(int status, Datum arg);
/*
* InternalIpcSemaphoreCreate
*
* Attempt to create a new semaphore set with the specified key.
* Will fail (return -1) if such a set already exists.
*
* If we fail with a failure code other than collision-with-existing-set,
* print out an error and abort. Other types of errors suggest nonrecoverable
* problems.
*/
static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
{
int semId;
semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
if (semId < 0)
{
/*
* Fail quietly if error indicates a collision with existing set.
* One would expect EEXIST, given that we said IPC_EXCL, but
* perhaps we could get a permission violation instead? Also,
* EIDRM might occur if an old set is slated for destruction but
* not gone yet.
*/
if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
|| errno == EIDRM
#endif
)
return -1;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
(int) semKey, numSems, (IPC_CREAT | IPC_EXCL | IPCProtection),
strerror(errno));
if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n"
"\n"
"It occurs when either the system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Alternatively, reduce PostgreSQL's\n"
"consumption of semaphores by reducing its max_connections parameter\n"
"(currently %d).\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"configuring your system for PostgreSQL.\n\n",
MaxBackends);
proc_exit(1);
}
return semId;
}
/*
* Initialize a semaphore to the specified value.
*/
static void
IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
{
union semun semun;
semun.val = value;
if (semctl(semId, semNum, SETVAL, semun) < 0)
{
fprintf(stderr, "IpcSemaphoreInitialize: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
semId, semNum, value, strerror(errno));
if (errno == ERANGE)
fprintf(stderr,
"You possibly need to raise your kernel's SEMVMX value to be at least\n"
"%d. Look into the PostgreSQL documentation for details.\n",
value);
proc_exit(1);
}
}
/*
* IpcSemaphoreKill(semId) - removes a semaphore set
*/
static void
IpcSemaphoreKill(IpcSemaphoreId semId)
{
union semun semun;
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
semId, strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/* Get the current value (semval) of the semaphore */
static int
IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
{
union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */
return semctl(semId, semNum, GETVAL, dummy);
}
/* Get the PID of the last process to do semop() on the semaphore */
static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
{
union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */
return semctl(semId, semNum, GETPID, dummy);
}
/*
* Create a semaphore set with the given number of useful semaphores
* (an additional sema is actually allocated to serve as identifier).
* Dead Postgres sema sets are recycled if found, but we do not fail
* upon collision with non-Postgres sema sets.
*
* The idea here is to detect and re-use keys that may have been assigned
* by a crashed postmaster or backend.
*/
static IpcSemaphoreId
IpcSemaphoreCreate(int numSems)
{
IpcSemaphoreId semId;
union semun semun;
PGSemaphoreData mysema;
/* Loop till we find a free IPC key */
for (nextSemaKey++; ; nextSemaKey++)
{
pid_t creatorPID;
/* Try to create new semaphore set */
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
if (semId >= 0)
break; /* successful create */
/* See if it looks to be leftover from a dead Postgres process */
semId = semget(nextSemaKey, numSems + 1, 0);
if (semId < 0)
continue; /* failed: must be some other app's */
if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
continue; /* sema belongs to a non-Postgres app */
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
if (creatorPID <= 0)
continue; /* oops, GETPID failed */
if (creatorPID != getpid())
{
if (kill(creatorPID, 0) == 0 ||
errno != ESRCH)
continue; /* sema belongs to a live process */
}
/*
* The sema set appears to be from a dead Postgres process, or
* from a previous cycle of life in this same process. Zap it, if
* possible. This probably shouldn't fail, but if it does, assume
* the sema set belongs to someone else after all, and continue
* quietly.
*/
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
continue;
/*
* Now try again to create the sema set.
*/
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
if (semId >= 0)
break; /* successful create */
/*
* Can only get here if some other process managed to create the
* same sema key before we did. Let him have that one, loop
* around to try next key.
*/
}
/*
* OK, we created a new sema set. Mark it as created by this process.
* We do this by setting the spare semaphore to PGSemaMagic-1 and then
* incrementing it with semop(). That leaves it with value
* PGSemaMagic and sempid referencing this process.
*/
IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
mysema.semId = semId;
mysema.semNum = numSems;
PGSemaphoreUnlock(&mysema);
return semId;
}
/*
* PGReserveSemaphores --- initialize semaphore support
*
* This is called during postmaster start or shared memory reinitialization.
* It should do whatever is needed to be able to support up to maxSemas
* subsequent PGSemaphoreCreate calls. Also, if any system resources
* are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
* callback to release them.
*
* The port number is passed for possible use as a key (for SysV, we use
* it to generate the starting semaphore key). In a standalone backend,
* zero will be passed.
*
* In the SysV implementation, we acquire semaphore sets on-demand; the
* maxSemas parameter is just used to size the array that keeps track of
* acquired sets for subsequent releasing.
*/
void
PGReserveSemaphores(int maxSemas, int port)
{
maxSemaSets = (maxSemas + SEMAS_PER_SET-1) / SEMAS_PER_SET;
mySemaSets = (IpcSemaphoreId *)
malloc(maxSemaSets * sizeof(IpcSemaphoreId));
if (mySemaSets == NULL)
elog(PANIC, "Out of memory in PGReserveSemaphores");
numSemaSets = 0;
nextSemaKey = port * 1000;
nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
on_shmem_exit(ReleaseSemaphores, 0);
}
/*
* Release semaphores at shutdown or shmem reinitialization
*
* (called as an on_shmem_exit callback, hence funny argument list)
*/
static void
ReleaseSemaphores(int status, Datum arg)
{
int i;
for (i = 0; i < numSemaSets; i++)
IpcSemaphoreKill(mySemaSets[i]);
free(mySemaSets);
}
/*
* PGSemaphoreCreate
*
* Initialize a PGSemaphore structure to represent a sema with count 1
*/
void
PGSemaphoreCreate(PGSemaphore sema)
{
/* Can't do this in a backend, because static state is postmaster's */
Assert(!IsUnderPostmaster);
if (nextSemaNumber >= SEMAS_PER_SET)
{
/* Time to allocate another semaphore set */
if (numSemaSets >= maxSemaSets)
elog(PANIC, "PGSemaphoreCreate: too many semaphores created");
mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
numSemaSets++;
nextSemaNumber = 0;
}
/* Assign the next free semaphore in the current set */
sema->semId = mySemaSets[numSemaSets-1];
sema->semNum = nextSemaNumber++;
/* Initialize it to count 1 */
IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
}
/*
* PGSemaphoreReset
*
* Reset a previously-initialized PGSemaphore to have count 0
*/
void
PGSemaphoreReset(PGSemaphore sema)
{
IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
}
/*
* PGSemaphoreLock
*
* Lock a semaphore (decrement count), blocking if count would be < 0
*/
void
PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = 0;
sops.sem_num = sema->semNum;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*
* Each time around the loop, we check for a cancel/die interrupt. We
* assume that if such an interrupt comes in while we are waiting, it
* will cause the semop() call to exit with errno == EINTR, so that we
* will be able to service the interrupt (if not in a critical section
* already).
*
* Once we acquire the lock, we do NOT check for an interrupt before
* returning. The caller needs to be able to record ownership of the
* lock before any interrupt can be accepted.
*
* There is a window of a few instructions between CHECK_FOR_INTERRUPTS
* and entering the semop() call. If a cancel/die interrupt occurs in
* that window, we would fail to notice it until after we acquire the
* lock (or get another interrupt to escape the semop()). We can
* avoid this problem by temporarily setting ImmediateInterruptOK to
* true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
* this interval will execute directly. However, there is a huge
* pitfall: there is another window of a few instructions after the
* semop() before we are able to reset ImmediateInterruptOK. If an
* interrupt occurs then, we'll lose control, which means that the
* lock has been acquired but our caller did not get a chance to
* record the fact. Therefore, we only set ImmediateInterruptOK if the
* caller tells us it's OK to do so, ie, the caller does not need to
* record acquiring the lock. (This is currently true for lockmanager
* locks, since the process that granted us the lock did all the
* necessary state updates. It's not true for SysV semaphores used to
* implement LW locks or emulate spinlocks --- but the wait time for
* such locks should not be very long, anyway.)
*/
do
{
ImmediateInterruptOK = interruptOK;
CHECK_FOR_INTERRUPTS();
errStatus = semop(sema->semId, &sops, 1);
ImmediateInterruptOK = false;
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
fprintf(stderr, "PGSemaphoreLock: semop(id=%d) failed: %s\n",
sema->semId, strerror(errno));
proc_exit(255);
}
}
/*
* PGSemaphoreUnlock
*
* Unlock a semaphore (increment count)
*/
void
PGSemaphoreUnlock(PGSemaphore sema)
{
int errStatus;
struct sembuf sops;
sops.sem_op = 1; /* increment */
sops.sem_flg = 0;
sops.sem_num = sema->semNum;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and unlock the semaphore again. Not clear this
* can really happen, but might as well cope.
*/
do
{
errStatus = semop(sema->semId, &sops, 1);
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
fprintf(stderr, "PGSemaphoreUnlock: semop(id=%d) failed: %s\n",
sema->semId, strerror(errno));
proc_exit(255);
}
}
/*
* PGSemaphoreTryLock
*
* Lock a semaphore only if able to do so without blocking
*/
bool
PGSemaphoreTryLock(PGSemaphore sema)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = IPC_NOWAIT; /* but don't block */
sops.sem_num = sema->semNum;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*/
do
{
errStatus = semop(sema->semId, &sops, 1);
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
#ifdef EAGAIN
if (errno == EAGAIN)
return false; /* failed to lock it */
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
if (errno == EWOULDBLOCK)
return false; /* failed to lock it */
#endif
/* Otherwise we got trouble */
fprintf(stderr, "PGSemaphoreTryLock: semop(id=%d) failed: %s\n",
sema->semId, strerror(errno));
proc_exit(255);
}
return true;
}

View File

@@ -0,0 +1,400 @@
/*-------------------------------------------------------------------------
*
* sysv_shmem.c
* Implement shared memory using SysV facilities
*
* These routines represent a fairly thin layer on top of SysV shared
* memory functionality.
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/port/sysv_shmem.c,v 1.1 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif
#ifdef HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
#define IPCProtection (0600) /* access/modify by user only */
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size);
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum shmId);
static void *PrivateMemoryCreate(uint32 size);
static void PrivateMemoryDelete(int status, Datum memaddr);
/*
* InternalIpcMemoryCreate(memKey, size)
*
* Attempt to create a new shared memory segment with the specified key.
* Will fail (return NULL) if such a segment already exists. If successful,
* attach the segment to the current process and return its attached address.
* On success, callbacks are registered with on_shmem_exit to detach and
* delete the segment when on_shmem_exit is called.
*
* If we fail with a failure code other than collision-with-existing-segment,
* print out an error and abort. Other types of errors are not recoverable.
*/
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size)
{
IpcMemoryId shmid;
void *memAddress;
shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
if (shmid < 0)
{
/*
* Fail quietly if error indicates a collision with existing
* segment. One would expect EEXIST, given that we said IPC_EXCL,
* but perhaps we could get a permission violation instead? Also,
* EIDRM might occur if an old seg is slated for destruction but
* not gone yet.
*/
if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
|| errno == EIDRM
#endif
)
return NULL;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n",
(int) memKey, size, (IPC_CREAT | IPC_EXCL | IPCProtection),
strerror(errno));
if (errno == EINVAL)
fprintf(stderr,
"\nThis error usually means that PostgreSQL's request for a shared memory\n"
"segment exceeded your kernel's SHMMAX parameter. You can either\n"
"reduce the request size or reconfigure the kernel with larger SHMMAX.\n"
"To reduce the request size (currently %u bytes), reduce\n"
"PostgreSQL's shared_buffers parameter (currently %d) and/or\n"
"its max_connections parameter (currently %d).\n"
"\n"
"If the request size is already small, it's possible that it is less than\n"
"your kernel's SHMMIN parameter, in which case raising the request size or\n"
"reconfiguring SHMMIN is called for.\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"shared memory configuration.\n\n",
size, NBuffers, MaxBackends);
else if (errno == ENOMEM)
fprintf(stderr,
"\nThis error usually means that PostgreSQL's request for a shared\n"
"memory segment exceeded available memory or swap space.\n"
"To reduce the request size (currently %u bytes), reduce\n"
"PostgreSQL's shared_buffers parameter (currently %d) and/or\n"
"its max_connections parameter (currently %d).\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"shared memory configuration.\n\n",
size, NBuffers, MaxBackends);
else if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n"
"\n"
"It occurs either if all available shared memory IDs have been taken,\n"
"in which case you need to raise the SHMMNI parameter in your kernel,\n"
"or because the system's overall limit for shared memory has been\n"
"reached. If you cannot increase the shared memory limit,\n"
"reduce PostgreSQL's shared memory request (currently %u bytes),\n"
"by reducing its shared_buffers parameter (currently %d) and/or\n"
"its max_connections parameter (currently %d).\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"shared memory configuration.\n\n",
size, NBuffers, MaxBackends);
proc_exit(1);
}
/* Register on-exit routine to delete the new segment */
on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
/* OK, should be able to attach to the segment */
#if defined(solaris) && defined(__sparc__)
/* use intimate shared memory on SPARC Solaris */
memAddress = shmat(shmid, 0, SHM_SHARE_MMU);
#else
memAddress = shmat(shmid, 0, 0);
#endif
if (memAddress == (void *) -1)
{
fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n",
shmid, strerror(errno));
proc_exit(1);
}
/* Register on-exit routine to detach new segment before deleting */
on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
/* Record key and ID in lockfile for data directory. */
RecordSharedMemoryInLockFile((unsigned long) memKey,
(unsigned long) shmid);
return memAddress;
}
/****************************************************************************/
/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
/* from process' address spaceq */
/* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/
static void
IpcMemoryDetach(int status, Datum shmaddr)
{
if (shmdt(DatumGetPointer(shmaddr)) < 0)
fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n",
DatumGetPointer(shmaddr), strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/****************************************************************************/
/* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
/* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/
static void
IpcMemoryDelete(int status, Datum shmId)
{
if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0)
fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n",
DatumGetInt32(shmId), IPC_RMID, strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/*
* PGSharedMemoryIsInUse
*
* Is a previously-existing shmem segment still existing and in use?
*/
bool
PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
{
IpcMemoryId shmId = (IpcMemoryId) id2;
struct shmid_ds shmStat;
/*
* We detect whether a shared memory segment is in use by seeing
* whether it (a) exists and (b) has any processes are attached to it.
*
* If we are unable to perform the stat operation for a reason other than
* nonexistence of the segment (most likely, because it doesn't belong
* to our userid), assume it is in use.
*/
if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
{
/*
* EINVAL actually has multiple possible causes documented in the
* shmctl man page, but we assume it must mean the segment no
* longer exists.
*/
if (errno == EINVAL)
return false;
/* Else assume segment is in use */
return true;
}
/* If it has attached processes, it's in use */
if (shmStat.shm_nattch != 0)
return true;
return false;
}
/* ----------------------------------------------------------------
* private memory support
*
* Rather than allocating shmem segments with IPC_PRIVATE key, we
* just malloc() the requested amount of space. This code emulates
* the needed shmem functions.
* ----------------------------------------------------------------
*/
static void *
PrivateMemoryCreate(uint32 size)
{
void *memAddress;
memAddress = malloc(size);
if (!memAddress)
{
fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size);
proc_exit(1);
}
MemSet(memAddress, 0, size); /* keep Purify quiet */
/* Register on-exit routine to release storage */
on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress));
return memAddress;
}
static void
PrivateMemoryDelete(int status, Datum memaddr)
{
free(DatumGetPointer(memaddr));
}
/*
* PGSharedMemoryCreate
*
* Create a shared memory segment of the given size and initialize its
* standard header. Also, register an on_shmem_exit callback to release
* the storage.
*
* Dead Postgres segments are recycled if found, but we do not fail upon
* collision with non-Postgres shmem segments. The idea here is to detect and
* re-use keys that may have been assigned by a crashed postmaster or backend.
*
* The port number is passed for possible use as a key (for SysV, we use
* it to generate the starting shmem key). In a standalone backend,
* zero will be passed.
*/
PGShmemHeader *
PGSharedMemoryCreate(uint32 size, bool makePrivate, int port)
{
IpcMemoryKey NextShmemSegID;
void *memAddress;
PGShmemHeader *hdr;
/* Room for a header? */
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
/* Loop till we find a free IPC key */
NextShmemSegID = port * 1000;
for (NextShmemSegID++;; NextShmemSegID++)
{
IpcMemoryId shmid;
/* Special case if creating a private segment --- just malloc() it */
if (makePrivate)
{
memAddress = PrivateMemoryCreate(size);
break;
}
/* Try to create new segment */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);
if (memAddress)
break; /* successful create and attach */
/* See if it looks to be leftover from a dead Postgres process */
shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
if (shmid < 0)
continue; /* failed: must be some other app's */
#if defined(solaris) && defined(__sparc__)
/* use intimate shared memory on SPARC Solaris */
memAddress = shmat(shmid, 0, SHM_SHARE_MMU);
#else
memAddress = shmat(shmid, 0, 0);
#endif
if (memAddress == (void *) -1)
continue; /* failed: must be some other app's */
hdr = (PGShmemHeader *) memAddress;
if (hdr->magic != PGShmemMagic)
{
shmdt(memAddress);
continue; /* segment belongs to a non-Postgres app */
}
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
if (hdr->creatorPID != getpid())
{
if (kill(hdr->creatorPID, 0) == 0 ||
errno != ESRCH)
{
shmdt(memAddress);
continue; /* segment belongs to a live process */
}
}
/*
* The segment appears to be from a dead Postgres process, or from
* a previous cycle of life in this same process. Zap it, if
* possible. This probably shouldn't fail, but if it does, assume
* the segment belongs to someone else after all, and continue
* quietly.
*/
shmdt(memAddress);
if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
continue;
/*
* Now try again to create the segment.
*/
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);
if (memAddress)
break; /* successful create and attach */
/*
* Can only get here if some other process managed to create the
* same shmem key before we did. Let him have that one, loop
* around to try next key.
*/
}
/*
* OK, we created a new segment. Mark it as created by this process.
* The order of assignments here is critical so that another Postgres
* process can't see the header as valid but belonging to an invalid
* PID!
*/
hdr = (PGShmemHeader *) memAddress;
hdr->creatorPID = getpid();
hdr->magic = PGShmemMagic;
/*
* Initialize space allocation status for segment.
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
return hdr;
}