1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-03 15:22:11 +03:00

Create an internal semaphore API that is not tied to SysV semaphores.

As proof of concept, provide an alternate implementation based on POSIX
semaphores.  Also push the SysV shared-memory implementation into a
separate file so that it can be replaced conveniently.
This commit is contained in:
Tom Lane
2002-05-05 00:03:29 +00:00
parent 91fc10fdac
commit 72a3902a66
37 changed files with 1659 additions and 1370 deletions

View File

@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.90 2002/03/06 06:10:03 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.91 2002/05/05 00:03:28 tgl Exp $
*
* NOTES:
*
@@ -51,6 +51,7 @@
#include "miscadmin.h"
#include "storage/fd.h"
#include "storage/ipc.h"
/* Filename components for OpenTemporaryFile */

View File

@@ -3,25 +3,17 @@
* ipc.c
* POSTGRES inter-process communication definitions.
*
* This file is misnamed, as it no longer has much of anything directly
* to do with IPC. The functionality here is concerned with managing
* exit-time cleanup for either a postmaster or a backend.
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.78 2002/04/13 19:52:51 momjian Exp $
*
* NOTES
*
* Currently, semaphores are used (my understanding anyway) in two
* different ways:
* 1. as mutexes on machines that don't have test-and-set (eg.
* mips R3000).
* 2. for putting processes to sleep when waiting on a lock
* and waking them up when the lock is free.
* The number of semaphores in (1) is fixed and those are shared
* among all backends. In (2), there is 1 semaphore per process and those
* are not shared with anyone else.
* -ay 4/95
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.79 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -30,27 +22,9 @@
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <sys/file.h>
#include "storage/ipc.h"
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
#ifdef HAVE_SYS_SEM_H
#include <sys/sem.h>
#endif
#ifdef HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif
#include "miscadmin.h"
#include "utils/memutils.h"
#include "libpq/libpq.h"
#include "storage/ipc.h"
/*
@@ -60,17 +34,6 @@
*/
bool proc_exit_inprogress = false;
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
int numSems, int permission,
int semStartValue, bool removeOnExit);
static void CallbackSemaphoreKill(int status, Datum semId);
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
int permission);
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum shmId);
static void *PrivateMemoryCreate(uint32 size);
static void PrivateMemoryDelete(int status, Datum memaddr);
/* ----------------------------------------------------------------
* exit() handling stuff
@@ -83,8 +46,6 @@ static void PrivateMemoryDelete(int status, Datum memaddr);
* Callback functions can take zero, one, or two args: the first passed
* arg is the integer exitcode, the second is the Datum supplied when
* the callback was registered.
*
* XXX these functions probably ought to live in some other module.
* ----------------------------------------------------------------
*/
@@ -230,762 +191,3 @@ on_exit_reset(void)
on_shmem_exit_index = 0;
on_proc_exit_index = 0;
}
/* ----------------------------------------------------------------
* Semaphore support
*
* These routines represent a fairly thin layer on top of SysV semaphore
* functionality.
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* InternalIpcSemaphoreCreate(semKey, numSems, permission,
* semStartValue, removeOnExit)
*
* Attempt to create a new semaphore set with the specified key.
* Will fail (return -1) if such a set already exists.
* On success, a callback is optionally registered with on_shmem_exit
* to delete the semaphore set when on_shmem_exit is called.
*
* If we fail with a failure code other than collision-with-existing-set,
* print out an error and abort. Other types of errors are not recoverable.
* ----------------------------------------------------------------
*/
static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
int numSems, int permission,
int semStartValue, bool removeOnExit)
{
int semId;
int i;
u_short array[IPC_NMAXSEM];
union semun semun;
Assert(numSems > 0 && numSems <= IPC_NMAXSEM);
semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | permission);
if (semId < 0)
{
/*
* Fail quietly if error indicates a collision with existing set.
* One would expect EEXIST, given that we said IPC_EXCL, but
* perhaps we could get a permission violation instead? Also,
* EIDRM might occur if an old set is slated for destruction but
* not gone yet.
*/
if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
|| errno == EIDRM
#endif
)
return -1;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
(int) semKey, numSems, (IPC_CREAT | IPC_EXCL | permission),
strerror(errno));
if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n"
"\n"
"It occurs when either the system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Alternatively, reduce PostgreSQL's\n"
"consumption of semaphores by reducing its max_connections parameter\n"
"(currently %d).\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"configuring your system for PostgreSQL.\n\n",
MaxBackends);
proc_exit(1);
}
/* Initialize new semas to specified start value */
for (i = 0; i < numSems; i++)
array[i] = semStartValue;
semun.array = array;
if (semctl(semId, 0, SETALL, semun) < 0)
{
fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n",
semId, strerror(errno));
if (errno == ERANGE)
fprintf(stderr,
"You possibly need to raise your kernel's SEMVMX value to be at least\n"
"%d. Look into the PostgreSQL documentation for details.\n",
semStartValue);
IpcSemaphoreKill(semId);
proc_exit(1);
}
/* Register on-exit routine to delete the new set */
if (removeOnExit)
on_shmem_exit(CallbackSemaphoreKill, Int32GetDatum(semId));
return semId;
}
/****************************************************************************/
/* IpcSemaphoreKill(semId) - removes a semaphore set */
/* */
/****************************************************************************/
void
IpcSemaphoreKill(IpcSemaphoreId semId)
{
union semun semun;
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
semId, strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/****************************************************************************/
/* CallbackSemaphoreKill(status, semId) */
/* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/
static void
CallbackSemaphoreKill(int status, Datum semId)
{
IpcSemaphoreKill(DatumGetInt32(semId));
}
/****************************************************************************/
/* IpcSemaphoreLock(semId, sem) - locks a semaphore */
/****************************************************************************/
void
IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = 0;
sops.sem_num = sem;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*
* Each time around the loop, we check for a cancel/die interrupt. We
* assume that if such an interrupt comes in while we are waiting, it
* will cause the semop() call to exit with errno == EINTR, so that we
* will be able to service the interrupt (if not in a critical section
* already).
*
* Once we acquire the lock, we do NOT check for an interrupt before
* returning. The caller needs to be able to record ownership of the
* lock before any interrupt can be accepted.
*
* There is a window of a few instructions between CHECK_FOR_INTERRUPTS
* and entering the semop() call. If a cancel/die interrupt occurs in
* that window, we would fail to notice it until after we acquire the
* lock (or get another interrupt to escape the semop()). We can
* avoid this problem by temporarily setting ImmediateInterruptOK to
* true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
* this interval will execute directly. However, there is a huge
* pitfall: there is another window of a few instructions after the
* semop() before we are able to reset ImmediateInterruptOK. If an
* interrupt occurs then, we'll lose control, which means that the
* lock has been acquired but our caller did not get a chance to
* record the fact. Therefore, we only set ImmediateInterruptOK if the
* caller tells us it's OK to do so, ie, the caller does not need to
* record acquiring the lock. (This is currently true for lockmanager
* locks, since the process that granted us the lock did all the
* necessary state updates. It's not true for SysV semaphores used to
* implement LW locks or emulate spinlocks --- but the wait time for
* such locks should not be very long, anyway.)
*/
do
{
ImmediateInterruptOK = interruptOK;
CHECK_FOR_INTERRUPTS();
errStatus = semop(semId, &sops, 1);
ImmediateInterruptOK = false;
} while (errStatus == -1 && errno == EINTR);
if (errStatus == -1)
{
fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n",
semId, strerror(errno));
proc_exit(255);
}
}
/****************************************************************************/
/* IpcSemaphoreUnlock(semId, sem) - unlocks a semaphore */
/****************************************************************************/
void
IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem)
{
int errStatus;
struct sembuf sops;
sops.sem_op = 1; /* increment */
sops.sem_flg = 0;
sops.sem_num = sem;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and unlock the semaphore again. Not clear this
* can really happen, but might as well cope.
*/
do
{
errStatus = semop(semId, &sops, 1);
} while (errStatus == -1 && errno == EINTR);
if (errStatus == -1)
{
fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n",
semId, strerror(errno));
proc_exit(255);
}
}
/****************************************************************************/
/* IpcSemaphoreTryLock(semId, sem) - conditionally locks a semaphore */
/* Lock the semaphore if it's free, but don't block. */
/****************************************************************************/
bool
IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = IPC_NOWAIT; /* but don't block */
sops.sem_num = sem;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*/
do
{
errStatus = semop(semId, &sops, 1);
} while (errStatus == -1 && errno == EINTR);
if (errStatus == -1)
{
/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
#ifdef EAGAIN
if (errno == EAGAIN)
return false; /* failed to lock it */
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
if (errno == EWOULDBLOCK)
return false; /* failed to lock it */
#endif
/* Otherwise we got trouble */
fprintf(stderr, "IpcSemaphoreTryLock: semop(id=%d) failed: %s\n",
semId, strerror(errno));
proc_exit(255);
}
return true;
}
/* Get the current value (semval) of the semaphore */
int
IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
{
union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */
return semctl(semId, sem, GETVAL, dummy);
}
/* Get the PID of the last process to do semop() on the semaphore */
static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int sem)
{
union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */
return semctl(semId, sem, GETPID, dummy);
}
/* ----------------------------------------------------------------
* Shared memory support
*
* These routines represent a fairly thin layer on top of SysV shared
* memory functionality.
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* InternalIpcMemoryCreate(memKey, size, permission)
*
* Attempt to create a new shared memory segment with the specified key.
* Will fail (return NULL) if such a segment already exists. If successful,
* attach the segment to the current process and return its attached address.
* On success, callbacks are registered with on_shmem_exit to detach and
* delete the segment when on_shmem_exit is called.
*
* If we fail with a failure code other than collision-with-existing-segment,
* print out an error and abort. Other types of errors are not recoverable.
* ----------------------------------------------------------------
*/
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
{
IpcMemoryId shmid;
void *memAddress;
shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | permission);
if (shmid < 0)
{
/*
* Fail quietly if error indicates a collision with existing
* segment. One would expect EEXIST, given that we said IPC_EXCL,
* but perhaps we could get a permission violation instead? Also,
* EIDRM might occur if an old seg is slated for destruction but
* not gone yet.
*/
if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
|| errno == EIDRM
#endif
)
return NULL;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n",
(int) memKey, size, (IPC_CREAT | IPC_EXCL | permission),
strerror(errno));
if (errno == EINVAL)
fprintf(stderr,
"\nThis error usually means that PostgreSQL's request for a shared memory\n"
"segment exceeded your kernel's SHMMAX parameter. You can either\n"
"reduce the request size or reconfigure the kernel with larger SHMMAX.\n"
"To reduce the request size (currently %u bytes), reduce\n"
"PostgreSQL's shared_buffers parameter (currently %d) and/or\n"
"its max_connections parameter (currently %d).\n"
"\n"
"If the request size is already small, it's possible that it is less than\n"
"your kernel's SHMMIN parameter, in which case raising the request size or\n"
"reconfiguring SHMMIN is called for.\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"shared memory configuration.\n\n",
size, NBuffers, MaxBackends);
else if (errno == ENOMEM)
fprintf(stderr,
"\nThis error usually means that PostgreSQL's request for a shared\n"
"memory segment exceeded available memory or swap space.\n"
"To reduce the request size (currently %u bytes), reduce\n"
"PostgreSQL's shared_buffers parameter (currently %d) and/or\n"
"its max_connections parameter (currently %d).\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"shared memory configuration.\n\n",
size, NBuffers, MaxBackends);
else if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n"
"\n"
"It occurs either if all available shared memory IDs have been taken,\n"
"in which case you need to raise the SHMMNI parameter in your kernel,\n"
"or because the system's overall limit for shared memory has been\n"
"reached. If you cannot increase the shared memory limit,\n"
"reduce PostgreSQL's shared memory request (currently %u bytes),\n"
"by reducing its shared_buffers parameter (currently %d) and/or\n"
"its max_connections parameter (currently %d).\n"
"\n"
"The PostgreSQL Administrator's Guide contains more information about\n"
"shared memory configuration.\n\n",
size, NBuffers, MaxBackends);
proc_exit(1);
}
/* Register on-exit routine to delete the new segment */
on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
/* OK, should be able to attach to the segment */
#if defined(solaris) && defined(__sparc__)
/* use intimate shared memory on SPARC Solaris */
memAddress = shmat(shmid, 0, SHM_SHARE_MMU);
#else
memAddress = shmat(shmid, 0, 0);
#endif
if (memAddress == (void *) -1)
{
fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n",
shmid, strerror(errno));
proc_exit(1);
}
/* Register on-exit routine to detach new segment before deleting */
on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
/* Record key and ID in lockfile for data directory. */
RecordSharedMemoryInLockFile(memKey, shmid);
return memAddress;
}
/****************************************************************************/
/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
/* from process' address spaceq */
/* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/
static void
IpcMemoryDetach(int status, Datum shmaddr)
{
if (shmdt(DatumGetPointer(shmaddr)) < 0)
fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n",
DatumGetPointer(shmaddr), strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/****************************************************************************/
/* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
/* (called as an on_shmem_exit callback, hence funny argument list) */
/****************************************************************************/
static void
IpcMemoryDelete(int status, Datum shmId)
{
if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0)
fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n",
DatumGetInt32(shmId), IPC_RMID, strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/****************************************************************************/
/* SharedMemoryIsInUse(shmKey, shmId) Is a shared memory segment in use? */
/****************************************************************************/
bool
SharedMemoryIsInUse(IpcMemoryKey shmKey, IpcMemoryId shmId)
{
struct shmid_ds shmStat;
/*
* We detect whether a shared memory segment is in use by seeing
* whether it (a) exists and (b) has any processes are attached to it.
*
* If we are unable to perform the stat operation for a reason other than
* nonexistence of the segment (most likely, because it doesn't belong
* to our userid), assume it is in use.
*/
if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
{
/*
* EINVAL actually has multiple possible causes documented in the
* shmctl man page, but we assume it must mean the segment no
* longer exists.
*/
if (errno == EINVAL)
return false;
/* Else assume segment is in use */
return true;
}
/* If it has attached processes, it's in use */
if (shmStat.shm_nattch != 0)
return true;
return false;
}
/* ----------------------------------------------------------------
* private memory support
*
* Rather than allocating shmem segments with IPC_PRIVATE key, we
* just malloc() the requested amount of space. This code emulates
* the needed shmem functions.
* ----------------------------------------------------------------
*/
static void *
PrivateMemoryCreate(uint32 size)
{
void *memAddress;
memAddress = malloc(size);
if (!memAddress)
{
fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size);
proc_exit(1);
}
MemSet(memAddress, 0, size); /* keep Purify quiet */
/* Register on-exit routine to release storage */
on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress));
return memAddress;
}
static void
PrivateMemoryDelete(int status, Datum memaddr)
{
free(DatumGetPointer(memaddr));
}
/* ------------------
* Routines to assign keys for new IPC objects
*
* The idea here is to detect and re-use keys that may have been assigned
* by a crashed postmaster or backend.
* ------------------
*/
static IpcMemoryKey NextShmemSegID = 0;
static IpcSemaphoreKey NextSemaID = 0;
/*
* (Re) initialize key assignment at startup of postmaster or standalone
* backend, also at postmaster reset.
*/
void
IpcInitKeyAssignment(int port)
{
NextShmemSegID = port * 1000;
NextSemaID = port * 1000;
}
/*
* Create a shared memory segment of the given size and initialize its
* standard header. Dead Postgres segments are recycled if found,
* but we do not fail upon collision with non-Postgres shmem segments.
*/
PGShmemHeader *
IpcMemoryCreate(uint32 size, bool makePrivate, int permission)
{
void *memAddress;
PGShmemHeader *hdr;
/* Room for a header? */
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
/* Loop till we find a free IPC key */
for (NextShmemSegID++;; NextShmemSegID++)
{
IpcMemoryId shmid;
/* Special case if creating a private segment --- just malloc() it */
if (makePrivate)
{
memAddress = PrivateMemoryCreate(size);
break;
}
/* Try to create new segment */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
if (memAddress)
break; /* successful create and attach */
/* See if it looks to be leftover from a dead Postgres process */
shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
if (shmid < 0)
continue; /* failed: must be some other app's */
#if defined(solaris) && defined(__sparc__)
/* use intimate shared memory on SPARC Solaris */
memAddress = shmat(shmid, 0, SHM_SHARE_MMU);
#else
memAddress = shmat(shmid, 0, 0);
#endif
if (memAddress == (void *) -1)
continue; /* failed: must be some other app's */
hdr = (PGShmemHeader *) memAddress;
if (hdr->magic != PGShmemMagic)
{
shmdt(memAddress);
continue; /* segment belongs to a non-Postgres app */
}
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
if (hdr->creatorPID != getpid())
{
if (kill(hdr->creatorPID, 0) == 0 ||
errno != ESRCH)
{
shmdt(memAddress);
continue; /* segment belongs to a live process */
}
}
/*
* The segment appears to be from a dead Postgres process, or from
* a previous cycle of life in this same process. Zap it, if
* possible. This probably shouldn't fail, but if it does, assume
* the segment belongs to someone else after all, and continue
* quietly.
*/
shmdt(memAddress);
if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
continue;
/*
* Now try again to create the segment.
*/
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
if (memAddress)
break; /* successful create and attach */
/*
* Can only get here if some other process managed to create the
* same shmem key before we did. Let him have that one, loop
* around to try next key.
*/
}
/*
* OK, we created a new segment. Mark it as created by this process.
* The order of assignments here is critical so that another Postgres
* process can't see the header as valid but belonging to an invalid
* PID!
*/
hdr = (PGShmemHeader *) memAddress;
hdr->creatorPID = getpid();
hdr->magic = PGShmemMagic;
/*
* Initialize space allocation status for segment.
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
return hdr;
}
/*
* Create a semaphore set with the given number of useful semaphores
* (an additional sema is actually allocated to serve as identifier).
* Dead Postgres sema sets are recycled if found, but we do not fail
* upon collision with non-Postgres sema sets.
*/
IpcSemaphoreId
IpcSemaphoreCreate(int numSems, int permission,
int semStartValue, bool removeOnExit)
{
IpcSemaphoreId semId;
union semun semun;
/* Loop till we find a free IPC key */
for (NextSemaID++;; NextSemaID++)
{
pid_t creatorPID;
/* Try to create new semaphore set */
semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1,
permission, semStartValue,
removeOnExit);
if (semId >= 0)
break; /* successful create */
/* See if it looks to be leftover from a dead Postgres process */
semId = semget(NextSemaID, numSems + 1, 0);
if (semId < 0)
continue; /* failed: must be some other app's */
if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
continue; /* sema belongs to a non-Postgres app */
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
if (creatorPID <= 0)
continue; /* oops, GETPID failed */
if (creatorPID != getpid())
{
if (kill(creatorPID, 0) == 0 ||
errno != ESRCH)
continue; /* sema belongs to a live process */
}
/*
* The sema set appears to be from a dead Postgres process, or
* from a previous cycle of life in this same process. Zap it, if
* possible. This probably shouldn't fail, but if it does, assume
* the sema set belongs to someone else after all, and continue
* quietly.
*/
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
continue;
/*
* Now try again to create the sema set.
*/
semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1,
permission, semStartValue,
removeOnExit);
if (semId >= 0)
break; /* successful create */
/*
* Can only get here if some other process managed to create the
* same sema key before we did. Let him have that one, loop
* around to try next key.
*/
}
/*
* OK, we created a new sema set. Mark it as created by this process.
* We do this by setting the spare semaphore to PGSemaMagic-1 and then
* incrementing it with semop(). That leaves it with value
* PGSemaMagic and sempid referencing this process.
*/
semun.val = PGSemaMagic - 1;
if (semctl(semId, numSems, SETVAL, semun) < 0)
{
fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
semId, numSems, PGSemaMagic - 1, strerror(errno));
if (errno == ERANGE)
fprintf(stderr,
"You possibly need to raise your kernel's SEMVMX value to be at least\n"
"%d. Look into the PostgreSQL documentation for details.\n",
PGSemaMagic);
proc_exit(1);
}
IpcSemaphoreUnlock(semId, numSems);
return semId;
}

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.46 2002/03/02 21:39:29 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.47 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,8 +21,11 @@
#include "access/xlog.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/lwlock.h"
#include "storage/pg_sema.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/sinval.h"
@@ -41,9 +44,12 @@
* memory. This is true for a standalone backend, false for a postmaster.
*/
void
CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
CreateSharedMemoryAndSemaphores(bool makePrivate,
int maxBackends,
int port)
{
int size;
int numSemas;
PGShmemHeader *seghdr;
/*
@@ -70,12 +76,14 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
/*
* Create the shmem segment
*/
seghdr = IpcMemoryCreate(size, makePrivate, IPCProtection);
seghdr = PGSharedMemoryCreate(size, makePrivate, port);
/*
* First initialize spinlocks --- needed by InitShmemAllocation()
* Create semaphores
*/
CreateSpinlocks();
numSemas = ProcGlobalSemas(maxBackends);
numSemas += SpinlockSemas();
PGReserveSemaphores(numSemas, port);
/*
* Set up shared memory allocation mechanism

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.64 2002/03/06 06:10:05 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.65 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -61,6 +61,7 @@
#include "postgres.h"
#include "access/transam.h"
#include "storage/pg_shmem.h"
#include "storage/spin.h"
#include "utils/tqual.h"

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.45 2002/03/02 23:35:57 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.46 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,6 +16,7 @@
#include "miscadmin.h"
#include "storage/backendid.h"
#include "storage/ipc.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/sinvaladt.h"

View File

@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.9 2002/03/02 21:39:29 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.10 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -302,7 +302,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
for (;;)
{
/* "false" means cannot accept cancel/die interrupt here. */
IpcSemaphoreLock(proc->sem.semId, proc->sem.semNum, false);
PGSemaphoreLock(&proc->sem, false);
if (!proc->lwWaiting)
break;
extraWaits++;
@@ -325,7 +325,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
* Fix the process wait semaphore's count for any absorbed wakeups.
*/
while (extraWaits-- > 0)
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
PGSemaphoreUnlock(&proc->sem);
}
/*
@@ -485,7 +485,7 @@ LWLockRelease(LWLockId lockid)
head = proc->lwWaitLink;
proc->lwWaitLink = NULL;
proc->lwWaiting = false;
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
PGSemaphoreUnlock(&proc->sem);
}
/*

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.118 2002/03/02 21:39:29 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.119 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -37,12 +37,6 @@
* in the first place was to allow the lock table to grow larger
* than available shared memory and that isn't going to work
* without a lot of unimplemented support anyway.
*
* 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
* allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
* shared among backends (we keep a few sets of semaphores around).
* This is so that we can support more backends. (system-wide semaphore
* sets run out pretty fast.) -ay 4/95
*/
#include "postgres.h"
@@ -51,18 +45,9 @@
#include <unistd.h>
#include <sys/time.h>
#include "storage/ipc.h"
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
#ifdef HAVE_SYS_SEM_H
#include <sys/sem.h>
#endif
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif
#include "miscadmin.h"
#include "access/xact.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "storage/spin.h"
@@ -73,11 +58,11 @@ int DeadlockTimeout = 1000;
PROC *MyProc = NULL;
/*
* This spinlock protects the freelist of recycled PROC structures and the
* bitmap of free semaphores. We cannot use an LWLock because the LWLock
* manager depends on already having a PROC and a wait semaphore! But these
* structures are touched relatively infrequently (only at backend startup
* or shutdown) and not for very long, so a spinlock is okay.
* This spinlock protects the freelist of recycled PROC structures.
* We cannot use an LWLock because the LWLock manager depends on already
* having a PROC and a wait semaphore! But these structures are touched
* relatively infrequently (only at backend startup or shutdown) and not for
* very long, so a spinlock is okay.
*/
static slock_t *ProcStructLock = NULL;
@@ -90,21 +75,24 @@ static bool waitingForSignal = false;
static void ProcKill(void);
static void DummyProcKill(void);
static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum);
static void ProcFreeSem(IpcSemaphoreId semId, int semNum);
static void ZeroProcSemaphore(PROC *proc);
static void ProcFreeAllSemaphores(void);
/*
* Report number of semaphores needed by InitProcGlobal.
*/
int
ProcGlobalSemas(int maxBackends)
{
/* We need a sema per backend, plus one for the dummy process. */
return maxBackends + 1;
}
/*
* InitProcGlobal -
* initializes the global process table. We put it here so that
* the postmaster can do this initialization. (ProcFreeAllSemaphores needs
* to read this table on exiting the postmaster. If we have the first
* backend do this, starting up and killing the postmaster without
* starting any backends will be a problem.)
* the postmaster can do this initialization.
*
* We also allocate all the per-process semaphores we will need to support
* We also create all the per-process semaphores we will need to support
* the requested number of backends. We used to allocate semaphores
* only when backends were actually started up, but that is bad because
* it lets Postgres fail under load --- a lot of Unix systems are
@@ -114,28 +102,19 @@ static void ProcFreeAllSemaphores(void);
* of backends immediately at initialization --- if the sysadmin has set
* MaxBackends higher than his kernel will support, he'll find out sooner
* rather than later.
*
* Another reason for creating semaphores here is that the semaphore
* implementation typically requires us to create semaphores in the
* postmaster, not in backends.
*/
void
InitProcGlobal(int maxBackends)
{
int semMapEntries;
Size procGlobalSize;
bool found = false;
/*
* Compute size for ProcGlobal structure. Note we need one more sema
* besides those used for regular backends; this is accounted for in
* the PROC_SEM_MAP_ENTRIES macro. (We do it that way so that other
* modules that use PROC_SEM_MAP_ENTRIES(maxBackends) to size data
* structures don't have to know about this explicitly.)
*/
Assert(maxBackends > 0);
semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends);
procGlobalSize = sizeof(PROC_HDR) + (semMapEntries - 1) *sizeof(SEM_MAP_ENTRY);
/* Create or attach to the ProcGlobal shared structure */
ProcGlobal = (PROC_HDR *)
ShmemInitStruct("Proc Header", procGlobalSize, &found);
ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);
/* --------------------
* We're the first - initialize.
@@ -148,47 +127,33 @@ InitProcGlobal(int maxBackends)
int i;
ProcGlobal->freeProcs = INVALID_OFFSET;
ProcGlobal->semMapEntries = semMapEntries;
for (i = 0; i < semMapEntries; i++)
{
ProcGlobal->procSemMap[i].procSemId = -1;
ProcGlobal->procSemMap[i].freeSemMap = 0;
}
/*
* Arrange to delete semas on exit --- set this up now so that we
* will clean up if pre-allocation fails. We use our own
* freeproc, rather than IpcSemaphoreCreate's removeOnExit option,
* because we don't want to fill up the on_shmem_exit list with a
* separate entry for each semaphore set.
* Pre-create the PROC structures and create a semaphore for each.
*/
on_shmem_exit(ProcFreeAllSemaphores, 0);
/*
* Pre-create the semaphores.
*/
for (i = 0; i < semMapEntries; i++)
for (i = 0; i < maxBackends; i++)
{
IpcSemaphoreId semId;
PROC *proc;
semId = IpcSemaphoreCreate(PROC_NSEMS_PER_SET,
IPCProtection,
1,
false);
ProcGlobal->procSemMap[i].procSemId = semId;
proc = (PROC *) ShmemAlloc(sizeof(PROC));
if (!proc)
elog(FATAL, "cannot create new proc: out of memory");
MemSet(proc, 0, sizeof(PROC));
PGSemaphoreCreate(&proc->sem);
proc->links.next = ProcGlobal->freeProcs;
ProcGlobal->freeProcs = MAKE_OFFSET(proc);
}
/*
* Pre-allocate a PROC structure for dummy (checkpoint) processes,
* and reserve the last sema of the precreated semas for it.
* too. This does not get linked into the freeProcs list.
*/
DummyProc = (PROC *) ShmemAlloc(sizeof(PROC));
if (!DummyProc)
elog(FATAL, "cannot create new proc: out of memory");
MemSet(DummyProc, 0, sizeof(PROC));
DummyProc->pid = 0; /* marks DummyProc as not in use */
i = semMapEntries - 1;
ProcGlobal->procSemMap[i].freeSemMap |= 1 << (PROC_NSEMS_PER_SET - 1);
DummyProc->sem.semId = ProcGlobal->procSemMap[i].procSemId;
DummyProc->sem.semNum = PROC_NSEMS_PER_SET - 1;
PGSemaphoreCreate(&DummyProc->sem);
/* Create ProcStructLock spinlock, too */
ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
@@ -197,7 +162,7 @@ InitProcGlobal(int maxBackends)
}
/*
* InitProcess -- create a per-process data structure for this backend
* InitProcess -- initialize a per-process data structure for this backend
*/
void
InitProcess(void)
@@ -217,7 +182,8 @@ InitProcess(void)
elog(ERROR, "InitProcess: you already exist");
/*
* try to get a proc struct from the free list first
* Try to get a proc struct from the free list. If this fails,
* we must be out of PROC structures (not to mention semaphores).
*/
SpinLockAcquire(ProcStructLock);
@@ -232,20 +198,19 @@ InitProcess(void)
else
{
/*
* have to allocate a new one.
* If we reach here, all the PROCs are in use. This is one of
* the possible places to detect "too many backends", so give the
* standard error message.
*/
SpinLockRelease(ProcStructLock);
MyProc = (PROC *) ShmemAlloc(sizeof(PROC));
if (!MyProc)
elog(FATAL, "cannot create new proc: out of memory");
elog(FATAL, "Sorry, too many clients already");
}
/*
* Initialize all fields of MyProc.
* Initialize all fields of MyProc, except for the semaphore which
* was prepared for us by InitProcGlobal.
*/
SHMQueueElemInit(&(MyProc->links));
MyProc->sem.semId = -1; /* no wait-semaphore acquired yet */
MyProc->sem.semNum = -1;
MyProc->errType = STATUS_OK;
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
@@ -264,19 +229,11 @@ InitProcess(void)
*/
on_shmem_exit(ProcKill, 0);
/*
* Set up a wait-semaphore for the proc. (We rely on ProcKill to
* clean up MyProc if this fails.)
*/
if (IsUnderPostmaster)
ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum);
/*
* We might be reusing a semaphore that belonged to a failed process.
* So be careful and reinitialize its value here.
*/
if (MyProc->sem.semId >= 0)
ZeroProcSemaphore(MyProc);
PGSemaphoreReset(&MyProc->sem);
/*
* Now that we have a PROC, we could try to acquire locks, so
@@ -340,25 +297,7 @@ InitDummyProcess(void)
* We might be reusing a semaphore that belonged to a failed process.
* So be careful and reinitialize its value here.
*/
if (MyProc->sem.semId >= 0)
ZeroProcSemaphore(MyProc);
}
/*
* Initialize the proc's wait-semaphore to count zero.
*/
static void
ZeroProcSemaphore(PROC *proc)
{
union semun semun;
semun.val = 0;
if (semctl(proc->sem.semId, proc->sem.semNum, SETVAL, semun) < 0)
{
fprintf(stderr, "ZeroProcSemaphore: semctl(id=%d,SETVAL) failed: %s\n",
proc->sem.semId, strerror(errno));
proc_exit(255);
}
PGSemaphoreReset(&MyProc->sem);
}
/*
@@ -397,7 +336,7 @@ LockWaitCancel(void)
* to zero. Otherwise, our next attempt to wait for a lock will fall
* through prematurely.
*/
ZeroProcSemaphore(MyProc);
PGSemaphoreReset(&MyProc->sem);
/*
* Return true even if we were kicked off the lock before we were able
@@ -463,11 +402,7 @@ ProcKill(void)
SpinLockAcquire(ProcStructLock);
/* Free up my wait semaphore, if I got one */
if (MyProc->sem.semId >= 0)
ProcFreeSem(MyProc->sem.semId, MyProc->sem.semNum);
/* Add PROC struct to freelist so space can be recycled in future */
/* Return PROC structure (and semaphore) to freelist */
MyProc->links.next = procglobal->freeProcs;
procglobal->freeProcs = MAKE_OFFSET(MyProc);
@@ -701,10 +636,10 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
/*
* If someone wakes us between LWLockRelease and IpcSemaphoreLock,
* IpcSemaphoreLock will not block. The wakeup is "saved" by the
* If someone wakes us between LWLockRelease and PGSemaphoreLock,
* PGSemaphoreLock will not block. The wakeup is "saved" by the
* semaphore implementation. Note also that if HandleDeadLock is
* invoked but does not detect a deadlock, IpcSemaphoreLock() will
* invoked but does not detect a deadlock, PGSemaphoreLock() will
* continue to wait. There used to be a loop here, but it was useless
* code...
*
@@ -714,7 +649,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
* here. We don't, because we have no state-change work to do after
* being granted the lock (the grantor did it all).
*/
IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true);
PGSemaphoreLock(&MyProc->sem, true);
/*
* Disable the timer, if it's still running
@@ -775,7 +710,7 @@ ProcWakeup(PROC *proc, int errType)
proc->errType = errType;
/* And awaken it */
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
PGSemaphoreUnlock(&proc->sem);
return retProc;
}
@@ -914,7 +849,7 @@ HandleDeadLock(SIGNAL_ARGS)
* Unlock my semaphore so that the interrupted ProcSleep() call can
* finish.
*/
IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum);
PGSemaphoreUnlock(&MyProc->sem);
/*
* We're done here. Transaction abort caused by the error that
@@ -943,7 +878,7 @@ void
ProcWaitForSignal(void)
{
waitingForSignal = true;
IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true);
PGSemaphoreLock(&MyProc->sem, true);
waitingForSignal = false;
}
@@ -957,7 +892,7 @@ ProcWaitForSignal(void)
void
ProcCancelWaitForSignal(void)
{
ZeroProcSemaphore(MyProc);
PGSemaphoreReset(&MyProc->sem);
waitingForSignal = false;
}
@@ -970,7 +905,7 @@ ProcSendSignal(BackendId procId)
PROC *proc = BackendIdGetProc(procId);
if (proc != NULL)
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
PGSemaphoreUnlock(&proc->sem);
}
@@ -1035,110 +970,3 @@ disable_sigalrm_interrupt(void)
return true;
}
/*****************************************************************************
*
*****************************************************************************/
/*
* ProcGetNewSemIdAndNum -
* scan the free semaphore bitmap and allocate a single semaphore from
* a semaphore set.
*/
static void
ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
{
/* use volatile pointer to prevent code rearrangement */
volatile PROC_HDR *procglobal = ProcGlobal;
int semMapEntries = procglobal->semMapEntries;
volatile SEM_MAP_ENTRY *procSemMap = procglobal->procSemMap;
int32 fullmask = (1 << PROC_NSEMS_PER_SET) - 1;
int i;
SpinLockAcquire(ProcStructLock);
for (i = 0; i < semMapEntries; i++)
{
int mask = 1;
int j;
if (procSemMap[i].freeSemMap == fullmask)
continue; /* this set is fully allocated */
if (procSemMap[i].procSemId < 0)
continue; /* this set hasn't been initialized */
for (j = 0; j < PROC_NSEMS_PER_SET; j++)
{
if ((procSemMap[i].freeSemMap & mask) == 0)
{
/* A free semaphore found. Mark it as allocated. */
procSemMap[i].freeSemMap |= mask;
*semId = procSemMap[i].procSemId;
*semNum = j;
SpinLockRelease(ProcStructLock);
return;
}
mask <<= 1;
}
}
SpinLockRelease(ProcStructLock);
/*
* If we reach here, all the semaphores are in use. This is one of
* the possible places to detect "too many backends", so give the
* standard error message. (Whether we detect it here or in sinval.c
* depends on whether MaxBackends is a multiple of
* PROC_NSEMS_PER_SET.)
*/
elog(FATAL, "Sorry, too many clients already");
}
/*
* ProcFreeSem -
* free up our semaphore in the semaphore set.
*
* Caller is assumed to hold ProcStructLock.
*/
static void
ProcFreeSem(IpcSemaphoreId semId, int semNum)
{
int32 mask;
int i;
int semMapEntries = ProcGlobal->semMapEntries;
mask = ~(1 << semNum);
for (i = 0; i < semMapEntries; i++)
{
if (ProcGlobal->procSemMap[i].procSemId == semId)
{
ProcGlobal->procSemMap[i].freeSemMap &= mask;
return;
}
}
/* can't elog here!!! */
fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId);
}
/*
* ProcFreeAllSemaphores -
* called at shmem_exit time, ie when exiting the postmaster or
* destroying shared state for a failed set of backends.
* Free up all the semaphores allocated to the lmgrs of the backends.
*/
static void
ProcFreeAllSemaphores(void)
{
int i;
for (i = 0; i < ProcGlobal->semMapEntries; i++)
{
if (ProcGlobal->procSemMap[i].procSemId >= 0)
IpcSemaphoreKill(ProcGlobal->procSemMap[i].procSemId);
}
}

View File

@@ -6,7 +6,8 @@
*
* For machines that have test-and-set (TAS) instructions, s_lock.h/.c
* define the spinlock implementation. This file contains only a stub
* implementation for spinlocks using SysV semaphores. The semaphore method
* implementation for spinlocks using PGSemaphores. Unless semaphores
* are implemented in a way that doesn't involve a kernel call, this
* is too slow to be very useful :-(
*
*
@@ -15,143 +16,49 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.7 2001/11/05 17:46:28 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.8 2002/05/05 00:03:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include "storage/ipc.h"
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
#ifdef HAVE_SYS_SEM_H
#include <sys/sem.h>
#endif
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif
#include "storage/lwlock.h"
#include "storage/proc.h"
#include "storage/pg_sema.h"
#include "storage/spin.h"
#ifdef HAS_TEST_AND_SET
/*
* CreateSpinlocks --- create and initialize spinlocks during startup
* Report number of semaphores needed to support spinlocks.
*/
void
CreateSpinlocks(void)
int
SpinlockSemas(void)
{
/* no-op when we have TAS spinlocks */
return 0;
}
#else /* !HAS_TEST_AND_SET */
/*
* No TAS, so spinlocks are implemented using SysV semaphores.
*
* Typedef slock_t stores the semId and sem number of the sema to use.
* The semas needed are created by CreateSpinlocks and doled out by
* s_init_lock_sema.
*
* Since many systems have a rather small SEMMSL limit on semas per set,
* we allocate the semaphores required in sets of SPINLOCKS_PER_SET semas.
* This value is deliberately made equal to PROC_NSEMS_PER_SET so that all
* sema sets allocated by Postgres will be the same size; that eases the
* semaphore-recycling logic in IpcSemaphoreCreate().
*
* Note that the SpinLockIds array is not in shared memory; it is filled
* by the postmaster and then inherited through fork() by backends. This
* is OK because its contents do not change after shmem initialization.
* No TAS, so spinlocks are implemented as PGSemaphores.
*/
#define SPINLOCKS_PER_SET PROC_NSEMS_PER_SET
static IpcSemaphoreId *SpinLockIds = NULL;
static int numSpinSets = 0; /* number of sema sets used */
static int numSpinLocks = 0; /* total number of semas allocated */
static int nextSpinLock = 0; /* next free spinlock index */
static void SpinFreeAllSemaphores(void);
/*
* CreateSpinlocks --- create and initialize spinlocks during startup
* Report number of semaphores needed to support spinlocks.
*/
void
CreateSpinlocks(void)
int
SpinlockSemas(void)
{
int i;
if (SpinLockIds == NULL)
{
/*
* Compute number of spinlocks needed. It would be cleaner to
* distribute this logic into the affected modules, similar to the
* way shmem space estimation is handled.
*
* For now, though, we just need a few spinlocks (10 should be
* plenty) plus one for each LWLock.
*/
numSpinLocks = NumLWLocks() + 10;
/* might as well round up to a multiple of SPINLOCKS_PER_SET */
numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1;
numSpinLocks = numSpinSets * SPINLOCKS_PER_SET;
SpinLockIds = (IpcSemaphoreId *)
malloc(numSpinSets * sizeof(IpcSemaphoreId));
Assert(SpinLockIds != NULL);
}
for (i = 0; i < numSpinSets; i++)
SpinLockIds[i] = -1;
/*
* Arrange to delete semas on exit --- set this up now so that we will
* clean up if allocation fails. We use our own freeproc, rather than
* IpcSemaphoreCreate's removeOnExit option, because we don't want to
* fill up the on_shmem_exit list with a separate entry for each
* semaphore set.
* It would be cleaner to distribute this logic into the affected modules,
* similar to the way shmem space estimation is handled.
*
* For now, though, we just need a few spinlocks (10 should be
* plenty) plus one for each LWLock.
*/
on_shmem_exit(SpinFreeAllSemaphores, 0);
/* Create sema sets and set all semas to count 1 */
for (i = 0; i < numSpinSets; i++)
{
SpinLockIds[i] = IpcSemaphoreCreate(SPINLOCKS_PER_SET,
IPCProtection,
1,
false);
}
/* Init counter for allocating dynamic spinlocks */
nextSpinLock = 0;
}
/*
* SpinFreeAllSemaphores -
* called at shmem_exit time, ie when exiting the postmaster or
* destroying shared state for a failed set of backends.
* Free up all the semaphores allocated for spinlocks.
*/
static void
SpinFreeAllSemaphores(void)
{
int i;
for (i = 0; i < numSpinSets; i++)
{
if (SpinLockIds[i] >= 0)
IpcSemaphoreKill(SpinLockIds[i]);
}
free(SpinLockIds);
SpinLockIds = NULL;
return NumLWLocks() + 10;
}
/*
@@ -161,30 +68,28 @@ SpinFreeAllSemaphores(void)
void
s_init_lock_sema(volatile slock_t *lock)
{
if (nextSpinLock >= numSpinLocks)
elog(FATAL, "s_init_lock_sema: not enough semaphores");
lock->semId = SpinLockIds[nextSpinLock / SPINLOCKS_PER_SET];
lock->sem = nextSpinLock % SPINLOCKS_PER_SET;
nextSpinLock++;
PGSemaphoreCreate((PGSemaphore) lock);
}
void
s_unlock_sema(volatile slock_t *lock)
{
IpcSemaphoreUnlock(lock->semId, lock->sem);
PGSemaphoreUnlock((PGSemaphore) lock);
}
bool
s_lock_free_sema(volatile slock_t *lock)
{
return IpcSemaphoreGetValue(lock->semId, lock->sem) > 0;
/* We don't currently use S_LOCK_FREE anyway */
elog(ERROR, "spin.c does not support S_LOCK_FREE()");
return false;
}
int
tas_sema(volatile slock_t *lock)
{
/* Note that TAS macros return 0 if *success* */
return !IpcSemaphoreTryLock(lock->semId, lock->sem);
return !PGSemaphoreTryLock((PGSemaphore) lock);
}
#endif /* !HAS_TEST_AND_SET */