Create an internal semaphore API that is not tied to SysV semaphores.

As proof of concept, provide an alternate implementation based on POSIX semaphores. Also push the SysV shared-memory implementation into a separate file so that it can be replaced conveniently.
2025-09-03 15:22:11 +03:00 · 2002-05-05 00:03:29 +00:00
parent 91fc10fdac
commit 72a3902a66
37 changed files with 1659 additions and 1370 deletions
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.90 2002/03/06 06:10:03 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.91 2002/05/05 00:03:28 tgl Exp $
 *
 * NOTES:
 *
@@ -51,6 +51,7 @@

 #include "miscadmin.h"
 #include "storage/fd.h"
+#include "storage/ipc.h"


 /* Filename components for OpenTemporaryFile */
--- a/src/backend/storage/ipc/ipc.c
+++ b/src/backend/storage/ipc/ipc.c
@@ -3,25 +3,17 @@
 * ipc.c
 *	  POSTGRES inter-process communication definitions.
 *
+ * This file is misnamed, as it no longer has much of anything directly
+ * to do with IPC.  The functionality here is concerned with managing
+ * exit-time cleanup for either a postmaster or a backend.
+ *
+ *
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.78 2002/04/13 19:52:51 momjian Exp $
- *
- * NOTES
- *
- *	  Currently, semaphores are used (my understanding anyway) in two
- *	  different ways:
- *		1. as mutexes on machines that don't have test-and-set (eg.
- *		   mips R3000).
- *		2. for putting processes to sleep when waiting on a lock
- *		   and waking them up when the lock is free.
- *	  The number of semaphores in (1) is fixed and those are shared
- *	  among all backends. In (2), there is 1 semaphore per process and those
- *	  are not shared with anyone else.
- *														  -ay 4/95
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.79 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -30,27 +22,9 @@
 #include <errno.h>
 #include <signal.h>
 #include <unistd.h>
-#include <sys/file.h>
-
-#include "storage/ipc.h"
-/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
-#ifdef HAVE_SYS_SEM_H
-#include <sys/sem.h>
-#endif
-#ifdef HAVE_SYS_SHM_H
-#include <sys/shm.h>
-#endif
-#ifdef HAVE_KERNEL_OS_H
-#include <kernel/OS.h>
-#endif
-
-#if defined(__darwin__)
-#include "port/darwin/sem.h"
-#endif

 #include "miscadmin.h"
-#include "utils/memutils.h"
-#include "libpq/libpq.h"
+#include "storage/ipc.h"


 /*
@@ -60,17 +34,6 @@
 */
 bool		proc_exit_inprogress = false;

-static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
-						   int numSems, int permission,
-						   int semStartValue, bool removeOnExit);
-static void CallbackSemaphoreKill(int status, Datum semId);
-static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
-						int permission);
-static void IpcMemoryDetach(int status, Datum shmaddr);
-static void IpcMemoryDelete(int status, Datum shmId);
-static void *PrivateMemoryCreate(uint32 size);
-static void PrivateMemoryDelete(int status, Datum memaddr);
-

 /* ----------------------------------------------------------------
 *						exit() handling stuff
@@ -83,8 +46,6 @@ static void PrivateMemoryDelete(int status, Datum memaddr);
 * Callback functions can take zero, one, or two args: the first passed
 * arg is the integer exitcode, the second is the Datum supplied when
 * the callback was registered.
- *
- * XXX these functions probably ought to live in some other module.
 * ----------------------------------------------------------------
 */

@@ -230,762 +191,3 @@ on_exit_reset(void)
 	on_shmem_exit_index = 0;
 	on_proc_exit_index = 0;
 }
-
-
-/* ----------------------------------------------------------------
- *						Semaphore support
- *
- * These routines represent a fairly thin layer on top of SysV semaphore
- * functionality.
- * ----------------------------------------------------------------
- */
-
-/* ----------------------------------------------------------------
- *	InternalIpcSemaphoreCreate(semKey, numSems, permission,
- *							   semStartValue, removeOnExit)
- *
- * Attempt to create a new semaphore set with the specified key.
- * Will fail (return -1) if such a set already exists.
- * On success, a callback is optionally registered with on_shmem_exit
- * to delete the semaphore set when on_shmem_exit is called.
- *
- * If we fail with a failure code other than collision-with-existing-set,
- * print out an error and abort.  Other types of errors are not recoverable.
- * ----------------------------------------------------------------
- */
-static IpcSemaphoreId
-InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
-						   int numSems, int permission,
-						   int semStartValue, bool removeOnExit)
-{
-	int			semId;
-	int			i;
-	u_short		array[IPC_NMAXSEM];
-	union semun semun;
-
-	Assert(numSems > 0 && numSems <= IPC_NMAXSEM);
-
-	semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | permission);
-
-	if (semId < 0)
-	{
-		/*
-		 * Fail quietly if error indicates a collision with existing set.
-		 * One would expect EEXIST, given that we said IPC_EXCL, but
-		 * perhaps we could get a permission violation instead?  Also,
-		 * EIDRM might occur if an old set is slated for destruction but
-		 * not gone yet.
-		 */
-		if (errno == EEXIST || errno == EACCES
-#ifdef EIDRM
-			|| errno == EIDRM
-#endif
-			)
-			return -1;
-
-		/*
-		 * Else complain and abort
-		 */
-		fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
-			  (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | permission),
-				strerror(errno));
-
-		if (errno == ENOSPC)
-			fprintf(stderr,
-					"\nThis error does *not* mean that you have run out of disk space.\n"
-					"\n"
-					"It occurs when either the system limit for the maximum number of\n"
-					"semaphore sets (SEMMNI), or the system wide maximum number of\n"
-					"semaphores (SEMMNS), would be exceeded.  You need to raise the\n"
-					"respective kernel parameter.  Alternatively, reduce PostgreSQL's\n"
-					"consumption of semaphores by reducing its max_connections parameter\n"
-					"(currently %d).\n"
-					"\n"
-					"The PostgreSQL Administrator's Guide contains more information about\n"
-					"configuring your system for PostgreSQL.\n\n",
-					MaxBackends);
-
-		proc_exit(1);
-	}
-
-	/* Initialize new semas to specified start value */
-	for (i = 0; i < numSems; i++)
-		array[i] = semStartValue;
-	semun.array = array;
-	if (semctl(semId, 0, SETALL, semun) < 0)
-	{
-		fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n",
-				semId, strerror(errno));
-
-		if (errno == ERANGE)
-			fprintf(stderr,
-					"You possibly need to raise your kernel's SEMVMX value to be at least\n"
-			"%d.  Look into the PostgreSQL documentation for details.\n",
-					semStartValue);
-
-		IpcSemaphoreKill(semId);
-		proc_exit(1);
-	}
-
-	/* Register on-exit routine to delete the new set */
-	if (removeOnExit)
-		on_shmem_exit(CallbackSemaphoreKill, Int32GetDatum(semId));
-
-	return semId;
-}
-
-/****************************************************************************/
-/*	 IpcSemaphoreKill(semId)	- removes a semaphore set					*/
-/*																			*/
-/****************************************************************************/
-void
-IpcSemaphoreKill(IpcSemaphoreId semId)
-{
-	union semun semun;
-
-	semun.val = 0;				/* unused, but keep compiler quiet */
-
-	if (semctl(semId, 0, IPC_RMID, semun) < 0)
-		fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
-				semId, strerror(errno));
-
-	/*
-	 * We used to report a failure via elog(WARNING), but that's pretty
-	 * pointless considering any client has long since disconnected ...
-	 */
-}
-
-/****************************************************************************/
-/*	 CallbackSemaphoreKill(status, semId)									*/
-/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
-/****************************************************************************/
-static void
-CallbackSemaphoreKill(int status, Datum semId)
-{
-	IpcSemaphoreKill(DatumGetInt32(semId));
-}
-
-/****************************************************************************/
-/*	 IpcSemaphoreLock(semId, sem) - locks a semaphore						*/
-/****************************************************************************/
-void
-IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK)
-{
-	int			errStatus;
-	struct sembuf sops;
-
-	sops.sem_op = -1;			/* decrement */
-	sops.sem_flg = 0;
-	sops.sem_num = sem;
-
-	/*
-	 * Note: if errStatus is -1 and errno == EINTR then it means we
-	 * returned from the operation prematurely because we were sent a
-	 * signal.	So we try and lock the semaphore again.
-	 *
-	 * Each time around the loop, we check for a cancel/die interrupt. We
-	 * assume that if such an interrupt comes in while we are waiting, it
-	 * will cause the semop() call to exit with errno == EINTR, so that we
-	 * will be able to service the interrupt (if not in a critical section
-	 * already).
-	 *
-	 * Once we acquire the lock, we do NOT check for an interrupt before
-	 * returning.  The caller needs to be able to record ownership of the
-	 * lock before any interrupt can be accepted.
-	 *
-	 * There is a window of a few instructions between CHECK_FOR_INTERRUPTS
-	 * and entering the semop() call.  If a cancel/die interrupt occurs in
-	 * that window, we would fail to notice it until after we acquire the
-	 * lock (or get another interrupt to escape the semop()).  We can
-	 * avoid this problem by temporarily setting ImmediateInterruptOK to
-	 * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
-	 * this interval will execute directly.  However, there is a huge
-	 * pitfall: there is another window of a few instructions after the
-	 * semop() before we are able to reset ImmediateInterruptOK.  If an
-	 * interrupt occurs then, we'll lose control, which means that the
-	 * lock has been acquired but our caller did not get a chance to
-	 * record the fact. Therefore, we only set ImmediateInterruptOK if the
-	 * caller tells us it's OK to do so, ie, the caller does not need to
-	 * record acquiring the lock.  (This is currently true for lockmanager
-	 * locks, since the process that granted us the lock did all the
-	 * necessary state updates. It's not true for SysV semaphores used to
-	 * implement LW locks or emulate spinlocks --- but the wait time for
-	 * such locks should not be very long, anyway.)
-	 */
-	do
-	{
-		ImmediateInterruptOK = interruptOK;
-		CHECK_FOR_INTERRUPTS();
-		errStatus = semop(semId, &sops, 1);
-		ImmediateInterruptOK = false;
-	} while (errStatus == -1 && errno == EINTR);
-
-	if (errStatus == -1)
-	{
-		fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n",
-				semId, strerror(errno));
-		proc_exit(255);
-	}
-}
-
-/****************************************************************************/
-/*	 IpcSemaphoreUnlock(semId, sem)		- unlocks a semaphore				*/
-/****************************************************************************/
-void
-IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem)
-{
-	int			errStatus;
-	struct sembuf sops;
-
-	sops.sem_op = 1;			/* increment */
-	sops.sem_flg = 0;
-	sops.sem_num = sem;
-
-
-	/*
-	 * Note: if errStatus is -1 and errno == EINTR then it means we
-	 * returned from the operation prematurely because we were sent a
-	 * signal.	So we try and unlock the semaphore again. Not clear this
-	 * can really happen, but might as well cope.
-	 */
-	do
-	{
-		errStatus = semop(semId, &sops, 1);
-	} while (errStatus == -1 && errno == EINTR);
-
-	if (errStatus == -1)
-	{
-		fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n",
-				semId, strerror(errno));
-		proc_exit(255);
-	}
-}
-
-/****************************************************************************/
-/*	 IpcSemaphoreTryLock(semId, sem)	- conditionally locks a semaphore	*/
-/* Lock the semaphore if it's free, but don't block.						*/
-/****************************************************************************/
-bool
-IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem)
-{
-	int			errStatus;
-	struct sembuf sops;
-
-	sops.sem_op = -1;			/* decrement */
-	sops.sem_flg = IPC_NOWAIT;	/* but don't block */
-	sops.sem_num = sem;
-
-	/*
-	 * Note: if errStatus is -1 and errno == EINTR then it means we
-	 * returned from the operation prematurely because we were sent a
-	 * signal.	So we try and lock the semaphore again.
-	 */
-	do
-	{
-		errStatus = semop(semId, &sops, 1);
-	} while (errStatus == -1 && errno == EINTR);
-
-	if (errStatus == -1)
-	{
-		/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
-#ifdef EAGAIN
-		if (errno == EAGAIN)
-			return false;		/* failed to lock it */
-#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
-		if (errno == EWOULDBLOCK)
-			return false;		/* failed to lock it */
-#endif
-		/* Otherwise we got trouble */
-		fprintf(stderr, "IpcSemaphoreTryLock: semop(id=%d) failed: %s\n",
-				semId, strerror(errno));
-		proc_exit(255);
-	}
-
-	return true;
-}
-
-/* Get the current value (semval) of the semaphore */
-int
-IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
-{
-	union semun dummy;			/* for Solaris */
-
-	dummy.val = 0;				/* unused */
-
-	return semctl(semId, sem, GETVAL, dummy);
-}
-
-/* Get the PID of the last process to do semop() on the semaphore */
-static pid_t
-IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int sem)
-{
-	union semun dummy;			/* for Solaris */
-
-	dummy.val = 0;				/* unused */
-
-	return semctl(semId, sem, GETPID, dummy);
-}
-
-
-/* ----------------------------------------------------------------
- *						Shared memory support
- *
- * These routines represent a fairly thin layer on top of SysV shared
- * memory functionality.
- * ----------------------------------------------------------------
- */
-
-/* ----------------------------------------------------------------
- *	InternalIpcMemoryCreate(memKey, size, permission)
- *
- * Attempt to create a new shared memory segment with the specified key.
- * Will fail (return NULL) if such a segment already exists.  If successful,
- * attach the segment to the current process and return its attached address.
- * On success, callbacks are registered with on_shmem_exit to detach and
- * delete the segment when on_shmem_exit is called.
- *
- * If we fail with a failure code other than collision-with-existing-segment,
- * print out an error and abort.  Other types of errors are not recoverable.
- * ----------------------------------------------------------------
- */
-static void *
-InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
-{
-	IpcMemoryId shmid;
-	void	   *memAddress;
-
-	shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | permission);
-
-	if (shmid < 0)
-	{
-		/*
-		 * Fail quietly if error indicates a collision with existing
-		 * segment. One would expect EEXIST, given that we said IPC_EXCL,
-		 * but perhaps we could get a permission violation instead?  Also,
-		 * EIDRM might occur if an old seg is slated for destruction but
-		 * not gone yet.
-		 */
-		if (errno == EEXIST || errno == EACCES
-#ifdef EIDRM
-			|| errno == EIDRM
-#endif
-			)
-			return NULL;
-
-		/*
-		 * Else complain and abort
-		 */
-		fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n",
-				(int) memKey, size, (IPC_CREAT | IPC_EXCL | permission),
-				strerror(errno));
-
-		if (errno == EINVAL)
-			fprintf(stderr,
-					"\nThis error usually means that PostgreSQL's request for a shared memory\n"
-					"segment exceeded your kernel's SHMMAX parameter.  You can either\n"
-					"reduce the request size or reconfigure the kernel with larger SHMMAX.\n"
-			  "To reduce the request size (currently %u bytes), reduce\n"
-					"PostgreSQL's shared_buffers parameter (currently %d) and/or\n"
-					"its max_connections parameter (currently %d).\n"
-					"\n"
-					"If the request size is already small, it's possible that it is less than\n"
-					"your kernel's SHMMIN parameter, in which case raising the request size or\n"
-					"reconfiguring SHMMIN is called for.\n"
-					"\n"
-					"The PostgreSQL Administrator's Guide contains more information about\n"
-					"shared memory configuration.\n\n",
-					size, NBuffers, MaxBackends);
-
-		else if (errno == ENOMEM)
-			fprintf(stderr,
-					"\nThis error usually means that PostgreSQL's request for a shared\n"
-			  "memory segment exceeded available memory or swap space.\n"
-			  "To reduce the request size (currently %u bytes), reduce\n"
-					"PostgreSQL's shared_buffers parameter (currently %d) and/or\n"
-					"its max_connections parameter (currently %d).\n"
-					"\n"
-					"The PostgreSQL Administrator's Guide contains more information about\n"
-					"shared memory configuration.\n\n",
-					size, NBuffers, MaxBackends);
-
-		else if (errno == ENOSPC)
-			fprintf(stderr,
-					"\nThis error does *not* mean that you have run out of disk space.\n"
-					"\n"
-					"It occurs either if all available shared memory IDs have been taken,\n"
-					"in which case you need to raise the SHMMNI parameter in your kernel,\n"
-					"or because the system's overall limit for shared memory has been\n"
-			"reached.  If you cannot increase the shared memory limit,\n"
-					"reduce PostgreSQL's shared memory request (currently %u bytes),\n"
-					"by reducing its shared_buffers parameter (currently %d) and/or\n"
-					"its max_connections parameter (currently %d).\n"
-					"\n"
-					"The PostgreSQL Administrator's Guide contains more information about\n"
-					"shared memory configuration.\n\n",
-					size, NBuffers, MaxBackends);
-
-		proc_exit(1);
-	}
-
-	/* Register on-exit routine to delete the new segment */
-	on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
-
-	/* OK, should be able to attach to the segment */
-#if defined(solaris) && defined(__sparc__)
-	/* use intimate shared memory on SPARC Solaris */
-	memAddress = shmat(shmid, 0, SHM_SHARE_MMU);
-#else
- 	memAddress = shmat(shmid, 0, 0);
-#endif
-
-	if (memAddress == (void *) -1)
-	{
-		fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n",
-				shmid, strerror(errno));
-		proc_exit(1);
-	}
-
-	/* Register on-exit routine to detach new segment before deleting */
-	on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
-
-	/* Record key and ID in lockfile for data directory. */
-	RecordSharedMemoryInLockFile(memKey, shmid);
-
-	return memAddress;
-}
-
-/****************************************************************************/
-/*	IpcMemoryDetach(status, shmaddr)	removes a shared memory segment		*/
-/*										from process' address spaceq		*/
-/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
-/****************************************************************************/
-static void
-IpcMemoryDetach(int status, Datum shmaddr)
-{
-	if (shmdt(DatumGetPointer(shmaddr)) < 0)
-		fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n",
-				DatumGetPointer(shmaddr), strerror(errno));
-
-	/*
-	 * We used to report a failure via elog(WARNING), but that's pretty
-	 * pointless considering any client has long since disconnected ...
-	 */
-}
-
-/****************************************************************************/
-/*	IpcMemoryDelete(status, shmId)		deletes a shared memory segment		*/
-/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
-/****************************************************************************/
-static void
-IpcMemoryDelete(int status, Datum shmId)
-{
-	if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0)
-		fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n",
-				DatumGetInt32(shmId), IPC_RMID, strerror(errno));
-
-	/*
-	 * We used to report a failure via elog(WARNING), but that's pretty
-	 * pointless considering any client has long since disconnected ...
-	 */
-}
-
-/****************************************************************************/
-/*	SharedMemoryIsInUse(shmKey, shmId)	Is a shared memory segment in use?	*/
-/****************************************************************************/
-bool
-SharedMemoryIsInUse(IpcMemoryKey shmKey, IpcMemoryId shmId)
-{
-	struct shmid_ds shmStat;
-
-	/*
-	 * We detect whether a shared memory segment is in use by seeing
-	 * whether it (a) exists and (b) has any processes are attached to it.
-	 *
-	 * If we are unable to perform the stat operation for a reason other than
-	 * nonexistence of the segment (most likely, because it doesn't belong
-	 * to our userid), assume it is in use.
-	 */
-	if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
-	{
-		/*
-		 * EINVAL actually has multiple possible causes documented in the
-		 * shmctl man page, but we assume it must mean the segment no
-		 * longer exists.
-		 */
-		if (errno == EINVAL)
-			return false;
-		/* Else assume segment is in use */
-		return true;
-	}
-	/* If it has attached processes, it's in use */
-	if (shmStat.shm_nattch != 0)
-		return true;
-	return false;
-}
-
-
-/* ----------------------------------------------------------------
- *						private memory support
- *
- * Rather than allocating shmem segments with IPC_PRIVATE key, we
- * just malloc() the requested amount of space.  This code emulates
- * the needed shmem functions.
- * ----------------------------------------------------------------
- */
-
-static void *
-PrivateMemoryCreate(uint32 size)
-{
-	void	   *memAddress;
-
-	memAddress = malloc(size);
-	if (!memAddress)
-	{
-		fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size);
-		proc_exit(1);
-	}
-	MemSet(memAddress, 0, size);	/* keep Purify quiet */
-
-	/* Register on-exit routine to release storage */
-	on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress));
-
-	return memAddress;
-}
-
-static void
-PrivateMemoryDelete(int status, Datum memaddr)
-{
-	free(DatumGetPointer(memaddr));
-}
-
-
-/* ------------------
- *				Routines to assign keys for new IPC objects
- *
- * The idea here is to detect and re-use keys that may have been assigned
- * by a crashed postmaster or backend.
- * ------------------
- */
-
-static IpcMemoryKey NextShmemSegID = 0;
-static IpcSemaphoreKey NextSemaID = 0;
-
-/*
- * (Re) initialize key assignment at startup of postmaster or standalone
- * backend, also at postmaster reset.
- */
-void
-IpcInitKeyAssignment(int port)
-{
-	NextShmemSegID = port * 1000;
-	NextSemaID = port * 1000;
-}
-
-/*
- * Create a shared memory segment of the given size and initialize its
- * standard header.  Dead Postgres segments are recycled if found,
- * but we do not fail upon collision with non-Postgres shmem segments.
- */
-PGShmemHeader *
-IpcMemoryCreate(uint32 size, bool makePrivate, int permission)
-{
-	void	   *memAddress;
-	PGShmemHeader *hdr;
-
-	/* Room for a header? */
-	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
-
-	/* Loop till we find a free IPC key */
-	for (NextShmemSegID++;; NextShmemSegID++)
-	{
-		IpcMemoryId shmid;
-
-		/* Special case if creating a private segment --- just malloc() it */
-		if (makePrivate)
-		{
-			memAddress = PrivateMemoryCreate(size);
-			break;
-		}
-
-		/* Try to create new segment */
-		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
-		if (memAddress)
-			break;				/* successful create and attach */
-
-		/* See if it looks to be leftover from a dead Postgres process */
-		shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
-		if (shmid < 0)
-			continue;			/* failed: must be some other app's */
-
-#if defined(solaris) && defined(__sparc__)
-		/* use intimate shared memory on SPARC Solaris */
-		memAddress = shmat(shmid, 0, SHM_SHARE_MMU);
-#else
- 		memAddress = shmat(shmid, 0, 0);
-#endif
-
-		if (memAddress == (void *) -1)
-			continue;			/* failed: must be some other app's */
-		hdr = (PGShmemHeader *) memAddress;
-		if (hdr->magic != PGShmemMagic)
-		{
-			shmdt(memAddress);
-			continue;			/* segment belongs to a non-Postgres app */
-		}
-
-		/*
-		 * If the creator PID is my own PID or does not belong to any
-		 * extant process, it's safe to zap it.
-		 */
-		if (hdr->creatorPID != getpid())
-		{
-			if (kill(hdr->creatorPID, 0) == 0 ||
-				errno != ESRCH)
-			{
-				shmdt(memAddress);
-				continue;		/* segment belongs to a live process */
-			}
-		}
-
-		/*
-		 * The segment appears to be from a dead Postgres process, or from
-		 * a previous cycle of life in this same process.  Zap it, if
-		 * possible.  This probably shouldn't fail, but if it does, assume
-		 * the segment belongs to someone else after all, and continue
-		 * quietly.
-		 */
-		shmdt(memAddress);
-		if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
-			continue;
-
-		/*
-		 * Now try again to create the segment.
-		 */
-		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
-		if (memAddress)
-			break;				/* successful create and attach */
-
-		/*
-		 * Can only get here if some other process managed to create the
-		 * same shmem key before we did.  Let him have that one, loop
-		 * around to try next key.
-		 */
-	}
-
-	/*
-	 * OK, we created a new segment.  Mark it as created by this process.
-	 * The order of assignments here is critical so that another Postgres
-	 * process can't see the header as valid but belonging to an invalid
-	 * PID!
-	 */
-	hdr = (PGShmemHeader *) memAddress;
-	hdr->creatorPID = getpid();
-	hdr->magic = PGShmemMagic;
-
-	/*
-	 * Initialize space allocation status for segment.
-	 */
-	hdr->totalsize = size;
-	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
-
-	return hdr;
-}
-
-/*
- * Create a semaphore set with the given number of useful semaphores
- * (an additional sema is actually allocated to serve as identifier).
- * Dead Postgres sema sets are recycled if found, but we do not fail
- * upon collision with non-Postgres sema sets.
- */
-IpcSemaphoreId
-IpcSemaphoreCreate(int numSems, int permission,
-				   int semStartValue, bool removeOnExit)
-{
-	IpcSemaphoreId semId;
-	union semun semun;
-
-	/* Loop till we find a free IPC key */
-	for (NextSemaID++;; NextSemaID++)
-	{
-		pid_t		creatorPID;
-
-		/* Try to create new semaphore set */
-		semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1,
-										   permission, semStartValue,
-										   removeOnExit);
-		if (semId >= 0)
-			break;				/* successful create */
-
-		/* See if it looks to be leftover from a dead Postgres process */
-		semId = semget(NextSemaID, numSems + 1, 0);
-		if (semId < 0)
-			continue;			/* failed: must be some other app's */
-		if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
-			continue;			/* sema belongs to a non-Postgres app */
-
-		/*
-		 * If the creator PID is my own PID or does not belong to any
-		 * extant process, it's safe to zap it.
-		 */
-		creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
-		if (creatorPID <= 0)
-			continue;			/* oops, GETPID failed */
-		if (creatorPID != getpid())
-		{
-			if (kill(creatorPID, 0) == 0 ||
-				errno != ESRCH)
-				continue;		/* sema belongs to a live process */
-		}
-
-		/*
-		 * The sema set appears to be from a dead Postgres process, or
-		 * from a previous cycle of life in this same process.	Zap it, if
-		 * possible.  This probably shouldn't fail, but if it does, assume
-		 * the sema set belongs to someone else after all, and continue
-		 * quietly.
-		 */
-		semun.val = 0;			/* unused, but keep compiler quiet */
-		if (semctl(semId, 0, IPC_RMID, semun) < 0)
-			continue;
-
-		/*
-		 * Now try again to create the sema set.
-		 */
-		semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1,
-										   permission, semStartValue,
-										   removeOnExit);
-		if (semId >= 0)
-			break;				/* successful create */
-
-		/*
-		 * Can only get here if some other process managed to create the
-		 * same sema key before we did.  Let him have that one, loop
-		 * around to try next key.
-		 */
-	}
-
-	/*
-	 * OK, we created a new sema set.  Mark it as created by this process.
-	 * We do this by setting the spare semaphore to PGSemaMagic-1 and then
-	 * incrementing it with semop().  That leaves it with value
-	 * PGSemaMagic and sempid referencing this process.
-	 */
-	semun.val = PGSemaMagic - 1;
-	if (semctl(semId, numSems, SETVAL, semun) < 0)
-	{
-		fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
-				semId, numSems, PGSemaMagic - 1, strerror(errno));
-
-		if (errno == ERANGE)
-			fprintf(stderr,
-					"You possibly need to raise your kernel's SEMVMX value to be at least\n"
-			"%d.  Look into the PostgreSQL documentation for details.\n",
-					PGSemaMagic);
-
-		proc_exit(1);
-	}
-	IpcSemaphoreUnlock(semId, numSems);
-
-	return semId;
-}
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.46 2002/03/02 21:39:29 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.47 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -21,8 +21,11 @@
 #include "access/xlog.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
+#include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/lwlock.h"
+#include "storage/pg_sema.h"
+#include "storage/pg_shmem.h"
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/sinval.h"
@@ -41,9 +44,12 @@
 * memory.	This is true for a standalone backend, false for a postmaster.
 */
 void
-CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
+CreateSharedMemoryAndSemaphores(bool makePrivate,
+								int maxBackends,
+								int port)
 {
 	int			size;
+	int			numSemas;
 	PGShmemHeader *seghdr;

 	/*
@@ -70,12 +76,14 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends)
 	/*
 	 * Create the shmem segment
 	 */
-	seghdr = IpcMemoryCreate(size, makePrivate, IPCProtection);
+	seghdr = PGSharedMemoryCreate(size, makePrivate, port);

 	/*
-	 * First initialize spinlocks --- needed by InitShmemAllocation()
+	 * Create semaphores
 	 */
-	CreateSpinlocks();
+	numSemas = ProcGlobalSemas(maxBackends);
+	numSemas += SpinlockSemas();
+	PGReserveSemaphores(numSemas, port);

 	/*
 	 * Set up shared memory allocation mechanism
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.64 2002/03/06 06:10:05 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.65 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -61,6 +61,7 @@
 #include "postgres.h"

 #include "access/transam.h"
+#include "storage/pg_shmem.h"
 #include "storage/spin.h"
 #include "utils/tqual.h"

--- a/src/backend/storage/ipc/sinvaladt.c
+++ b/src/backend/storage/ipc/sinvaladt.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.45 2002/03/02 23:35:57 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.46 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -16,6 +16,7 @@

 #include "miscadmin.h"
 #include "storage/backendid.h"
+#include "storage/ipc.h"
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/sinvaladt.h"
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -15,7 +15,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.9 2002/03/02 21:39:29 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.10 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -302,7 +302,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		for (;;)
 		{
 			/* "false" means cannot accept cancel/die interrupt here. */
-			IpcSemaphoreLock(proc->sem.semId, proc->sem.semNum, false);
+			PGSemaphoreLock(&proc->sem, false);
 			if (!proc->lwWaiting)
 				break;
 			extraWaits++;
@@ -325,7 +325,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	 * Fix the process wait semaphore's count for any absorbed wakeups.
 	 */
 	while (extraWaits-- > 0)
-		IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
+		PGSemaphoreUnlock(&proc->sem);
 }

 /*
@@ -485,7 +485,7 @@ LWLockRelease(LWLockId lockid)
 		head = proc->lwWaitLink;
 		proc->lwWaitLink = NULL;
 		proc->lwWaiting = false;
-		IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
+		PGSemaphoreUnlock(&proc->sem);
 	}

 	/*
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.118 2002/03/02 21:39:29 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.119 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -37,12 +37,6 @@
 *		in the first place was to allow the lock table to grow larger
 *		than available shared memory and that isn't going to work
 *		without a lot of unimplemented support anyway.
- *
- * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
- *		allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
- *		shared among backends (we keep a few sets of semaphores around).
- *		This is so that we can support more backends. (system-wide semaphore
- *		sets run out pretty fast.)				  -ay 4/95
 */
 #include "postgres.h"

@@ -51,18 +45,9 @@
 #include <unistd.h>
 #include <sys/time.h>

-#include "storage/ipc.h"
-/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
-#ifdef HAVE_SYS_SEM_H
-#include <sys/sem.h>
-#endif
-
-#if defined(__darwin__)
-#include "port/darwin/sem.h"
-#endif
-
 #include "miscadmin.h"
 #include "access/xact.h"
+#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "storage/sinval.h"
 #include "storage/spin.h"
@@ -73,11 +58,11 @@ int			DeadlockTimeout = 1000;
 PROC	   *MyProc = NULL;

 /*
- * This spinlock protects the freelist of recycled PROC structures and the
- * bitmap of free semaphores.  We cannot use an LWLock because the LWLock
- * manager depends on already having a PROC and a wait semaphore!  But these
- * structures are touched relatively infrequently (only at backend startup
- * or shutdown) and not for very long, so a spinlock is okay.
+ * This spinlock protects the freelist of recycled PROC structures.
+ * We cannot use an LWLock because the LWLock manager depends on already
+ * having a PROC and a wait semaphore!  But these structures are touched
+ * relatively infrequently (only at backend startup or shutdown) and not for
+ * very long, so a spinlock is okay.
 */
 static slock_t *ProcStructLock = NULL;

@@ -90,21 +75,24 @@ static bool waitingForSignal = false;

 static void ProcKill(void);
 static void DummyProcKill(void);
-static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum);
-static void ProcFreeSem(IpcSemaphoreId semId, int semNum);
-static void ZeroProcSemaphore(PROC *proc);
-static void ProcFreeAllSemaphores(void);


+/*
+ * Report number of semaphores needed by InitProcGlobal.
+ */
+int
+ProcGlobalSemas(int maxBackends)
+{
+	/* We need a sema per backend, plus one for the dummy process. */
+	return maxBackends + 1;
+}
+
 /*
 * InitProcGlobal -
 *	  initializes the global process table. We put it here so that
- *	  the postmaster can do this initialization. (ProcFreeAllSemaphores needs
- *	  to read this table on exiting the postmaster. If we have the first
- *	  backend do this, starting up and killing the postmaster without
- *	  starting any backends will be a problem.)
+ *	  the postmaster can do this initialization.
 *
- *	  We also allocate all the per-process semaphores we will need to support
+ *	  We also create all the per-process semaphores we will need to support
 *	  the requested number of backends.  We used to allocate semaphores
 *	  only when backends were actually started up, but that is bad because
 *	  it lets Postgres fail under load --- a lot of Unix systems are
@@ -114,28 +102,19 @@ static void ProcFreeAllSemaphores(void);
 *	  of backends immediately at initialization --- if the sysadmin has set
 *	  MaxBackends higher than his kernel will support, he'll find out sooner
 *	  rather than later.
+ *
+ *	  Another reason for creating semaphores here is that the semaphore
+ *	  implementation typically requires us to create semaphores in the
+ *	  postmaster, not in backends.
 */
 void
 InitProcGlobal(int maxBackends)
 {
-	int			semMapEntries;
-	Size		procGlobalSize;
 	bool		found = false;

-	/*
-	 * Compute size for ProcGlobal structure.  Note we need one more sema
-	 * besides those used for regular backends; this is accounted for in
-	 * the PROC_SEM_MAP_ENTRIES macro.	(We do it that way so that other
-	 * modules that use PROC_SEM_MAP_ENTRIES(maxBackends) to size data
-	 * structures don't have to know about this explicitly.)
-	 */
-	Assert(maxBackends > 0);
-	semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends);
-	procGlobalSize = sizeof(PROC_HDR) + (semMapEntries - 1) *sizeof(SEM_MAP_ENTRY);
-
 	/* Create or attach to the ProcGlobal shared structure */
 	ProcGlobal = (PROC_HDR *)
-		ShmemInitStruct("Proc Header", procGlobalSize, &found);
+		ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found);

 	/* --------------------
 	 * We're the first - initialize.
@@ -148,47 +127,33 @@ InitProcGlobal(int maxBackends)
 		int			i;

 		ProcGlobal->freeProcs = INVALID_OFFSET;
-		ProcGlobal->semMapEntries = semMapEntries;
-
-		for (i = 0; i < semMapEntries; i++)
-		{
-			ProcGlobal->procSemMap[i].procSemId = -1;
-			ProcGlobal->procSemMap[i].freeSemMap = 0;
-		}

 		/*
-		 * Arrange to delete semas on exit --- set this up now so that we
-		 * will clean up if pre-allocation fails.  We use our own
-		 * freeproc, rather than IpcSemaphoreCreate's removeOnExit option,
-		 * because we don't want to fill up the on_shmem_exit list with a
-		 * separate entry for each semaphore set.
+		 * Pre-create the PROC structures and create a semaphore for each.
 		 */
-		on_shmem_exit(ProcFreeAllSemaphores, 0);
-
-		/*
-		 * Pre-create the semaphores.
-		 */
-		for (i = 0; i < semMapEntries; i++)
+		for (i = 0; i < maxBackends; i++)
 		{
-			IpcSemaphoreId semId;
+			PROC   *proc;

-			semId = IpcSemaphoreCreate(PROC_NSEMS_PER_SET,
-									   IPCProtection,
-									   1,
-									   false);
-			ProcGlobal->procSemMap[i].procSemId = semId;
+			proc = (PROC *) ShmemAlloc(sizeof(PROC));
+			if (!proc)
+				elog(FATAL, "cannot create new proc: out of memory");
+			MemSet(proc, 0, sizeof(PROC));
+			PGSemaphoreCreate(&proc->sem);
+			proc->links.next = ProcGlobal->freeProcs;
+			ProcGlobal->freeProcs = MAKE_OFFSET(proc);
 		}

 		/*
 		 * Pre-allocate a PROC structure for dummy (checkpoint) processes,
-		 * and reserve the last sema of the precreated semas for it.
+		 * too.  This does not get linked into the freeProcs list.
 		 */
 		DummyProc = (PROC *) ShmemAlloc(sizeof(PROC));
+		if (!DummyProc)
+			elog(FATAL, "cannot create new proc: out of memory");
+		MemSet(DummyProc, 0, sizeof(PROC));
 		DummyProc->pid = 0;		/* marks DummyProc as not in use */
-		i = semMapEntries - 1;
-		ProcGlobal->procSemMap[i].freeSemMap |= 1 << (PROC_NSEMS_PER_SET - 1);
-		DummyProc->sem.semId = ProcGlobal->procSemMap[i].procSemId;
-		DummyProc->sem.semNum = PROC_NSEMS_PER_SET - 1;
+		PGSemaphoreCreate(&DummyProc->sem);

 		/* Create ProcStructLock spinlock, too */
 		ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
@@ -197,7 +162,7 @@ InitProcGlobal(int maxBackends)
 }

 /*
- * InitProcess -- create a per-process data structure for this backend
+ * InitProcess -- initialize a per-process data structure for this backend
 */
 void
 InitProcess(void)
@@ -217,7 +182,8 @@ InitProcess(void)
 		elog(ERROR, "InitProcess: you already exist");

 	/*
-	 * try to get a proc struct from the free list first
+	 * Try to get a proc struct from the free list.  If this fails,
+	 * we must be out of PROC structures (not to mention semaphores).
 	 */
 	SpinLockAcquire(ProcStructLock);

@@ -232,20 +198,19 @@ InitProcess(void)
 	else
 	{
 		/*
-		 * have to allocate a new one.
+		 * If we reach here, all the PROCs are in use.  This is one of
+		 * the possible places to detect "too many backends", so give the
+		 * standard error message.
 		 */
 		SpinLockRelease(ProcStructLock);
-		MyProc = (PROC *) ShmemAlloc(sizeof(PROC));
-		if (!MyProc)
-			elog(FATAL, "cannot create new proc: out of memory");
+		elog(FATAL, "Sorry, too many clients already");
 	}

 	/*
-	 * Initialize all fields of MyProc.
+	 * Initialize all fields of MyProc, except for the semaphore which
+	 * was prepared for us by InitProcGlobal.
 	 */
 	SHMQueueElemInit(&(MyProc->links));
-	MyProc->sem.semId = -1;		/* no wait-semaphore acquired yet */
-	MyProc->sem.semNum = -1;
 	MyProc->errType = STATUS_OK;
 	MyProc->xid = InvalidTransactionId;
 	MyProc->xmin = InvalidTransactionId;
@@ -264,19 +229,11 @@ InitProcess(void)
 	 */
 	on_shmem_exit(ProcKill, 0);

-	/*
-	 * Set up a wait-semaphore for the proc.  (We rely on ProcKill to
-	 * clean up MyProc if this fails.)
-	 */
-	if (IsUnderPostmaster)
-		ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum);
-
 	/*
 	 * We might be reusing a semaphore that belonged to a failed process.
 	 * So be careful and reinitialize its value here.
 	 */
-	if (MyProc->sem.semId >= 0)
-		ZeroProcSemaphore(MyProc);
+	PGSemaphoreReset(&MyProc->sem);

 	/*
 	 * Now that we have a PROC, we could try to acquire locks, so
@@ -340,25 +297,7 @@ InitDummyProcess(void)
 	 * We might be reusing a semaphore that belonged to a failed process.
 	 * So be careful and reinitialize its value here.
 	 */
-	if (MyProc->sem.semId >= 0)
-		ZeroProcSemaphore(MyProc);
-}
-
-/*
- * Initialize the proc's wait-semaphore to count zero.
- */
-static void
-ZeroProcSemaphore(PROC *proc)
-{
-	union semun semun;
-
-	semun.val = 0;
-	if (semctl(proc->sem.semId, proc->sem.semNum, SETVAL, semun) < 0)
-	{
-		fprintf(stderr, "ZeroProcSemaphore: semctl(id=%d,SETVAL) failed: %s\n",
-				proc->sem.semId, strerror(errno));
-		proc_exit(255);
-	}
+	PGSemaphoreReset(&MyProc->sem);
 }

 /*
@@ -397,7 +336,7 @@ LockWaitCancel(void)
 	 * to zero. Otherwise, our next attempt to wait for a lock will fall
 	 * through prematurely.
 	 */
-	ZeroProcSemaphore(MyProc);
+	PGSemaphoreReset(&MyProc->sem);

 	/*
 	 * Return true even if we were kicked off the lock before we were able
@@ -463,11 +402,7 @@ ProcKill(void)

 	SpinLockAcquire(ProcStructLock);

-	/* Free up my wait semaphore, if I got one */
-	if (MyProc->sem.semId >= 0)
-		ProcFreeSem(MyProc->sem.semId, MyProc->sem.semNum);
-
-	/* Add PROC struct to freelist so space can be recycled in future */
+	/* Return PROC structure (and semaphore) to freelist */
 	MyProc->links.next = procglobal->freeProcs;
 	procglobal->freeProcs = MAKE_OFFSET(MyProc);

@@ -701,10 +636,10 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
 		elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");

 	/*
-	 * If someone wakes us between LWLockRelease and IpcSemaphoreLock,
-	 * IpcSemaphoreLock will not block.  The wakeup is "saved" by the
+	 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
+	 * PGSemaphoreLock will not block.  The wakeup is "saved" by the
 	 * semaphore implementation.  Note also that if HandleDeadLock is
-	 * invoked but does not detect a deadlock, IpcSemaphoreLock() will
+	 * invoked but does not detect a deadlock, PGSemaphoreLock() will
 	 * continue to wait.  There used to be a loop here, but it was useless
 	 * code...
 	 *
@@ -714,7 +649,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
 	 * here.  We don't, because we have no state-change work to do after
 	 * being granted the lock (the grantor did it all).
 	 */
-	IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true);
+	PGSemaphoreLock(&MyProc->sem, true);

 	/*
 	 * Disable the timer, if it's still running
@@ -775,7 +710,7 @@ ProcWakeup(PROC *proc, int errType)
 	proc->errType = errType;

 	/* And awaken it */
-	IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
+	PGSemaphoreUnlock(&proc->sem);

 	return retProc;
 }
@@ -914,7 +849,7 @@ HandleDeadLock(SIGNAL_ARGS)
 	 * Unlock my semaphore so that the interrupted ProcSleep() call can
 	 * finish.
 	 */
-	IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum);
+	PGSemaphoreUnlock(&MyProc->sem);

 	/*
 	 * We're done here.  Transaction abort caused by the error that
@@ -943,7 +878,7 @@ void
 ProcWaitForSignal(void)
 {
 	waitingForSignal = true;
-	IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true);
+	PGSemaphoreLock(&MyProc->sem, true);
 	waitingForSignal = false;
 }

@@ -957,7 +892,7 @@ ProcWaitForSignal(void)
 void
 ProcCancelWaitForSignal(void)
 {
-	ZeroProcSemaphore(MyProc);
+	PGSemaphoreReset(&MyProc->sem);
 	waitingForSignal = false;
 }

@@ -970,7 +905,7 @@ ProcSendSignal(BackendId procId)
 	PROC	   *proc = BackendIdGetProc(procId);

 	if (proc != NULL)
-		IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum);
+		PGSemaphoreUnlock(&proc->sem);
 }


@@ -1035,110 +970,3 @@ disable_sigalrm_interrupt(void)

 	return true;
 }
-
-
-/*****************************************************************************
- *
- *****************************************************************************/
-
-/*
- * ProcGetNewSemIdAndNum -
- *	  scan the free semaphore bitmap and allocate a single semaphore from
- *	  a semaphore set.
- */
-static void
-ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum)
-{
-	/* use volatile pointer to prevent code rearrangement */
-	volatile PROC_HDR *procglobal = ProcGlobal;
-	int			semMapEntries = procglobal->semMapEntries;
-	volatile SEM_MAP_ENTRY *procSemMap = procglobal->procSemMap;
-	int32		fullmask = (1 << PROC_NSEMS_PER_SET) - 1;
-	int			i;
-
-	SpinLockAcquire(ProcStructLock);
-
-	for (i = 0; i < semMapEntries; i++)
-	{
-		int			mask = 1;
-		int			j;
-
-		if (procSemMap[i].freeSemMap == fullmask)
-			continue;			/* this set is fully allocated */
-		if (procSemMap[i].procSemId < 0)
-			continue;			/* this set hasn't been initialized */
-
-		for (j = 0; j < PROC_NSEMS_PER_SET; j++)
-		{
-			if ((procSemMap[i].freeSemMap & mask) == 0)
-			{
-				/* A free semaphore found. Mark it as allocated. */
-				procSemMap[i].freeSemMap |= mask;
-
-				*semId = procSemMap[i].procSemId;
-				*semNum = j;
-
-				SpinLockRelease(ProcStructLock);
-
-				return;
-			}
-			mask <<= 1;
-		}
-	}
-
-	SpinLockRelease(ProcStructLock);
-
-	/*
-	 * If we reach here, all the semaphores are in use.  This is one of
-	 * the possible places to detect "too many backends", so give the
-	 * standard error message.	(Whether we detect it here or in sinval.c
-	 * depends on whether MaxBackends is a multiple of
-	 * PROC_NSEMS_PER_SET.)
-	 */
-	elog(FATAL, "Sorry, too many clients already");
-}
-
-/*
- * ProcFreeSem -
- *	  free up our semaphore in the semaphore set.
- *
- * Caller is assumed to hold ProcStructLock.
- */
-static void
-ProcFreeSem(IpcSemaphoreId semId, int semNum)
-{
-	int32		mask;
-	int			i;
-	int			semMapEntries = ProcGlobal->semMapEntries;
-
-	mask = ~(1 << semNum);
-
-	for (i = 0; i < semMapEntries; i++)
-	{
-		if (ProcGlobal->procSemMap[i].procSemId == semId)
-		{
-			ProcGlobal->procSemMap[i].freeSemMap &= mask;
-			return;
-		}
-	}
-	/* can't elog here!!! */
-	fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId);
-}
-
-/*
- * ProcFreeAllSemaphores -
- *	  called at shmem_exit time, ie when exiting the postmaster or
- *	  destroying shared state for a failed set of backends.
- *	  Free up all the semaphores allocated to the lmgrs of the backends.
- */
-static void
-ProcFreeAllSemaphores(void)
-{
-	int			i;
-
-	for (i = 0; i < ProcGlobal->semMapEntries; i++)
-	{
-		if (ProcGlobal->procSemMap[i].procSemId >= 0)
-			IpcSemaphoreKill(ProcGlobal->procSemMap[i].procSemId);
-	}
-}
--- a/src/backend/storage/lmgr/spin.c
+++ b/src/backend/storage/lmgr/spin.c
@@ -6,7 +6,8 @@
 *
 * For machines that have test-and-set (TAS) instructions, s_lock.h/.c
 * define the spinlock implementation.	This file contains only a stub
- * implementation for spinlocks using SysV semaphores.	The semaphore method
+ * implementation for spinlocks using PGSemaphores.  Unless semaphores
+ * are implemented in a way that doesn't involve a kernel call, this
 * is too slow to be very useful :-(
 *
 *
@@ -15,143 +16,49 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.7 2001/11/05 17:46:28 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.8 2002/05/05 00:03:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"

-#include <errno.h>
-
-#include "storage/ipc.h"
-/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
-#ifdef HAVE_SYS_SEM_H
-#include <sys/sem.h>
-#endif
-
-#if defined(__darwin__)
-#include "port/darwin/sem.h"
-#endif
-
 #include "storage/lwlock.h"
-#include "storage/proc.h"
+#include "storage/pg_sema.h"
 #include "storage/spin.h"


 #ifdef HAS_TEST_AND_SET

 /*
- * CreateSpinlocks --- create and initialize spinlocks during startup
+ * Report number of semaphores needed to support spinlocks.
 */
-void
-CreateSpinlocks(void)
+int
+SpinlockSemas(void)
 {
-	/* no-op when we have TAS spinlocks */
+	return 0;
 }

 #else							/* !HAS_TEST_AND_SET */

 /*
- * No TAS, so spinlocks are implemented using SysV semaphores.
- *
- * Typedef slock_t stores the semId and sem number of the sema to use.
- * The semas needed are created by CreateSpinlocks and doled out by
- * s_init_lock_sema.
- *
- * Since many systems have a rather small SEMMSL limit on semas per set,
- * we allocate the semaphores required in sets of SPINLOCKS_PER_SET semas.
- * This value is deliberately made equal to PROC_NSEMS_PER_SET so that all
- * sema sets allocated by Postgres will be the same size; that eases the
- * semaphore-recycling logic in IpcSemaphoreCreate().
- *
- * Note that the SpinLockIds array is not in shared memory; it is filled
- * by the postmaster and then inherited through fork() by backends.  This
- * is OK because its contents do not change after shmem initialization.
+ * No TAS, so spinlocks are implemented as PGSemaphores.
 */

-#define SPINLOCKS_PER_SET  PROC_NSEMS_PER_SET
-
-static IpcSemaphoreId *SpinLockIds = NULL;
-
-static int	numSpinSets = 0;	/* number of sema sets used */
-static int	numSpinLocks = 0;	/* total number of semas allocated */
-static int	nextSpinLock = 0;	/* next free spinlock index */
-
-static void SpinFreeAllSemaphores(void);
-

 /*
- * CreateSpinlocks --- create and initialize spinlocks during startup
+ * Report number of semaphores needed to support spinlocks.
 */
-void
-CreateSpinlocks(void)
+int
+SpinlockSemas(void)
 {
-	int			i;
-
-	if (SpinLockIds == NULL)
-	{
-		/*
-		 * Compute number of spinlocks needed.	It would be cleaner to
-		 * distribute this logic into the affected modules, similar to the
-		 * way shmem space estimation is handled.
-		 *
-		 * For now, though, we just need a few spinlocks (10 should be
-		 * plenty) plus one for each LWLock.
-		 */
-		numSpinLocks = NumLWLocks() + 10;
-
-		/* might as well round up to a multiple of SPINLOCKS_PER_SET */
-		numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1;
-		numSpinLocks = numSpinSets * SPINLOCKS_PER_SET;
-
-		SpinLockIds = (IpcSemaphoreId *)
-			malloc(numSpinSets * sizeof(IpcSemaphoreId));
-		Assert(SpinLockIds != NULL);
-	}
-
-	for (i = 0; i < numSpinSets; i++)
-		SpinLockIds[i] = -1;
-
 	/*
-	 * Arrange to delete semas on exit --- set this up now so that we will
-	 * clean up if allocation fails.  We use our own freeproc, rather than
-	 * IpcSemaphoreCreate's removeOnExit option, because we don't want to
-	 * fill up the on_shmem_exit list with a separate entry for each
-	 * semaphore set.
+	 * It would be cleaner to distribute this logic into the affected modules,
+	 * similar to the way shmem space estimation is handled.
+	 *
+	 * For now, though, we just need a few spinlocks (10 should be
+	 * plenty) plus one for each LWLock.
 	 */
-	on_shmem_exit(SpinFreeAllSemaphores, 0);
-
-	/* Create sema sets and set all semas to count 1 */
-	for (i = 0; i < numSpinSets; i++)
-	{
-		SpinLockIds[i] = IpcSemaphoreCreate(SPINLOCKS_PER_SET,
-											IPCProtection,
-											1,
-											false);
-	}
-
-	/* Init counter for allocating dynamic spinlocks */
-	nextSpinLock = 0;
-}
-
-/*
- * SpinFreeAllSemaphores -
- *	  called at shmem_exit time, ie when exiting the postmaster or
- *	  destroying shared state for a failed set of backends.
- *	  Free up all the semaphores allocated for spinlocks.
- */
-static void
-SpinFreeAllSemaphores(void)
-{
-	int			i;
-
-	for (i = 0; i < numSpinSets; i++)
-	{
-		if (SpinLockIds[i] >= 0)
-			IpcSemaphoreKill(SpinLockIds[i]);
-	}
-	free(SpinLockIds);
-	SpinLockIds = NULL;
+	return NumLWLocks() + 10;
 }

 /*
@@ -161,30 +68,28 @@ SpinFreeAllSemaphores(void)
 void
 s_init_lock_sema(volatile slock_t *lock)
 {
-	if (nextSpinLock >= numSpinLocks)
-		elog(FATAL, "s_init_lock_sema: not enough semaphores");
-	lock->semId = SpinLockIds[nextSpinLock / SPINLOCKS_PER_SET];
-	lock->sem = nextSpinLock % SPINLOCKS_PER_SET;
-	nextSpinLock++;
+	PGSemaphoreCreate((PGSemaphore) lock);
 }

 void
 s_unlock_sema(volatile slock_t *lock)
 {
-	IpcSemaphoreUnlock(lock->semId, lock->sem);
+	PGSemaphoreUnlock((PGSemaphore) lock);
 }

 bool
 s_lock_free_sema(volatile slock_t *lock)
 {
-	return IpcSemaphoreGetValue(lock->semId, lock->sem) > 0;
+	/* We don't currently use S_LOCK_FREE anyway */
+	elog(ERROR, "spin.c does not support S_LOCK_FREE()");
+	return false;
 }

 int
 tas_sema(volatile slock_t *lock)
 {
 	/* Note that TAS macros return 0 if *success* */
-	return !IpcSemaphoreTryLock(lock->semId, lock->sem);
+	return !PGSemaphoreTryLock((PGSemaphore) lock);
 }

 #endif   /* !HAS_TEST_AND_SET */