1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-16 15:02:33 +03:00
Files
postgres/src/backend/storage/ipc/ipci.c
Heikki Linnakangas dafaa3efb7 Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.

To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.

A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.

Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.

We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.

Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.

Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-08 00:09:08 +02:00

254 lines
6.8 KiB
C

/*-------------------------------------------------------------------------
*
* ipci.c
* POSTGRES inter-process communication initialization code.
*
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/storage/ipc/ipci.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/clog.h"
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/nbtree.h"
#include "access/subtrans.h"
#include "access/twophase.h"
#include "commands/async.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
#include "storage/procsignal.h"
#include "storage/sinvaladt.h"
#include "storage/spin.h"
shmem_startup_hook_type shmem_startup_hook = NULL;
static Size total_addin_request = 0;
static bool addin_request_allowed = true;
/*
* RequestAddinShmemSpace
* Request that extra shmem space be allocated for use by
* a loadable module.
*
* This is only useful if called from the _PG_init hook of a library that
* is loaded into the postmaster via shared_preload_libraries. Once
* shared memory has been allocated, calls will be ignored. (We could
* raise an error, but it seems better to make it a no-op, so that
* libraries containing such calls can be reloaded if needed.)
*/
void
RequestAddinShmemSpace(Size size)
{
if (IsUnderPostmaster || !addin_request_allowed)
return; /* too late */
total_addin_request = add_size(total_addin_request, size);
}
/*
* CreateSharedMemoryAndSemaphores
* Creates and initializes shared memory and semaphores.
*
* This is called by the postmaster or by a standalone backend.
* It is also called by a backend forked from the postmaster in the
* EXEC_BACKEND case. In the latter case, the shared memory segment
* already exists and has been physically attached to, but we have to
* initialize pointers in local memory that reference the shared structures,
* because we didn't inherit the correct pointer values from the postmaster
* as we do in the fork() scenario. The easiest way to do that is to run
* through the same code as before. (Note that the called routines mostly
* check IsUnderPostmaster, rather than EXEC_BACKEND, to detect this case.
* This is a bit code-wasteful and could be cleaned up.)
*
* If "makePrivate" is true then we only need private memory, not shared
* memory. This is true for a standalone backend, false for a postmaster.
*/
void
CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
{
if (!IsUnderPostmaster)
{
PGShmemHeader *seghdr;
Size size;
int numSemas;
/*
* Size of the Postgres shared-memory block is estimated via
* moderately-accurate estimates for the big hogs, plus 100K for the
* stuff that's too small to bother with estimating.
*
* We take some care during this phase to ensure that the total size
* request doesn't overflow size_t. If this gets through, we don't
* need to be so careful during the actual allocation phase.
*/
size = 100000;
size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
sizeof(ShmemIndexEnt)));
size = add_size(size, BufferShmemSize());
size = add_size(size, LockShmemSize());
size = add_size(size, PredicateLockShmemSize());
size = add_size(size, ProcGlobalShmemSize());
size = add_size(size, XLOGShmemSize());
size = add_size(size, CLOGShmemSize());
size = add_size(size, SUBTRANSShmemSize());
size = add_size(size, TwoPhaseShmemSize());
size = add_size(size, MultiXactShmemSize());
size = add_size(size, LWLockShmemSize());
size = add_size(size, ProcArrayShmemSize());
size = add_size(size, BackendStatusShmemSize());
size = add_size(size, SInvalShmemSize());
size = add_size(size, PMSignalShmemSize());
size = add_size(size, ProcSignalShmemSize());
size = add_size(size, BgWriterShmemSize());
size = add_size(size, AutoVacuumShmemSize());
size = add_size(size, WalSndShmemSize());
size = add_size(size, WalRcvShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
#ifdef EXEC_BACKEND
size = add_size(size, ShmemBackendArraySize());
#endif
/* freeze the addin request size and include it */
addin_request_allowed = false;
size = add_size(size, total_addin_request);
/* might as well round it off to a multiple of a typical page size */
size = add_size(size, 8192 - (size % 8192));
elog(DEBUG3, "invoking IpcMemoryCreate(size=%lu)",
(unsigned long) size);
/*
* Create the shmem segment
*/
seghdr = PGSharedMemoryCreate(size, makePrivate, port);
InitShmemAccess(seghdr);
/*
* Create semaphores
*/
numSemas = ProcGlobalSemas();
numSemas += SpinlockSemas();
PGReserveSemaphores(numSemas, port);
}
else
{
/*
* We are reattaching to an existing shared memory segment. This
* should only be reached in the EXEC_BACKEND case, and even then only
* with makePrivate == false.
*/
#ifdef EXEC_BACKEND
Assert(!makePrivate);
#else
elog(PANIC, "should be attached to shared memory already");
#endif
}
/*
* Set up shared memory allocation mechanism
*/
if (!IsUnderPostmaster)
InitShmemAllocation();
/*
* Now initialize LWLocks, which do shared memory allocation and are
* needed for InitShmemIndex.
*/
if (!IsUnderPostmaster)
CreateLWLocks();
/*
* Set up shmem.c index hashtable
*/
InitShmemIndex();
/*
* Set up xlog, clog, and buffers
*/
XLOGShmemInit();
CLOGShmemInit();
SUBTRANSShmemInit();
TwoPhaseShmemInit();
MultiXactShmemInit();
InitBufferPool();
/*
* Set up lock manager
*/
InitLocks();
/*
* Set up predicate lock manager
*/
InitPredicateLocks();
/*
* Set up process table
*/
if (!IsUnderPostmaster)
InitProcGlobal();
CreateSharedProcArray();
CreateSharedBackendStatus();
/*
* Set up shared-inval messaging
*/
CreateSharedInvalidationState();
/*
* Set up interprocess signaling mechanisms
*/
PMSignalShmemInit();
ProcSignalShmemInit();
BgWriterShmemInit();
AutoVacuumShmemInit();
WalSndShmemInit();
WalRcvShmemInit();
/*
* Set up other modules that need some shared memory space
*/
BTreeShmemInit();
SyncScanShmemInit();
AsyncShmemInit();
#ifdef EXEC_BACKEND
/*
* Alloc the win32 shared backend array
*/
if (!IsUnderPostmaster)
ShmemBackendArrayAllocation();
#endif
/*
* Now give loadable modules a chance to set up their shmem allocations
*/
if (shmem_startup_hook)
shmem_startup_hook();
}