mirror of
https://github.com/postgres/postgres.git
synced 2025-04-24 10:47:04 +03:00
Divide the lock manager's shared state into 'partitions', so as to
reduce contention for the former single LockMgrLock. Per my recent proposal. I set it up for 16 partitions, but on a pgbench test this gives only a marginal further improvement over 4 partitions --- we need to test more scenarios to choose the number of partitions.
This commit is contained in:
parent
be8100d64e
commit
ec0baf949e
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.17 2005/11/22 18:17:07 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.18 2005/12/11 21:02:17 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Each global transaction is associated with a global transaction
|
||||
@ -284,7 +284,8 @@ MarkAsPreparing(TransactionId xid, const char *gid,
|
||||
gxact->proc.lwWaitLink = NULL;
|
||||
gxact->proc.waitLock = NULL;
|
||||
gxact->proc.waitProcLock = NULL;
|
||||
SHMQueueInit(&(gxact->proc.procLocks));
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
SHMQueueInit(&(gxact->proc.myProcLocks[i]));
|
||||
/* subxid data must be filled later by GXactLoadSubxactData */
|
||||
gxact->proc.subxids.overflowed = false;
|
||||
gxact->proc.subxids.nxids = 0;
|
||||
|
@ -14,8 +14,8 @@
|
||||
*
|
||||
* The process array now also includes PGPROC structures representing
|
||||
* prepared transactions. The xid and subxids fields of these are valid,
|
||||
* as is the procLocks list. They can be distinguished from regular backend
|
||||
* PGPROCs at need by checking for pid == 0.
|
||||
* as are the myProcLocks lists. They can be distinguished from regular
|
||||
* backend PGPROCs at need by checking for pid == 0.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
@ -23,7 +23,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.8 2005/11/22 18:17:20 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.9 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -1,4 +1,4 @@
|
||||
$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.18 2005/12/09 01:22:04 tgl Exp $
|
||||
$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.19 2005/12/11 21:02:18 tgl Exp $
|
||||
|
||||
|
||||
LOCKING OVERVIEW
|
||||
@ -51,8 +51,11 @@ Lock methods describe the overall locking behavior. Currently there are
|
||||
two lock methods: DEFAULT and USER. (USER locks are non-blocking.)
|
||||
|
||||
Lock modes describe the type of the lock (read/write or shared/exclusive).
|
||||
See src/tools/backend/index.html and src/include/storage/lock.h for more
|
||||
details.
|
||||
In principle, each lock method can have its own set of lock modes with
|
||||
different conflict rules, but currently DEFAULT and USER methods use
|
||||
identical lock mode sets. See src/tools/backend/index.html and
|
||||
src/include/storage/lock.h for more details. (Lock modes are also called
|
||||
lock types in some places in the code and documentation.)
|
||||
|
||||
There are two fundamental lock structures in shared memory: the
|
||||
per-lockable-object LOCK struct, and the per-lock-and-requestor PROCLOCK
|
||||
@ -67,7 +70,7 @@ be made per lockable object/lock mode/backend. Internally to a backend,
|
||||
however, the same lock may be requested and perhaps released multiple times
|
||||
in a transaction, and it can also be held both transactionally and session-
|
||||
wide. The internal request counts are held in LOCALLOCK so that the shared
|
||||
LockMgrLock need not be obtained to alter them.
|
||||
data structures need not be accessed to alter them.
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
@ -103,10 +106,10 @@ procLocks -
|
||||
be waiting for more!).
|
||||
|
||||
waitProcs -
|
||||
This is a shared memory queue of all process structures corresponding to
|
||||
a backend that is waiting (sleeping) until another backend releases this
|
||||
This is a shared memory queue of all PGPROC structures corresponding to
|
||||
backends that are waiting (sleeping) until another backend releases this
|
||||
lock. The process structure holds the information needed to determine
|
||||
if it should be woken up when this lock is released.
|
||||
if it should be woken up when the lock is released.
|
||||
|
||||
nRequested -
|
||||
Keeps a count of how many times this lock has been attempted to be
|
||||
@ -131,12 +134,12 @@ nGranted -
|
||||
granted -
|
||||
Keeps count of how many locks of each type are currently held. Once again
|
||||
only elements 1 through MAX_LOCKMODES-1 are used (0 is not). Also, like
|
||||
requested, summing the values of granted should total to the value
|
||||
requested[], summing the values of granted[] should total to the value
|
||||
of nGranted.
|
||||
|
||||
We should always have 0 <= nGranted <= nRequested, and
|
||||
0 <= granted[i] <= requested[i] for each i. If the request counts go to
|
||||
zero, the lock object is no longer needed and can be freed.
|
||||
0 <= granted[i] <= requested[i] for each i. When all the request counts
|
||||
go to zero, the LOCK object is no longer needed and can be freed.
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
@ -154,15 +157,16 @@ tag -
|
||||
SHMEM offset of PGPROC of backend process that owns this PROCLOCK.
|
||||
|
||||
holdMask -
|
||||
A bitmask for the lock types successfully acquired by this PROCLOCK.
|
||||
A bitmask for the lock modes successfully acquired by this PROCLOCK.
|
||||
This should be a subset of the LOCK object's grantMask, and also a
|
||||
subset of the PGPROC object's heldLocks mask.
|
||||
subset of the PGPROC object's heldLocks mask (if the PGPROC is
|
||||
currently waiting for another lock mode on this lock).
|
||||
|
||||
releaseMask -
|
||||
A bitmask for the lock types due to be released during LockReleaseAll.
|
||||
A bitmask for the lock modes due to be released during LockReleaseAll.
|
||||
This must be a subset of the holdMask. Note that it is modified without
|
||||
taking the LockMgrLock, and therefore it is unsafe for any backend except
|
||||
the one owning the PROCLOCK to examine/change it.
|
||||
taking the partition LWLock, and therefore it is unsafe for any
|
||||
backend except the one owning the PROCLOCK to examine/change it.
|
||||
|
||||
lockLink -
|
||||
List link for shared memory queue of all the PROCLOCK objects for the
|
||||
@ -174,7 +178,60 @@ procLink -
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
The deadlock detection algorithm:
|
||||
|
||||
LOCK MANAGER INTERNAL LOCKING
|
||||
|
||||
Before PostgreSQL 8.2, all of the shared-memory data structures used by
|
||||
the lock manager were protected by a single LWLock, the LockMgrLock;
|
||||
any operation involving these data structures had to exclusively lock
|
||||
LockMgrLock. Not too surprisingly, this became a contention bottleneck.
|
||||
To reduce contention, the lock manager's data structures have been split
|
||||
into multiple "partitions", each protected by an independent LWLock.
|
||||
Most operations only need to lock the single partition they are working in.
|
||||
Here are the details:
|
||||
|
||||
* Each possible lock is assigned to one partition according to a hash of
|
||||
its LOCKTAG value (see LockTagToPartition()). The partition's LWLock is
|
||||
considered to protect all the LOCK objects of that partition as well as
|
||||
their subsidiary PROCLOCKs. The shared-memory hash tables for LOCKs and
|
||||
PROCLOCKs are divided into separate hash tables for each partition, and
|
||||
operations on each hash table are likewise protected by the partition
|
||||
lock.
|
||||
|
||||
* Formerly, each PGPROC had a single list of PROCLOCKs belonging to it.
|
||||
This has now been split into per-partition lists, so that access to a
|
||||
particular PROCLOCK list can be protected by the associated partition's
|
||||
LWLock. (This is not strictly necessary at the moment, because at this
|
||||
writing a PGPROC's PROCLOCK list is only accessed by the owning backend
|
||||
anyway. But it seems forward-looking to maintain a convention for how
|
||||
other backends could access it. In any case LockReleaseAll needs to be
|
||||
able to quickly determine which partition each LOCK belongs to, and
|
||||
for the currently contemplated number of partitions, this way takes less
|
||||
shared memory than explicitly storing a partition number in LOCK structs
|
||||
would require.)
|
||||
|
||||
* The other lock-related fields of a PGPROC are only interesting when
|
||||
the PGPROC is waiting for a lock, so we consider that they are protected
|
||||
by the partition LWLock of the awaited lock.
|
||||
|
||||
For normal lock acquisition and release, it is sufficient to lock the
|
||||
partition containing the desired lock. Deadlock checking needs to touch
|
||||
multiple partitions in general; for simplicity, we just make it lock all
|
||||
the partitions in partition-number order. (To prevent LWLock deadlock,
|
||||
we establish the rule that any backend needing to lock more than one
|
||||
partition at once must lock them in partition-number order.) It's
|
||||
possible that deadlock checking could be done without touching every
|
||||
partition in typical cases, but since in a properly functioning system
|
||||
deadlock checking should not occur often enough to be performance-critical,
|
||||
trying to make this work does not seem a productive use of effort.
|
||||
|
||||
A backend's internal LOCALLOCK hash table is not partitioned. We do store
|
||||
the partition number in LOCALLOCK table entries, but this is a straight
|
||||
speed-for-space tradeoff: we could instead recalculate the partition
|
||||
number from the LOCKTAG when needed.
|
||||
|
||||
|
||||
THE DEADLOCK DETECTION ALGORITHM
|
||||
|
||||
Since we allow user transactions to request locks in any order, deadlock
|
||||
is possible. We use a deadlock detection/breaking algorithm that is
|
||||
|
@ -12,7 +12,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.37 2005/12/09 01:22:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.38 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
* Interface:
|
||||
*
|
||||
@ -53,9 +53,9 @@ typedef struct
|
||||
* Information saved about each edge in a detected deadlock cycle. This
|
||||
* is used to print a diagnostic message upon failure.
|
||||
*
|
||||
* Note: because we want to examine this info after releasing the LockMgrLock,
|
||||
* we can't just store LOCK and PGPROC pointers; we must extract out all the
|
||||
* info we want to be able to print.
|
||||
* Note: because we want to examine this info after releasing the lock
|
||||
* manager's partition locks, we can't just store LOCK and PGPROC pointers;
|
||||
* we must extract out all the info we want to be able to print.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
@ -188,19 +188,11 @@ InitDeadLockChecking(void)
|
||||
* deadlock. If resolution is impossible, return TRUE --- the caller
|
||||
* is then expected to abort the given proc's transaction.
|
||||
*
|
||||
* We can't block on user locks, so no sense testing for deadlock
|
||||
* because there is no blocking, and no timer for the block. So,
|
||||
* only look at regular locks.
|
||||
*
|
||||
* We must have already locked the master lock before being called.
|
||||
* NOTE: although the lockmethod structure appears to allow each lock
|
||||
* table to have a different masterLock, all locks that can block had
|
||||
* better use the same LWLock, else this code will not be adequately
|
||||
* interlocked!
|
||||
* Caller must already have locked all partitions of the lock tables.
|
||||
*
|
||||
* On failure, deadlock details are recorded in deadlockDetails[] for
|
||||
* subsequent printing by DeadLockReport(). That activity is separate
|
||||
* because we don't want to do it while holding the master lock.
|
||||
* because we don't want to do it while holding all those LWLocks.
|
||||
*/
|
||||
bool
|
||||
DeadLockCheck(PGPROC *proc)
|
||||
|
@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lock.c
|
||||
* POSTGRES low-level lock mechanism
|
||||
* POSTGRES primary lock mechanism
|
||||
*
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.161 2005/12/09 01:22:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.162 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* A lock table is a shared memory hash table. When
|
||||
@ -163,10 +163,13 @@ typedef struct TwoPhaseLockRecord
|
||||
|
||||
|
||||
/*
|
||||
* Links to hash tables containing lock state
|
||||
* Pointers to hash tables containing lock state
|
||||
*
|
||||
* The LockMethodLockHash and LockMethodProcLockHash hash tables are in
|
||||
* shared memory; LockMethodLocalHash is local to each backend.
|
||||
*/
|
||||
static HTAB *LockMethodLockHash;
|
||||
static HTAB *LockMethodProcLockHash;
|
||||
static HTAB *LockMethodLockHash[NUM_LOCK_PARTITIONS];
|
||||
static HTAB *LockMethodProcLockHash[NUM_LOCK_PARTITIONS];
|
||||
static HTAB *LockMethodLocalHash;
|
||||
|
||||
|
||||
@ -255,16 +258,25 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
|
||||
|
||||
static void RemoveLocalLock(LOCALLOCK *locallock);
|
||||
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
|
||||
static void WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
|
||||
ResourceOwner owner);
|
||||
static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
|
||||
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
|
||||
PROCLOCK *proclock, LockMethod lockMethodTable);
|
||||
static void CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock,
|
||||
PROCLOCK *proclock, bool wakeupNeeded);
|
||||
static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
|
||||
LockMethod lockMethodTable, int partition,
|
||||
bool wakeupNeeded);
|
||||
|
||||
|
||||
/*
|
||||
* InitLocks -- Initialize the lock module's shared memory.
|
||||
* InitLocks -- Initialize the lock manager's data structures.
|
||||
*
|
||||
* This is called from CreateSharedMemoryAndSemaphores(), which see for
|
||||
* more comments. In the normal postmaster case, the shared hash tables
|
||||
* are created here, as well as a locallock hash table that will remain
|
||||
* unused and empty in the postmaster itself. Backends inherit the pointers
|
||||
* to the shared tables via fork(), and also inherit an image of the locallock
|
||||
* hash table, which they proceed to use. In the EXEC_BACKEND case, each
|
||||
* backend re-executes this code to obtain pointers to the already existing
|
||||
* shared hash tables and to create its locallock hash table.
|
||||
*/
|
||||
void
|
||||
InitLocks(void)
|
||||
@ -274,13 +286,18 @@ InitLocks(void)
|
||||
int hash_flags;
|
||||
long init_table_size,
|
||||
max_table_size;
|
||||
int i;
|
||||
|
||||
/* Compute init/max size to request for lock hashtables */
|
||||
/*
|
||||
* Compute init/max size to request for lock hashtables. Note these
|
||||
* calculations must agree with LockShmemSize!
|
||||
*/
|
||||
max_table_size = NLOCKENTS();
|
||||
max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
|
||||
init_table_size = max_table_size / 2;
|
||||
|
||||
/*
|
||||
* allocate a hash table for LOCK structs. This is used to store
|
||||
* Allocate hash tables for LOCK structs. These are used to store
|
||||
* per-locked-object information.
|
||||
*/
|
||||
MemSet(&info, 0, sizeof(info));
|
||||
@ -289,37 +306,45 @@ InitLocks(void)
|
||||
info.hash = tag_hash;
|
||||
hash_flags = (HASH_ELEM | HASH_FUNCTION);
|
||||
|
||||
sprintf(shmemName, "LOCK hash");
|
||||
LockMethodLockHash = ShmemInitHash(shmemName,
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
{
|
||||
sprintf(shmemName, "LOCK hash %d", i);
|
||||
LockMethodLockHash[i] = ShmemInitHash(shmemName,
|
||||
init_table_size,
|
||||
max_table_size,
|
||||
&info,
|
||||
hash_flags);
|
||||
|
||||
if (!LockMethodLockHash)
|
||||
if (!LockMethodLockHash[i])
|
||||
elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
|
||||
}
|
||||
|
||||
/* Assume an average of 2 holders per lock */
|
||||
max_table_size *= 2;
|
||||
init_table_size *= 2;
|
||||
|
||||
/*
|
||||
* allocate a hash table for PROCLOCK structs. This is used to store
|
||||
* per-lock-holder information.
|
||||
* Allocate hash tables for PROCLOCK structs. These are used to store
|
||||
* per-lock-per-holder information.
|
||||
*/
|
||||
info.keysize = sizeof(PROCLOCKTAG);
|
||||
info.entrysize = sizeof(PROCLOCK);
|
||||
info.hash = tag_hash;
|
||||
hash_flags = (HASH_ELEM | HASH_FUNCTION);
|
||||
|
||||
sprintf(shmemName, "PROCLOCK hash");
|
||||
LockMethodProcLockHash = ShmemInitHash(shmemName,
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
{
|
||||
sprintf(shmemName, "PROCLOCK hash %d", i);
|
||||
LockMethodProcLockHash[i] = ShmemInitHash(shmemName,
|
||||
init_table_size,
|
||||
max_table_size,
|
||||
&info,
|
||||
hash_flags);
|
||||
|
||||
if (!LockMethodProcLockHash)
|
||||
if (!LockMethodProcLockHash[i])
|
||||
elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate a non-shared hash table for LOCALLOCK structs. This is used
|
||||
* Allocate one non-shared hash table for LOCALLOCK structs. This is used
|
||||
* to store lock counts and resource owner information.
|
||||
*
|
||||
* The non-shared table could already exist in this process (this occurs
|
||||
@ -355,6 +380,39 @@ GetLocksMethodTable(const LOCK *lock)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Given a LOCKTAG, determine which partition the lock belongs in.
|
||||
*
|
||||
* Basically what we want to do here is hash the locktag. However, it
|
||||
* seems unwise to use hash_any() because that is the same function that
|
||||
* will be used to distribute the locks within each partition's hash table;
|
||||
* if we use it, we run a big risk of having uneven distribution of hash
|
||||
* codes within each hash table. Instead, we use a simple linear XOR of the
|
||||
* bits of the locktag.
|
||||
*/
|
||||
int
|
||||
LockTagToPartition(const LOCKTAG *locktag)
|
||||
{
|
||||
const uint8 *ptr = (const uint8 *) locktag;
|
||||
int result = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sizeof(LOCKTAG); i++)
|
||||
result ^= *ptr++;
|
||||
#if NUM_LOCK_PARTITIONS == 16
|
||||
result ^= result >> 4;
|
||||
result &= 0x0F;
|
||||
#elif NUM_LOCK_PARTITIONS == 4
|
||||
result ^= result >> 4;
|
||||
result ^= result >> 2;
|
||||
result &= 0x03;
|
||||
#else
|
||||
#error unsupported NUM_LOCK_PARTITIONS
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LockAcquire -- Check for lock conflicts, sleep if conflict found,
|
||||
* set lock if/when no conflicts.
|
||||
@ -397,7 +455,8 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
PROCLOCKTAG proclocktag;
|
||||
bool found;
|
||||
ResourceOwner owner;
|
||||
LWLockId masterLock;
|
||||
int partition;
|
||||
LWLockId partitionLock;
|
||||
int status;
|
||||
|
||||
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
|
||||
@ -438,6 +497,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
locallock->lock = NULL;
|
||||
locallock->proclock = NULL;
|
||||
locallock->isTempObject = isTempObject;
|
||||
locallock->partition = LockTagToPartition(&(localtag.lock));
|
||||
locallock->nLocks = 0;
|
||||
locallock->numLockOwners = 0;
|
||||
locallock->maxLockOwners = 8;
|
||||
@ -474,9 +534,10 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
/*
|
||||
* Otherwise we've got to mess with the shared lock table.
|
||||
*/
|
||||
masterLock = LockMgrLock;
|
||||
partition = locallock->partition;
|
||||
partitionLock = FirstLockMgrLock + partition;
|
||||
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Find or create a lock with this tag.
|
||||
@ -486,12 +547,12 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
* pointer is valid, since a lock object with no locks can go away
|
||||
* anytime.
|
||||
*/
|
||||
lock = (LOCK *) hash_search(LockMethodLockHash,
|
||||
lock = (LOCK *) hash_search(LockMethodLockHash[partition],
|
||||
(void *) locktag,
|
||||
HASH_ENTER_NULL, &found);
|
||||
if (!lock)
|
||||
{
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
@ -532,7 +593,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
/*
|
||||
* Find or create a proclock entry with this tag
|
||||
*/
|
||||
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
|
||||
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &proclocktag,
|
||||
HASH_ENTER_NULL, &found);
|
||||
if (!proclock)
|
||||
@ -547,12 +608,12 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
* anyone to release the lock object later.
|
||||
*/
|
||||
Assert(SHMQueueEmpty(&(lock->procLocks)));
|
||||
if (!hash_search(LockMethodLockHash,
|
||||
if (!hash_search(LockMethodLockHash[partition],
|
||||
(void *) &(lock->tag),
|
||||
HASH_REMOVE, NULL))
|
||||
elog(PANIC, "lock table corrupted");
|
||||
}
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
@ -569,7 +630,8 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
proclock->releaseMask = 0;
|
||||
/* Add proclock to appropriate lists */
|
||||
SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
|
||||
SHMQueueInsertBefore(&MyProc->procLocks, &proclock->procLink);
|
||||
SHMQueueInsertBefore(&(MyProc->myProcLocks[partition]),
|
||||
&proclock->procLink);
|
||||
PROCLOCK_PRINT("LockAcquire: new", proclock);
|
||||
}
|
||||
else
|
||||
@ -666,7 +728,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
{
|
||||
SHMQueueDelete(&proclock->lockLink);
|
||||
SHMQueueDelete(&proclock->procLink);
|
||||
if (!hash_search(LockMethodProcLockHash,
|
||||
if (!hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &(proclock->tag),
|
||||
HASH_REMOVE, NULL))
|
||||
elog(PANIC, "proclock table corrupted");
|
||||
@ -678,7 +740,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
|
||||
Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
|
||||
Assert(lock->nGranted <= lock->nRequested);
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
if (locallock->nLocks == 0)
|
||||
RemoveLocalLock(locallock);
|
||||
return LOCKACQUIRE_NOT_AVAIL;
|
||||
@ -692,7 +754,7 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
/*
|
||||
* Sleep till someone wakes me up.
|
||||
*/
|
||||
WaitOnLock(lockmethodid, locallock, owner);
|
||||
WaitOnLock(locallock, owner);
|
||||
|
||||
/*
|
||||
* NOTE: do not do any material change of state between here and
|
||||
@ -709,14 +771,14 @@ LockAcquire(const LOCKTAG *locktag,
|
||||
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
|
||||
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
|
||||
/* Should we retry ? */
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
elog(ERROR, "LockAcquire failed");
|
||||
}
|
||||
PROCLOCK_PRINT("LockAcquire: granted", proclock);
|
||||
LOCK_PRINT("LockAcquire: granted", lock, lockmode);
|
||||
}
|
||||
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
|
||||
return LOCKACQUIRE_OK;
|
||||
}
|
||||
@ -894,11 +956,12 @@ UnGrantLock(LOCK *lock, LOCKMODE lockmode,
|
||||
* should be called after UnGrantLock, and wakeupNeeded is the result from
|
||||
* UnGrantLock.)
|
||||
*
|
||||
* The locktable's masterLock must be held at entry, and will be
|
||||
* The lock table's partition lock must be held at entry, and will be
|
||||
* held at exit.
|
||||
*/
|
||||
static void
|
||||
CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
|
||||
CleanUpLock(LOCK *lock, PROCLOCK *proclock,
|
||||
LockMethod lockMethodTable, int partition,
|
||||
bool wakeupNeeded)
|
||||
{
|
||||
/*
|
||||
@ -910,7 +973,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
|
||||
PROCLOCK_PRINT("CleanUpLock: deleting", proclock);
|
||||
SHMQueueDelete(&proclock->lockLink);
|
||||
SHMQueueDelete(&proclock->procLink);
|
||||
if (!hash_search(LockMethodProcLockHash,
|
||||
if (!hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &(proclock->tag),
|
||||
HASH_REMOVE, NULL))
|
||||
elog(PANIC, "proclock table corrupted");
|
||||
@ -924,7 +987,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
|
||||
*/
|
||||
LOCK_PRINT("CleanUpLock: deleting", lock, 0);
|
||||
Assert(SHMQueueEmpty(&(lock->procLocks)));
|
||||
if (!hash_search(LockMethodLockHash,
|
||||
if (!hash_search(LockMethodLockHash[partition],
|
||||
(void *) &(lock->tag),
|
||||
HASH_REMOVE, NULL))
|
||||
elog(PANIC, "lock table corrupted");
|
||||
@ -932,7 +995,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
|
||||
else if (wakeupNeeded)
|
||||
{
|
||||
/* There are waiters on this lock, so wake them up. */
|
||||
ProcLockWakeup(LockMethods[lockmethodid], lock);
|
||||
ProcLockWakeup(lockMethodTable, lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -988,12 +1051,12 @@ GrantAwaitedLock(void)
|
||||
* Caller must have set MyProc->heldLocks to reflect locks already held
|
||||
* on the lockable object by this process.
|
||||
*
|
||||
* The locktable's masterLock must be held at entry.
|
||||
* The appropriate partition lock must be held at entry.
|
||||
*/
|
||||
static void
|
||||
WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
|
||||
ResourceOwner owner)
|
||||
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
|
||||
{
|
||||
LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
|
||||
LockMethod lockMethodTable = LockMethods[lockmethodid];
|
||||
const char *old_status;
|
||||
char *new_status;
|
||||
@ -1025,10 +1088,7 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
|
||||
* will also happen in the cancel/die case.
|
||||
*/
|
||||
|
||||
if (ProcSleep(lockMethodTable,
|
||||
locallock->tag.mode,
|
||||
locallock->lock,
|
||||
locallock->proclock) != STATUS_OK)
|
||||
if (ProcSleep(locallock, lockMethodTable) != STATUS_OK)
|
||||
{
|
||||
/*
|
||||
* We failed as a result of a deadlock, see CheckDeadLock(). Quit now.
|
||||
@ -1036,10 +1096,10 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
|
||||
awaitedLock = NULL;
|
||||
LOCK_PRINT("WaitOnLock: aborting on lock",
|
||||
locallock->lock, locallock->tag.mode);
|
||||
LWLockRelease(LockMgrLock);
|
||||
LWLockRelease(FirstLockMgrLock + locallock->partition);
|
||||
|
||||
/*
|
||||
* Now that we aren't holding the LockMgrLock, we can give an error
|
||||
* Now that we aren't holding the partition lock, we can give an error
|
||||
* report including details about the detected deadlock.
|
||||
*/
|
||||
DeadLockReport();
|
||||
@ -1059,12 +1119,12 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
|
||||
* Remove a proc from the wait-queue it is on
|
||||
* (caller must know it is on one).
|
||||
*
|
||||
* Locktable lock must be held by caller.
|
||||
* Appropriate partition lock must be held by caller.
|
||||
*
|
||||
* NB: this does not clean up any locallock object that may exist for the lock.
|
||||
*/
|
||||
void
|
||||
RemoveFromWaitQueue(PGPROC *proc)
|
||||
RemoveFromWaitQueue(PGPROC *proc, int partition)
|
||||
{
|
||||
LOCK *waitLock = proc->waitLock;
|
||||
PROCLOCK *proclock = proc->waitProcLock;
|
||||
@ -1102,7 +1162,9 @@ RemoveFromWaitQueue(PGPROC *proc)
|
||||
* LockRelease expects there to be no remaining proclocks.) Then see if
|
||||
* any other waiters for the lock can be woken up now.
|
||||
*/
|
||||
CleanUpLock(lockmethodid, waitLock, proclock, true);
|
||||
CleanUpLock(waitLock, proclock,
|
||||
LockMethods[lockmethodid], partition,
|
||||
true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1125,7 +1187,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
|
||||
LOCALLOCK *locallock;
|
||||
LOCK *lock;
|
||||
PROCLOCK *proclock;
|
||||
LWLockId masterLock;
|
||||
int partition;
|
||||
LWLockId partitionLock;
|
||||
bool wakeupNeeded;
|
||||
|
||||
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
|
||||
@ -1212,9 +1275,10 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
|
||||
/*
|
||||
* Otherwise we've got to mess with the shared lock table.
|
||||
*/
|
||||
masterLock = LockMgrLock;
|
||||
partition = locallock->partition;
|
||||
partitionLock = FirstLockMgrLock + partition;
|
||||
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* We don't need to re-find the lock or proclock, since we kept their
|
||||
@ -1233,7 +1297,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
|
||||
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
|
||||
{
|
||||
PROCLOCK_PRINT("LockRelease: WRONGTYPE", proclock);
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
elog(WARNING, "you don't own a lock of type %s",
|
||||
lockMethodTable->lockModeNames[lockmode]);
|
||||
RemoveLocalLock(locallock);
|
||||
@ -1245,9 +1309,11 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
|
||||
*/
|
||||
wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
|
||||
|
||||
CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
|
||||
CleanUpLock(lock, proclock,
|
||||
lockMethodTable, partition,
|
||||
wakeupNeeded);
|
||||
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
|
||||
RemoveLocalLock(locallock);
|
||||
return TRUE;
|
||||
@ -1265,14 +1331,13 @@ void
|
||||
LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
|
||||
{
|
||||
HASH_SEQ_STATUS status;
|
||||
SHM_QUEUE *procLocks = &(MyProc->procLocks);
|
||||
LWLockId masterLock;
|
||||
LockMethod lockMethodTable;
|
||||
int i,
|
||||
numLockModes;
|
||||
LOCALLOCK *locallock;
|
||||
PROCLOCK *proclock;
|
||||
LOCK *lock;
|
||||
PROCLOCK *proclock;
|
||||
int partition;
|
||||
|
||||
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
|
||||
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
|
||||
@ -1284,7 +1349,6 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
|
||||
#endif
|
||||
|
||||
numLockModes = lockMethodTable->numLockModes;
|
||||
masterLock = LockMgrLock;
|
||||
|
||||
/*
|
||||
* First we run through the locallock table and get rid of unwanted
|
||||
@ -1351,18 +1415,30 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
|
||||
RemoveLocalLock(locallock);
|
||||
}
|
||||
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
/*
|
||||
* Now, scan each lock partition separately.
|
||||
*/
|
||||
for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
|
||||
{
|
||||
LWLockId partitionLock = FirstLockMgrLock + partition;
|
||||
SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]);
|
||||
|
||||
proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
|
||||
offsetof(PROCLOCK, procLink));
|
||||
|
||||
if (!proclock)
|
||||
continue; /* needn't examine this partition */
|
||||
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
while (proclock)
|
||||
{
|
||||
bool wakeupNeeded = false;
|
||||
PROCLOCK *nextplock;
|
||||
|
||||
/* Get link first, since we may unlink/delete this proclock */
|
||||
nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
|
||||
nextplock = (PROCLOCK *)
|
||||
SHMQueueNext(procLocks, &proclock->procLink,
|
||||
offsetof(PROCLOCK, procLink));
|
||||
|
||||
Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
|
||||
@ -1412,13 +1488,16 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
|
||||
proclock->releaseMask = 0;
|
||||
|
||||
/* CleanUpLock will wake up waiters if needed. */
|
||||
CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
|
||||
CleanUpLock(lock, proclock,
|
||||
lockMethodTable, partition,
|
||||
wakeupNeeded);
|
||||
|
||||
next_item:
|
||||
next_item:
|
||||
proclock = nextplock;
|
||||
}
|
||||
} /* loop over PROCLOCKs within this partition */
|
||||
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
} /* loop over partitions */
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
if (*(lockMethodTable->trace_flag))
|
||||
@ -1627,19 +1706,16 @@ PostPrepare_Locks(TransactionId xid)
|
||||
{
|
||||
PGPROC *newproc = TwoPhaseGetDummyProc(xid);
|
||||
HASH_SEQ_STATUS status;
|
||||
SHM_QUEUE *procLocks = &(MyProc->procLocks);
|
||||
LWLockId masterLock;
|
||||
LOCALLOCK *locallock;
|
||||
LOCK *lock;
|
||||
PROCLOCK *proclock;
|
||||
PROCLOCKTAG proclocktag;
|
||||
bool found;
|
||||
LOCK *lock;
|
||||
int partition;
|
||||
|
||||
/* This is a critical section: any error means big trouble */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
masterLock = LockMgrLock;
|
||||
|
||||
/*
|
||||
* First we run through the locallock table and get rid of unwanted
|
||||
* entries, then we scan the process's proclocks and transfer them to the
|
||||
@ -1678,11 +1754,22 @@ PostPrepare_Locks(TransactionId xid)
|
||||
RemoveLocalLock(locallock);
|
||||
}
|
||||
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
/*
|
||||
* Now, scan each lock partition separately.
|
||||
*/
|
||||
for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
|
||||
{
|
||||
LWLockId partitionLock = FirstLockMgrLock + partition;
|
||||
SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]);
|
||||
|
||||
proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
|
||||
offsetof(PROCLOCK, procLink));
|
||||
|
||||
if (!proclock)
|
||||
continue; /* needn't examine this partition */
|
||||
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
while (proclock)
|
||||
{
|
||||
PROCLOCK *nextplock;
|
||||
@ -1690,7 +1777,8 @@ PostPrepare_Locks(TransactionId xid)
|
||||
PROCLOCK *newproclock;
|
||||
|
||||
/* Get link first, since we may unlink/delete this proclock */
|
||||
nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
|
||||
nextplock = (PROCLOCK *)
|
||||
SHMQueueNext(procLocks, &proclock->procLink,
|
||||
offsetof(PROCLOCK, procLink));
|
||||
|
||||
Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
|
||||
@ -1709,7 +1797,8 @@ PostPrepare_Locks(TransactionId xid)
|
||||
Assert((proclock->holdMask & ~lock->grantMask) == 0);
|
||||
|
||||
/*
|
||||
* Since there were no session locks, we should be releasing all locks
|
||||
* Since there were no session locks, we should be releasing all
|
||||
* locks
|
||||
*/
|
||||
if (proclock->releaseMask != proclock->holdMask)
|
||||
elog(PANIC, "we seem to have dropped a bit somewhere");
|
||||
@ -1717,17 +1806,18 @@ PostPrepare_Locks(TransactionId xid)
|
||||
holdMask = proclock->holdMask;
|
||||
|
||||
/*
|
||||
* We cannot simply modify proclock->tag.proc to reassign ownership of
|
||||
* the lock, because that's part of the hash key and the proclock
|
||||
* would then be in the wrong hash chain. So, unlink and delete the
|
||||
* old proclock; create a new one with the right contents; and link it
|
||||
* into place. We do it in this order to be certain we won't run out
|
||||
* of shared memory (the way dynahash.c works, the deleted object is
|
||||
* certain to be available for reallocation).
|
||||
* We cannot simply modify proclock->tag.proc to reassign
|
||||
* ownership of the lock, because that's part of the hash key and
|
||||
* the proclock would then be in the wrong hash chain. So, unlink
|
||||
* and delete the old proclock; create a new one with the right
|
||||
* contents; and link it into place. We do it in this order to be
|
||||
* certain we won't run out of shared memory (the way dynahash.c
|
||||
* works, the deleted object is certain to be available for
|
||||
* reallocation).
|
||||
*/
|
||||
SHMQueueDelete(&proclock->lockLink);
|
||||
SHMQueueDelete(&proclock->procLink);
|
||||
if (!hash_search(LockMethodProcLockHash,
|
||||
if (!hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &(proclock->tag),
|
||||
HASH_REMOVE, NULL))
|
||||
elog(PANIC, "proclock table corrupted");
|
||||
@ -1739,7 +1829,7 @@ PostPrepare_Locks(TransactionId xid)
|
||||
proclocktag.lock = MAKE_OFFSET(lock);
|
||||
proclocktag.proc = MAKE_OFFSET(newproc);
|
||||
|
||||
newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
|
||||
newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &proclocktag,
|
||||
HASH_ENTER_NULL, &found);
|
||||
if (!newproclock)
|
||||
@ -1757,7 +1847,8 @@ PostPrepare_Locks(TransactionId xid)
|
||||
newproclock->releaseMask = 0;
|
||||
/* Add new proclock to appropriate lists */
|
||||
SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
|
||||
SHMQueueInsertBefore(&newproc->procLocks, &newproclock->procLink);
|
||||
SHMQueueInsertBefore(&(newproc->myProcLocks[partition]),
|
||||
&newproclock->procLink);
|
||||
PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
|
||||
}
|
||||
else
|
||||
@ -1772,11 +1863,12 @@ PostPrepare_Locks(TransactionId xid)
|
||||
Assert((newproclock->holdMask & holdMask) == 0);
|
||||
newproclock->holdMask |= holdMask;
|
||||
|
||||
next_item:
|
||||
next_item:
|
||||
proclock = nextplock;
|
||||
}
|
||||
} /* loop over PROCLOCKs within this partition */
|
||||
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
} /* loop over partitions */
|
||||
|
||||
END_CRIT_SECTION();
|
||||
}
|
||||
@ -1789,20 +1881,23 @@ Size
|
||||
LockShmemSize(void)
|
||||
{
|
||||
Size size = 0;
|
||||
long max_table_size = NLOCKENTS();
|
||||
Size tabsize;
|
||||
long max_table_size;
|
||||
|
||||
/* lockHash table */
|
||||
size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK)));
|
||||
/* lock hash tables */
|
||||
max_table_size = NLOCKENTS();
|
||||
max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
|
||||
tabsize = hash_estimate_size(max_table_size, sizeof(LOCK));
|
||||
size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
|
||||
|
||||
/* proclockHash table */
|
||||
size = add_size(size, hash_estimate_size(max_table_size, sizeof(PROCLOCK)));
|
||||
/* proclock hash tables */
|
||||
max_table_size *= 2;
|
||||
tabsize = hash_estimate_size(max_table_size, sizeof(PROCLOCK));
|
||||
size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
|
||||
|
||||
/*
|
||||
* Note we count only one pair of hash tables, since the userlocks table
|
||||
* actually overlays the main one.
|
||||
*
|
||||
* Since the lockHash entry count above is only an estimate, add 10%
|
||||
* safety margin.
|
||||
* Since there is likely to be some space wastage due to uneven use
|
||||
* of the partitions, add 10% safety margin.
|
||||
*/
|
||||
size = add_size(size, size / 10);
|
||||
|
||||
@ -1818,9 +1913,9 @@ LockShmemSize(void)
|
||||
* copies of the same PGPROC and/or LOCK objects are likely to appear.
|
||||
* It is the caller's responsibility to match up duplicates if wanted.
|
||||
*
|
||||
* The design goal is to hold the LockMgrLock for as short a time as possible;
|
||||
* The design goal is to hold the LWLocks for as short a time as possible;
|
||||
* thus, this function simply makes a copy of the necessary data and releases
|
||||
* the lock, allowing the caller to contemplate and format the data for as
|
||||
* the locks, allowing the caller to contemplate and format the data for as
|
||||
* long as it pleases.
|
||||
*/
|
||||
LockData *
|
||||
@ -1830,40 +1925,67 @@ GetLockStatusData(void)
|
||||
HTAB *proclockTable;
|
||||
PROCLOCK *proclock;
|
||||
HASH_SEQ_STATUS seqstat;
|
||||
int els;
|
||||
int el;
|
||||
int i;
|
||||
|
||||
data = (LockData *) palloc(sizeof(LockData));
|
||||
|
||||
LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
|
||||
/*
|
||||
* Acquire lock on the entire shared lock data structures. We can't
|
||||
* operate one partition at a time if we want to deliver a self-consistent
|
||||
* view of the state.
|
||||
*
|
||||
* Since this is a read-only operation, we take shared instead of exclusive
|
||||
* lock. There's not a whole lot of point to this, because all the normal
|
||||
* operations require exclusive lock, but it doesn't hurt anything either.
|
||||
* It will at least allow two backends to do GetLockStatusData in parallel.
|
||||
*
|
||||
* Must grab LWLocks in partition-number order to avoid LWLock deadlock.
|
||||
*
|
||||
* Use same loop to count up the total number of PROCLOCK objects.
|
||||
*/
|
||||
els = 0;
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
{
|
||||
LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
|
||||
proclockTable = LockMethodProcLockHash[i];
|
||||
els += proclockTable->hctl->nentries;
|
||||
}
|
||||
|
||||
proclockTable = LockMethodProcLockHash;
|
||||
data->nelements = els;
|
||||
data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * els);
|
||||
data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * els);
|
||||
data->procs = (PGPROC *) palloc(sizeof(PGPROC) * els);
|
||||
data->locks = (LOCK *) palloc(sizeof(LOCK) * els);
|
||||
|
||||
data->nelements = i = proclockTable->hctl->nentries;
|
||||
|
||||
data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * i);
|
||||
data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * i);
|
||||
data->procs = (PGPROC *) palloc(sizeof(PGPROC) * i);
|
||||
data->locks = (LOCK *) palloc(sizeof(LOCK) * i);
|
||||
el = 0;
|
||||
|
||||
/* Now scan the tables to copy the data */
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
{
|
||||
proclockTable = LockMethodProcLockHash[i];
|
||||
hash_seq_init(&seqstat, proclockTable);
|
||||
|
||||
i = 0;
|
||||
while ((proclock = hash_seq_search(&seqstat)))
|
||||
{
|
||||
PGPROC *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
|
||||
LOCK *lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
|
||||
|
||||
data->proclockaddrs[i] = MAKE_OFFSET(proclock);
|
||||
memcpy(&(data->proclocks[i]), proclock, sizeof(PROCLOCK));
|
||||
memcpy(&(data->procs[i]), proc, sizeof(PGPROC));
|
||||
memcpy(&(data->locks[i]), lock, sizeof(LOCK));
|
||||
data->proclockaddrs[el] = MAKE_OFFSET(proclock);
|
||||
memcpy(&(data->proclocks[el]), proclock, sizeof(PROCLOCK));
|
||||
memcpy(&(data->procs[el]), proc, sizeof(PGPROC));
|
||||
memcpy(&(data->locks[el]), lock, sizeof(LOCK));
|
||||
|
||||
i++;
|
||||
el++;
|
||||
}
|
||||
}
|
||||
|
||||
LWLockRelease(LockMgrLock);
|
||||
/* And release locks */
|
||||
for (i = NUM_LOCK_PARTITIONS; --i >= 0; )
|
||||
LWLockRelease(FirstLockMgrLock + i);
|
||||
|
||||
Assert(i == data->nelements);
|
||||
Assert(el == data->nelements);
|
||||
|
||||
return data;
|
||||
}
|
||||
@ -1879,7 +2001,7 @@ GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode)
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
/*
|
||||
* Dump all locks in the given proc's procLocks list.
|
||||
* Dump all locks in the given proc's myProcLocks lists.
|
||||
*
|
||||
* Caller is responsible for having acquired appropriate LWLocks.
|
||||
*/
|
||||
@ -1889,15 +2011,18 @@ DumpLocks(PGPROC *proc)
|
||||
SHM_QUEUE *procLocks;
|
||||
PROCLOCK *proclock;
|
||||
LOCK *lock;
|
||||
int i;
|
||||
|
||||
if (proc == NULL)
|
||||
return;
|
||||
|
||||
procLocks = &proc->procLocks;
|
||||
|
||||
if (proc->waitLock)
|
||||
LOCK_PRINT("DumpLocks: waiting on", proc->waitLock, 0);
|
||||
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
{
|
||||
procLocks = &(proc->myProcLocks[i]);
|
||||
|
||||
proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
|
||||
offsetof(PROCLOCK, procLink));
|
||||
|
||||
@ -1910,9 +2035,11 @@ DumpLocks(PGPROC *proc)
|
||||
PROCLOCK_PRINT("DumpLocks", proclock);
|
||||
LOCK_PRINT("DumpLocks", lock, 0);
|
||||
|
||||
proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
|
||||
proclock = (PROCLOCK *)
|
||||
SHMQueueNext(procLocks, &proclock->procLink,
|
||||
offsetof(PROCLOCK, procLink));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1928,14 +2055,18 @@ DumpAllLocks(void)
|
||||
LOCK *lock;
|
||||
HTAB *proclockTable;
|
||||
HASH_SEQ_STATUS status;
|
||||
int i;
|
||||
|
||||
proc = MyProc;
|
||||
proclockTable = LockMethodProcLockHash;
|
||||
|
||||
if (proc && proc->waitLock)
|
||||
LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
|
||||
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
{
|
||||
proclockTable = LockMethodProcLockHash[i];
|
||||
hash_seq_init(&status, proclockTable);
|
||||
|
||||
while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
PROCLOCK_PRINT("DumpAllLocks", proclock);
|
||||
@ -1948,6 +2079,7 @@ DumpAllLocks(void)
|
||||
else
|
||||
elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* LOCK_DEBUG */
|
||||
|
||||
@ -1975,7 +2107,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
PROCLOCK *proclock;
|
||||
PROCLOCKTAG proclocktag;
|
||||
bool found;
|
||||
LWLockId masterLock;
|
||||
int partition;
|
||||
LWLockId partitionLock;
|
||||
LockMethod lockMethodTable;
|
||||
|
||||
Assert(len == sizeof(TwoPhaseLockRecord));
|
||||
@ -1987,19 +2120,20 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
|
||||
lockMethodTable = LockMethods[lockmethodid];
|
||||
|
||||
masterLock = LockMgrLock;
|
||||
partition = LockTagToPartition(locktag);
|
||||
partitionLock = FirstLockMgrLock + partition;
|
||||
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Find or create a lock with this tag.
|
||||
*/
|
||||
lock = (LOCK *) hash_search(LockMethodLockHash,
|
||||
lock = (LOCK *) hash_search(LockMethodLockHash[partition],
|
||||
(void *) locktag,
|
||||
HASH_ENTER_NULL, &found);
|
||||
if (!lock)
|
||||
{
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
@ -2039,7 +2173,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
/*
|
||||
* Find or create a proclock entry with this tag
|
||||
*/
|
||||
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
|
||||
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &proclocktag,
|
||||
HASH_ENTER_NULL, &found);
|
||||
if (!proclock)
|
||||
@ -2054,12 +2188,12 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
* anyone to release the lock object later.
|
||||
*/
|
||||
Assert(SHMQueueEmpty(&(lock->procLocks)));
|
||||
if (!hash_search(LockMethodLockHash,
|
||||
if (!hash_search(LockMethodLockHash[partition],
|
||||
(void *) &(lock->tag),
|
||||
HASH_REMOVE, NULL))
|
||||
elog(PANIC, "lock table corrupted");
|
||||
}
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of shared memory"),
|
||||
@ -2075,7 +2209,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
proclock->releaseMask = 0;
|
||||
/* Add proclock to appropriate lists */
|
||||
SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
|
||||
SHMQueueInsertBefore(&proc->procLocks, &proclock->procLink);
|
||||
SHMQueueInsertBefore(&(proc->myProcLocks[partition]),
|
||||
&proclock->procLink);
|
||||
PROCLOCK_PRINT("lock_twophase_recover: new", proclock);
|
||||
}
|
||||
else
|
||||
@ -2106,7 +2241,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
|
||||
*/
|
||||
GrantLock(lock, proclock, lockmode);
|
||||
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2123,10 +2258,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
|
||||
LOCKTAG *locktag;
|
||||
LOCKMODE lockmode;
|
||||
LOCKMETHODID lockmethodid;
|
||||
PROCLOCKTAG proclocktag;
|
||||
LOCK *lock;
|
||||
PROCLOCK *proclock;
|
||||
LWLockId masterLock;
|
||||
PROCLOCKTAG proclocktag;
|
||||
int partition;
|
||||
LWLockId partitionLock;
|
||||
LockMethod lockMethodTable;
|
||||
bool wakeupNeeded;
|
||||
|
||||
@ -2139,14 +2275,15 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
|
||||
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
|
||||
lockMethodTable = LockMethods[lockmethodid];
|
||||
|
||||
masterLock = LockMgrLock;
|
||||
partition = LockTagToPartition(locktag);
|
||||
partitionLock = FirstLockMgrLock + partition;
|
||||
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Re-find the lock object (it had better be there).
|
||||
*/
|
||||
lock = (LOCK *) hash_search(LockMethodLockHash,
|
||||
lock = (LOCK *) hash_search(LockMethodLockHash[partition],
|
||||
(void *) locktag,
|
||||
HASH_FIND, NULL);
|
||||
if (!lock)
|
||||
@ -2158,7 +2295,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
|
||||
MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
|
||||
proclocktag.lock = MAKE_OFFSET(lock);
|
||||
proclocktag.proc = MAKE_OFFSET(proc);
|
||||
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
|
||||
proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
|
||||
(void *) &proclocktag,
|
||||
HASH_FIND, NULL);
|
||||
if (!proclock)
|
||||
@ -2171,7 +2308,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
|
||||
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
|
||||
{
|
||||
PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock);
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
elog(WARNING, "you don't own a lock of type %s",
|
||||
lockMethodTable->lockModeNames[lockmode]);
|
||||
return;
|
||||
@ -2182,9 +2319,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
|
||||
*/
|
||||
wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
|
||||
|
||||
CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
|
||||
CleanUpLock(lock, proclock,
|
||||
lockMethodTable, partition,
|
||||
wakeupNeeded);
|
||||
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -8,14 +8,14 @@
|
||||
* exclusive and shared lock modes (to support read/write and read-only
|
||||
* access to a shared object). There are few other frammishes. User-level
|
||||
* locking should be done with the full lock manager --- which depends on
|
||||
* an LWLock to protect its shared state.
|
||||
* LWLocks to protect its shared state.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.35 2005/12/06 23:08:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.36 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -125,7 +125,10 @@ NumLWLocks(void)
|
||||
*/
|
||||
|
||||
/* Predefined LWLocks */
|
||||
numLocks = (int) NumFixedLWLocks;
|
||||
numLocks = (int) FirstLockMgrLock;
|
||||
|
||||
/* lock.c gets the ones starting at FirstLockMgrLock */
|
||||
numLocks += NUM_LOCK_PARTITIONS;
|
||||
|
||||
/* bufmgr.c needs two for each shared buffer */
|
||||
numLocks += 2 * NBuffers;
|
||||
@ -204,10 +207,11 @@ CreateLWLocks(void)
|
||||
|
||||
/*
|
||||
* Initialize the dynamic-allocation counter, which is stored just before
|
||||
* the first LWLock.
|
||||
* the first LWLock. The LWLocks used by lock.c are not dynamically
|
||||
* allocated, it just assumes it has them.
|
||||
*/
|
||||
LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
|
||||
LWLockCounter[0] = (int) NumFixedLWLocks;
|
||||
LWLockCounter[0] = (int) FirstLockMgrLock + NUM_LOCK_PARTITIONS;
|
||||
LWLockCounter[1] = numLocks;
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.169 2005/12/09 01:22:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.170 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -18,9 +18,8 @@
|
||||
* ProcQueueAlloc() -- create a shm queue for sleeping processes
|
||||
* ProcQueueInit() -- create a queue without allocing memory
|
||||
*
|
||||
* Locking and waiting for buffers can cause the backend to be
|
||||
* put to sleep. Whoever releases the lock, etc. wakes the
|
||||
* process up again (and gives it an error code so it knows
|
||||
* Waiting for a lock causes the backend to be put to sleep. Whoever releases
|
||||
* the lock wakes the process up again (and gives it an error code so it knows
|
||||
* whether it was awoken on an error condition).
|
||||
*
|
||||
* Interface (b):
|
||||
@ -65,7 +64,8 @@ NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
|
||||
static PROC_HDR *ProcGlobal = NULL;
|
||||
static PGPROC *DummyProcs = NULL;
|
||||
|
||||
static bool waitingForLock = false;
|
||||
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
|
||||
static LOCALLOCK *lockAwaited = NULL;
|
||||
|
||||
/* Mark these volatile because they can be changed by signal handler */
|
||||
static volatile bool statement_timeout_active = false;
|
||||
@ -200,10 +200,10 @@ InitProcGlobal(void)
|
||||
void
|
||||
InitProcess(void)
|
||||
{
|
||||
SHMEM_OFFSET myOffset;
|
||||
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile PROC_HDR *procglobal = ProcGlobal;
|
||||
SHMEM_OFFSET myOffset;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* ProcGlobal should be set by a previous call to InitProcGlobal (if we
|
||||
@ -264,7 +264,8 @@ InitProcess(void)
|
||||
MyProc->lwWaitLink = NULL;
|
||||
MyProc->waitLock = NULL;
|
||||
MyProc->waitProcLock = NULL;
|
||||
SHMQueueInit(&(MyProc->procLocks));
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
SHMQueueInit(&(MyProc->myProcLocks[i]));
|
||||
|
||||
/*
|
||||
* Add our PGPROC to the PGPROC array in shared memory.
|
||||
@ -304,6 +305,7 @@ void
|
||||
InitDummyProcess(int proctype)
|
||||
{
|
||||
PGPROC *dummyproc;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* ProcGlobal should be set by a previous call to InitProcGlobal (we
|
||||
@ -360,7 +362,8 @@ InitDummyProcess(int proctype)
|
||||
MyProc->lwWaitLink = NULL;
|
||||
MyProc->waitLock = NULL;
|
||||
MyProc->waitProcLock = NULL;
|
||||
SHMQueueInit(&(MyProc->procLocks));
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
SHMQueueInit(&(MyProc->myProcLocks[i]));
|
||||
|
||||
/*
|
||||
* Arrange to clean up at process exit.
|
||||
@ -416,21 +419,24 @@ HaveNFreeProcs(int n)
|
||||
bool
|
||||
LockWaitCancel(void)
|
||||
{
|
||||
LWLockId partitionLock;
|
||||
|
||||
/* Nothing to do if we weren't waiting for a lock */
|
||||
if (!waitingForLock)
|
||||
if (lockAwaited == NULL)
|
||||
return false;
|
||||
|
||||
/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
|
||||
disable_sig_alarm(false);
|
||||
|
||||
/* Unlink myself from the wait queue, if on it (might not be anymore!) */
|
||||
LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
|
||||
partitionLock = FirstLockMgrLock + lockAwaited->partition;
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
if (MyProc->links.next != INVALID_OFFSET)
|
||||
{
|
||||
/* We could not have been granted the lock yet */
|
||||
Assert(MyProc->waitStatus == STATUS_ERROR);
|
||||
RemoveFromWaitQueue(MyProc);
|
||||
RemoveFromWaitQueue(MyProc, lockAwaited->partition);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -444,9 +450,9 @@ LockWaitCancel(void)
|
||||
GrantAwaitedLock();
|
||||
}
|
||||
|
||||
waitingForLock = false;
|
||||
lockAwaited = NULL;
|
||||
|
||||
LWLockRelease(LockMgrLock);
|
||||
LWLockRelease(partitionLock);
|
||||
|
||||
/*
|
||||
* Reset the proc wait semaphore to zero. This is necessary in the
|
||||
@ -606,18 +612,18 @@ ProcQueueInit(PROC_QUEUE *queue)
|
||||
|
||||
|
||||
/*
|
||||
* ProcSleep -- put a process to sleep
|
||||
* ProcSleep -- put a process to sleep on the specified lock
|
||||
*
|
||||
* Caller must have set MyProc->heldLocks to reflect locks already held
|
||||
* on the lockable object by this process (under all XIDs).
|
||||
*
|
||||
* Locktable's masterLock must be held at entry, and will be held
|
||||
* The lock table's partition lock must be held at entry, and will be held
|
||||
* at exit.
|
||||
*
|
||||
* Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
|
||||
*
|
||||
* ASSUME: that no one will fiddle with the queue until after
|
||||
* we release the masterLock.
|
||||
* we release the partition lock.
|
||||
*
|
||||
* NOTES: The process queue is now a priority queue for locking.
|
||||
*
|
||||
@ -625,12 +631,13 @@ ProcQueueInit(PROC_QUEUE *queue)
|
||||
* semaphore is normally zero, so when we try to acquire it, we sleep.
|
||||
*/
|
||||
int
|
||||
ProcSleep(LockMethod lockMethodTable,
|
||||
LOCKMODE lockmode,
|
||||
LOCK *lock,
|
||||
PROCLOCK *proclock)
|
||||
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
{
|
||||
LWLockId masterLock = LockMgrLock;
|
||||
LOCKMODE lockmode = locallock->tag.mode;
|
||||
LOCK *lock = locallock->lock;
|
||||
PROCLOCK *proclock = locallock->proclock;
|
||||
int partition = locallock->partition;
|
||||
LWLockId partitionLock = FirstLockMgrLock + partition;
|
||||
PROC_QUEUE *waitQueue = &(lock->waitProcs);
|
||||
LOCKMASK myHeldLocks = MyProc->heldLocks;
|
||||
bool early_deadlock = false;
|
||||
@ -732,22 +739,22 @@ ProcSleep(LockMethod lockMethodTable,
|
||||
*/
|
||||
if (early_deadlock)
|
||||
{
|
||||
RemoveFromWaitQueue(MyProc);
|
||||
RemoveFromWaitQueue(MyProc, partition);
|
||||
return STATUS_ERROR;
|
||||
}
|
||||
|
||||
/* mark that we are waiting for a lock */
|
||||
waitingForLock = true;
|
||||
lockAwaited = locallock;
|
||||
|
||||
/*
|
||||
* Release the locktable's masterLock.
|
||||
* Release the lock table's partition lock.
|
||||
*
|
||||
* NOTE: this may also cause us to exit critical-section state, possibly
|
||||
* allowing a cancel/die interrupt to be accepted. This is OK because we
|
||||
* have recorded the fact that we are waiting for a lock, and so
|
||||
* LockWaitCancel will clean up if cancel/die happens.
|
||||
*/
|
||||
LWLockRelease(masterLock);
|
||||
LWLockRelease(partitionLock);
|
||||
|
||||
/*
|
||||
* Set timer so we can wake up after awhile and check for a deadlock. If a
|
||||
@ -785,16 +792,16 @@ ProcSleep(LockMethod lockMethodTable,
|
||||
elog(FATAL, "could not disable timer for process wakeup");
|
||||
|
||||
/*
|
||||
* Re-acquire the locktable's masterLock. We have to do this to hold off
|
||||
* cancel/die interrupts before we can mess with waitingForLock (else we
|
||||
* might have a missed or duplicated locallock update).
|
||||
* Re-acquire the lock table's partition lock. We have to do this to
|
||||
* hold off cancel/die interrupts before we can mess with lockAwaited
|
||||
* (else we might have a missed or duplicated locallock update).
|
||||
*/
|
||||
LWLockAcquire(masterLock, LW_EXCLUSIVE);
|
||||
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* We no longer want LockWaitCancel to do anything.
|
||||
*/
|
||||
waitingForLock = false;
|
||||
lockAwaited = NULL;
|
||||
|
||||
/*
|
||||
* If we got the lock, be sure to remember it in the locallock table.
|
||||
@ -816,6 +823,8 @@ ProcSleep(LockMethod lockMethodTable,
|
||||
* Also remove the process from the wait queue and set its links invalid.
|
||||
* RETURN: the next process in the wait queue.
|
||||
*
|
||||
* The appropriate lock partition lock must be held by caller.
|
||||
*
|
||||
* XXX: presently, this code is only used for the "success" case, and only
|
||||
* works correctly for that case. To clean up in failure case, would need
|
||||
* to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
|
||||
@ -825,8 +834,6 @@ ProcWakeup(PGPROC *proc, int waitStatus)
|
||||
{
|
||||
PGPROC *retProc;
|
||||
|
||||
/* assume that masterLock has been acquired */
|
||||
|
||||
/* Proc should be sleeping ... */
|
||||
if (proc->links.prev == INVALID_OFFSET ||
|
||||
proc->links.next == INVALID_OFFSET)
|
||||
@ -854,6 +861,8 @@ ProcWakeup(PGPROC *proc, int waitStatus)
|
||||
* ProcLockWakeup -- routine for waking up processes when a lock is
|
||||
* released (or a prior waiter is aborted). Scan all waiters
|
||||
* for lock, waken any that are no longer blocked.
|
||||
*
|
||||
* The appropriate lock partition lock must be held by caller.
|
||||
*/
|
||||
void
|
||||
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
|
||||
@ -908,25 +917,32 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
|
||||
Assert(waitQueue->size >= 0);
|
||||
}
|
||||
|
||||
/* --------------------
|
||||
/*
|
||||
* CheckDeadLock
|
||||
*
|
||||
* We only get to this routine if we got SIGALRM after DeadlockTimeout
|
||||
* while waiting for a lock to be released by some other process. Look
|
||||
* to see if there's a deadlock; if not, just return and continue waiting.
|
||||
* If we have a real deadlock, remove ourselves from the lock's wait queue
|
||||
* and signal an error to ProcSleep.
|
||||
* --------------------
|
||||
*/
|
||||
static void
|
||||
CheckDeadLock(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Acquire locktable lock. Note that the deadlock check interrupt had
|
||||
* better not be enabled anywhere that this process itself holds the
|
||||
* locktable lock, else this will wait forever. Also note that
|
||||
* LWLockAcquire creates a critical section, so that this routine cannot
|
||||
* be interrupted by cancel/die interrupts.
|
||||
* Acquire exclusive lock on the entire shared lock data structures.
|
||||
* Must grab LWLocks in partition-number order to avoid LWLock deadlock.
|
||||
*
|
||||
* Note that the deadlock check interrupt had better not be enabled
|
||||
* anywhere that this process itself holds lock partition locks, else this
|
||||
* will wait forever. Also note that LWLockAcquire creates a critical
|
||||
* section, so that this routine cannot be interrupted by cancel/die
|
||||
* interrupts.
|
||||
*/
|
||||
LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
|
||||
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
|
||||
LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Check to see if we've been awoken by anyone in the interim.
|
||||
@ -937,14 +953,11 @@ CheckDeadLock(void)
|
||||
*
|
||||
* We check by looking to see if we've been unlinked from the wait queue.
|
||||
* This is quicker than checking our semaphore's state, since no kernel
|
||||
* call is needed, and it is safe because we hold the locktable lock.
|
||||
* call is needed, and it is safe because we hold the lock partition lock.
|
||||
*/
|
||||
if (MyProc->links.prev == INVALID_OFFSET ||
|
||||
MyProc->links.next == INVALID_OFFSET)
|
||||
{
|
||||
LWLockRelease(LockMgrLock);
|
||||
return;
|
||||
}
|
||||
goto check_done;
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
if (Debug_deadlocks)
|
||||
@ -954,16 +967,19 @@ CheckDeadLock(void)
|
||||
if (!DeadLockCheck(MyProc))
|
||||
{
|
||||
/* No deadlock, so keep waiting */
|
||||
LWLockRelease(LockMgrLock);
|
||||
return;
|
||||
goto check_done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Oops. We have a deadlock.
|
||||
*
|
||||
* Get this process out of wait state.
|
||||
* Get this process out of wait state. (Note: we could do this more
|
||||
* efficiently by relying on lockAwaited, but use this coding to preserve
|
||||
* the flexibility to kill some other transaction than the one detecting
|
||||
* the deadlock.)
|
||||
*/
|
||||
RemoveFromWaitQueue(MyProc);
|
||||
Assert(MyProc->waitLock != NULL);
|
||||
RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag)));
|
||||
|
||||
/*
|
||||
* Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report an
|
||||
@ -987,7 +1003,15 @@ CheckDeadLock(void)
|
||||
* them anymore. However, RemoveFromWaitQueue took care of waking up any
|
||||
* such processes.
|
||||
*/
|
||||
LWLockRelease(LockMgrLock);
|
||||
|
||||
/*
|
||||
* Release locks acquired at head of routine. Order is not critical,
|
||||
* so do it back-to-front to avoid waking another CheckDeadLock instance
|
||||
* before it can get all the locks.
|
||||
*/
|
||||
check_done:
|
||||
for (i = NUM_LOCK_PARTITIONS; --i >= 0; )
|
||||
LWLockRelease(FirstLockMgrLock + i);
|
||||
}
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.92 2005/12/09 01:22:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.93 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -19,6 +19,13 @@
|
||||
#include "storage/shmem.h"
|
||||
|
||||
|
||||
/*
|
||||
* Number of partitions the shared lock tables are divided into.
|
||||
*
|
||||
* See LockTagToPartition() if you change this.
|
||||
*/
|
||||
#define NUM_LOCK_PARTITIONS 16
|
||||
|
||||
/* originally in procq.h */
|
||||
typedef struct PROC_QUEUE
|
||||
{
|
||||
@ -348,6 +355,7 @@ typedef struct LOCALLOCK
|
||||
LOCK *lock; /* associated LOCK object in shared mem */
|
||||
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
|
||||
bool isTempObject; /* true if lock is on a temporary object */
|
||||
int partition; /* ID of partition containing this lock */
|
||||
int nLocks; /* total number of times lock is held */
|
||||
int numLockOwners; /* # of relevant ResourceOwners */
|
||||
int maxLockOwners; /* allocated size of array */
|
||||
@ -389,6 +397,7 @@ typedef enum
|
||||
*/
|
||||
extern void InitLocks(void);
|
||||
extern LockMethod GetLocksMethodTable(const LOCK *lock);
|
||||
extern int LockTagToPartition(const LOCKTAG *locktag);
|
||||
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
|
||||
bool isTempObject,
|
||||
LOCKMODE lockmode,
|
||||
@ -406,7 +415,7 @@ extern int LockCheckConflicts(LockMethod lockMethodTable,
|
||||
LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
|
||||
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
|
||||
extern void GrantAwaitedLock(void);
|
||||
extern void RemoveFromWaitQueue(PGPROC *proc);
|
||||
extern void RemoveFromWaitQueue(PGPROC *proc, int partition);
|
||||
extern Size LockShmemSize(void);
|
||||
extern bool DeadLockCheck(PGPROC *proc);
|
||||
extern void DeadLockReport(void);
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.23 2005/10/15 02:49:46 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.24 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -16,9 +16,9 @@
|
||||
|
||||
/*
|
||||
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
|
||||
* dynamically assigned (for shared buffers). The LWLock structures live
|
||||
* in shared memory (since they contain shared data) and are identified by
|
||||
* values of this enumerated type. We abuse the notion of an enum somewhat
|
||||
* dynamically assigned (e.g., for shared buffers). The LWLock structures
|
||||
* live in shared memory (since they contain shared data) and are identified
|
||||
* by values of this enumerated type. We abuse the notion of an enum somewhat
|
||||
* by allowing values not listed in the enum declaration to be assigned.
|
||||
* The extra value MaxDynamicLWLock is there to keep the compiler from
|
||||
* deciding that the enum can be represented as char or short ...
|
||||
@ -27,7 +27,6 @@ typedef enum LWLockId
|
||||
{
|
||||
BufMappingLock,
|
||||
BufFreelistLock,
|
||||
LockMgrLock,
|
||||
OidGenLock,
|
||||
XidGenLock,
|
||||
ProcArrayLock,
|
||||
@ -46,8 +45,7 @@ typedef enum LWLockId
|
||||
RelCacheInitLock,
|
||||
BgWriterCommLock,
|
||||
TwoPhaseStateLock,
|
||||
|
||||
NumFixedLWLocks, /* must be last except for MaxDynamicLWLock */
|
||||
FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */
|
||||
|
||||
MaxDynamicLWLock = 1000000000
|
||||
} LWLockId;
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.84 2005/10/15 02:49:46 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.85 2005/12/11 21:02:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -52,7 +52,8 @@ struct XidCache
|
||||
* so that the prepared transactions appear to be still running and are
|
||||
* correctly shown as holding locks. A prepared transaction PGPROC can be
|
||||
* distinguished from a real one at need by the fact that it has pid == 0.
|
||||
* The semaphore and lock-related fields in a prepared-xact PGPROC are unused.
|
||||
* The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
|
||||
* but its myProcLocks[] lists are valid.
|
||||
*/
|
||||
struct PGPROC
|
||||
{
|
||||
@ -86,8 +87,12 @@ struct PGPROC
|
||||
LOCKMASK heldLocks; /* bitmask for lock types already held on this
|
||||
* lock object by this backend */
|
||||
|
||||
SHM_QUEUE procLocks; /* list of PROCLOCK objects for locks held or
|
||||
* awaited by this backend */
|
||||
/*
|
||||
* All PROCLOCK objects for locks held or awaited by this backend are
|
||||
* linked into one of these lists, according to the partition number of
|
||||
* their lock.
|
||||
*/
|
||||
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
|
||||
|
||||
struct XidCache subxids; /* cache for subtransaction XIDs */
|
||||
};
|
||||
@ -99,7 +104,7 @@ extern DLLIMPORT PGPROC *MyProc;
|
||||
|
||||
|
||||
/*
|
||||
* There is one ProcGlobal struct for the whole installation.
|
||||
* There is one ProcGlobal struct for the whole database cluster.
|
||||
*/
|
||||
typedef struct PROC_HDR
|
||||
{
|
||||
@ -134,8 +139,7 @@ extern bool HaveNFreeProcs(int n);
|
||||
extern void ProcReleaseLocks(bool isCommit);
|
||||
|
||||
extern void ProcQueueInit(PROC_QUEUE *queue);
|
||||
extern int ProcSleep(LockMethod lockMethodTable, LOCKMODE lockmode,
|
||||
LOCK *lock, PROCLOCK *proclock);
|
||||
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
|
||||
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
|
||||
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
|
||||
extern bool LockWaitCancel(void);
|
||||
|
Loading…
x
Reference in New Issue
Block a user