diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 7637581a184..ed59dfce893 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -309,6 +309,8 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) InitializeMaxBackends(); + InitializeFastPathLocks(); + CreateSharedMemoryAndSemaphores(); /* diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 96bc1d1cfed..85fd24e8287 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -903,6 +903,11 @@ PostmasterMain(int argc, char *argv[]) */ InitializeMaxBackends(); + /* + * Calculate the size of the PGPROC fast-path lock arrays. + */ + InitializeFastPathLocks(); + /* * Give preloaded libraries a chance to request additional shared memory. */ diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 6caeca3a8e6..10fc18f2529 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -178,6 +178,12 @@ AttachSharedMemoryStructs(void) Assert(MyProc != NULL); Assert(IsUnderPostmaster); + /* + * In EXEC_BACKEND mode, backends don't inherit the number of fast-path + * groups we calculated before setting the shmem up, so recalculate it. + */ + InitializeFastPathLocks(); + CreateOrAttachShmemStructs(); /* diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 83b99a98f08..613b0d49944 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -166,8 +166,13 @@ typedef struct TwoPhaseLockRecord * might be higher than the real number if another backend has transferred * our locks to the primary lock table, but it can never be lower than the * real value, since only we can acquire locks on our own behalf. + * + * XXX Allocate a static array of the maximum size. We could use a pointer + * and then allocate just the right size to save a couple kB, but then we + * would have to initialize that, while for the static array that happens + * automatically. Doesn't seem worth the extra complexity. */ -static int FastPathLocalUseCount = 0; +static int FastPathLocalUseCounts[FP_LOCK_GROUPS_PER_BACKEND_MAX]; /* * Flag to indicate if the relation extension lock is held by this backend. @@ -184,23 +189,68 @@ static int FastPathLocalUseCount = 0; */ static bool IsRelationExtensionLockHeld PG_USED_FOR_ASSERTS_ONLY = false; +/* + * Number of fast-path locks per backend - size of the arrays in PGPROC. + * This is set only once during start, before initializing shared memory, + * and remains constant after that. + * + * We set the limit based on max_locks_per_transaction GUC, because that's + * the best information about expected number of locks per backend we have. + * See InitializeFastPathLocks() for details. + */ +int FastPathLockGroupsPerBackend = 0; + +/* + * Macros to calculate the fast-path group and index for a relation. + * + * The formula is a simple hash function, designed to spread the OIDs a bit, + * so that even contiguous values end up in different groups. In most cases + * there will be gaps anyway, but the multiplication should help a bit. + * + * The selected constant (49157) is a prime not too close to 2^k, and it's + * small enough to not cause overflows (in 64-bit). + */ +#define FAST_PATH_REL_GROUP(rel) \ + (((uint64) (rel) * 49157) % FastPathLockGroupsPerBackend) + +/* + * Given the group/slot indexes, calculate the slot index in the whole array + * of fast-path lock slots. + */ +#define FAST_PATH_SLOT(group, index) \ + (AssertMacro(((group) >= 0) && ((group) < FastPathLockGroupsPerBackend)), \ + AssertMacro(((index) >= 0) && ((index) < FP_LOCK_SLOTS_PER_GROUP)), \ + ((group) * FP_LOCK_SLOTS_PER_GROUP + (index))) + +/* + * Given a slot index (into the whole per-backend array), calculated using + * the FAST_PATH_SLOT macro, split it into group and index (in the group). + */ +#define FAST_PATH_GROUP(index) \ + (AssertMacro(((index) >= 0) && ((index) < FP_LOCK_SLOTS_PER_BACKEND)), \ + ((index) / FP_LOCK_SLOTS_PER_GROUP)) +#define FAST_PATH_INDEX(index) \ + (AssertMacro(((index) >= 0) && ((index) < FP_LOCK_SLOTS_PER_BACKEND)), \ + ((index) % FP_LOCK_SLOTS_PER_GROUP)) + /* Macros for manipulating proc->fpLockBits */ #define FAST_PATH_BITS_PER_SLOT 3 #define FAST_PATH_LOCKNUMBER_OFFSET 1 #define FAST_PATH_MASK ((1 << FAST_PATH_BITS_PER_SLOT) - 1) +#define FAST_PATH_BITS(proc, n) (proc)->fpLockBits[FAST_PATH_GROUP(n)] #define FAST_PATH_GET_BITS(proc, n) \ - (((proc)->fpLockBits >> (FAST_PATH_BITS_PER_SLOT * n)) & FAST_PATH_MASK) + ((FAST_PATH_BITS(proc, n) >> (FAST_PATH_BITS_PER_SLOT * FAST_PATH_INDEX(n))) & FAST_PATH_MASK) #define FAST_PATH_BIT_POSITION(n, l) \ (AssertMacro((l) >= FAST_PATH_LOCKNUMBER_OFFSET), \ AssertMacro((l) < FAST_PATH_BITS_PER_SLOT+FAST_PATH_LOCKNUMBER_OFFSET), \ AssertMacro((n) < FP_LOCK_SLOTS_PER_BACKEND), \ - ((l) - FAST_PATH_LOCKNUMBER_OFFSET + FAST_PATH_BITS_PER_SLOT * (n))) + ((l) - FAST_PATH_LOCKNUMBER_OFFSET + FAST_PATH_BITS_PER_SLOT * (FAST_PATH_INDEX(n)))) #define FAST_PATH_SET_LOCKMODE(proc, n, l) \ - (proc)->fpLockBits |= UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l) + FAST_PATH_BITS(proc, n) |= UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l) #define FAST_PATH_CLEAR_LOCKMODE(proc, n, l) \ - (proc)->fpLockBits &= ~(UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l)) + FAST_PATH_BITS(proc, n) &= ~(UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l)) #define FAST_PATH_CHECK_LOCKMODE(proc, n, l) \ - ((proc)->fpLockBits & (UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l))) + (FAST_PATH_BITS(proc, n) & (UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l))) /* * The fast-path lock mechanism is concerned only with relation locks on @@ -926,7 +976,7 @@ LockAcquireExtended(const LOCKTAG *locktag, * for now we don't worry about that case either. */ if (EligibleForRelationFastPath(locktag, lockmode) && - FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND) + FastPathLocalUseCounts[FAST_PATH_REL_GROUP(locktag->locktag_field2)] < FP_LOCK_SLOTS_PER_GROUP) { uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode); bool acquired; @@ -2065,7 +2115,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) /* Attempt fast release of any lock eligible for the fast path. */ if (EligibleForRelationFastPath(locktag, lockmode) && - FastPathLocalUseCount > 0) + FastPathLocalUseCounts[FAST_PATH_REL_GROUP(locktag->locktag_field2)] > 0) { bool released; @@ -2633,12 +2683,18 @@ LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent) static bool FastPathGrantRelationLock(Oid relid, LOCKMODE lockmode) { - uint32 f; + uint32 i; uint32 unused_slot = FP_LOCK_SLOTS_PER_BACKEND; + /* fast-path group the lock belongs to */ + uint32 group = FAST_PATH_REL_GROUP(relid); + /* Scan for existing entry for this relid, remembering empty slot. */ - for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++) + for (i = 0; i < FP_LOCK_SLOTS_PER_GROUP; i++) { + /* index into the whole per-backend array */ + uint32 f = FAST_PATH_SLOT(group, i); + if (FAST_PATH_GET_BITS(MyProc, f) == 0) unused_slot = f; else if (MyProc->fpRelId[f] == relid) @@ -2654,7 +2710,7 @@ FastPathGrantRelationLock(Oid relid, LOCKMODE lockmode) { MyProc->fpRelId[unused_slot] = relid; FAST_PATH_SET_LOCKMODE(MyProc, unused_slot, lockmode); - ++FastPathLocalUseCount; + ++FastPathLocalUseCounts[group]; return true; } @@ -2670,12 +2726,18 @@ FastPathGrantRelationLock(Oid relid, LOCKMODE lockmode) static bool FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode) { - uint32 f; + uint32 i; bool result = false; - FastPathLocalUseCount = 0; - for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++) + /* fast-path group the lock belongs to */ + uint32 group = FAST_PATH_REL_GROUP(relid); + + FastPathLocalUseCounts[group] = 0; + for (i = 0; i < FP_LOCK_SLOTS_PER_GROUP; i++) { + /* index into the whole per-backend array */ + uint32 f = FAST_PATH_SLOT(group, i); + if (MyProc->fpRelId[f] == relid && FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode)) { @@ -2685,7 +2747,7 @@ FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode) /* we continue iterating so as to update FastPathLocalUseCount */ } if (FAST_PATH_GET_BITS(MyProc, f) != 0) - ++FastPathLocalUseCount; + ++FastPathLocalUseCounts[group]; } return result; } @@ -2714,7 +2776,8 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag for (i = 0; i < ProcGlobal->allProcCount; i++) { PGPROC *proc = &ProcGlobal->allProcs[i]; - uint32 f; + uint32 j, + group; LWLockAcquire(&proc->fpInfoLock, LW_EXCLUSIVE); @@ -2739,10 +2802,16 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag continue; } - for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++) + /* fast-path group the lock belongs to */ + group = FAST_PATH_REL_GROUP(relid); + + for (j = 0; j < FP_LOCK_SLOTS_PER_GROUP; j++) { uint32 lockmode; + /* index into the whole per-backend array */ + uint32 f = FAST_PATH_SLOT(group, j); + /* Look for an allocated slot matching the given relid. */ if (relid != proc->fpRelId[f] || FAST_PATH_GET_BITS(proc, f) == 0) continue; @@ -2793,14 +2862,21 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock) PROCLOCK *proclock = NULL; LWLock *partitionLock = LockHashPartitionLock(locallock->hashcode); Oid relid = locktag->locktag_field2; - uint32 f; + uint32 i, + group; + + /* fast-path group the lock belongs to */ + group = FAST_PATH_REL_GROUP(relid); LWLockAcquire(&MyProc->fpInfoLock, LW_EXCLUSIVE); - for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++) + for (i = 0; i < FP_LOCK_SLOTS_PER_GROUP; i++) { uint32 lockmode; + /* index into the whole per-backend array */ + uint32 f = FAST_PATH_SLOT(group, i); + /* Look for an allocated slot matching the given relid. */ if (relid != MyProc->fpRelId[f] || FAST_PATH_GET_BITS(MyProc, f) == 0) continue; @@ -2957,7 +3033,8 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) for (i = 0; i < ProcGlobal->allProcCount; i++) { PGPROC *proc = &ProcGlobal->allProcs[i]; - uint32 f; + uint32 j, + group; /* A backend never blocks itself */ if (proc == MyProc) @@ -2979,10 +3056,16 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp) continue; } - for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++) + /* fast-path group the lock belongs to */ + group = FAST_PATH_REL_GROUP(relid); + + for (j = 0; j < FP_LOCK_SLOTS_PER_GROUP; j++) { uint32 lockmask; + /* index into the whole per-backend array */ + uint32 f = FAST_PATH_SLOT(group, j); + /* Look for an allocated slot matching the given relid. */ if (relid != proc->fpRelId[f]) continue; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index ac66da8638f..9b72829725a 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -103,6 +103,8 @@ ProcGlobalShmemSize(void) Size size = 0; Size TotalProcs = add_size(MaxBackends, add_size(NUM_AUXILIARY_PROCS, max_prepared_xacts)); + Size fpLockBitsSize, + fpRelIdSize; /* ProcGlobal */ size = add_size(size, sizeof(PROC_HDR)); @@ -113,6 +115,15 @@ ProcGlobalShmemSize(void) size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->subxidStates))); size = add_size(size, mul_size(TotalProcs, sizeof(*ProcGlobal->statusFlags))); + /* + * Memory needed for PGPROC fast-path lock arrays. Make sure the sizes are + * nicely aligned in each backend. + */ + fpLockBitsSize = MAXALIGN(FastPathLockGroupsPerBackend * sizeof(uint64)); + fpRelIdSize = MAXALIGN(FastPathLockGroupsPerBackend * sizeof(Oid) * FP_LOCK_SLOTS_PER_GROUP); + + size = add_size(size, mul_size(TotalProcs, (fpLockBitsSize + fpRelIdSize))); + return size; } @@ -163,6 +174,12 @@ InitProcGlobal(void) bool found; uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts; + /* Used for setup of per-backend fast-path slots. */ + char *fpPtr, + *fpEndPtr PG_USED_FOR_ASSERTS_ONLY; + Size fpLockBitsSize, + fpRelIdSize; + /* Create the ProcGlobal shared structure */ ProcGlobal = (PROC_HDR *) ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found); @@ -211,12 +228,38 @@ InitProcGlobal(void) ProcGlobal->statusFlags = (uint8 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->statusFlags)); MemSet(ProcGlobal->statusFlags, 0, TotalProcs * sizeof(*ProcGlobal->statusFlags)); + /* + * Allocate arrays for fast-path locks. Those are variable-length, so + * can't be included in PGPROC directly. We allocate a separate piece of + * shared memory and then divide that between backends. + */ + fpLockBitsSize = MAXALIGN(FastPathLockGroupsPerBackend * sizeof(uint64)); + fpRelIdSize = MAXALIGN(FastPathLockGroupsPerBackend * sizeof(Oid) * FP_LOCK_SLOTS_PER_GROUP); + + fpPtr = ShmemAlloc(TotalProcs * (fpLockBitsSize + fpRelIdSize)); + MemSet(fpPtr, 0, TotalProcs * (fpLockBitsSize + fpRelIdSize)); + + /* For asserts checking we did not overflow. */ + fpEndPtr = fpPtr + (TotalProcs * (fpLockBitsSize + fpRelIdSize)); + for (i = 0; i < TotalProcs; i++) { PGPROC *proc = &procs[i]; /* Common initialization for all PGPROCs, regardless of type. */ + /* + * Set the fast-path lock arrays, and move the pointer. We interleave + * the two arrays, to (hopefully) get some locality for each backend. + */ + proc->fpLockBits = (uint64 *) fpPtr; + fpPtr += fpLockBitsSize; + + proc->fpRelId = (Oid *) fpPtr; + fpPtr += fpRelIdSize; + + Assert(fpPtr <= fpEndPtr); + /* * Set up per-PGPROC semaphore, latch, and fpInfoLock. Prepared xact * dummy PGPROCs don't need these though - they're never associated @@ -278,6 +321,9 @@ InitProcGlobal(void) pg_atomic_init_u64(&(proc->waitStart), 0); } + /* Should have consumed exactly the expected amount of fast-path memory. */ + Assert(fpPtr = fpEndPtr); + /* * Save pointers to the blocks of PGPROC structures reserved for auxiliary * processes and prepared transactions. diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e394f1419ad..7f5eada9d45 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4190,6 +4190,9 @@ PostgresSingleUserMain(int argc, char *argv[], /* Initialize MaxBackends */ InitializeMaxBackends(); + /* Initialize size of fast-path lock cache. */ + InitializeFastPathLocks(); + /* * Give preloaded libraries a chance to request additional shared memory. */ diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 3b50ce19a2c..a024b1151d0 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -557,6 +557,40 @@ InitializeMaxBackends(void) MAX_BACKENDS))); } +/* + * Initialize the number of fast-path lock slots in PGPROC. + * + * This must be called after modules have had the chance to alter GUCs in + * shared_preload_libraries and before shared memory size is determined. + * + * The default max_locks_per_xact=64 means 4 groups by default. + * + * We allow anything between 1 and 1024 groups, with the usual power-of-2 + * logic. The 1 is the "old" size with only 16 slots, 1024 is an arbitrary + * limit (matching max_locks_per_xact = 16k). Values over 1024 are unlikely + * to be beneficial - there are bottlenecks we'll hit way before that. + */ +void +InitializeFastPathLocks(void) +{ + /* Should be initialized only once. */ + Assert(FastPathLockGroupsPerBackend == 0); + + /* we need at least one group */ + FastPathLockGroupsPerBackend = 1; + + while (FastPathLockGroupsPerBackend < FP_LOCK_GROUPS_PER_BACKEND_MAX) + { + /* stop once we exceed max_locks_per_xact */ + if (FastPathLockGroupsPerBackend * FP_LOCK_SLOTS_PER_GROUP >= max_locks_per_xact) + break; + + FastPathLockGroupsPerBackend *= 2; + } + + Assert(FastPathLockGroupsPerBackend <= FP_LOCK_GROUPS_PER_BACKEND_MAX); +} + /* * Early initialization of a backend (either standalone or under postmaster). * This happens even before InitPostgres. diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 25348e71eb9..e26d108a470 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -475,6 +475,7 @@ extern PGDLLIMPORT ProcessingMode Mode; #define INIT_PG_OVERRIDE_ROLE_LOGIN 0x0004 extern void pg_split_opts(char **argv, int *argcp, const char *optstr); extern void InitializeMaxBackends(void); +extern void InitializeFastPathLocks(void); extern void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index deeb06c9e01..ebcf0ad4036 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -78,12 +78,17 @@ struct XidCache #define PROC_XMIN_FLAGS (PROC_IN_VACUUM | PROC_IN_SAFE_IC) /* - * We allow a small number of "weak" relation locks (AccessShareLock, + * We allow a limited number of "weak" relation locks (AccessShareLock, * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure - * rather than the main lock table. This eases contention on the lock - * manager LWLocks. See storage/lmgr/README for additional details. + * (or rather in shared memory referenced from PGPROC) rather than the main + * lock table. This eases contention on the lock manager LWLocks. See + * storage/lmgr/README for additional details. */ -#define FP_LOCK_SLOTS_PER_BACKEND 16 +extern PGDLLIMPORT int FastPathLockGroupsPerBackend; + +#define FP_LOCK_GROUPS_PER_BACKEND_MAX 1024 +#define FP_LOCK_SLOTS_PER_GROUP 16 /* don't change */ +#define FP_LOCK_SLOTS_PER_BACKEND (FP_LOCK_SLOTS_PER_GROUP * FastPathLockGroupsPerBackend) /* * Flags for PGPROC.delayChkptFlags @@ -292,8 +297,8 @@ struct PGPROC /* Lock manager data, recording fast-path locks taken by this backend. */ LWLock fpInfoLock; /* protects per-backend fast-path state */ - uint64 fpLockBits; /* lock modes held for each fast-path slot */ - Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ + uint64 *fpLockBits; /* lock modes held for each fast-path slot */ + Oid *fpRelId; /* slots for rel oids */ bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID * lock */