1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Make parallel nbtree index scans use an LWLock.

Teach parallel nbtree index scans to use an LWLock (not a spinlock) to
protect the scan's shared descriptor state.

Preparation for an upcoming patch that will add skip scan optimizations
to nbtree.  That patch will create the need to occasionally allocate
memory while the scan descriptor is locked, while copying datums that
were serialized by another backend.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-Wz=PKR6rB7qbx+Vnd7eqeB5VTcrW=iJvAsTsKbdG+kW_UA@mail.gmail.com
This commit is contained in:
Peter Geoghegan
2025-03-08 11:10:14 -05:00
parent 8021c77769
commit 67fc4c9fd7
5 changed files with 18 additions and 14 deletions

View File

@ -1565,7 +1565,7 @@ _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap)
* Parallel index scans require space in shared memory to store the
* current array elements (for arrays kept by preprocessing) to schedule
* the next primitive index scan. The underlying structure is protected
* using a spinlock, so defensively limit its size. In practice this can
* using an LWLock, so defensively limit its size. In practice this can
* only affect parallel scans that use an incomplete opfamily.
*/
if (scan->parallel_scan && so->numArrayKeys > INDEX_MAX_KEYS)

View File

@ -70,7 +70,7 @@ typedef struct BTParallelScanDescData
BTPS_State btps_pageStatus; /* indicates whether next page is
* available for scan. see above for
* possible states of parallel scan. */
slock_t btps_mutex; /* protects above variables, btps_arrElems */
LWLock btps_lock; /* protects shared parallel state */
ConditionVariable btps_cv; /* used to synchronize parallel scan */
/*
@ -554,7 +554,8 @@ btinitparallelscan(void *target)
{
BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
SpinLockInit(&bt_target->btps_mutex);
LWLockInitialize(&bt_target->btps_lock,
LWTRANCHE_PARALLEL_BTREE_SCAN);
bt_target->btps_nextScanPage = InvalidBlockNumber;
bt_target->btps_lastCurrPage = InvalidBlockNumber;
bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
@ -576,15 +577,15 @@ btparallelrescan(IndexScanDesc scan)
parallel_scan->ps_offset);
/*
* In theory, we don't need to acquire the spinlock here, because there
* In theory, we don't need to acquire the LWLock here, because there
* shouldn't be any other workers running at this point, but we do so for
* consistency.
*/
SpinLockAcquire(&btscan->btps_mutex);
LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
btscan->btps_nextScanPage = InvalidBlockNumber;
btscan->btps_lastCurrPage = InvalidBlockNumber;
btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
SpinLockRelease(&btscan->btps_mutex);
LWLockRelease(&btscan->btps_lock);
}
/*
@ -655,7 +656,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
while (1)
{
SpinLockAcquire(&btscan->btps_mutex);
LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
if (btscan->btps_pageStatus == BTPARALLEL_DONE)
{
@ -717,7 +718,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
*last_curr_page = btscan->btps_lastCurrPage;
exit_loop = true;
}
SpinLockRelease(&btscan->btps_mutex);
LWLockRelease(&btscan->btps_lock);
if (exit_loop || !status)
break;
ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
@ -761,11 +762,11 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber next_scan_page,
btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
parallel_scan->ps_offset);
SpinLockAcquire(&btscan->btps_mutex);
LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
btscan->btps_nextScanPage = next_scan_page;
btscan->btps_lastCurrPage = curr_page;
btscan->btps_pageStatus = BTPARALLEL_IDLE;
SpinLockRelease(&btscan->btps_mutex);
LWLockRelease(&btscan->btps_lock);
ConditionVariableSignal(&btscan->btps_cv);
}
@ -804,14 +805,14 @@ _bt_parallel_done(IndexScanDesc scan)
* Mark the parallel scan as done, unless some other process did so
* already
*/
SpinLockAcquire(&btscan->btps_mutex);
LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
Assert(btscan->btps_pageStatus != BTPARALLEL_NEED_PRIMSCAN);
if (btscan->btps_pageStatus != BTPARALLEL_DONE)
{
btscan->btps_pageStatus = BTPARALLEL_DONE;
status_changed = true;
}
SpinLockRelease(&btscan->btps_mutex);
LWLockRelease(&btscan->btps_lock);
/* wake up all the workers associated with this parallel scan */
if (status_changed)
@ -838,7 +839,7 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page)
btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
parallel_scan->ps_offset);
SpinLockAcquire(&btscan->btps_mutex);
LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
if (btscan->btps_lastCurrPage == curr_page &&
btscan->btps_pageStatus == BTPARALLEL_IDLE)
{
@ -854,7 +855,7 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page)
btscan->btps_arrElems[i] = array->cur_elem;
}
}
SpinLockRelease(&btscan->btps_mutex);
LWLockRelease(&btscan->btps_lock);
}
/*

View File

@ -153,6 +153,7 @@ static const char *const BuiltinTrancheNames[] = {
[LWTRANCHE_LOCK_MANAGER] = "LockManager",
[LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
[LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
[LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",
[LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
[LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
[LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",

View File

@ -371,6 +371,7 @@ BufferMapping "Waiting to associate a data block with a buffer in the buffer poo
LockManager "Waiting to read or update information about <quote>heavyweight</quote> locks."
PredicateLockManager "Waiting to access predicate lock information used by serializable transactions."
ParallelHashJoin "Waiting to synchronize workers during Parallel Hash Join plan execution."
ParallelBtreeScan "Waiting to synchronize workers during Parallel B-tree scan plan execution."
ParallelQueryDSA "Waiting for parallel query dynamic shared memory allocation."
PerSessionDSA "Waiting for parallel query dynamic shared memory allocation."
PerSessionRecordType "Waiting to access a parallel query's information about composite types."

View File

@ -194,6 +194,7 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_LOCK_MANAGER,
LWTRANCHE_PREDICATE_LOCK_MANAGER,
LWTRANCHE_PARALLEL_HASH_JOIN,
LWTRANCHE_PARALLEL_BTREE_SCAN,
LWTRANCHE_PARALLEL_QUERY_DSA,
LWTRANCHE_PER_SESSION_DSA,
LWTRANCHE_PER_SESSION_RECORD_TYPE,