mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating processes can iterate jointly over the resulting set of heap blocks. In the future, we might also want to support using a parallel bitmap index scan to set up for a parallel bitmap heap scan, but that's a job for another day. Dilip Kumar, with some corrections and cosmetic changes by me. The larger patch set of which this is a part has been reviewed and tested by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia Sabih, Haribabu Kommi, Thomas Munro, and me. Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
This commit is contained in:
parent
4eafdcc276
commit
f35742ccb7
@ -1211,7 +1211,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
|||||||
<entry>Waiting in an extension.</entry>
|
<entry>Waiting in an extension.</entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry morerows="10"><literal>IPC</></entry>
|
<entry morerows="11"><literal>IPC</></entry>
|
||||||
<entry><literal>BgWorkerShutdown</></entry>
|
<entry><literal>BgWorkerShutdown</></entry>
|
||||||
<entry>Waiting for background worker to shut down.</entry>
|
<entry>Waiting for background worker to shut down.</entry>
|
||||||
</row>
|
</row>
|
||||||
@ -1247,6 +1247,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
|||||||
<entry><literal>ParallelFinish</></entry>
|
<entry><literal>ParallelFinish</></entry>
|
||||||
<entry>Waiting for parallel workers to finish computing.</entry>
|
<entry>Waiting for parallel workers to finish computing.</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><literal>ParallelBitmapPopulate</></entry>
|
||||||
|
<entry>Waiting for the leader to populate the TidBitmap.</entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry><literal>SafeSnapshot</></entry>
|
<entry><literal>SafeSnapshot</></entry>
|
||||||
<entry>Waiting for a snapshot for a <literal>READ ONLY DEFERRABLE</> transaction.</entry>
|
<entry>Waiting for a snapshot for a <literal>READ ONLY DEFERRABLE</> transaction.</entry>
|
||||||
|
@ -1753,6 +1753,22 @@ retry:
|
|||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------
|
||||||
|
* heap_update_snapshot
|
||||||
|
*
|
||||||
|
* Update snapshot info in heap scan descriptor.
|
||||||
|
* ----------------
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
|
||||||
|
{
|
||||||
|
Assert(IsMVCCSnapshot(snapshot));
|
||||||
|
|
||||||
|
RegisterSnapshot(snapshot);
|
||||||
|
scan->rs_snapshot = snapshot;
|
||||||
|
scan->rs_temp_snap = true;
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
* heap_getnext - retrieve next tuple in scan
|
* heap_getnext - retrieve next tuple in scan
|
||||||
*
|
*
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
#include "executor/execParallel.h"
|
#include "executor/execParallel.h"
|
||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
|
#include "executor/nodeBitmapHeapscan.h"
|
||||||
#include "executor/nodeCustom.h"
|
#include "executor/nodeCustom.h"
|
||||||
#include "executor/nodeForeignscan.h"
|
#include "executor/nodeForeignscan.h"
|
||||||
#include "executor/nodeSeqscan.h"
|
#include "executor/nodeSeqscan.h"
|
||||||
@ -217,6 +218,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
|
|||||||
ExecCustomScanEstimate((CustomScanState *) planstate,
|
ExecCustomScanEstimate((CustomScanState *) planstate,
|
||||||
e->pcxt);
|
e->pcxt);
|
||||||
break;
|
break;
|
||||||
|
case T_BitmapHeapScanState:
|
||||||
|
ExecBitmapHeapEstimate((BitmapHeapScanState *) planstate,
|
||||||
|
e->pcxt);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -277,6 +282,11 @@ ExecParallelInitializeDSM(PlanState *planstate,
|
|||||||
ExecCustomScanInitializeDSM((CustomScanState *) planstate,
|
ExecCustomScanInitializeDSM((CustomScanState *) planstate,
|
||||||
d->pcxt);
|
d->pcxt);
|
||||||
break;
|
break;
|
||||||
|
case T_BitmapHeapScanState:
|
||||||
|
ExecBitmapHeapInitializeDSM((BitmapHeapScanState *) planstate,
|
||||||
|
d->pcxt);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -775,6 +785,10 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc)
|
|||||||
ExecCustomScanInitializeWorker((CustomScanState *) planstate,
|
ExecCustomScanInitializeWorker((CustomScanState *) planstate,
|
||||||
toc);
|
toc);
|
||||||
break;
|
break;
|
||||||
|
case T_BitmapHeapScanState:
|
||||||
|
ExecBitmapHeapInitializeWorker(
|
||||||
|
(BitmapHeapScanState *) planstate, toc);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -53,11 +53,15 @@
|
|||||||
|
|
||||||
static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
|
static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
|
||||||
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
|
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
|
||||||
|
static inline void BitmapDoneInitializingSharedState(
|
||||||
|
ParallelBitmapHeapState *pstate);
|
||||||
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
|
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
|
||||||
TBMIterateResult *tbmres);
|
TBMIterateResult *tbmres);
|
||||||
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
|
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
|
||||||
static inline void BitmapPrefetch(BitmapHeapScanState *node,
|
static inline void BitmapPrefetch(BitmapHeapScanState *node,
|
||||||
HeapScanDesc scan);
|
HeapScanDesc scan);
|
||||||
|
static bool BitmapShouldInitializeSharedState(
|
||||||
|
ParallelBitmapHeapState *pstate);
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
@ -73,9 +77,12 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
|||||||
HeapScanDesc scan;
|
HeapScanDesc scan;
|
||||||
TIDBitmap *tbm;
|
TIDBitmap *tbm;
|
||||||
TBMIterator *tbmiterator;
|
TBMIterator *tbmiterator;
|
||||||
|
TBMSharedIterator *shared_tbmiterator;
|
||||||
TBMIterateResult *tbmres;
|
TBMIterateResult *tbmres;
|
||||||
OffsetNumber targoffset;
|
OffsetNumber targoffset;
|
||||||
TupleTableSlot *slot;
|
TupleTableSlot *slot;
|
||||||
|
ParallelBitmapHeapState *pstate = node->pstate;
|
||||||
|
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* extract necessary information from index scan node
|
* extract necessary information from index scan node
|
||||||
@ -84,7 +91,10 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
|||||||
slot = node->ss.ss_ScanTupleSlot;
|
slot = node->ss.ss_ScanTupleSlot;
|
||||||
scan = node->ss.ss_currentScanDesc;
|
scan = node->ss.ss_currentScanDesc;
|
||||||
tbm = node->tbm;
|
tbm = node->tbm;
|
||||||
|
if (pstate == NULL)
|
||||||
tbmiterator = node->tbmiterator;
|
tbmiterator = node->tbmiterator;
|
||||||
|
else
|
||||||
|
shared_tbmiterator = node->shared_tbmiterator;
|
||||||
tbmres = node->tbmres;
|
tbmres = node->tbmres;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -99,7 +109,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
|||||||
* node->prefetch_maximum. This is to avoid doing a lot of prefetching in
|
* node->prefetch_maximum. This is to avoid doing a lot of prefetching in
|
||||||
* a scan that stops after a few tuples because of a LIMIT.
|
* a scan that stops after a few tuples because of a LIMIT.
|
||||||
*/
|
*/
|
||||||
if (tbm == NULL)
|
if (!node->initialized)
|
||||||
|
{
|
||||||
|
if (!pstate)
|
||||||
{
|
{
|
||||||
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
|
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
|
||||||
|
|
||||||
@ -119,6 +131,61 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
|||||||
}
|
}
|
||||||
#endif /* USE_PREFETCH */
|
#endif /* USE_PREFETCH */
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The leader will immediately come out of the function, but
|
||||||
|
* others will be blocked until leader populates the TBM and wakes
|
||||||
|
* them up.
|
||||||
|
*/
|
||||||
|
if (BitmapShouldInitializeSharedState(pstate))
|
||||||
|
{
|
||||||
|
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
|
||||||
|
if (!tbm || !IsA(tbm, TIDBitmap))
|
||||||
|
elog(ERROR, "unrecognized result from subplan");
|
||||||
|
|
||||||
|
node->tbm = tbm;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prepare to iterate over the TBM. This will return the
|
||||||
|
* dsa_pointer of the iterator state which will be used by
|
||||||
|
* multiple processes to iterate jointly.
|
||||||
|
*/
|
||||||
|
pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
|
||||||
|
#ifdef USE_PREFETCH
|
||||||
|
if (node->prefetch_maximum > 0)
|
||||||
|
{
|
||||||
|
pstate->prefetch_iterator =
|
||||||
|
tbm_prepare_shared_iterate(tbm);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't need the mutex here as we haven't yet woke up
|
||||||
|
* others.
|
||||||
|
*/
|
||||||
|
pstate->prefetch_pages = 0;
|
||||||
|
pstate->prefetch_target = -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* We have initialized the shared state so wake up others. */
|
||||||
|
BitmapDoneInitializingSharedState(pstate);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate a private iterator and attach the shared state to it */
|
||||||
|
node->shared_tbmiterator = shared_tbmiterator =
|
||||||
|
tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
|
||||||
|
node->tbmres = tbmres = NULL;
|
||||||
|
|
||||||
|
#ifdef USE_PREFETCH
|
||||||
|
if (node->prefetch_maximum > 0)
|
||||||
|
{
|
||||||
|
node->shared_prefetch_iterator =
|
||||||
|
tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
|
||||||
|
}
|
||||||
|
#endif /* USE_PREFETCH */
|
||||||
|
}
|
||||||
|
node->initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
@ -130,7 +197,10 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
|||||||
*/
|
*/
|
||||||
if (tbmres == NULL)
|
if (tbmres == NULL)
|
||||||
{
|
{
|
||||||
|
if (!pstate)
|
||||||
node->tbmres = tbmres = tbm_iterate(tbmiterator);
|
node->tbmres = tbmres = tbm_iterate(tbmiterator);
|
||||||
|
else
|
||||||
|
node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
|
||||||
if (tbmres == NULL)
|
if (tbmres == NULL)
|
||||||
{
|
{
|
||||||
/* no more entries in the bitmap */
|
/* no more entries in the bitmap */
|
||||||
@ -182,8 +252,19 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
|||||||
* Try to prefetch at least a few pages even before we get to the
|
* Try to prefetch at least a few pages even before we get to the
|
||||||
* second page if we don't stop reading after the first tuple.
|
* second page if we don't stop reading after the first tuple.
|
||||||
*/
|
*/
|
||||||
|
if (!pstate)
|
||||||
|
{
|
||||||
if (node->prefetch_target < node->prefetch_maximum)
|
if (node->prefetch_target < node->prefetch_maximum)
|
||||||
node->prefetch_target++;
|
node->prefetch_target++;
|
||||||
|
}
|
||||||
|
else if (pstate->prefetch_target < node->prefetch_maximum)
|
||||||
|
{
|
||||||
|
/* take spinlock while updating shared state */
|
||||||
|
SpinLockAcquire(&pstate->mutex);
|
||||||
|
if (pstate->prefetch_target < node->prefetch_maximum)
|
||||||
|
pstate->prefetch_target++;
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
}
|
||||||
#endif /* USE_PREFETCH */
|
#endif /* USE_PREFETCH */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -361,6 +442,21 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
|
|||||||
scan->rs_ntuples = ntup;
|
scan->rs_ntuples = ntup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BitmapDoneInitializingSharedState - Shared state is initialized
|
||||||
|
*
|
||||||
|
* By this time the leader has already populated the TBM and initialized the
|
||||||
|
* shared state so wake up other processes.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
|
||||||
|
{
|
||||||
|
SpinLockAcquire(&pstate->mutex);
|
||||||
|
pstate->state = BM_FINISHED;
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
ConditionVariableBroadcast(&pstate->cv);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
|
* BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
|
||||||
*/
|
*/
|
||||||
@ -369,6 +465,10 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
|
|||||||
TBMIterateResult *tbmres)
|
TBMIterateResult *tbmres)
|
||||||
{
|
{
|
||||||
#ifdef USE_PREFETCH
|
#ifdef USE_PREFETCH
|
||||||
|
ParallelBitmapHeapState *pstate = node->pstate;
|
||||||
|
|
||||||
|
if (pstate == NULL)
|
||||||
|
{
|
||||||
TBMIterator *prefetch_iterator = node->prefetch_iterator;
|
TBMIterator *prefetch_iterator = node->prefetch_iterator;
|
||||||
|
|
||||||
if (node->prefetch_pages > 0)
|
if (node->prefetch_pages > 0)
|
||||||
@ -384,6 +484,35 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
|
|||||||
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
|
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
|
||||||
elog(ERROR, "prefetch and main iterators are out of sync");
|
elog(ERROR, "prefetch and main iterators are out of sync");
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->prefetch_maximum > 0)
|
||||||
|
{
|
||||||
|
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
|
||||||
|
|
||||||
|
SpinLockAcquire(&pstate->mutex);
|
||||||
|
if (pstate->prefetch_pages > 0)
|
||||||
|
{
|
||||||
|
node->prefetch_pages--;
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Release the mutex before iterating */
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In case of shared mode, we can not ensure that the current
|
||||||
|
* blockno of the main iterator and that of the prefetch iterator
|
||||||
|
* are same. It's possible that whatever blockno we are
|
||||||
|
* prefetching will be processed by another process. Therefore, we
|
||||||
|
* don't validate the blockno here as we do in non-parallel case.
|
||||||
|
*/
|
||||||
|
if (prefetch_iterator)
|
||||||
|
tbm_shared_iterate(prefetch_iterator);
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif /* USE_PREFETCH */
|
#endif /* USE_PREFETCH */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -399,6 +528,10 @@ static inline void
|
|||||||
BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
|
BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
|
||||||
{
|
{
|
||||||
#ifdef USE_PREFETCH
|
#ifdef USE_PREFETCH
|
||||||
|
ParallelBitmapHeapState *pstate = node->pstate;
|
||||||
|
|
||||||
|
if (pstate == NULL)
|
||||||
|
{
|
||||||
if (node->prefetch_target >= node->prefetch_maximum)
|
if (node->prefetch_target >= node->prefetch_maximum)
|
||||||
/* don't increase any further */ ;
|
/* don't increase any further */ ;
|
||||||
else if (node->prefetch_target >= node->prefetch_maximum / 2)
|
else if (node->prefetch_target >= node->prefetch_maximum / 2)
|
||||||
@ -407,6 +540,23 @@ BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
|
|||||||
node->prefetch_target *= 2;
|
node->prefetch_target *= 2;
|
||||||
else
|
else
|
||||||
node->prefetch_target++;
|
node->prefetch_target++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do an unlocked check first to save spinlock acquisitions. */
|
||||||
|
if (pstate->prefetch_target < node->prefetch_maximum)
|
||||||
|
{
|
||||||
|
SpinLockAcquire(&pstate->mutex);
|
||||||
|
if (pstate->prefetch_target >= node->prefetch_maximum)
|
||||||
|
/* don't increase any further */ ;
|
||||||
|
else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
|
||||||
|
pstate->prefetch_target = node->prefetch_maximum;
|
||||||
|
else if (pstate->prefetch_target > 0)
|
||||||
|
pstate->prefetch_target *= 2;
|
||||||
|
else
|
||||||
|
pstate->prefetch_target++;
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
}
|
||||||
#endif /* USE_PREFETCH */
|
#endif /* USE_PREFETCH */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -417,6 +567,10 @@ static inline void
|
|||||||
BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
|
BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
|
||||||
{
|
{
|
||||||
#ifdef USE_PREFETCH
|
#ifdef USE_PREFETCH
|
||||||
|
ParallelBitmapHeapState *pstate = node->pstate;
|
||||||
|
|
||||||
|
if (pstate == NULL)
|
||||||
|
{
|
||||||
TBMIterator *prefetch_iterator = node->prefetch_iterator;
|
TBMIterator *prefetch_iterator = node->prefetch_iterator;
|
||||||
|
|
||||||
if (prefetch_iterator)
|
if (prefetch_iterator)
|
||||||
@ -436,6 +590,49 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
|
|||||||
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
|
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pstate->prefetch_pages < pstate->prefetch_target)
|
||||||
|
{
|
||||||
|
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
|
||||||
|
|
||||||
|
if (prefetch_iterator)
|
||||||
|
{
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
TBMIterateResult *tbmpre;
|
||||||
|
bool do_prefetch = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recheck under the mutex. If some other process has already
|
||||||
|
* done enough prefetching then we need not to do anything.
|
||||||
|
*/
|
||||||
|
SpinLockAcquire(&pstate->mutex);
|
||||||
|
if (pstate->prefetch_pages < pstate->prefetch_target)
|
||||||
|
{
|
||||||
|
pstate->prefetch_pages++;
|
||||||
|
do_prefetch = true;
|
||||||
|
}
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
|
||||||
|
if (!do_prefetch)
|
||||||
|
return;
|
||||||
|
|
||||||
|
tbmpre = tbm_shared_iterate(prefetch_iterator);
|
||||||
|
if (tbmpre == NULL)
|
||||||
|
{
|
||||||
|
/* No more pages to prefetch */
|
||||||
|
tbm_end_shared_iterate(prefetch_iterator);
|
||||||
|
node->shared_prefetch_iterator = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif /* USE_PREFETCH */
|
#endif /* USE_PREFETCH */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -488,12 +685,36 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
|
|||||||
tbm_end_iterate(node->tbmiterator);
|
tbm_end_iterate(node->tbmiterator);
|
||||||
if (node->prefetch_iterator)
|
if (node->prefetch_iterator)
|
||||||
tbm_end_iterate(node->prefetch_iterator);
|
tbm_end_iterate(node->prefetch_iterator);
|
||||||
|
if (node->shared_tbmiterator)
|
||||||
|
tbm_end_shared_iterate(node->shared_tbmiterator);
|
||||||
|
if (node->shared_prefetch_iterator)
|
||||||
|
tbm_end_shared_iterate(node->shared_prefetch_iterator);
|
||||||
if (node->tbm)
|
if (node->tbm)
|
||||||
tbm_free(node->tbm);
|
tbm_free(node->tbm);
|
||||||
node->tbm = NULL;
|
node->tbm = NULL;
|
||||||
node->tbmiterator = NULL;
|
node->tbmiterator = NULL;
|
||||||
node->tbmres = NULL;
|
node->tbmres = NULL;
|
||||||
node->prefetch_iterator = NULL;
|
node->prefetch_iterator = NULL;
|
||||||
|
node->initialized = false;
|
||||||
|
node->shared_tbmiterator = NULL;
|
||||||
|
node->shared_prefetch_iterator = NULL;
|
||||||
|
|
||||||
|
/* Reset parallel bitmap state, if present */
|
||||||
|
if (node->pstate)
|
||||||
|
{
|
||||||
|
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
|
||||||
|
|
||||||
|
node->pstate->state = BM_INITIAL;
|
||||||
|
|
||||||
|
if (DsaPointerIsValid(node->pstate->tbmiterator))
|
||||||
|
tbm_free_shared_area(dsa, node->pstate->tbmiterator);
|
||||||
|
|
||||||
|
if (DsaPointerIsValid(node->pstate->prefetch_iterator))
|
||||||
|
tbm_free_shared_area(dsa, node->pstate->prefetch_iterator);
|
||||||
|
|
||||||
|
node->pstate->tbmiterator = InvalidDsaPointer;
|
||||||
|
node->pstate->prefetch_iterator = InvalidDsaPointer;
|
||||||
|
}
|
||||||
|
|
||||||
ExecScanReScan(&node->ss);
|
ExecScanReScan(&node->ss);
|
||||||
|
|
||||||
@ -546,6 +767,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
|
|||||||
tbm_end_iterate(node->prefetch_iterator);
|
tbm_end_iterate(node->prefetch_iterator);
|
||||||
if (node->tbm)
|
if (node->tbm)
|
||||||
tbm_free(node->tbm);
|
tbm_free(node->tbm);
|
||||||
|
if (node->shared_tbmiterator)
|
||||||
|
tbm_end_shared_iterate(node->shared_tbmiterator);
|
||||||
|
if (node->shared_prefetch_iterator)
|
||||||
|
tbm_end_shared_iterate(node->shared_prefetch_iterator);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* close heap scan
|
* close heap scan
|
||||||
@ -597,6 +822,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
|
|||||||
scanstate->prefetch_target = 0;
|
scanstate->prefetch_target = 0;
|
||||||
/* may be updated below */
|
/* may be updated below */
|
||||||
scanstate->prefetch_maximum = target_prefetch_pages;
|
scanstate->prefetch_maximum = target_prefetch_pages;
|
||||||
|
scanstate->pscan_len = 0;
|
||||||
|
scanstate->initialized = false;
|
||||||
|
scanstate->shared_tbmiterator = NULL;
|
||||||
|
scanstate->pstate = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Miscellaneous initialization
|
* Miscellaneous initialization
|
||||||
@ -681,3 +910,108 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
|
|||||||
*/
|
*/
|
||||||
return scanstate;
|
return scanstate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*----------------
|
||||||
|
* BitmapShouldInitializeSharedState
|
||||||
|
*
|
||||||
|
* The first process to come here and see the state to the BM_INITIAL
|
||||||
|
* will become the leader for the parallel bitmap scan and will be
|
||||||
|
* responsible for populating the TIDBitmap. The other processes will
|
||||||
|
* be blocked by the condition variable until the leader wakes them up.
|
||||||
|
* ---------------
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
|
||||||
|
{
|
||||||
|
SharedBitmapState state;
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
SpinLockAcquire(&pstate->mutex);
|
||||||
|
state = pstate->state;
|
||||||
|
if (pstate->state == BM_INITIAL)
|
||||||
|
pstate->state = BM_INPROGRESS;
|
||||||
|
SpinLockRelease(&pstate->mutex);
|
||||||
|
|
||||||
|
/* Exit if bitmap is done, or if we're the leader. */
|
||||||
|
if (state != BM_INPROGRESS)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Wait for the leader to wake us up. */
|
||||||
|
ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConditionVariableCancelSleep();
|
||||||
|
|
||||||
|
return (state == BM_INITIAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------
|
||||||
|
* ExecBitmapHeapEstimate
|
||||||
|
*
|
||||||
|
* estimates the space required to serialize bitmap scan node.
|
||||||
|
* ----------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ExecBitmapHeapEstimate(BitmapHeapScanState *node,
|
||||||
|
ParallelContext *pcxt)
|
||||||
|
{
|
||||||
|
EState *estate = node->ss.ps.state;
|
||||||
|
|
||||||
|
node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
|
||||||
|
phs_snapshot_data),
|
||||||
|
EstimateSnapshotSpace(estate->es_snapshot));
|
||||||
|
|
||||||
|
shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
|
||||||
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------
|
||||||
|
* ExecBitmapHeapInitializeDSM
|
||||||
|
*
|
||||||
|
* Set up a parallel bitmap heap scan descriptor.
|
||||||
|
* ----------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
|
||||||
|
ParallelContext *pcxt)
|
||||||
|
{
|
||||||
|
ParallelBitmapHeapState *pstate;
|
||||||
|
EState *estate = node->ss.ps.state;
|
||||||
|
|
||||||
|
pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
|
||||||
|
|
||||||
|
pstate->tbmiterator = 0;
|
||||||
|
pstate->prefetch_iterator = 0;
|
||||||
|
|
||||||
|
/* Initialize the mutex */
|
||||||
|
SpinLockInit(&pstate->mutex);
|
||||||
|
pstate->prefetch_pages = 0;
|
||||||
|
pstate->prefetch_target = 0;
|
||||||
|
pstate->state = BM_INITIAL;
|
||||||
|
|
||||||
|
ConditionVariableInit(&pstate->cv);
|
||||||
|
SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
|
||||||
|
|
||||||
|
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
|
||||||
|
node->pstate = pstate;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------
|
||||||
|
* ExecBitmapHeapInitializeWorker
|
||||||
|
*
|
||||||
|
* Copy relevant information from TOC into planstate.
|
||||||
|
* ----------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
|
||||||
|
{
|
||||||
|
ParallelBitmapHeapState *pstate;
|
||||||
|
Snapshot snapshot;
|
||||||
|
|
||||||
|
pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
|
||||||
|
node->pstate = pstate;
|
||||||
|
|
||||||
|
snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
|
||||||
|
heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
|
||||||
|
}
|
||||||
|
@ -78,7 +78,9 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* XXX should we use less than work_mem for this? */
|
/* XXX should we use less than work_mem for this? */
|
||||||
tbm = tbm_create(work_mem * 1024L, NULL);
|
tbm = tbm_create(work_mem * 1024L,
|
||||||
|
((BitmapIndexScan *) node->ss.ps.plan)->isshared ?
|
||||||
|
node->ss.ps.state->es_query_dsa : NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -129,7 +129,9 @@ MultiExecBitmapOr(BitmapOrState *node)
|
|||||||
if (result == NULL) /* first subplan */
|
if (result == NULL) /* first subplan */
|
||||||
{
|
{
|
||||||
/* XXX should we use less than work_mem for this? */
|
/* XXX should we use less than work_mem for this? */
|
||||||
result = tbm_create(work_mem * 1024L, NULL);
|
result = tbm_create(work_mem * 1024L,
|
||||||
|
((BitmapOr *) node->ps.plan)->isshared ?
|
||||||
|
node->ps.state->es_query_dsa : NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
((BitmapIndexScanState *) subnode)->biss_result = result;
|
((BitmapIndexScanState *) subnode)->biss_result = result;
|
||||||
|
@ -331,6 +331,7 @@ _copyBitmapOr(const BitmapOr *from)
|
|||||||
/*
|
/*
|
||||||
* copy remainder of node
|
* copy remainder of node
|
||||||
*/
|
*/
|
||||||
|
COPY_SCALAR_FIELD(isshared);
|
||||||
COPY_NODE_FIELD(bitmapplans);
|
COPY_NODE_FIELD(bitmapplans);
|
||||||
|
|
||||||
return newnode;
|
return newnode;
|
||||||
@ -496,6 +497,7 @@ _copyBitmapIndexScan(const BitmapIndexScan *from)
|
|||||||
* copy remainder of node
|
* copy remainder of node
|
||||||
*/
|
*/
|
||||||
COPY_SCALAR_FIELD(indexid);
|
COPY_SCALAR_FIELD(indexid);
|
||||||
|
COPY_SCALAR_FIELD(isshared);
|
||||||
COPY_NODE_FIELD(indexqual);
|
COPY_NODE_FIELD(indexqual);
|
||||||
COPY_NODE_FIELD(indexqualorig);
|
COPY_NODE_FIELD(indexqualorig);
|
||||||
|
|
||||||
|
@ -441,6 +441,7 @@ _outBitmapOr(StringInfo str, const BitmapOr *node)
|
|||||||
|
|
||||||
_outPlanInfo(str, (const Plan *) node);
|
_outPlanInfo(str, (const Plan *) node);
|
||||||
|
|
||||||
|
WRITE_BOOL_FIELD(isshared);
|
||||||
WRITE_NODE_FIELD(bitmapplans);
|
WRITE_NODE_FIELD(bitmapplans);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -520,6 +521,7 @@ _outBitmapIndexScan(StringInfo str, const BitmapIndexScan *node)
|
|||||||
_outScanInfo(str, (const Scan *) node);
|
_outScanInfo(str, (const Scan *) node);
|
||||||
|
|
||||||
WRITE_OID_FIELD(indexid);
|
WRITE_OID_FIELD(indexid);
|
||||||
|
WRITE_BOOL_FIELD(isshared);
|
||||||
WRITE_NODE_FIELD(indexqual);
|
WRITE_NODE_FIELD(indexqual);
|
||||||
WRITE_NODE_FIELD(indexqualorig);
|
WRITE_NODE_FIELD(indexqualorig);
|
||||||
}
|
}
|
||||||
|
@ -1633,6 +1633,7 @@ _readBitmapOr(void)
|
|||||||
|
|
||||||
ReadCommonPlan(&local_node->plan);
|
ReadCommonPlan(&local_node->plan);
|
||||||
|
|
||||||
|
READ_BOOL_FIELD(isshared);
|
||||||
READ_NODE_FIELD(bitmapplans);
|
READ_NODE_FIELD(bitmapplans);
|
||||||
|
|
||||||
READ_DONE();
|
READ_DONE();
|
||||||
@ -1744,6 +1745,7 @@ _readBitmapIndexScan(void)
|
|||||||
ReadCommonScan(&local_node->scan);
|
ReadCommonScan(&local_node->scan);
|
||||||
|
|
||||||
READ_OID_FIELD(indexid);
|
READ_OID_FIELD(indexid);
|
||||||
|
READ_BOOL_FIELD(isshared);
|
||||||
READ_NODE_FIELD(indexqual);
|
READ_NODE_FIELD(indexqual);
|
||||||
READ_NODE_FIELD(indexqualorig);
|
READ_NODE_FIELD(indexqualorig);
|
||||||
|
|
||||||
|
@ -2911,6 +2911,30 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* create_partial_bitmap_paths
|
||||||
|
* Build partial bitmap heap path for the relation
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
|
||||||
|
Path *bitmapqual)
|
||||||
|
{
|
||||||
|
int parallel_workers;
|
||||||
|
double pages_fetched;
|
||||||
|
|
||||||
|
/* Compute heap pages for bitmap heap scan */
|
||||||
|
pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
|
||||||
|
NULL, NULL);
|
||||||
|
|
||||||
|
parallel_workers = compute_parallel_worker(rel, pages_fetched, 0);
|
||||||
|
|
||||||
|
if (parallel_workers <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
|
||||||
|
bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute the number of parallel workers that should be used to scan a
|
* Compute the number of parallel workers that should be used to scan a
|
||||||
* relation. We compute the parallel workers based on the size of the heap to
|
* relation. We compute the parallel workers based on the size of the heap to
|
||||||
|
@ -860,6 +860,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
|
|||||||
QualCost qpqual_cost;
|
QualCost qpqual_cost;
|
||||||
Cost cpu_per_tuple;
|
Cost cpu_per_tuple;
|
||||||
Cost cost_per_page;
|
Cost cost_per_page;
|
||||||
|
Cost cpu_run_cost;
|
||||||
double tuples_fetched;
|
double tuples_fetched;
|
||||||
double pages_fetched;
|
double pages_fetched;
|
||||||
double spc_seq_page_cost,
|
double spc_seq_page_cost,
|
||||||
@ -921,8 +922,21 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
|
|||||||
|
|
||||||
startup_cost += qpqual_cost.startup;
|
startup_cost += qpqual_cost.startup;
|
||||||
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
|
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
|
||||||
|
cpu_run_cost = cpu_per_tuple * tuples_fetched;
|
||||||
|
|
||||||
run_cost += cpu_per_tuple * tuples_fetched;
|
/* Adjust costing for parallelism, if used. */
|
||||||
|
if (path->parallel_workers > 0)
|
||||||
|
{
|
||||||
|
double parallel_divisor = get_parallel_divisor(path);
|
||||||
|
|
||||||
|
/* The CPU cost is divided among all the workers. */
|
||||||
|
cpu_run_cost /= parallel_divisor;
|
||||||
|
|
||||||
|
path->rows = clamp_row_est(path->rows / parallel_divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
run_cost += cpu_run_cost;
|
||||||
|
|
||||||
/* tlist eval costs are paid per output row, not per tuple scanned */
|
/* tlist eval costs are paid per output row, not per tuple scanned */
|
||||||
startup_cost += path->pathtarget->cost.startup;
|
startup_cost += path->pathtarget->cost.startup;
|
||||||
|
@ -337,8 +337,12 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
|
|||||||
|
|
||||||
bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
|
bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
|
||||||
bpath = create_bitmap_heap_path(root, rel, bitmapqual,
|
bpath = create_bitmap_heap_path(root, rel, bitmapqual,
|
||||||
rel->lateral_relids, 1.0);
|
rel->lateral_relids, 1.0, 0);
|
||||||
add_path(rel, (Path *) bpath);
|
add_path(rel, (Path *) bpath);
|
||||||
|
|
||||||
|
/* create a partial bitmap heap path */
|
||||||
|
if (rel->consider_parallel && rel->lateral_relids == NULL)
|
||||||
|
create_partial_bitmap_paths(root, rel, bitmapqual);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -410,7 +414,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
|
|||||||
required_outer = get_bitmap_tree_required_outer(bitmapqual);
|
required_outer = get_bitmap_tree_required_outer(bitmapqual);
|
||||||
loop_count = get_loop_count(root, rel->relid, required_outer);
|
loop_count = get_loop_count(root, rel->relid, required_outer);
|
||||||
bpath = create_bitmap_heap_path(root, rel, bitmapqual,
|
bpath = create_bitmap_heap_path(root, rel, bitmapqual,
|
||||||
required_outer, loop_count);
|
required_outer, loop_count, 0);
|
||||||
add_path(rel, (Path *) bpath);
|
add_path(rel, (Path *) bpath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1617,6 +1621,11 @@ bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath)
|
|||||||
bpath.path.pathkeys = NIL;
|
bpath.path.pathkeys = NIL;
|
||||||
bpath.bitmapqual = ipath;
|
bpath.bitmapqual = ipath;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the cost of temporary path without considering parallelism.
|
||||||
|
* Parallel bitmap heap path will be considered at later stage.
|
||||||
|
*/
|
||||||
|
bpath.path.parallel_workers = 0;
|
||||||
cost_bitmap_heap_scan(&bpath.path, root, rel,
|
cost_bitmap_heap_scan(&bpath.path, root, rel,
|
||||||
bpath.path.param_info,
|
bpath.path.param_info,
|
||||||
ipath,
|
ipath,
|
||||||
@ -1659,6 +1668,12 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths)
|
|||||||
bpath.path.pathkeys = NIL;
|
bpath.path.pathkeys = NIL;
|
||||||
bpath.bitmapqual = (Path *) &apath;
|
bpath.bitmapqual = (Path *) &apath;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the cost of temporary path without considering parallelism.
|
||||||
|
* Parallel bitmap heap path will be considered at later stage.
|
||||||
|
*/
|
||||||
|
bpath.path.parallel_workers = 0;
|
||||||
|
|
||||||
/* Now we can do cost_bitmap_heap_scan */
|
/* Now we can do cost_bitmap_heap_scan */
|
||||||
cost_bitmap_heap_scan(&bpath.path, root, rel,
|
cost_bitmap_heap_scan(&bpath.path, root, rel,
|
||||||
bpath.path.param_info,
|
bpath.path.param_info,
|
||||||
|
@ -125,6 +125,7 @@ static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root,
|
|||||||
List *tlist, List *scan_clauses);
|
List *tlist, List *scan_clauses);
|
||||||
static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
|
static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
|
||||||
List **qual, List **indexqual, List **indexECs);
|
List **qual, List **indexqual, List **indexECs);
|
||||||
|
static void bitmap_subplan_mark_shared(Plan *plan);
|
||||||
static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
|
static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
|
||||||
List *tlist, List *scan_clauses);
|
List *tlist, List *scan_clauses);
|
||||||
static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
|
static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
|
||||||
@ -2590,6 +2591,9 @@ create_bitmap_scan_plan(PlannerInfo *root,
|
|||||||
&bitmapqualorig, &indexquals,
|
&bitmapqualorig, &indexquals,
|
||||||
&indexECs);
|
&indexECs);
|
||||||
|
|
||||||
|
if (best_path->path.parallel_aware)
|
||||||
|
bitmap_subplan_mark_shared(bitmapqualplan);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The qpqual list must contain all restrictions not automatically handled
|
* The qpqual list must contain all restrictions not automatically handled
|
||||||
* by the index, other than pseudoconstant clauses which will be handled
|
* by the index, other than pseudoconstant clauses which will be handled
|
||||||
@ -4756,6 +4760,24 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
|
|||||||
plan->plan.parallel_aware = false;
|
plan->plan.parallel_aware = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bitmap_subplan_mark_shared
|
||||||
|
* Set isshared flag in bitmap subplan so that it will be created in
|
||||||
|
* shared memory.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
bitmap_subplan_mark_shared(Plan *plan)
|
||||||
|
{
|
||||||
|
if (IsA(plan, BitmapAnd))
|
||||||
|
bitmap_subplan_mark_shared(
|
||||||
|
linitial(((BitmapAnd *) plan)->bitmapplans));
|
||||||
|
else if (IsA(plan, BitmapOr))
|
||||||
|
((BitmapOr *) plan)->isshared = true;
|
||||||
|
else if (IsA(plan, BitmapIndexScan))
|
||||||
|
((BitmapIndexScan *) plan)->isshared = true;
|
||||||
|
else
|
||||||
|
elog(ERROR, "unrecognized node type: %d", nodeTag(plan));
|
||||||
|
}
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
*
|
*
|
||||||
|
@ -1068,7 +1068,8 @@ create_bitmap_heap_path(PlannerInfo *root,
|
|||||||
RelOptInfo *rel,
|
RelOptInfo *rel,
|
||||||
Path *bitmapqual,
|
Path *bitmapqual,
|
||||||
Relids required_outer,
|
Relids required_outer,
|
||||||
double loop_count)
|
double loop_count,
|
||||||
|
int parallel_degree)
|
||||||
{
|
{
|
||||||
BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
|
BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
|
||||||
|
|
||||||
@ -1077,9 +1078,9 @@ create_bitmap_heap_path(PlannerInfo *root,
|
|||||||
pathnode->path.pathtarget = rel->reltarget;
|
pathnode->path.pathtarget = rel->reltarget;
|
||||||
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
|
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
|
||||||
required_outer);
|
required_outer);
|
||||||
pathnode->path.parallel_aware = false;
|
pathnode->path.parallel_aware = parallel_degree > 0 ? true : false;
|
||||||
pathnode->path.parallel_safe = rel->consider_parallel;
|
pathnode->path.parallel_safe = rel->consider_parallel;
|
||||||
pathnode->path.parallel_workers = 0;
|
pathnode->path.parallel_workers = parallel_degree;
|
||||||
pathnode->path.pathkeys = NIL; /* always unordered */
|
pathnode->path.pathkeys = NIL; /* always unordered */
|
||||||
|
|
||||||
pathnode->bitmapqual = bitmapqual;
|
pathnode->bitmapqual = bitmapqual;
|
||||||
@ -3281,7 +3282,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
|
|||||||
rel,
|
rel,
|
||||||
bpath->bitmapqual,
|
bpath->bitmapqual,
|
||||||
required_outer,
|
required_outer,
|
||||||
loop_count);
|
loop_count, 0);
|
||||||
}
|
}
|
||||||
case T_SubqueryScan:
|
case T_SubqueryScan:
|
||||||
{
|
{
|
||||||
|
@ -3395,6 +3395,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
|
|||||||
case WAIT_EVENT_PARALLEL_FINISH:
|
case WAIT_EVENT_PARALLEL_FINISH:
|
||||||
event_name = "ParallelFinish";
|
event_name = "ParallelFinish";
|
||||||
break;
|
break;
|
||||||
|
case WAIT_EVENT_PARALLEL_BITMAP_SCAN:
|
||||||
|
event_name = "ParallelBitmapScan";
|
||||||
|
break;
|
||||||
case WAIT_EVENT_SAFE_SNAPSHOT:
|
case WAIT_EVENT_SAFE_SNAPSHOT:
|
||||||
event_name = "SafeSnapshot";
|
event_name = "SafeSnapshot";
|
||||||
break;
|
break;
|
||||||
|
@ -179,6 +179,7 @@ extern void simple_heap_update(Relation relation, ItemPointer otid,
|
|||||||
HeapTuple tup);
|
HeapTuple tup);
|
||||||
|
|
||||||
extern void heap_sync(Relation relation);
|
extern void heap_sync(Relation relation);
|
||||||
|
extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot);
|
||||||
|
|
||||||
/* in heap/pruneheap.c */
|
/* in heap/pruneheap.c */
|
||||||
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
|
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
|
||||||
|
@ -15,10 +15,17 @@
|
|||||||
#define NODEBITMAPHEAPSCAN_H
|
#define NODEBITMAPHEAPSCAN_H
|
||||||
|
|
||||||
#include "nodes/execnodes.h"
|
#include "nodes/execnodes.h"
|
||||||
|
#include "access/parallel.h"
|
||||||
|
|
||||||
extern BitmapHeapScanState *ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags);
|
extern BitmapHeapScanState *ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags);
|
||||||
extern TupleTableSlot *ExecBitmapHeapScan(BitmapHeapScanState *node);
|
extern TupleTableSlot *ExecBitmapHeapScan(BitmapHeapScanState *node);
|
||||||
extern void ExecEndBitmapHeapScan(BitmapHeapScanState *node);
|
extern void ExecEndBitmapHeapScan(BitmapHeapScanState *node);
|
||||||
extern void ExecReScanBitmapHeapScan(BitmapHeapScanState *node);
|
extern void ExecReScanBitmapHeapScan(BitmapHeapScanState *node);
|
||||||
|
extern void ExecBitmapHeapEstimate(BitmapHeapScanState *node,
|
||||||
|
ParallelContext *pcxt);
|
||||||
|
extern void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
|
||||||
|
ParallelContext *pcxt);
|
||||||
|
extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
|
||||||
|
shm_toc *toc);
|
||||||
|
|
||||||
#endif /* NODEBITMAPHEAPSCAN_H */
|
#endif /* NODEBITMAPHEAPSCAN_H */
|
||||||
|
@ -26,6 +26,8 @@
|
|||||||
#include "utils/sortsupport.h"
|
#include "utils/sortsupport.h"
|
||||||
#include "utils/tuplestore.h"
|
#include "utils/tuplestore.h"
|
||||||
#include "utils/tuplesort.h"
|
#include "utils/tuplesort.h"
|
||||||
|
#include "nodes/tidbitmap.h"
|
||||||
|
#include "storage/condition_variable.h"
|
||||||
|
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
@ -1464,6 +1466,51 @@ typedef struct BitmapIndexScanState
|
|||||||
IndexScanDesc biss_ScanDesc;
|
IndexScanDesc biss_ScanDesc;
|
||||||
} BitmapIndexScanState;
|
} BitmapIndexScanState;
|
||||||
|
|
||||||
|
/* ----------------
|
||||||
|
* SharedBitmapState information
|
||||||
|
*
|
||||||
|
* BM_INITIAL TIDBitmap creation is not yet started, so first worker
|
||||||
|
* to see this state will set the state to BM_INPROGRESS
|
||||||
|
* and that process will be responsible for creating
|
||||||
|
* TIDBitmap.
|
||||||
|
* BM_INPROGRESS TIDBitmap creation is in progress; workers need to
|
||||||
|
* sleep until it's finished.
|
||||||
|
* BM_FINISHED TIDBitmap creation is done, so now all workers can
|
||||||
|
* proceed to iterate over TIDBitmap.
|
||||||
|
* ----------------
|
||||||
|
*/
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
BM_INITIAL,
|
||||||
|
BM_INPROGRESS,
|
||||||
|
BM_FINISHED
|
||||||
|
} SharedBitmapState;
|
||||||
|
|
||||||
|
/* ----------------
|
||||||
|
* ParallelBitmapHeapState information
|
||||||
|
* tbmiterator iterator for scanning current pages
|
||||||
|
* prefetch_iterator iterator for prefetching ahead of current page
|
||||||
|
* mutex mutual exclusion for the prefetching variable
|
||||||
|
* and state
|
||||||
|
* prefetch_pages # pages prefetch iterator is ahead of current
|
||||||
|
* prefetch_target current target prefetch distance
|
||||||
|
* state current state of the TIDBitmap
|
||||||
|
* cv conditional wait variable
|
||||||
|
* phs_snapshot_data snapshot data shared to workers
|
||||||
|
* ----------------
|
||||||
|
*/
|
||||||
|
typedef struct ParallelBitmapHeapState
|
||||||
|
{
|
||||||
|
dsa_pointer tbmiterator;
|
||||||
|
dsa_pointer prefetch_iterator;
|
||||||
|
slock_t mutex;
|
||||||
|
int prefetch_pages;
|
||||||
|
int prefetch_target;
|
||||||
|
SharedBitmapState state;
|
||||||
|
ConditionVariable cv;
|
||||||
|
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} ParallelBitmapHeapState;
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
* BitmapHeapScanState information
|
* BitmapHeapScanState information
|
||||||
*
|
*
|
||||||
@ -1477,6 +1524,11 @@ typedef struct BitmapIndexScanState
|
|||||||
* prefetch_pages # pages prefetch iterator is ahead of current
|
* prefetch_pages # pages prefetch iterator is ahead of current
|
||||||
* prefetch_target current target prefetch distance
|
* prefetch_target current target prefetch distance
|
||||||
* prefetch_maximum maximum value for prefetch_target
|
* prefetch_maximum maximum value for prefetch_target
|
||||||
|
* pscan_len size of the shared memory for parallel bitmap
|
||||||
|
* initialized is node is ready to iterate
|
||||||
|
* shared_tbmiterator shared iterator
|
||||||
|
* shared_prefetch_iterator shared iterator for prefetching
|
||||||
|
* pstate shared state for parallel bitmap scan
|
||||||
* ----------------
|
* ----------------
|
||||||
*/
|
*/
|
||||||
typedef struct BitmapHeapScanState
|
typedef struct BitmapHeapScanState
|
||||||
@ -1492,6 +1544,11 @@ typedef struct BitmapHeapScanState
|
|||||||
int prefetch_pages;
|
int prefetch_pages;
|
||||||
int prefetch_target;
|
int prefetch_target;
|
||||||
int prefetch_maximum;
|
int prefetch_maximum;
|
||||||
|
Size pscan_len;
|
||||||
|
bool initialized;
|
||||||
|
TBMSharedIterator *shared_tbmiterator;
|
||||||
|
TBMSharedIterator *shared_prefetch_iterator;
|
||||||
|
ParallelBitmapHeapState *pstate;
|
||||||
} BitmapHeapScanState;
|
} BitmapHeapScanState;
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
|
@ -292,6 +292,7 @@ typedef struct BitmapAnd
|
|||||||
typedef struct BitmapOr
|
typedef struct BitmapOr
|
||||||
{
|
{
|
||||||
Plan plan;
|
Plan plan;
|
||||||
|
bool isshared;
|
||||||
List *bitmapplans;
|
List *bitmapplans;
|
||||||
} BitmapOr;
|
} BitmapOr;
|
||||||
|
|
||||||
@ -420,6 +421,7 @@ typedef struct BitmapIndexScan
|
|||||||
{
|
{
|
||||||
Scan scan;
|
Scan scan;
|
||||||
Oid indexid; /* OID of index to scan */
|
Oid indexid; /* OID of index to scan */
|
||||||
|
bool isshared; /* Create shared bitmap if set */
|
||||||
List *indexqual; /* list of index quals (OpExprs) */
|
List *indexqual; /* list of index quals (OpExprs) */
|
||||||
List *indexqualorig; /* the same in original form */
|
List *indexqualorig; /* the same in original form */
|
||||||
} BitmapIndexScan;
|
} BitmapIndexScan;
|
||||||
|
@ -53,7 +53,8 @@ extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
|
|||||||
RelOptInfo *rel,
|
RelOptInfo *rel,
|
||||||
Path *bitmapqual,
|
Path *bitmapqual,
|
||||||
Relids required_outer,
|
Relids required_outer,
|
||||||
double loop_count);
|
double loop_count,
|
||||||
|
int parallel_degree);
|
||||||
extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root,
|
extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root,
|
||||||
RelOptInfo *rel,
|
RelOptInfo *rel,
|
||||||
List *bitmapquals);
|
List *bitmapquals);
|
||||||
|
@ -56,6 +56,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
|
|||||||
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
|
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
|
||||||
extern int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
|
extern int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
|
||||||
BlockNumber index_pages);
|
BlockNumber index_pages);
|
||||||
|
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
|
||||||
|
Path *bitmapqual);
|
||||||
|
|
||||||
#ifdef OPTIMIZER_DEBUG
|
#ifdef OPTIMIZER_DEBUG
|
||||||
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
|
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
|
||||||
|
@ -787,6 +787,7 @@ typedef enum
|
|||||||
WAIT_EVENT_MQ_RECEIVE,
|
WAIT_EVENT_MQ_RECEIVE,
|
||||||
WAIT_EVENT_MQ_SEND,
|
WAIT_EVENT_MQ_SEND,
|
||||||
WAIT_EVENT_PARALLEL_FINISH,
|
WAIT_EVENT_PARALLEL_FINISH,
|
||||||
|
WAIT_EVENT_PARALLEL_BITMAP_SCAN,
|
||||||
WAIT_EVENT_SAFE_SNAPSHOT,
|
WAIT_EVENT_SAFE_SNAPSHOT,
|
||||||
WAIT_EVENT_SYNC_REP
|
WAIT_EVENT_SYNC_REP
|
||||||
} WaitEventIPC;
|
} WaitEventIPC;
|
||||||
|
@ -169,6 +169,25 @@ select count(*) from tenk1 where thousand > 95;
|
|||||||
|
|
||||||
reset enable_seqscan;
|
reset enable_seqscan;
|
||||||
reset enable_bitmapscan;
|
reset enable_bitmapscan;
|
||||||
|
-- test parallel bitmap heap scan.
|
||||||
|
set enable_seqscan to off;
|
||||||
|
set enable_indexscan to off;
|
||||||
|
explain (costs off)
|
||||||
|
select count((unique1)) from tenk1 where hundred > 1;
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------------------------
|
||||||
|
Finalize Aggregate
|
||||||
|
-> Gather
|
||||||
|
Workers Planned: 4
|
||||||
|
-> Partial Aggregate
|
||||||
|
-> Parallel Bitmap Heap Scan on tenk1
|
||||||
|
Recheck Cond: (hundred > 1)
|
||||||
|
-> Bitmap Index Scan on tenk1_hundred
|
||||||
|
Index Cond: (hundred > 1)
|
||||||
|
(8 rows)
|
||||||
|
|
||||||
|
reset enable_seqscan;
|
||||||
|
reset enable_indexscan;
|
||||||
-- test parallel merge join path.
|
-- test parallel merge join path.
|
||||||
set enable_hashjoin to off;
|
set enable_hashjoin to off;
|
||||||
set enable_nestloop to off;
|
set enable_nestloop to off;
|
||||||
|
@ -64,6 +64,16 @@ select count(*) from tenk1 where thousand > 95;
|
|||||||
reset enable_seqscan;
|
reset enable_seqscan;
|
||||||
reset enable_bitmapscan;
|
reset enable_bitmapscan;
|
||||||
|
|
||||||
|
-- test parallel bitmap heap scan.
|
||||||
|
set enable_seqscan to off;
|
||||||
|
set enable_indexscan to off;
|
||||||
|
|
||||||
|
explain (costs off)
|
||||||
|
select count((unique1)) from tenk1 where hundred > 1;
|
||||||
|
|
||||||
|
reset enable_seqscan;
|
||||||
|
reset enable_indexscan;
|
||||||
|
|
||||||
-- test parallel merge join path.
|
-- test parallel merge join path.
|
||||||
set enable_hashjoin to off;
|
set enable_hashjoin to off;
|
||||||
set enable_nestloop to off;
|
set enable_nestloop to off;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user