1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-26 01:22:12 +03:00
Files
postgres/src/backend/executor/nodeBitmapIndexscan.c
Peter Geoghegan 0fbceae841 Show index search count in EXPLAIN ANALYZE, take 2.
Expose the count of index searches/index descents in EXPLAIN ANALYZE's
output for index scan/index-only scan/bitmap index scan nodes.  This
information is particularly useful with scans that use ScalarArrayOp
quals, where the number of index searches can be unpredictable due to
implementation details that interact with physical index characteristics
(at least with nbtree SAOP scans, since Postgres 17 commit 5bf748b8).
The information shown also provides useful context when EXPLAIN ANALYZE
runs a plan with an index scan node that successfully applied the skip
scan optimization (set to be added to nbtree by an upcoming patch).

The instrumentation works by teaching all index AMs to increment a new
nsearches counter whenever a new index search begins.  The counter is
incremented at exactly the same point that index AMs already increment
the pg_stat_*_indexes.idx_scan counter (we're counting the same event,
but at the scan level rather than the relation level).  Parallel queries
have workers copy their local counter struct into shared memory when an
index scan node ends -- even when it isn't a parallel aware scan node.
An earlier version of this patch that only worked with parallel aware
scans became commit 5ead85fb (though that was quickly reverted by commit
d00107cd following "debug_parallel_query=regress" buildfarm failures).

Our approach doesn't match the approach used when tracking other index
scan related costs (e.g., "Rows Removed by Filter:").  It is comparable
to the approach used in similar cases involving costs that are only
readily accessible inside an access method, not from the executor proper
(e.g., "Heap Blocks:" output for a Bitmap Heap Scan, which was recently
enhanced to show per-worker costs by commit 5a1e6df3, using essentially
the same scheme as the one used here).  It is necessary for index AMs to
have direct responsibility for maintaining the new counter, since the
counter might need to be incremented multiple times per amgettuple call
(or per amgetbitmap call).  But it is also necessary for the executor
proper to manage the shared memory now used to transfer each worker's
counter struct to the leader.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Reviewed-By: Tomas Vondra <tomas@vondra.me>
Reviewed-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzkRqvaqR2CTNqTZP0z6FuL4-3ED6eQB0yx38XBNj1v-4Q@mail.gmail.com
Discussion: https://postgr.es/m/CAH2-Wz=PKR6rB7qbx+Vnd7eqeB5VTcrW=iJvAsTsKbdG+kW_UA@mail.gmail.com
2025-03-11 09:20:50 -04:00

438 lines
13 KiB
C

/*-------------------------------------------------------------------------
*
* nodeBitmapIndexscan.c
* Routines to support bitmapped index scans of relations
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/executor/nodeBitmapIndexscan.c
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* MultiExecBitmapIndexScan scans a relation using index.
* ExecInitBitmapIndexScan creates and initializes state info.
* ExecReScanBitmapIndexScan prepares to rescan the plan.
* ExecEndBitmapIndexScan releases all storage.
*/
#include "postgres.h"
#include "access/genam.h"
#include "executor/executor.h"
#include "executor/nodeBitmapIndexscan.h"
#include "executor/nodeIndexscan.h"
#include "miscadmin.h"
/* ----------------------------------------------------------------
* ExecBitmapIndexScan
*
* stub for pro forma compliance
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecBitmapIndexScan(PlanState *pstate)
{
elog(ERROR, "BitmapIndexScan node does not support ExecProcNode call convention");
return NULL;
}
/* ----------------------------------------------------------------
* MultiExecBitmapIndexScan(node)
* ----------------------------------------------------------------
*/
Node *
MultiExecBitmapIndexScan(BitmapIndexScanState *node)
{
TIDBitmap *tbm;
IndexScanDesc scandesc;
double nTuples = 0;
bool doscan;
/* must provide our own instrumentation support */
if (node->ss.ps.instrument)
InstrStartNode(node->ss.ps.instrument);
/*
* extract necessary information from index scan node
*/
scandesc = node->biss_ScanDesc;
/*
* If we have runtime keys and they've not already been set up, do it now.
* Array keys are also treated as runtime keys; note that if ExecReScan
* returns with biss_RuntimeKeysReady still false, then there is an empty
* array key so we should do nothing.
*/
if (!node->biss_RuntimeKeysReady &&
(node->biss_NumRuntimeKeys != 0 || node->biss_NumArrayKeys != 0))
{
ExecReScan((PlanState *) node);
doscan = node->biss_RuntimeKeysReady;
}
else
doscan = true;
/*
* Prepare the result bitmap. Normally we just create a new one to pass
* back; however, our parent node is allowed to store a pre-made one into
* node->biss_result, in which case we just OR our tuple IDs into the
* existing bitmap. (This saves needing explicit UNION steps.)
*/
if (node->biss_result)
{
tbm = node->biss_result;
node->biss_result = NULL; /* reset for next time */
}
else
{
/* XXX should we use less than work_mem for this? */
tbm = tbm_create(work_mem * (Size) 1024,
((BitmapIndexScan *) node->ss.ps.plan)->isshared ?
node->ss.ps.state->es_query_dsa : NULL);
}
/*
* Get TIDs from index and insert into bitmap
*/
while (doscan)
{
nTuples += (double) index_getbitmap(scandesc, tbm);
CHECK_FOR_INTERRUPTS();
doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys,
node->biss_NumArrayKeys);
if (doscan) /* reset index scan */
index_rescan(node->biss_ScanDesc,
node->biss_ScanKeys, node->biss_NumScanKeys,
NULL, 0);
}
/* must provide our own instrumentation support */
if (node->ss.ps.instrument)
InstrStopNode(node->ss.ps.instrument, nTuples);
return (Node *) tbm;
}
/* ----------------------------------------------------------------
* ExecReScanBitmapIndexScan(node)
*
* Recalculates the values of any scan keys whose value depends on
* information known at runtime, then rescans the indexed relation.
* ----------------------------------------------------------------
*/
void
ExecReScanBitmapIndexScan(BitmapIndexScanState *node)
{
ExprContext *econtext = node->biss_RuntimeContext;
/*
* Reset the runtime-key context so we don't leak memory as each outer
* tuple is scanned. Note this assumes that we will recalculate *all*
* runtime keys on each call.
*/
if (econtext)
ResetExprContext(econtext);
/*
* If we are doing runtime key calculations (ie, any of the index key
* values weren't simple Consts), compute the new key values.
*
* Array keys are also treated as runtime keys; note that if we return
* with biss_RuntimeKeysReady still false, then there is an empty array
* key so no index scan is needed.
*/
if (node->biss_NumRuntimeKeys != 0)
ExecIndexEvalRuntimeKeys(econtext,
node->biss_RuntimeKeys,
node->biss_NumRuntimeKeys);
if (node->biss_NumArrayKeys != 0)
node->biss_RuntimeKeysReady =
ExecIndexEvalArrayKeys(econtext,
node->biss_ArrayKeys,
node->biss_NumArrayKeys);
else
node->biss_RuntimeKeysReady = true;
/* reset index scan */
if (node->biss_RuntimeKeysReady)
index_rescan(node->biss_ScanDesc,
node->biss_ScanKeys, node->biss_NumScanKeys,
NULL, 0);
}
/* ----------------------------------------------------------------
* ExecEndBitmapIndexScan
* ----------------------------------------------------------------
*/
void
ExecEndBitmapIndexScan(BitmapIndexScanState *node)
{
Relation indexRelationDesc;
IndexScanDesc indexScanDesc;
/*
* extract information from the node
*/
indexRelationDesc = node->biss_RelationDesc;
indexScanDesc = node->biss_ScanDesc;
/*
* When ending a parallel worker, copy the statistics gathered by the
* worker back into shared memory so that it can be picked up by the main
* process to report in EXPLAIN ANALYZE
*/
if (node->biss_SharedInfo != NULL && IsParallelWorker())
{
IndexScanInstrumentation *winstrument;
Assert(ParallelWorkerNumber <= node->biss_SharedInfo->num_workers);
winstrument = &node->biss_SharedInfo->winstrument[ParallelWorkerNumber];
/*
* We have to accumulate the stats rather than performing a memcpy.
* When a Gather/GatherMerge node finishes it will perform planner
* shutdown on the workers. On rescan it will spin up new workers
* which will have a new BitmapIndexScanState and zeroed stats.
*/
winstrument->nsearches += node->biss_Instrument.nsearches;
}
/*
* close the index relation (no-op if we didn't open it)
*/
if (indexScanDesc)
index_endscan(indexScanDesc);
if (indexRelationDesc)
index_close(indexRelationDesc, NoLock);
}
/* ----------------------------------------------------------------
* ExecInitBitmapIndexScan
*
* Initializes the index scan's state information.
* ----------------------------------------------------------------
*/
BitmapIndexScanState *
ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
{
BitmapIndexScanState *indexstate;
LOCKMODE lockmode;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
* create state structure
*/
indexstate = makeNode(BitmapIndexScanState);
indexstate->ss.ps.plan = (Plan *) node;
indexstate->ss.ps.state = estate;
indexstate->ss.ps.ExecProcNode = ExecBitmapIndexScan;
/* normally we don't make the result bitmap till runtime */
indexstate->biss_result = NULL;
/*
* We do not open or lock the base relation here. We assume that an
* ancestor BitmapHeapScan node is holding AccessShareLock (or better) on
* the heap relation throughout the execution of the plan tree.
*/
indexstate->ss.ss_currentRelation = NULL;
indexstate->ss.ss_currentScanDesc = NULL;
/*
* Miscellaneous initialization
*
* We do not need a standard exprcontext for this node, though we may
* decide below to create a runtime-key exprcontext
*/
/*
* initialize child expressions
*
* We don't need to initialize targetlist or qual since neither are used.
*
* Note: we don't initialize all of the indexqual expression, only the
* sub-parts corresponding to runtime keys (see below).
*/
/*
* If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
* here. This allows an index-advisor plugin to EXPLAIN a plan containing
* references to nonexistent indexes.
*/
if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
return indexstate;
/* Open the index relation. */
lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
indexstate->biss_RelationDesc = index_open(node->indexid, lockmode);
/*
* Initialize index-specific scan state
*/
indexstate->biss_RuntimeKeysReady = false;
indexstate->biss_RuntimeKeys = NULL;
indexstate->biss_NumRuntimeKeys = 0;
/*
* build the index scan keys from the index qualification
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->biss_RelationDesc,
node->indexqual,
false,
&indexstate->biss_ScanKeys,
&indexstate->biss_NumScanKeys,
&indexstate->biss_RuntimeKeys,
&indexstate->biss_NumRuntimeKeys,
&indexstate->biss_ArrayKeys,
&indexstate->biss_NumArrayKeys);
/*
* If we have runtime keys or array keys, we need an ExprContext to
* evaluate them. We could just create a "standard" plan node exprcontext,
* but to keep the code looking similar to nodeIndexscan.c, it seems
* better to stick with the approach of using a separate ExprContext.
*/
if (indexstate->biss_NumRuntimeKeys != 0 ||
indexstate->biss_NumArrayKeys != 0)
{
ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
ExecAssignExprContext(estate, &indexstate->ss.ps);
indexstate->biss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
indexstate->ss.ps.ps_ExprContext = stdecontext;
}
else
{
indexstate->biss_RuntimeContext = NULL;
}
/*
* Initialize scan descriptor.
*/
indexstate->biss_ScanDesc =
index_beginscan_bitmap(indexstate->biss_RelationDesc,
estate->es_snapshot,
&indexstate->biss_Instrument,
indexstate->biss_NumScanKeys);
/*
* If no run-time keys to calculate, go ahead and pass the scankeys to the
* index AM.
*/
if (indexstate->biss_NumRuntimeKeys == 0 &&
indexstate->biss_NumArrayKeys == 0)
index_rescan(indexstate->biss_ScanDesc,
indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys,
NULL, 0);
/*
* all done.
*/
return indexstate;
}
/* ----------------------------------------------------------------
* ExecBitmapIndexScanEstimate
*
* Compute the amount of space we'll need in the parallel
* query DSM, and inform pcxt->estimator about our needs.
* ----------------------------------------------------------------
*/
void
ExecBitmapIndexScanEstimate(BitmapIndexScanState *node, ParallelContext *pcxt)
{
Size size;
/*
* Parallel bitmap index scans are not supported, but we still need to
* store the scan's instrumentation in DSM during parallel query
*/
if (!node->ss.ps.instrument || pcxt->nworkers == 0)
return;
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
pcxt->nworkers * sizeof(IndexScanInstrumentation);
shm_toc_estimate_chunk(&pcxt->estimator, size);
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
/* ----------------------------------------------------------------
* ExecBitmapIndexScanInitializeDSM
*
* Set up bitmap index scan shared instrumentation.
* ----------------------------------------------------------------
*/
void
ExecBitmapIndexScanInitializeDSM(BitmapIndexScanState *node,
ParallelContext *pcxt)
{
Size size;
/* don't need this if not instrumenting or no workers */
if (!node->ss.ps.instrument || pcxt->nworkers == 0)
return;
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
pcxt->nworkers * sizeof(IndexScanInstrumentation);
node->biss_SharedInfo =
(SharedIndexScanInstrumentation *) shm_toc_allocate(pcxt->toc,
size);
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
node->biss_SharedInfo);
/* Each per-worker area must start out as zeroes */
memset(node->biss_SharedInfo, 0, size);
node->biss_SharedInfo->num_workers = pcxt->nworkers;
}
/* ----------------------------------------------------------------
* ExecBitmapIndexScanInitializeWorker
*
* Copy relevant information from TOC into planstate.
* ----------------------------------------------------------------
*/
void
ExecBitmapIndexScanInitializeWorker(BitmapIndexScanState *node,
ParallelWorkerContext *pwcxt)
{
/* don't need this if not instrumenting */
if (!node->ss.ps.instrument)
return;
node->biss_SharedInfo = (SharedIndexScanInstrumentation *)
shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
}
/* ----------------------------------------------------------------
* ExecBitmapIndexScanRetrieveInstrumentation
*
* Transfer bitmap index scan statistics from DSM to private memory.
* ----------------------------------------------------------------
*/
void
ExecBitmapIndexScanRetrieveInstrumentation(BitmapIndexScanState *node)
{
SharedIndexScanInstrumentation *SharedInfo = node->biss_SharedInfo;
size_t size;
if (SharedInfo == NULL)
return;
/* Create a copy of SharedInfo in backend-local memory */
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
node->biss_SharedInfo = palloc(size);
memcpy(node->biss_SharedInfo, SharedInfo, size);
}