mirror of
https://github.com/postgres/postgres.git
synced 2025-07-26 01:22:12 +03:00
Expose the count of index searches/index descents in EXPLAIN ANALYZE's output for index scan/index-only scan/bitmap index scan nodes. This information is particularly useful with scans that use ScalarArrayOp quals, where the number of index searches can be unpredictable due to implementation details that interact with physical index characteristics (at least with nbtree SAOP scans, since Postgres 17 commit5bf748b8
). The information shown also provides useful context when EXPLAIN ANALYZE runs a plan with an index scan node that successfully applied the skip scan optimization (set to be added to nbtree by an upcoming patch). The instrumentation works by teaching all index AMs to increment a new nsearches counter whenever a new index search begins. The counter is incremented at exactly the same point that index AMs already increment the pg_stat_*_indexes.idx_scan counter (we're counting the same event, but at the scan level rather than the relation level). Parallel queries have workers copy their local counter struct into shared memory when an index scan node ends -- even when it isn't a parallel aware scan node. An earlier version of this patch that only worked with parallel aware scans became commit5ead85fb
(though that was quickly reverted by commitd00107cd
following "debug_parallel_query=regress" buildfarm failures). Our approach doesn't match the approach used when tracking other index scan related costs (e.g., "Rows Removed by Filter:"). It is comparable to the approach used in similar cases involving costs that are only readily accessible inside an access method, not from the executor proper (e.g., "Heap Blocks:" output for a Bitmap Heap Scan, which was recently enhanced to show per-worker costs by commit5a1e6df3
, using essentially the same scheme as the one used here). It is necessary for index AMs to have direct responsibility for maintaining the new counter, since the counter might need to be incremented multiple times per amgettuple call (or per amgetbitmap call). But it is also necessary for the executor proper to manage the shared memory now used to transfer each worker's counter struct to the leader. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Robert Haas <robertmhaas@gmail.com> Reviewed-By: Tomas Vondra <tomas@vondra.me> Reviewed-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com> Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com> Discussion: https://postgr.es/m/CAH2-WzkRqvaqR2CTNqTZP0z6FuL4-3ED6eQB0yx38XBNj1v-4Q@mail.gmail.com Discussion: https://postgr.es/m/CAH2-Wz=PKR6rB7qbx+Vnd7eqeB5VTcrW=iJvAsTsKbdG+kW_UA@mail.gmail.com
438 lines
13 KiB
C
438 lines
13 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* nodeBitmapIndexscan.c
|
|
* Routines to support bitmapped index scans of relations
|
|
*
|
|
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/executor/nodeBitmapIndexscan.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
/*
|
|
* INTERFACE ROUTINES
|
|
* MultiExecBitmapIndexScan scans a relation using index.
|
|
* ExecInitBitmapIndexScan creates and initializes state info.
|
|
* ExecReScanBitmapIndexScan prepares to rescan the plan.
|
|
* ExecEndBitmapIndexScan releases all storage.
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "executor/executor.h"
|
|
#include "executor/nodeBitmapIndexscan.h"
|
|
#include "executor/nodeIndexscan.h"
|
|
#include "miscadmin.h"
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScan
|
|
*
|
|
* stub for pro forma compliance
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
static TupleTableSlot *
|
|
ExecBitmapIndexScan(PlanState *pstate)
|
|
{
|
|
elog(ERROR, "BitmapIndexScan node does not support ExecProcNode call convention");
|
|
return NULL;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* MultiExecBitmapIndexScan(node)
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
Node *
|
|
MultiExecBitmapIndexScan(BitmapIndexScanState *node)
|
|
{
|
|
TIDBitmap *tbm;
|
|
IndexScanDesc scandesc;
|
|
double nTuples = 0;
|
|
bool doscan;
|
|
|
|
/* must provide our own instrumentation support */
|
|
if (node->ss.ps.instrument)
|
|
InstrStartNode(node->ss.ps.instrument);
|
|
|
|
/*
|
|
* extract necessary information from index scan node
|
|
*/
|
|
scandesc = node->biss_ScanDesc;
|
|
|
|
/*
|
|
* If we have runtime keys and they've not already been set up, do it now.
|
|
* Array keys are also treated as runtime keys; note that if ExecReScan
|
|
* returns with biss_RuntimeKeysReady still false, then there is an empty
|
|
* array key so we should do nothing.
|
|
*/
|
|
if (!node->biss_RuntimeKeysReady &&
|
|
(node->biss_NumRuntimeKeys != 0 || node->biss_NumArrayKeys != 0))
|
|
{
|
|
ExecReScan((PlanState *) node);
|
|
doscan = node->biss_RuntimeKeysReady;
|
|
}
|
|
else
|
|
doscan = true;
|
|
|
|
/*
|
|
* Prepare the result bitmap. Normally we just create a new one to pass
|
|
* back; however, our parent node is allowed to store a pre-made one into
|
|
* node->biss_result, in which case we just OR our tuple IDs into the
|
|
* existing bitmap. (This saves needing explicit UNION steps.)
|
|
*/
|
|
if (node->biss_result)
|
|
{
|
|
tbm = node->biss_result;
|
|
node->biss_result = NULL; /* reset for next time */
|
|
}
|
|
else
|
|
{
|
|
/* XXX should we use less than work_mem for this? */
|
|
tbm = tbm_create(work_mem * (Size) 1024,
|
|
((BitmapIndexScan *) node->ss.ps.plan)->isshared ?
|
|
node->ss.ps.state->es_query_dsa : NULL);
|
|
}
|
|
|
|
/*
|
|
* Get TIDs from index and insert into bitmap
|
|
*/
|
|
while (doscan)
|
|
{
|
|
nTuples += (double) index_getbitmap(scandesc, tbm);
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys,
|
|
node->biss_NumArrayKeys);
|
|
if (doscan) /* reset index scan */
|
|
index_rescan(node->biss_ScanDesc,
|
|
node->biss_ScanKeys, node->biss_NumScanKeys,
|
|
NULL, 0);
|
|
}
|
|
|
|
/* must provide our own instrumentation support */
|
|
if (node->ss.ps.instrument)
|
|
InstrStopNode(node->ss.ps.instrument, nTuples);
|
|
|
|
return (Node *) tbm;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecReScanBitmapIndexScan(node)
|
|
*
|
|
* Recalculates the values of any scan keys whose value depends on
|
|
* information known at runtime, then rescans the indexed relation.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecReScanBitmapIndexScan(BitmapIndexScanState *node)
|
|
{
|
|
ExprContext *econtext = node->biss_RuntimeContext;
|
|
|
|
/*
|
|
* Reset the runtime-key context so we don't leak memory as each outer
|
|
* tuple is scanned. Note this assumes that we will recalculate *all*
|
|
* runtime keys on each call.
|
|
*/
|
|
if (econtext)
|
|
ResetExprContext(econtext);
|
|
|
|
/*
|
|
* If we are doing runtime key calculations (ie, any of the index key
|
|
* values weren't simple Consts), compute the new key values.
|
|
*
|
|
* Array keys are also treated as runtime keys; note that if we return
|
|
* with biss_RuntimeKeysReady still false, then there is an empty array
|
|
* key so no index scan is needed.
|
|
*/
|
|
if (node->biss_NumRuntimeKeys != 0)
|
|
ExecIndexEvalRuntimeKeys(econtext,
|
|
node->biss_RuntimeKeys,
|
|
node->biss_NumRuntimeKeys);
|
|
if (node->biss_NumArrayKeys != 0)
|
|
node->biss_RuntimeKeysReady =
|
|
ExecIndexEvalArrayKeys(econtext,
|
|
node->biss_ArrayKeys,
|
|
node->biss_NumArrayKeys);
|
|
else
|
|
node->biss_RuntimeKeysReady = true;
|
|
|
|
/* reset index scan */
|
|
if (node->biss_RuntimeKeysReady)
|
|
index_rescan(node->biss_ScanDesc,
|
|
node->biss_ScanKeys, node->biss_NumScanKeys,
|
|
NULL, 0);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecEndBitmapIndexScan
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecEndBitmapIndexScan(BitmapIndexScanState *node)
|
|
{
|
|
Relation indexRelationDesc;
|
|
IndexScanDesc indexScanDesc;
|
|
|
|
/*
|
|
* extract information from the node
|
|
*/
|
|
indexRelationDesc = node->biss_RelationDesc;
|
|
indexScanDesc = node->biss_ScanDesc;
|
|
|
|
/*
|
|
* When ending a parallel worker, copy the statistics gathered by the
|
|
* worker back into shared memory so that it can be picked up by the main
|
|
* process to report in EXPLAIN ANALYZE
|
|
*/
|
|
if (node->biss_SharedInfo != NULL && IsParallelWorker())
|
|
{
|
|
IndexScanInstrumentation *winstrument;
|
|
|
|
Assert(ParallelWorkerNumber <= node->biss_SharedInfo->num_workers);
|
|
winstrument = &node->biss_SharedInfo->winstrument[ParallelWorkerNumber];
|
|
|
|
/*
|
|
* We have to accumulate the stats rather than performing a memcpy.
|
|
* When a Gather/GatherMerge node finishes it will perform planner
|
|
* shutdown on the workers. On rescan it will spin up new workers
|
|
* which will have a new BitmapIndexScanState and zeroed stats.
|
|
*/
|
|
winstrument->nsearches += node->biss_Instrument.nsearches;
|
|
}
|
|
|
|
/*
|
|
* close the index relation (no-op if we didn't open it)
|
|
*/
|
|
if (indexScanDesc)
|
|
index_endscan(indexScanDesc);
|
|
if (indexRelationDesc)
|
|
index_close(indexRelationDesc, NoLock);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecInitBitmapIndexScan
|
|
*
|
|
* Initializes the index scan's state information.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
BitmapIndexScanState *
|
|
ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
|
|
{
|
|
BitmapIndexScanState *indexstate;
|
|
LOCKMODE lockmode;
|
|
|
|
/* check for unsupported flags */
|
|
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
|
|
|
|
/*
|
|
* create state structure
|
|
*/
|
|
indexstate = makeNode(BitmapIndexScanState);
|
|
indexstate->ss.ps.plan = (Plan *) node;
|
|
indexstate->ss.ps.state = estate;
|
|
indexstate->ss.ps.ExecProcNode = ExecBitmapIndexScan;
|
|
|
|
/* normally we don't make the result bitmap till runtime */
|
|
indexstate->biss_result = NULL;
|
|
|
|
/*
|
|
* We do not open or lock the base relation here. We assume that an
|
|
* ancestor BitmapHeapScan node is holding AccessShareLock (or better) on
|
|
* the heap relation throughout the execution of the plan tree.
|
|
*/
|
|
|
|
indexstate->ss.ss_currentRelation = NULL;
|
|
indexstate->ss.ss_currentScanDesc = NULL;
|
|
|
|
/*
|
|
* Miscellaneous initialization
|
|
*
|
|
* We do not need a standard exprcontext for this node, though we may
|
|
* decide below to create a runtime-key exprcontext
|
|
*/
|
|
|
|
/*
|
|
* initialize child expressions
|
|
*
|
|
* We don't need to initialize targetlist or qual since neither are used.
|
|
*
|
|
* Note: we don't initialize all of the indexqual expression, only the
|
|
* sub-parts corresponding to runtime keys (see below).
|
|
*/
|
|
|
|
/*
|
|
* If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
|
|
* here. This allows an index-advisor plugin to EXPLAIN a plan containing
|
|
* references to nonexistent indexes.
|
|
*/
|
|
if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
|
|
return indexstate;
|
|
|
|
/* Open the index relation. */
|
|
lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
|
|
indexstate->biss_RelationDesc = index_open(node->indexid, lockmode);
|
|
|
|
/*
|
|
* Initialize index-specific scan state
|
|
*/
|
|
indexstate->biss_RuntimeKeysReady = false;
|
|
indexstate->biss_RuntimeKeys = NULL;
|
|
indexstate->biss_NumRuntimeKeys = 0;
|
|
|
|
/*
|
|
* build the index scan keys from the index qualification
|
|
*/
|
|
ExecIndexBuildScanKeys((PlanState *) indexstate,
|
|
indexstate->biss_RelationDesc,
|
|
node->indexqual,
|
|
false,
|
|
&indexstate->biss_ScanKeys,
|
|
&indexstate->biss_NumScanKeys,
|
|
&indexstate->biss_RuntimeKeys,
|
|
&indexstate->biss_NumRuntimeKeys,
|
|
&indexstate->biss_ArrayKeys,
|
|
&indexstate->biss_NumArrayKeys);
|
|
|
|
/*
|
|
* If we have runtime keys or array keys, we need an ExprContext to
|
|
* evaluate them. We could just create a "standard" plan node exprcontext,
|
|
* but to keep the code looking similar to nodeIndexscan.c, it seems
|
|
* better to stick with the approach of using a separate ExprContext.
|
|
*/
|
|
if (indexstate->biss_NumRuntimeKeys != 0 ||
|
|
indexstate->biss_NumArrayKeys != 0)
|
|
{
|
|
ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
|
|
|
|
ExecAssignExprContext(estate, &indexstate->ss.ps);
|
|
indexstate->biss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
|
|
indexstate->ss.ps.ps_ExprContext = stdecontext;
|
|
}
|
|
else
|
|
{
|
|
indexstate->biss_RuntimeContext = NULL;
|
|
}
|
|
|
|
/*
|
|
* Initialize scan descriptor.
|
|
*/
|
|
indexstate->biss_ScanDesc =
|
|
index_beginscan_bitmap(indexstate->biss_RelationDesc,
|
|
estate->es_snapshot,
|
|
&indexstate->biss_Instrument,
|
|
indexstate->biss_NumScanKeys);
|
|
|
|
/*
|
|
* If no run-time keys to calculate, go ahead and pass the scankeys to the
|
|
* index AM.
|
|
*/
|
|
if (indexstate->biss_NumRuntimeKeys == 0 &&
|
|
indexstate->biss_NumArrayKeys == 0)
|
|
index_rescan(indexstate->biss_ScanDesc,
|
|
indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys,
|
|
NULL, 0);
|
|
|
|
/*
|
|
* all done.
|
|
*/
|
|
return indexstate;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanEstimate
|
|
*
|
|
* Compute the amount of space we'll need in the parallel
|
|
* query DSM, and inform pcxt->estimator about our needs.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanEstimate(BitmapIndexScanState *node, ParallelContext *pcxt)
|
|
{
|
|
Size size;
|
|
|
|
/*
|
|
* Parallel bitmap index scans are not supported, but we still need to
|
|
* store the scan's instrumentation in DSM during parallel query
|
|
*/
|
|
if (!node->ss.ps.instrument || pcxt->nworkers == 0)
|
|
return;
|
|
|
|
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
|
|
pcxt->nworkers * sizeof(IndexScanInstrumentation);
|
|
shm_toc_estimate_chunk(&pcxt->estimator, size);
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanInitializeDSM
|
|
*
|
|
* Set up bitmap index scan shared instrumentation.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanInitializeDSM(BitmapIndexScanState *node,
|
|
ParallelContext *pcxt)
|
|
{
|
|
Size size;
|
|
|
|
/* don't need this if not instrumenting or no workers */
|
|
if (!node->ss.ps.instrument || pcxt->nworkers == 0)
|
|
return;
|
|
|
|
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
|
|
pcxt->nworkers * sizeof(IndexScanInstrumentation);
|
|
node->biss_SharedInfo =
|
|
(SharedIndexScanInstrumentation *) shm_toc_allocate(pcxt->toc,
|
|
size);
|
|
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
|
|
node->biss_SharedInfo);
|
|
|
|
/* Each per-worker area must start out as zeroes */
|
|
memset(node->biss_SharedInfo, 0, size);
|
|
node->biss_SharedInfo->num_workers = pcxt->nworkers;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanInitializeWorker
|
|
*
|
|
* Copy relevant information from TOC into planstate.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanInitializeWorker(BitmapIndexScanState *node,
|
|
ParallelWorkerContext *pwcxt)
|
|
{
|
|
/* don't need this if not instrumenting */
|
|
if (!node->ss.ps.instrument)
|
|
return;
|
|
|
|
node->biss_SharedInfo = (SharedIndexScanInstrumentation *)
|
|
shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanRetrieveInstrumentation
|
|
*
|
|
* Transfer bitmap index scan statistics from DSM to private memory.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanRetrieveInstrumentation(BitmapIndexScanState *node)
|
|
{
|
|
SharedIndexScanInstrumentation *SharedInfo = node->biss_SharedInfo;
|
|
size_t size;
|
|
|
|
if (SharedInfo == NULL)
|
|
return;
|
|
|
|
/* Create a copy of SharedInfo in backend-local memory */
|
|
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
|
|
SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
|
|
node->biss_SharedInfo = palloc(size);
|
|
memcpy(node->biss_SharedInfo, SharedInfo, size);
|
|
}
|