diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c index bf801fe78ff..d072f47fe28 100644 --- a/contrib/bloom/blscan.c +++ b/contrib/bloom/blscan.c @@ -116,6 +116,8 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) bas = GetAccessStrategy(BAS_BULKREAD); npages = RelationGetNumberOfBlocks(scan->indexRelation); pgstat_count_index_scan(scan->indexRelation); + if (scan->instrument) + scan->instrument->nsearches++; for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) { diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml index 663a0a4a681..ec5d077679b 100644 --- a/doc/src/sgml/bloom.sgml +++ b/doc/src/sgml/bloom.sgml @@ -173,10 +173,11 @@ CREATE INDEX Buffers: shared hit=21864 -> Bitmap Index Scan on bloomidx (cost=0.00..178436.00 rows=1 width=0) (actual time=20.005..20.005 rows=2300.00 loops=1) Index Cond: ((i2 = 898732) AND (i5 = 123451)) + Index Searches: 1 Buffers: shared hit=19608 Planning Time: 0.099 ms Execution Time: 22.632 ms -(10 rows) +(11 rows) @@ -208,13 +209,15 @@ CREATE INDEX Buffers: shared hit=6 -> Bitmap Index Scan on btreeidx5 (cost=0.00..4.52 rows=11 width=0) (actual time=0.026..0.026 rows=7.00 loops=1) Index Cond: (i5 = 123451) + Index Searches: 1 Buffers: shared hit=3 -> Bitmap Index Scan on btreeidx2 (cost=0.00..4.52 rows=11 width=0) (actual time=0.007..0.007 rows=8.00 loops=1) Index Cond: (i2 = 898732) + Index Searches: 1 Buffers: shared hit=3 Planning Time: 0.264 ms Execution Time: 0.047 ms -(13 rows) +(15 rows) Although this query runs much faster than with either of the single indexes, we pay a penalty in index size. Each of the single-column diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index b1710680705..aaa6586d3a4 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -4234,16 +4234,32 @@ description | Waiting for a newly initialized WAL file to reach durable storage - Queries that use certain SQL constructs to search for - rows matching any value out of a list or array of multiple scalar values - (see ) perform multiple - primitive index scans (up to one primitive scan per scalar - value) during query execution. Each internal primitive index scan - increments pg_stat_all_indexes.idx_scan, + Index scans may sometimes perform multiple index searches per execution. + Each index search increments pg_stat_all_indexes.idx_scan, so it's possible for the count of index scans to significantly exceed the total number of index scan executor node executions. + + This can happen with queries that use certain SQL + constructs to search for rows matching any value out of a list or array of + multiple scalar values (see ). It + can also happen to queries with a + column_name = + value1 OR + column_name = + value2 ... construct, though only + when the optimizer transforms the construct into an equivalent + multi-valued array representation. + + + + EXPLAIN ANALYZE outputs the total number of index + searches performed by each index scan node. See + for an example demonstrating how + this works. + + diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 91feb59abd1..e6146c1131e 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -729,9 +729,11 @@ WHERE t1.unique1 < 10 AND t1.unique2 = t2.unique2; Buffers: shared hit=3 read=5 written=4 -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..4.36 rows=10 width=0) (actual time=0.004..0.004 rows=10.00 loops=1) Index Cond: (unique1 < 10) + Index Searches: 1 Buffers: shared hit=2 -> Index Scan using tenk2_unique2 on tenk2 t2 (cost=0.29..7.90 rows=1 width=244) (actual time=0.003..0.003 rows=1.00 loops=10) Index Cond: (unique2 = t1.unique2) + Index Searches: 10 Buffers: shared hit=24 read=6 Planning: Buffers: shared hit=15 dirtied=9 @@ -790,6 +792,7 @@ WHERE t1.unique1 < 100 AND t1.unique2 = t2.unique2 ORDER BY t1.fivethous; Buffers: shared hit=92 -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..5.04 rows=100 width=0) (actual time=0.013..0.013 rows=100.00 loops=1) Index Cond: (unique1 < 100) + Index Searches: 1 Buffers: shared hit=2 Planning: Buffers: shared hit=12 @@ -805,6 +808,58 @@ WHERE t1.unique1 < 100 AND t1.unique2 = t2.unique2 ORDER BY t1.fivethous; shown.) + + Index Scan nodes (as well as Bitmap Index Scan and Index-Only Scan nodes) + show an Index Searches line that reports the total number + of searches across all node + executions/loops: + + +EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE thousand IN (1, 500, 700, 999); + QUERY PLAN +-------------------------------------------------------------------&zwsp;--------------------------------------------------------------- + Bitmap Heap Scan on tenk1 (cost=9.45..73.44 rows=40 width=244) (actual time=0.012..0.028 rows=40.00 loops=1) + Recheck Cond: (thousand = ANY ('{1,500,700,999}'::integer[])) + Heap Blocks: exact=39 + Buffers: shared hit=47 + -> Bitmap Index Scan on tenk1_thous_tenthous (cost=0.00..9.44 rows=40 width=0) (actual time=0.009..0.009 rows=40.00 loops=1) + Index Cond: (thousand = ANY ('{1,500,700,999}'::integer[])) + Index Searches: 4 + Buffers: shared hit=8 + Planning Time: 0.029 ms + Execution Time: 0.034 ms + + + Here we see a Bitmap Index Scan node that needed 4 separate index + searches. The scan had to search the index from the + tenk1_thous_tenthous index root page once per + integer value from the predicate's IN + construct. However, the number of index searches often won't have such a + simple correspondence to the query predicate: + + +EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE thousand IN (1, 2, 3, 4); + QUERY PLAN +--------------------------------------------------------------------&zwsp;-------------------------------------------------------------- + Bitmap Heap Scan on tenk1 (cost=9.45..73.44 rows=40 width=244) (actual time=0.009..0.019 rows=40.00 loops=1) + Recheck Cond: (thousand = ANY ('{1,2,3,4}'::integer[])) + Heap Blocks: exact=38 + Buffers: shared hit=40 + -> Bitmap Index Scan on tenk1_thous_tenthous (cost=0.00..9.44 rows=40 width=0) (actual time=0.005..0.005 rows=40.00 loops=1) + Index Cond: (thousand = ANY ('{1,2,3,4}'::integer[])) + Index Searches: 1 + Buffers: shared hit=2 + Planning Time: 0.029 ms + Execution Time: 0.026 ms + + + This variant of our IN query performed only 1 index + search. It spent less time traversing the index (compared to the original + query) because its IN construct uses values matching + index tuples stored next to each other, on the same + tenk1_thous_tenthous index leaf page. + + Another type of extra information is the number of rows removed by a filter condition: @@ -861,6 +916,7 @@ EXPLAIN ANALYZE SELECT * FROM polygon_tbl WHERE f1 @> polygon '(0.5,2.0)'; Index Scan using gpolygonind on polygon_tbl (cost=0.13..8.15 rows=1 width=85) (actual time=0.074..0.074 rows=0.00 loops=1) Index Cond: (f1 @> '((0.5,2))'::polygon) Rows Removed by Index Recheck: 1 + Index Searches: 1 Buffers: shared hit=1 Planning Time: 0.039 ms Execution Time: 0.098 ms @@ -894,8 +950,10 @@ EXPLAIN (ANALYZE, BUFFERS OFF) SELECT * FROM tenk1 WHERE unique1 < 100 AND un -> BitmapAnd (cost=25.07..25.07 rows=10 width=0) (actual time=0.100..0.101 rows=0.00 loops=1) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..5.04 rows=100 width=0) (actual time=0.027..0.027 rows=100.00 loops=1) Index Cond: (unique1 < 100) + Index Searches: 1 -> Bitmap Index Scan on tenk1_unique2 (cost=0.00..19.78 rows=999 width=0) (actual time=0.070..0.070 rows=999.00 loops=1) Index Cond: (unique2 > 9000) + Index Searches: 1 Planning Time: 0.162 ms Execution Time: 0.143 ms @@ -923,6 +981,7 @@ EXPLAIN ANALYZE UPDATE tenk1 SET hundred = hundred + 1 WHERE unique1 < 100; Buffers: shared hit=4 read=2 -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..5.04 rows=100 width=0) (actual time=0.031..0.031 rows=100.00 loops=1) Index Cond: (unique1 < 100) + Index Searches: 1 Buffers: shared read=2 Planning Time: 0.151 ms Execution Time: 1.856 ms @@ -1061,6 +1120,7 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 < 100 AND unique2 > 9000 Index Cond: (unique2 > 9000) Filter: (unique1 < 100) Rows Removed by Filter: 287 + Index Searches: 1 Buffers: shared hit=16 Planning Time: 0.077 ms Execution Time: 0.086 ms diff --git a/doc/src/sgml/ref/explain.sgml b/doc/src/sgml/ref/explain.sgml index 7daddf03ef0..9ed1061b7ff 100644 --- a/doc/src/sgml/ref/explain.sgml +++ b/doc/src/sgml/ref/explain.sgml @@ -506,10 +506,11 @@ EXPLAIN ANALYZE EXECUTE query(100, 200); Buffers: shared hit=4 -> Index Scan using test_pkey on test (cost=0.29..10.27 rows=99 width=8) (actual time=0.009..0.025 rows=99.00 loops=1) Index Cond: ((id > 100) AND (id < 200)) + Index Searches: 1 Buffers: shared hit=4 Planning Time: 0.244 ms Execution Time: 0.073 ms -(9 rows) +(10 rows) diff --git a/doc/src/sgml/rules.sgml b/doc/src/sgml/rules.sgml index 1d9924a2a3c..8467d961fd0 100644 --- a/doc/src/sgml/rules.sgml +++ b/doc/src/sgml/rules.sgml @@ -1046,6 +1046,7 @@ SELECT count(*) FROM words WHERE word = 'caterpiler'; -> Index Only Scan using wrd_word on wrd (cost=0.42..4.44 rows=1 width=0) (actual time=0.039..0.039 rows=0.00 loops=1) Index Cond: (word = 'caterpiler'::text) Heap Fetches: 0 + Index Searches: 1 Planning time: 0.164 ms Execution time: 0.117 ms @@ -1090,6 +1091,7 @@ SELECT word FROM words ORDER BY word <-> 'caterpiler' LIMIT 10; Limit (cost=0.29..1.06 rows=10 width=10) (actual time=187.222..188.257 rows=10.00 loops=1) -> Index Scan using wrd_trgm on wrd (cost=0.29..37020.87 rows=479829 width=10) (actual time=187.219..188.252 rows=10.00 loops=1) Order By: (word <-> 'caterpiler'::text) + Index Searches: 1 Planning time: 0.196 ms Execution time: 198.640 ms diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index b01009c5d85..737ad638808 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -592,6 +592,8 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) opaque = (BrinOpaque *) scan->opaque; bdesc = opaque->bo_bdesc; pgstat_count_index_scan(idxRel); + if (scan->instrument) + scan->instrument->nsearches++; /* * We need to know the size of the table so that we know how long to diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 84aa14594f8..f6cdd098a02 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -442,6 +442,8 @@ ginNewScanKey(IndexScanDesc scan) MemoryContextSwitchTo(oldCtx); pgstat_count_index_scan(scan->indexRelation); + if (scan->instrument) + scan->instrument->nsearches++; } void diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index cc40e928e0a..387d9972345 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -625,6 +625,8 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir) GISTSearchItem fakeItem; pgstat_count_index_scan(scan->indexRelation); + if (scan->instrument) + scan->instrument->nsearches++; so->firstCall = false; so->curPageData = so->nPageData = 0; @@ -750,6 +752,8 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) return 0; pgstat_count_index_scan(scan->indexRelation); + if (scan->instrument) + scan->instrument->nsearches++; /* Begin the scan by processing the root page */ so->curPageData = so->nPageData = 0; diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index a3a1fccf33b..92c15a65be2 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -298,6 +298,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) HashScanPosItem *currItem; pgstat_count_index_scan(rel); + if (scan->instrument) + scan->instrument->nsearches++; /* * We do not support hash scans with no index qualification, because we diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index e78682c3cef..d74f0fbc5cd 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -749,7 +749,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, tableScan = NULL; heapScan = NULL; - indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0); + indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0); index_rescan(indexScan, NULL, 0, NULL, 0); } else diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 07bae342e25..886c05655f4 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -119,6 +119,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->ignore_killed_tuples = !scan->xactStartedInRecovery; scan->opaque = NULL; + scan->instrument = NULL; scan->xs_itup = NULL; scan->xs_itupdesc = NULL; @@ -446,7 +447,7 @@ systable_beginscan(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, irel, - snapshot, nkeys, 0); + snapshot, NULL, nkeys, 0); index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0); sysscan->scan = NULL; @@ -711,7 +712,7 @@ systable_beginscan_ordered(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, indexRelation, - snapshot, nkeys, 0); + snapshot, NULL, nkeys, 0); index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0); sysscan->scan = NULL; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 8b1f555435b..55ec4c10352 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -256,6 +256,7 @@ IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, + IndexScanInstrumentation *instrument, int nkeys, int norderbys) { IndexScanDesc scan; @@ -270,6 +271,7 @@ index_beginscan(Relation heapRelation, */ scan->heapRelation = heapRelation; scan->xs_snapshot = snapshot; + scan->instrument = instrument; /* prepare to fetch index matches from table */ scan->xs_heapfetch = table_index_fetch_begin(heapRelation); @@ -286,6 +288,7 @@ index_beginscan(Relation heapRelation, IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, + IndexScanInstrumentation *instrument, int nkeys) { IndexScanDesc scan; @@ -299,6 +302,7 @@ index_beginscan_bitmap(Relation indexRelation, * up by RelationGetIndexScan. */ scan->xs_snapshot = snapshot; + scan->instrument = instrument; return scan; } @@ -448,14 +452,19 @@ index_restrpos(IndexScanDesc scan) /* * index_parallelscan_estimate - estimate shared memory for parallel scan + * + * When instrument=true, estimate includes SharedIndexScanInstrumentation + * space. When parallel_aware=true, estimate includes whatever space the + * index AM's amestimateparallelscan routine requested when called. */ Size index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys, - Snapshot snapshot) + Snapshot snapshot, bool instrument, + bool parallel_aware, int nworkers) { Size nbytes; - Assert(snapshot != InvalidSnapshot); + Assert(instrument || parallel_aware); RELATION_CHECKS; @@ -463,12 +472,22 @@ index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys, nbytes = add_size(nbytes, EstimateSnapshotSpace(snapshot)); nbytes = MAXALIGN(nbytes); + if (instrument) + { + Size sharedinfosz; + + sharedinfosz = offsetof(SharedIndexScanInstrumentation, winstrument) + + nworkers * sizeof(IndexScanInstrumentation); + nbytes = add_size(nbytes, sharedinfosz); + nbytes = MAXALIGN(nbytes); + } + /* - * If amestimateparallelscan is not provided, assume there is no - * AM-specific data needed. (It's hard to believe that could work, but - * it's easy enough to cater to it here.) + * If parallel scan index AM interface can't be used (or index AM provides + * no such interface), assume there is no AM-specific data needed */ - if (indexRelation->rd_indam->amestimateparallelscan != NULL) + if (parallel_aware && + indexRelation->rd_indam->amestimateparallelscan != NULL) nbytes = add_size(nbytes, indexRelation->rd_indam->amestimateparallelscan(nkeys, norderbys)); @@ -488,11 +507,14 @@ index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys, */ void index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, - Snapshot snapshot, ParallelIndexScanDesc target) + Snapshot snapshot, bool instrument, + bool parallel_aware, int nworkers, + SharedIndexScanInstrumentation **sharedinfo, + ParallelIndexScanDesc target) { Size offset; - Assert(snapshot != InvalidSnapshot); + Assert(instrument || parallel_aware); RELATION_CHECKS; @@ -502,15 +524,34 @@ index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, target->ps_locator = heapRelation->rd_locator; target->ps_indexlocator = indexRelation->rd_locator; - target->ps_offset = offset; + target->ps_offset_ins = 0; + target->ps_offset_am = 0; SerializeSnapshot(snapshot, target->ps_snapshot_data); + if (instrument) + { + Size sharedinfosz; + + target->ps_offset_ins = offset; + sharedinfosz = offsetof(SharedIndexScanInstrumentation, winstrument) + + nworkers * sizeof(IndexScanInstrumentation); + offset = add_size(offset, sharedinfosz); + offset = MAXALIGN(offset); + + /* Set leader's *sharedinfo pointer, and initialize stats */ + *sharedinfo = (SharedIndexScanInstrumentation *) + OffsetToPointer(target, target->ps_offset_ins); + memset(*sharedinfo, 0, sharedinfosz); + (*sharedinfo)->num_workers = nworkers; + } + /* aminitparallelscan is optional; assume no-op if not provided by AM */ - if (indexRelation->rd_indam->aminitparallelscan != NULL) + if (parallel_aware && indexRelation->rd_indam->aminitparallelscan != NULL) { void *amtarget; - amtarget = OffsetToPointer(target, offset); + target->ps_offset_am = offset; + amtarget = OffsetToPointer(target, target->ps_offset_am); indexRelation->rd_indam->aminitparallelscan(amtarget); } } @@ -538,8 +579,10 @@ index_parallelrescan(IndexScanDesc scan) * Caller must be holding suitable locks on the heap and the index. */ IndexScanDesc -index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, - int norderbys, ParallelIndexScanDesc pscan) +index_beginscan_parallel(Relation heaprel, Relation indexrel, + IndexScanInstrumentation *instrument, + int nkeys, int norderbys, + ParallelIndexScanDesc pscan) { Snapshot snapshot; IndexScanDesc scan; @@ -558,6 +601,7 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, */ scan->heapRelation = heaprel; scan->xs_snapshot = snapshot; + scan->instrument = instrument; /* prepare to fetch index matches from table */ scan->xs_heapfetch = table_index_fetch_begin(heaprel); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 25188a644ef..c0a8833e068 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -574,7 +574,7 @@ btparallelrescan(IndexScanDesc scan) Assert(parallel_scan); btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan, - parallel_scan->ps_offset); + parallel_scan->ps_offset_am); /* * In theory, we don't need to acquire the LWLock here, because there @@ -652,7 +652,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, } btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan, - parallel_scan->ps_offset); + parallel_scan->ps_offset_am); while (1) { @@ -760,7 +760,7 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber next_scan_page, Assert(BlockNumberIsValid(next_scan_page)); btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan, - parallel_scan->ps_offset); + parallel_scan->ps_offset_am); LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE); btscan->btps_nextScanPage = next_scan_page; @@ -799,7 +799,7 @@ _bt_parallel_done(IndexScanDesc scan) return; btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan, - parallel_scan->ps_offset); + parallel_scan->ps_offset_am); /* * Mark the parallel scan as done, unless some other process did so @@ -837,7 +837,7 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page) Assert(so->numArrayKeys); btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan, - parallel_scan->ps_offset); + parallel_scan->ps_offset_am); LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE); if (btscan->btps_lastCurrPage == curr_page && diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 6b2f464aa04..22b27d01d00 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -950,6 +950,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * _bt_search/_bt_endpoint below */ pgstat_count_index_scan(rel); + if (scan->instrument) + scan->instrument->nsearches++; /*---------- * Examine the scan keys to discover where we need to start the scan. diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 53f910e9d89..25893050c58 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -421,6 +421,8 @@ spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, /* count an indexscan for stats */ pgstat_count_index_scan(scan->indexRelation); + if (scan->instrument) + scan->instrument->nsearches++; } void diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index d8a7232cedb..adefc5471a3 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -125,6 +125,7 @@ static void show_recursive_union_info(RecursiveUnionState *rstate, static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); +static void show_indexsearches_info(PlanState *planstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, @@ -2096,6 +2097,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_indexsearches_info(planstate, es); break; case T_IndexOnlyScan: show_scan_qual(((IndexOnlyScan *) plan)->indexqual, @@ -2112,10 +2114,12 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->analyze) ExplainPropertyFloat("Heap Fetches", NULL, planstate->instrument->ntuples2, 0, es); + show_indexsearches_info(planstate, es); break; case T_BitmapIndexScan: show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, "Index Cond", planstate, ancestors, es); + show_indexsearches_info(planstate, es); break; case T_BitmapHeapScan: show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, @@ -3855,6 +3859,65 @@ show_hashagg_info(AggState *aggstate, ExplainState *es) } } +/* + * Show the total number of index searches for a + * IndexScan/IndexOnlyScan/BitmapIndexScan node + */ +static void +show_indexsearches_info(PlanState *planstate, ExplainState *es) +{ + Plan *plan = planstate->plan; + SharedIndexScanInstrumentation *SharedInfo = NULL; + uint64 nsearches = 0; + + if (!es->analyze) + return; + + /* Initialize counters with stats from the local process first */ + switch (nodeTag(plan)) + { + case T_IndexScan: + { + IndexScanState *indexstate = ((IndexScanState *) planstate); + + nsearches = indexstate->iss_Instrument.nsearches; + SharedInfo = indexstate->iss_SharedInfo; + break; + } + case T_IndexOnlyScan: + { + IndexOnlyScanState *indexstate = ((IndexOnlyScanState *) planstate); + + nsearches = indexstate->ioss_Instrument.nsearches; + SharedInfo = indexstate->ioss_SharedInfo; + break; + } + case T_BitmapIndexScan: + { + BitmapIndexScanState *indexstate = ((BitmapIndexScanState *) planstate); + + nsearches = indexstate->biss_Instrument.nsearches; + SharedInfo = indexstate->biss_SharedInfo; + break; + } + default: + break; + } + + /* Next get the sum of the counters set within each and every process */ + if (SharedInfo) + { + for (int i = 0; i < SharedInfo->num_workers; ++i) + { + IndexScanInstrumentation *winstrument = &SharedInfo->winstrument[i]; + + nsearches += winstrument->nsearches; + } + } + + ExplainPropertyUInteger("Index Searches", NULL, nsearches, es); +} + /* * Show exact/lossy pages for a BitmapHeapScan node */ diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 742f3f8c08d..e3fe9b78bb5 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -816,7 +816,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, retry: conflict = false; found_self = false; - index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0); + index_scan = index_beginscan(heap, index, &DirtySnapshot, NULL, indnkeyatts, 0); index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0); while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot)) diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 1bedb808368..e9337a97d17 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -28,6 +28,7 @@ #include "executor/nodeAgg.h" #include "executor/nodeAppend.h" #include "executor/nodeBitmapHeapscan.h" +#include "executor/nodeBitmapIndexscan.h" #include "executor/nodeCustom.h" #include "executor/nodeForeignscan.h" #include "executor/nodeHash.h" @@ -244,14 +245,19 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) e->pcxt); break; case T_IndexScanState: - if (planstate->plan->parallel_aware) - ExecIndexScanEstimate((IndexScanState *) planstate, - e->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecIndexScanEstimate((IndexScanState *) planstate, + e->pcxt); break; case T_IndexOnlyScanState: - if (planstate->plan->parallel_aware) - ExecIndexOnlyScanEstimate((IndexOnlyScanState *) planstate, - e->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecIndexOnlyScanEstimate((IndexOnlyScanState *) planstate, + e->pcxt); + break; + case T_BitmapIndexScanState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecBitmapIndexScanEstimate((BitmapIndexScanState *) planstate, + e->pcxt); break; case T_ForeignScanState: if (planstate->plan->parallel_aware) @@ -468,14 +474,17 @@ ExecParallelInitializeDSM(PlanState *planstate, d->pcxt); break; case T_IndexScanState: - if (planstate->plan->parallel_aware) - ExecIndexScanInitializeDSM((IndexScanState *) planstate, - d->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecIndexScanInitializeDSM((IndexScanState *) planstate, d->pcxt); break; case T_IndexOnlyScanState: - if (planstate->plan->parallel_aware) - ExecIndexOnlyScanInitializeDSM((IndexOnlyScanState *) planstate, - d->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecIndexOnlyScanInitializeDSM((IndexOnlyScanState *) planstate, + d->pcxt); + break; + case T_BitmapIndexScanState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecBitmapIndexScanInitializeDSM((BitmapIndexScanState *) planstate, d->pcxt); break; case T_ForeignScanState: if (planstate->plan->parallel_aware) @@ -1002,6 +1011,7 @@ ExecParallelReInitializeDSM(PlanState *planstate, ExecHashJoinReInitializeDSM((HashJoinState *) planstate, pcxt); break; + case T_BitmapIndexScanState: case T_HashState: case T_SortState: case T_IncrementalSortState: @@ -1063,6 +1073,15 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, /* Perform any node-type-specific work that needs to be done. */ switch (nodeTag(planstate)) { + case T_IndexScanState: + ExecIndexScanRetrieveInstrumentation((IndexScanState *) planstate); + break; + case T_IndexOnlyScanState: + ExecIndexOnlyScanRetrieveInstrumentation((IndexOnlyScanState *) planstate); + break; + case T_BitmapIndexScanState: + ExecBitmapIndexScanRetrieveInstrumentation((BitmapIndexScanState *) planstate); + break; case T_SortState: ExecSortRetrieveInstrumentation((SortState *) planstate); break; @@ -1330,14 +1349,18 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) ExecSeqScanInitializeWorker((SeqScanState *) planstate, pwcxt); break; case T_IndexScanState: - if (planstate->plan->parallel_aware) - ExecIndexScanInitializeWorker((IndexScanState *) planstate, - pwcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecIndexScanInitializeWorker((IndexScanState *) planstate, pwcxt); break; case T_IndexOnlyScanState: - if (planstate->plan->parallel_aware) - ExecIndexOnlyScanInitializeWorker((IndexOnlyScanState *) planstate, - pwcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecIndexOnlyScanInitializeWorker((IndexOnlyScanState *) planstate, + pwcxt); + break; + case T_BitmapIndexScanState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecBitmapIndexScanInitializeWorker((BitmapIndexScanState *) planstate, + pwcxt); break; case T_ForeignScanState: if (planstate->plan->parallel_aware) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index f59cb9098ff..0a9b880d250 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -202,7 +202,7 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, skey_attoff = build_replindex_scan_key(skey, rel, idxrel, searchslot); /* Start an index scan. */ - scan = index_beginscan(rel, idxrel, &snap, skey_attoff, 0); + scan = index_beginscan(rel, idxrel, &snap, NULL, skey_attoff, 0); retry: found = false; diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 0b32c3a022f..abbb033881a 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -183,6 +183,27 @@ ExecEndBitmapIndexScan(BitmapIndexScanState *node) indexRelationDesc = node->biss_RelationDesc; indexScanDesc = node->biss_ScanDesc; + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the main + * process to report in EXPLAIN ANALYZE + */ + if (node->biss_SharedInfo != NULL && IsParallelWorker()) + { + IndexScanInstrumentation *winstrument; + + Assert(ParallelWorkerNumber <= node->biss_SharedInfo->num_workers); + winstrument = &node->biss_SharedInfo->winstrument[ParallelWorkerNumber]; + + /* + * We have to accumulate the stats rather than performing a memcpy. + * When a Gather/GatherMerge node finishes it will perform planner + * shutdown on the workers. On rescan it will spin up new workers + * which will have a new BitmapIndexScanState and zeroed stats. + */ + winstrument->nsearches += node->biss_Instrument.nsearches; + } + /* * close the index relation (no-op if we didn't open it) */ @@ -302,6 +323,7 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) indexstate->biss_ScanDesc = index_beginscan_bitmap(indexstate->biss_RelationDesc, estate->es_snapshot, + &indexstate->biss_Instrument, indexstate->biss_NumScanKeys); /* @@ -319,3 +341,97 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) */ return indexstate; } + +/* ---------------------------------------------------------------- + * ExecBitmapIndexScanEstimate + * + * Compute the amount of space we'll need in the parallel + * query DSM, and inform pcxt->estimator about our needs. + * ---------------------------------------------------------------- + */ +void +ExecBitmapIndexScanEstimate(BitmapIndexScanState *node, ParallelContext *pcxt) +{ + Size size; + + /* + * Parallel bitmap index scans are not supported, but we still need to + * store the scan's instrumentation in DSM during parallel query + */ + if (!node->ss.ps.instrument || pcxt->nworkers == 0) + return; + + size = offsetof(SharedIndexScanInstrumentation, winstrument) + + pcxt->nworkers * sizeof(IndexScanInstrumentation); + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* ---------------------------------------------------------------- + * ExecBitmapIndexScanInitializeDSM + * + * Set up bitmap index scan shared instrumentation. + * ---------------------------------------------------------------- + */ +void +ExecBitmapIndexScanInitializeDSM(BitmapIndexScanState *node, + ParallelContext *pcxt) +{ + Size size; + + /* don't need this if not instrumenting or no workers */ + if (!node->ss.ps.instrument || pcxt->nworkers == 0) + return; + + size = offsetof(SharedIndexScanInstrumentation, winstrument) + + pcxt->nworkers * sizeof(IndexScanInstrumentation); + node->biss_SharedInfo = + (SharedIndexScanInstrumentation *) shm_toc_allocate(pcxt->toc, + size); + shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, + node->biss_SharedInfo); + + /* Each per-worker area must start out as zeroes */ + memset(node->biss_SharedInfo, 0, size); + node->biss_SharedInfo->num_workers = pcxt->nworkers; +} + +/* ---------------------------------------------------------------- + * ExecBitmapIndexScanInitializeWorker + * + * Copy relevant information from TOC into planstate. + * ---------------------------------------------------------------- + */ +void +ExecBitmapIndexScanInitializeWorker(BitmapIndexScanState *node, + ParallelWorkerContext *pwcxt) +{ + /* don't need this if not instrumenting */ + if (!node->ss.ps.instrument) + return; + + node->biss_SharedInfo = (SharedIndexScanInstrumentation *) + shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); +} + +/* ---------------------------------------------------------------- + * ExecBitmapIndexScanRetrieveInstrumentation + * + * Transfer bitmap index scan statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecBitmapIndexScanRetrieveInstrumentation(BitmapIndexScanState *node) +{ + SharedIndexScanInstrumentation *SharedInfo = node->biss_SharedInfo; + size_t size; + + if (SharedInfo == NULL) + return; + + /* Create a copy of SharedInfo in backend-local memory */ + size = offsetof(SharedIndexScanInstrumentation, winstrument) + + SharedInfo->num_workers * sizeof(IndexScanInstrumentation); + node->biss_SharedInfo = palloc(size); + memcpy(node->biss_SharedInfo, SharedInfo, size); +} diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index e6635233155..f464cca9507 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -92,6 +92,7 @@ IndexOnlyNext(IndexOnlyScanState *node) scandesc = index_beginscan(node->ss.ss_currentRelation, node->ioss_RelationDesc, estate->es_snapshot, + &node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys); @@ -413,6 +414,27 @@ ExecEndIndexOnlyScan(IndexOnlyScanState *node) node->ioss_VMBuffer = InvalidBuffer; } + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the main + * process to report in EXPLAIN ANALYZE + */ + if (node->ioss_SharedInfo != NULL && IsParallelWorker()) + { + IndexScanInstrumentation *winstrument; + + Assert(ParallelWorkerNumber <= node->ioss_SharedInfo->num_workers); + winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber]; + + /* + * We have to accumulate the stats rather than performing a memcpy. + * When a Gather/GatherMerge node finishes it will perform planner + * shutdown on the workers. On rescan it will spin up new workers + * which will have a new IndexOnlyScanState and zeroed stats. + */ + winstrument->nsearches += node->ioss_Instrument.nsearches; + } + /* * close the index relation (no-op if we didn't open it) */ @@ -707,11 +729,21 @@ ExecIndexOnlyScanEstimate(IndexOnlyScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; + bool instrument = (node->ss.ps.instrument != NULL); + bool parallel_aware = node->ss.ps.plan->parallel_aware; + + if (!instrument && !parallel_aware) + { + /* No DSM required by the scan */ + return; + } node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, - estate->es_snapshot); + estate->es_snapshot, + instrument, parallel_aware, + pcxt->nworkers); shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -728,16 +760,33 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, { EState *estate = node->ss.ps.state; ParallelIndexScanDesc piscan; + bool instrument = node->ss.ps.instrument != NULL; + bool parallel_aware = node->ss.ps.plan->parallel_aware; + + if (!instrument && !parallel_aware) + { + /* No DSM required by the scan */ + return; + } piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen); index_parallelscan_initialize(node->ss.ss_currentRelation, node->ioss_RelationDesc, estate->es_snapshot, - piscan); + instrument, parallel_aware, pcxt->nworkers, + &node->ioss_SharedInfo, piscan); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan); + + if (!parallel_aware) + { + /* Only here to initialize SharedInfo in DSM */ + return; + } + node->ioss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->ioss_RelationDesc, + &node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, piscan); @@ -764,6 +813,7 @@ void ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt) { + Assert(node->ss.ps.plan->parallel_aware); index_parallelrescan(node->ioss_ScanDesc); } @@ -778,11 +828,31 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, ParallelWorkerContext *pwcxt) { ParallelIndexScanDesc piscan; + bool instrument = node->ss.ps.instrument != NULL; + bool parallel_aware = node->ss.ps.plan->parallel_aware; + + if (!instrument && !parallel_aware) + { + /* No DSM required by the scan */ + return; + } piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); + + if (instrument) + node->ioss_SharedInfo = (SharedIndexScanInstrumentation *) + OffsetToPointer(piscan, piscan->ps_offset_ins); + + if (!parallel_aware) + { + /* Only here to set up worker node's SharedInfo */ + return; + } + node->ioss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->ioss_RelationDesc, + &node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, piscan); @@ -797,3 +867,25 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, node->ioss_ScanKeys, node->ioss_NumScanKeys, node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); } + +/* ---------------------------------------------------------------- + * ExecIndexOnlyScanRetrieveInstrumentation + * + * Transfer index-only scan statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node) +{ + SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo; + size_t size; + + if (SharedInfo == NULL) + return; + + /* Create a copy of SharedInfo in backend-local memory */ + size = offsetof(SharedIndexScanInstrumentation, winstrument) + + SharedInfo->num_workers * sizeof(IndexScanInstrumentation); + node->ioss_SharedInfo = palloc(size); + memcpy(node->ioss_SharedInfo, SharedInfo, size); +} diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index c30b9c2c197..7fcaa37fe62 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -109,6 +109,7 @@ IndexNext(IndexScanState *node) scandesc = index_beginscan(node->ss.ss_currentRelation, node->iss_RelationDesc, estate->es_snapshot, + &node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys); @@ -204,6 +205,7 @@ IndexNextWithReorder(IndexScanState *node) scandesc = index_beginscan(node->ss.ss_currentRelation, node->iss_RelationDesc, estate->es_snapshot, + &node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys); @@ -793,6 +795,27 @@ ExecEndIndexScan(IndexScanState *node) indexRelationDesc = node->iss_RelationDesc; indexScanDesc = node->iss_ScanDesc; + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the main + * process to report in EXPLAIN ANALYZE + */ + if (node->iss_SharedInfo != NULL && IsParallelWorker()) + { + IndexScanInstrumentation *winstrument; + + Assert(ParallelWorkerNumber <= node->iss_SharedInfo->num_workers); + winstrument = &node->iss_SharedInfo->winstrument[ParallelWorkerNumber]; + + /* + * We have to accumulate the stats rather than performing a memcpy. + * When a Gather/GatherMerge node finishes it will perform planner + * shutdown on the workers. On rescan it will spin up new workers + * which will have a new IndexOnlyScanState and zeroed stats. + */ + winstrument->nsearches += node->iss_Instrument.nsearches; + } + /* * close the index relation (no-op if we didn't open it) */ @@ -1642,11 +1665,21 @@ ExecIndexScanEstimate(IndexScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; + bool instrument = node->ss.ps.instrument != NULL; + bool parallel_aware = node->ss.ps.plan->parallel_aware; + + if (!instrument && !parallel_aware) + { + /* No DSM required by the scan */ + return; + } node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc, node->iss_NumScanKeys, node->iss_NumOrderByKeys, - estate->es_snapshot); + estate->es_snapshot, + instrument, parallel_aware, + pcxt->nworkers); shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -1663,16 +1696,33 @@ ExecIndexScanInitializeDSM(IndexScanState *node, { EState *estate = node->ss.ps.state; ParallelIndexScanDesc piscan; + bool instrument = node->ss.ps.instrument != NULL; + bool parallel_aware = node->ss.ps.plan->parallel_aware; + + if (!instrument && !parallel_aware) + { + /* No DSM required by the scan */ + return; + } piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen); index_parallelscan_initialize(node->ss.ss_currentRelation, node->iss_RelationDesc, estate->es_snapshot, - piscan); + instrument, parallel_aware, pcxt->nworkers, + &node->iss_SharedInfo, piscan); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan); + + if (!parallel_aware) + { + /* Only here to initialize SharedInfo in DSM */ + return; + } + node->iss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->iss_RelationDesc, + &node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys, piscan); @@ -1697,6 +1747,7 @@ void ExecIndexScanReInitializeDSM(IndexScanState *node, ParallelContext *pcxt) { + Assert(node->ss.ps.plan->parallel_aware); index_parallelrescan(node->iss_ScanDesc); } @@ -1711,11 +1762,31 @@ ExecIndexScanInitializeWorker(IndexScanState *node, ParallelWorkerContext *pwcxt) { ParallelIndexScanDesc piscan; + bool instrument = node->ss.ps.instrument != NULL; + bool parallel_aware = node->ss.ps.plan->parallel_aware; + + if (!instrument && !parallel_aware) + { + /* No DSM required by the scan */ + return; + } piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); + + if (instrument) + node->iss_SharedInfo = (SharedIndexScanInstrumentation *) + OffsetToPointer(piscan, piscan->ps_offset_ins); + + if (!parallel_aware) + { + /* Only here to set up worker node's SharedInfo */ + return; + } + node->iss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->iss_RelationDesc, + &node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys, piscan); @@ -1729,3 +1800,25 @@ ExecIndexScanInitializeWorker(IndexScanState *node, node->iss_ScanKeys, node->iss_NumScanKeys, node->iss_OrderByKeys, node->iss_NumOrderByKeys); } + +/* ---------------------------------------------------------------- + * ExecIndexScanRetrieveInstrumentation + * + * Transfer index scan statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecIndexScanRetrieveInstrumentation(IndexScanState *node) +{ + SharedIndexScanInstrumentation *SharedInfo = node->iss_SharedInfo; + size_t size; + + if (SharedInfo == NULL) + return; + + /* Create a copy of SharedInfo in backend-local memory */ + size = offsetof(SharedIndexScanInstrumentation, winstrument) + + SharedInfo->num_workers * sizeof(IndexScanInstrumentation); + node->iss_SharedInfo = palloc(size); + memcpy(node->iss_SharedInfo, SharedInfo, size); +} diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e9eac5a8c1f..5b35debc8ff 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6551,7 +6551,7 @@ get_actual_variable_endpoint(Relation heapRel, GlobalVisTestFor(heapRel)); index_scan = index_beginscan(heapRel, indexRel, - &SnapshotNonVacuumable, + &SnapshotNonVacuumable, NULL, 1, 0); /* Set it up for index-only scan */ index_scan->xs_want_itup = true; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 1be8739573f..5b2ab181b5f 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -26,6 +26,27 @@ /* We don't want this file to depend on execnodes.h. */ struct IndexInfo; +/* + * Struct for statistics maintained by amgettuple and amgetbitmap + * + * Note: IndexScanInstrumentation can't contain any pointers, since it is + * copied into a SharedIndexScanInstrumentation during parallel scans + */ +typedef struct IndexScanInstrumentation +{ + /* Index search count (incremented with pgstat_count_index_scan call) */ + uint64 nsearches; +} IndexScanInstrumentation; + +/* + * Struct for every worker's IndexScanInstrumentation, stored in shared memory + */ +typedef struct SharedIndexScanInstrumentation +{ + int num_workers; + IndexScanInstrumentation winstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedIndexScanInstrumentation; + /* * Struct for statistics returned by ambuild */ @@ -157,9 +178,11 @@ extern void index_insert_cleanup(Relation indexRelation, extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, + IndexScanInstrumentation *instrument, int nkeys, int norderbys); extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, + IndexScanInstrumentation *instrument, int nkeys); extern void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, @@ -168,13 +191,20 @@ extern void index_endscan(IndexScanDesc scan); extern void index_markpos(IndexScanDesc scan); extern void index_restrpos(IndexScanDesc scan); extern Size index_parallelscan_estimate(Relation indexRelation, - int nkeys, int norderbys, Snapshot snapshot); + int nkeys, int norderbys, Snapshot snapshot, + bool instrument, bool parallel_aware, + int nworkers); extern void index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, Snapshot snapshot, + bool instrument, bool parallel_aware, + int nworkers, + SharedIndexScanInstrumentation **sharedinfo, ParallelIndexScanDesc target); extern void index_parallelrescan(IndexScanDesc scan); extern IndexScanDesc index_beginscan_parallel(Relation heaprel, - Relation indexrel, int nkeys, int norderbys, + Relation indexrel, + IndexScanInstrumentation *instrument, + int nkeys, int norderbys, ParallelIndexScanDesc pscan); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index dc6e0184284..b5e0fb386c0 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -123,6 +123,8 @@ typedef struct IndexFetchTableData Relation rel; } IndexFetchTableData; +struct IndexScanInstrumentation; + /* * We use the same IndexScanDescData structure for both amgettuple-based * and amgetbitmap-based index scans. Some fields are only relevant in @@ -150,6 +152,12 @@ typedef struct IndexScanDescData /* index access method's private state */ void *opaque; /* access-method-specific info */ + /* + * Instrumentation counters maintained by all index AMs during both + * amgettuple calls and amgetbitmap calls (unless field remains NULL) + */ + struct IndexScanInstrumentation *instrument; + /* * In an index-only scan, a successful amgettuple call must fill either * xs_itup (and xs_itupdesc) or xs_hitup (and xs_hitupdesc) to provide the @@ -188,7 +196,8 @@ typedef struct ParallelIndexScanDescData { RelFileLocator ps_locator; /* physical table relation to scan */ RelFileLocator ps_indexlocator; /* physical index relation to scan */ - Size ps_offset; /* Offset in bytes of am specific structure */ + Size ps_offset_ins; /* Offset to SharedIndexScanInstrumentation */ + Size ps_offset_am; /* Offset to am-specific structure */ char ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; } ParallelIndexScanDescData; diff --git a/src/include/executor/nodeBitmapIndexscan.h b/src/include/executor/nodeBitmapIndexscan.h index b51cb184e0d..b6a5ae25ed1 100644 --- a/src/include/executor/nodeBitmapIndexscan.h +++ b/src/include/executor/nodeBitmapIndexscan.h @@ -14,11 +14,17 @@ #ifndef NODEBITMAPINDEXSCAN_H #define NODEBITMAPINDEXSCAN_H +#include "access/parallel.h" #include "nodes/execnodes.h" extern BitmapIndexScanState *ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags); extern Node *MultiExecBitmapIndexScan(BitmapIndexScanState *node); extern void ExecEndBitmapIndexScan(BitmapIndexScanState *node); extern void ExecReScanBitmapIndexScan(BitmapIndexScanState *node); +extern void ExecBitmapIndexScanEstimate(BitmapIndexScanState *node, ParallelContext *pcxt); +extern void ExecBitmapIndexScanInitializeDSM(BitmapIndexScanState *node, ParallelContext *pcxt); +extern void ExecBitmapIndexScanInitializeWorker(BitmapIndexScanState *node, + ParallelWorkerContext *pwcxt); +extern void ExecBitmapIndexScanRetrieveInstrumentation(BitmapIndexScanState *node); #endif /* NODEBITMAPINDEXSCAN_H */ diff --git a/src/include/executor/nodeIndexonlyscan.h b/src/include/executor/nodeIndexonlyscan.h index c27d8eb6d4d..ae85dee6d8f 100644 --- a/src/include/executor/nodeIndexonlyscan.h +++ b/src/include/executor/nodeIndexonlyscan.h @@ -32,5 +32,6 @@ extern void ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt); extern void ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, ParallelWorkerContext *pwcxt); +extern void ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node); #endif /* NODEINDEXONLYSCAN_H */ diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h index 1c63d0615fd..08f0a148db6 100644 --- a/src/include/executor/nodeIndexscan.h +++ b/src/include/executor/nodeIndexscan.h @@ -28,6 +28,7 @@ extern void ExecIndexScanInitializeDSM(IndexScanState *node, ParallelContext *pc extern void ExecIndexScanReInitializeDSM(IndexScanState *node, ParallelContext *pcxt); extern void ExecIndexScanInitializeWorker(IndexScanState *node, ParallelWorkerContext *pwcxt); +extern void ExecIndexScanRetrieveInstrumentation(IndexScanState *node); /* * These routines are exported to share code with nodeIndexonlyscan.c and diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index a323fa98bbb..575b0b1bd24 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1680,6 +1680,8 @@ typedef struct * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor * ScanDesc index scan descriptor + * Instrument local index scan instrumentation + * SharedInfo parallel worker instrumentation (no leader entry) * * ReorderQueue tuples that need reordering due to re-check * ReachedEnd have we fetched all tuples from index already? @@ -1706,6 +1708,8 @@ typedef struct IndexScanState ExprContext *iss_RuntimeContext; Relation iss_RelationDesc; struct IndexScanDescData *iss_ScanDesc; + IndexScanInstrumentation iss_Instrument; + SharedIndexScanInstrumentation *iss_SharedInfo; /* These are needed for re-checking ORDER BY expr ordering */ pairingheap *iss_ReorderQueue; @@ -1732,6 +1736,8 @@ typedef struct IndexScanState * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor * ScanDesc index scan descriptor + * Instrument local index scan instrumentation + * SharedInfo parallel worker instrumentation (no leader entry) * TableSlot slot for holding tuples fetched from the table * VMBuffer buffer in use for visibility map testing, if any * PscanLen size of parallel index-only scan descriptor @@ -1753,6 +1759,8 @@ typedef struct IndexOnlyScanState ExprContext *ioss_RuntimeContext; Relation ioss_RelationDesc; struct IndexScanDescData *ioss_ScanDesc; + IndexScanInstrumentation ioss_Instrument; + SharedIndexScanInstrumentation *ioss_SharedInfo; TupleTableSlot *ioss_TableSlot; Buffer ioss_VMBuffer; Size ioss_PscanLen; @@ -1774,6 +1782,8 @@ typedef struct IndexOnlyScanState * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor * ScanDesc index scan descriptor + * Instrument local index scan instrumentation + * SharedInfo parallel worker instrumentation (no leader entry) * ---------------- */ typedef struct BitmapIndexScanState @@ -1790,6 +1800,8 @@ typedef struct BitmapIndexScanState ExprContext *biss_RuntimeContext; Relation biss_RelationDesc; struct IndexScanDescData *biss_ScanDesc; + IndexScanInstrumentation biss_Instrument; + SharedIndexScanInstrumentation *biss_SharedInfo; } BitmapIndexScanState; /* ---------------- diff --git a/src/test/regress/expected/brin_multi.out b/src/test/regress/expected/brin_multi.out index 991b7eacada..cb5b5e53e86 100644 --- a/src/test/regress/expected/brin_multi.out +++ b/src/test/regress/expected/brin_multi.out @@ -853,7 +853,8 @@ SELECT * FROM brin_date_test WHERE a = '2023-01-01'::date; Recheck Cond: (a = '2023-01-01'::date) -> Bitmap Index Scan on brin_date_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '2023-01-01'::date) -(4 rows) + Index Searches: 1 +(5 rows) DROP TABLE brin_date_test; RESET enable_seqscan; @@ -872,7 +873,8 @@ SELECT * FROM brin_timestamp_test WHERE a = '2023-01-01'::timestamp; Recheck Cond: (a = '2023-01-01 00:00:00'::timestamp without time zone) -> Bitmap Index Scan on brin_timestamp_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '2023-01-01 00:00:00'::timestamp without time zone) -(4 rows) + Index Searches: 1 +(5 rows) EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM brin_timestamp_test WHERE a = '1900-01-01'::timestamp; @@ -882,7 +884,8 @@ SELECT * FROM brin_timestamp_test WHERE a = '1900-01-01'::timestamp; Recheck Cond: (a = '1900-01-01 00:00:00'::timestamp without time zone) -> Bitmap Index Scan on brin_timestamp_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '1900-01-01 00:00:00'::timestamp without time zone) -(4 rows) + Index Searches: 1 +(5 rows) DROP TABLE brin_timestamp_test; RESET enable_seqscan; @@ -900,7 +903,8 @@ SELECT * FROM brin_date_test WHERE a = '2023-01-01'::date; Recheck Cond: (a = '2023-01-01'::date) -> Bitmap Index Scan on brin_date_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '2023-01-01'::date) -(4 rows) + Index Searches: 1 +(5 rows) EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM brin_date_test WHERE a = '1900-01-01'::date; @@ -910,7 +914,8 @@ SELECT * FROM brin_date_test WHERE a = '1900-01-01'::date; Recheck Cond: (a = '1900-01-01'::date) -> Bitmap Index Scan on brin_date_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '1900-01-01'::date) -(4 rows) + Index Searches: 1 +(5 rows) DROP TABLE brin_date_test; RESET enable_seqscan; @@ -929,7 +934,8 @@ SELECT * FROM brin_interval_test WHERE a = '-30 years'::interval; Recheck Cond: (a = '@ 30 years ago'::interval) -> Bitmap Index Scan on brin_interval_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '@ 30 years ago'::interval) -(4 rows) + Index Searches: 1 +(5 rows) EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM brin_interval_test WHERE a = '30 years'::interval; @@ -939,7 +945,8 @@ SELECT * FROM brin_interval_test WHERE a = '30 years'::interval; Recheck Cond: (a = '@ 30 years'::interval) -> Bitmap Index Scan on brin_interval_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '@ 30 years'::interval) -(4 rows) + Index Searches: 1 +(5 rows) DROP TABLE brin_interval_test; RESET enable_seqscan; @@ -957,7 +964,8 @@ SELECT * FROM brin_interval_test WHERE a = '-30 years'::interval; Recheck Cond: (a = '@ 30 years ago'::interval) -> Bitmap Index Scan on brin_interval_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '@ 30 years ago'::interval) -(4 rows) + Index Searches: 1 +(5 rows) EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM brin_interval_test WHERE a = '30 years'::interval; @@ -967,7 +975,8 @@ SELECT * FROM brin_interval_test WHERE a = '30 years'::interval; Recheck Cond: (a = '@ 30 years'::interval) -> Bitmap Index Scan on brin_interval_test_a_idx (actual rows=0.00 loops=1) Index Cond: (a = '@ 30 years'::interval) -(4 rows) + Index Searches: 1 +(5 rows) DROP TABLE brin_interval_test; RESET enable_seqscan; diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out index 22f2d32845e..38dfaf021c9 100644 --- a/src/test/regress/expected/memoize.out +++ b/src/test/regress/expected/memoize.out @@ -22,8 +22,9 @@ begin ln := regexp_replace(ln, 'Evictions: 0', 'Evictions: Zero'); ln := regexp_replace(ln, 'Evictions: \d+', 'Evictions: N'); ln := regexp_replace(ln, 'Memory Usage: \d+', 'Memory Usage: N'); - ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N'); - ln := regexp_replace(ln, 'loops=\d+', 'loops=N'); + ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N'); + ln := regexp_replace(ln, 'loops=\d+', 'loops=N'); + ln := regexp_replace(ln, 'Index Searches: \d+', 'Index Searches: N'); return next ln; end loop; end; @@ -49,7 +50,8 @@ WHERE t2.unique1 < 1000;', false); -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N) Index Cond: (unique1 = t2.twenty) Heap Fetches: N -(12 rows) + Index Searches: N +(13 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 @@ -80,7 +82,8 @@ WHERE t1.unique1 < 1000;', false); -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N) Index Cond: (unique1 = t1.twenty) Heap Fetches: N -(12 rows) + Index Searches: N +(13 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, @@ -106,6 +109,7 @@ WHERE t1.unique1 < 10;', false); -> Nested Loop Left Join (actual rows=20.00 loops=N) -> Index Scan using tenk1_unique1 on tenk1 t1 (actual rows=10.00 loops=N) Index Cond: (unique1 < 10) + Index Searches: N -> Memoize (actual rows=2.00 loops=N) Cache Key: t1.two Cache Mode: binary @@ -115,7 +119,8 @@ WHERE t1.unique1 < 10;', false); Rows Removed by Filter: 2 -> Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=4.00 loops=N) Index Cond: (unique1 < 4) -(13 rows) + Index Searches: N +(15 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN @@ -149,7 +154,8 @@ WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false); Filter: ((t1.two + 1) = unique1) Rows Removed by Filter: 9999 Heap Fetches: N -(13 rows) + Index Searches: N +(14 rows) -- And check we get the expected results. SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN @@ -219,7 +225,8 @@ ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false); Index Cond: (x = (t1.t)::numeric) Filter: (t1.x = (t)::numeric) Heap Fetches: N -(10 rows) + Index Searches: N +(11 rows) DROP TABLE expr_key; -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions @@ -246,7 +253,8 @@ WHERE t2.unique1 < 1200;', true); -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N) Index Cond: (unique1 = t2.thousand) Heap Fetches: N -(12 rows) + Index Searches: N +(13 rows) CREATE TABLE flt (f float); CREATE INDEX flt_f_idx ON flt (f); @@ -261,6 +269,7 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false); Nested Loop (actual rows=4.00 loops=N) -> Index Only Scan using flt_f_idx on flt f1 (actual rows=2.00 loops=N) Heap Fetches: N + Index Searches: N -> Memoize (actual rows=2.00 loops=N) Cache Key: f1.f Cache Mode: logical @@ -268,7 +277,8 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false); -> Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N) Index Cond: (f = f1.f) Heap Fetches: N -(10 rows) + Index Searches: N +(12 rows) -- Ensure memoize operates in binary mode SELECT explain_memoize(' @@ -278,6 +288,7 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false); Nested Loop (actual rows=4.00 loops=N) -> Index Only Scan using flt_f_idx on flt f1 (actual rows=2.00 loops=N) Heap Fetches: N + Index Searches: N -> Memoize (actual rows=2.00 loops=N) Cache Key: f1.f Cache Mode: binary @@ -285,7 +296,8 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false); -> Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N) Index Cond: (f <= f1.f) Heap Fetches: N -(10 rows) + Index Searches: N +(12 rows) DROP TABLE flt; -- Exercise Memoize in binary mode with a large fixed width type and a @@ -311,7 +323,8 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false); Hits: 3 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB -> Index Scan using strtest_n_idx on strtest s2 (actual rows=4.00 loops=N) Index Cond: (n <= s1.n) -(9 rows) + Index Searches: N +(10 rows) -- Ensure we get 3 hits and 3 misses SELECT explain_memoize(' @@ -327,7 +340,8 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false); Hits: 3 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB -> Index Scan using strtest_t_idx on strtest s2 (actual rows=4.00 loops=N) Index Cond: (t <= s1.t) -(9 rows) + Index Searches: N +(10 rows) DROP TABLE strtest; -- Ensure memoize works with partitionwise join @@ -348,6 +362,7 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false); -> Nested Loop (actual rows=16.00 loops=N) -> Index Only Scan using iprt_p1_a on prt_p1 t1_1 (actual rows=4.00 loops=N) Heap Fetches: N + Index Searches: N -> Memoize (actual rows=4.00 loops=N) Cache Key: t1_1.a Cache Mode: logical @@ -355,9 +370,11 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false); -> Index Only Scan using iprt_p1_a on prt_p1 t2_1 (actual rows=4.00 loops=N) Index Cond: (a = t1_1.a) Heap Fetches: N + Index Searches: N -> Nested Loop (actual rows=16.00 loops=N) -> Index Only Scan using iprt_p2_a on prt_p2 t1_2 (actual rows=4.00 loops=N) Heap Fetches: N + Index Searches: N -> Memoize (actual rows=4.00 loops=N) Cache Key: t1_2.a Cache Mode: logical @@ -365,7 +382,8 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false); -> Index Only Scan using iprt_p2_a on prt_p2 t2_2 (actual rows=4.00 loops=N) Index Cond: (a = t1_2.a) Heap Fetches: N -(21 rows) + Index Searches: N +(25 rows) -- Ensure memoize works with parameterized union-all Append path SET enable_partitionwise_join TO off; @@ -378,6 +396,7 @@ ON t1.a = t2.a;', false); Nested Loop (actual rows=16.00 loops=N) -> Index Only Scan using iprt_p1_a on prt_p1 t1 (actual rows=4.00 loops=N) Heap Fetches: N + Index Searches: N -> Memoize (actual rows=4.00 loops=N) Cache Key: t1.a Cache Mode: logical @@ -386,10 +405,12 @@ ON t1.a = t2.a;', false); -> Index Only Scan using iprt_p1_a on prt_p1 (actual rows=4.00 loops=N) Index Cond: (a = t1.a) Heap Fetches: N + Index Searches: N -> Index Only Scan using iprt_p2_a on prt_p2 (actual rows=0.00 loops=N) Index Cond: (a = t1.a) Heap Fetches: N -(14 rows) + Index Searches: N +(17 rows) DROP TABLE prt; RESET enable_partitionwise_join; diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index d95d2395d48..34f2b0b8dbd 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2369,6 +2369,10 @@ begin ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N'); ln := regexp_replace(ln, 'actual rows=\d+(?:\.\d+)? loops=\d+', 'actual rows=N loops=N'); ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N'); + perform regexp_matches(ln, 'Index Searches: \d+'); + if found then + continue; + end if; return next ln; end loop; end; @@ -2686,47 +2690,56 @@ select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a1_b1_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a1_b2 ab_2 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a1_b3 ab_3 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a2_b1 ab_4 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a2_b1_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a2_b2 ab_5 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a2_b2_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a2_b3 ab_6 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a2_b3_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a3_b1 ab_7 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a3_b1_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a3_b2 ab_8 (actual rows=0.00 loops=1) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a3_b2_a_idx (actual rows=0.00 loops=1) Index Cond: (a = (InitPlan 1).col1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a3_b3 ab_9 (never executed) Recheck Cond: (a = (InitPlan 1).col1) Filter: (b = (InitPlan 2).col1) -> Bitmap Index Scan on ab_a3_b3_a_idx (never executed) Index Cond: (a = (InitPlan 1).col1) -(52 rows) + Index Searches: 0 +(61 rows) -- Test run-time partition pruning with UNION ALL parents explain (analyze, costs off, summary off, timing off, buffers off) @@ -2742,16 +2755,19 @@ select * from (select * from ab where a = 1 union all select * from ab) ab where Filter: (b = (InitPlan 1).col1) -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a1_b2 ab_12 (never executed) Recheck Cond: (a = 1) Filter: (b = (InitPlan 1).col1) -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) Index Cond: (a = 1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a1_b3 ab_13 (never executed) Recheck Cond: (a = 1) Filter: (b = (InitPlan 1).col1) -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) Index Cond: (a = 1) + Index Searches: 0 -> Seq Scan on ab_a1_b1 ab_1 (actual rows=0.00 loops=1) Filter: (b = (InitPlan 1).col1) -> Seq Scan on ab_a1_b2 ab_2 (never executed) @@ -2770,7 +2786,7 @@ select * from (select * from ab where a = 1 union all select * from ab) ab where Filter: (b = (InitPlan 1).col1) -> Seq Scan on ab_a3_b3 ab_9 (never executed) Filter: (b = (InitPlan 1).col1) -(37 rows) +(40 rows) -- A case containing a UNION ALL with a non-partitioned child. explain (analyze, costs off, summary off, timing off, buffers off) @@ -2786,16 +2802,19 @@ select * from (select * from ab where a = 1 union all (values(10,5)) union all s Filter: (b = (InitPlan 1).col1) -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a1_b2 ab_12 (never executed) Recheck Cond: (a = 1) Filter: (b = (InitPlan 1).col1) -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) Index Cond: (a = 1) + Index Searches: 0 -> Bitmap Heap Scan on ab_a1_b3 ab_13 (never executed) Recheck Cond: (a = 1) Filter: (b = (InitPlan 1).col1) -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) Index Cond: (a = 1) + Index Searches: 0 -> Result (actual rows=0.00 loops=1) One-Time Filter: (5 = (InitPlan 1).col1) -> Seq Scan on ab_a1_b1 ab_1 (actual rows=0.00 loops=1) @@ -2816,7 +2835,7 @@ select * from (select * from ab where a = 1 union all (values(10,5)) union all s Filter: (b = (InitPlan 1).col1) -> Seq Scan on ab_a3_b3 ab_9 (never executed) Filter: (b = (InitPlan 1).col1) -(39 rows) +(42 rows) -- Another UNION ALL test, but containing a mix of exec init and exec run-time pruning. create table xy_1 (x int, y int); @@ -2887,16 +2906,19 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;'); Recheck Cond: (a = 1) -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a1_b2 ab_a1_2 (actual rows=1.00 loops=1) Recheck Cond: (a = 1) Heap Blocks: exact=1 -> Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a1_b3 ab_a1_3 (actual rows=0.00 loops=1) Recheck Cond: (a = 1) Heap Blocks: exact=1 -> Bitmap Index Scan on ab_a1_b3_a_idx (actual rows=1.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Materialize (actual rows=1.00 loops=1) Storage: Memory Maximum Storage: NkB -> Append (actual rows=1.00 loops=1) @@ -2904,17 +2926,20 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;'); Recheck Cond: (a = 1) -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a1_b2 ab_2 (actual rows=1.00 loops=1) Recheck Cond: (a = 1) Heap Blocks: exact=1 -> Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1.00 loops=1) Index Cond: (a = 1) + Index Searches: 1 -> Bitmap Heap Scan on ab_a1_b3 ab_3 (actual rows=0.00 loops=1) Recheck Cond: (a = 1) Heap Blocks: exact=1 -> Bitmap Index Scan on ab_a1_b3_a_idx (actual rows=1.00 loops=1) Index Cond: (a = 1) -(37 rows) + Index Searches: 1 +(43 rows) table ab; a | b @@ -2990,17 +3015,23 @@ select * from tbl1 join tprt on tbl1.col1 > tprt.col1; -> Append (actual rows=3.00 loops=2) -> Index Scan using tprt1_idx on tprt_1 (actual rows=2.00 loops=2) Index Cond: (col1 < tbl1.col1) + Index Searches: 2 -> Index Scan using tprt2_idx on tprt_2 (actual rows=2.00 loops=1) Index Cond: (col1 < tbl1.col1) + Index Searches: 1 -> Index Scan using tprt3_idx on tprt_3 (never executed) Index Cond: (col1 < tbl1.col1) + Index Searches: 0 -> Index Scan using tprt4_idx on tprt_4 (never executed) Index Cond: (col1 < tbl1.col1) + Index Searches: 0 -> Index Scan using tprt5_idx on tprt_5 (never executed) Index Cond: (col1 < tbl1.col1) + Index Searches: 0 -> Index Scan using tprt6_idx on tprt_6 (never executed) Index Cond: (col1 < tbl1.col1) -(15 rows) + Index Searches: 0 +(21 rows) explain (analyze, costs off, summary off, timing off, buffers off) select * from tbl1 join tprt on tbl1.col1 = tprt.col1; @@ -3011,17 +3042,23 @@ select * from tbl1 join tprt on tbl1.col1 = tprt.col1; -> Append (actual rows=1.00 loops=2) -> Index Scan using tprt1_idx on tprt_1 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt2_idx on tprt_2 (actual rows=1.00 loops=2) Index Cond: (col1 = tbl1.col1) + Index Searches: 2 -> Index Scan using tprt3_idx on tprt_3 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt4_idx on tprt_4 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt5_idx on tprt_5 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt6_idx on tprt_6 (never executed) Index Cond: (col1 = tbl1.col1) -(15 rows) + Index Searches: 0 +(21 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 > tprt.col1 @@ -3056,17 +3093,23 @@ select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1; -> Append (actual rows=4.60 loops=5) -> Index Scan using tprt1_idx on tprt_1 (actual rows=2.00 loops=5) Index Cond: (col1 < tbl1.col1) + Index Searches: 5 -> Index Scan using tprt2_idx on tprt_2 (actual rows=2.75 loops=4) Index Cond: (col1 < tbl1.col1) + Index Searches: 4 -> Index Scan using tprt3_idx on tprt_3 (actual rows=1.00 loops=2) Index Cond: (col1 < tbl1.col1) + Index Searches: 2 -> Index Scan using tprt4_idx on tprt_4 (never executed) Index Cond: (col1 < tbl1.col1) + Index Searches: 0 -> Index Scan using tprt5_idx on tprt_5 (never executed) Index Cond: (col1 < tbl1.col1) + Index Searches: 0 -> Index Scan using tprt6_idx on tprt_6 (never executed) Index Cond: (col1 < tbl1.col1) -(15 rows) + Index Searches: 0 +(21 rows) explain (analyze, costs off, summary off, timing off, buffers off) select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1; @@ -3077,17 +3120,23 @@ select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1; -> Append (actual rows=0.60 loops=5) -> Index Scan using tprt1_idx on tprt_1 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt2_idx on tprt_2 (actual rows=1.00 loops=2) Index Cond: (col1 = tbl1.col1) + Index Searches: 2 -> Index Scan using tprt3_idx on tprt_3 (actual rows=0.33 loops=3) Index Cond: (col1 = tbl1.col1) + Index Searches: 3 -> Index Scan using tprt4_idx on tprt_4 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt5_idx on tprt_5 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt6_idx on tprt_6 (never executed) Index Cond: (col1 = tbl1.col1) -(15 rows) + Index Searches: 0 +(21 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 > tprt.col1 @@ -3141,17 +3190,23 @@ select * from tbl1 join tprt on tbl1.col1 < tprt.col1; -> Append (actual rows=1.00 loops=1) -> Index Scan using tprt1_idx on tprt_1 (never executed) Index Cond: (col1 > tbl1.col1) + Index Searches: 0 -> Index Scan using tprt2_idx on tprt_2 (never executed) Index Cond: (col1 > tbl1.col1) + Index Searches: 0 -> Index Scan using tprt3_idx on tprt_3 (never executed) Index Cond: (col1 > tbl1.col1) + Index Searches: 0 -> Index Scan using tprt4_idx on tprt_4 (never executed) Index Cond: (col1 > tbl1.col1) + Index Searches: 0 -> Index Scan using tprt5_idx on tprt_5 (never executed) Index Cond: (col1 > tbl1.col1) + Index Searches: 0 -> Index Scan using tprt6_idx on tprt_6 (actual rows=1.00 loops=1) Index Cond: (col1 > tbl1.col1) -(15 rows) + Index Searches: 1 +(21 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 < tprt.col1 @@ -3173,17 +3228,23 @@ select * from tbl1 join tprt on tbl1.col1 = tprt.col1; -> Append (actual rows=0.00 loops=1) -> Index Scan using tprt1_idx on tprt_1 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt2_idx on tprt_2 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt3_idx on tprt_3 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt4_idx on tprt_4 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt5_idx on tprt_5 (never executed) Index Cond: (col1 = tbl1.col1) + Index Searches: 0 -> Index Scan using tprt6_idx on tprt_6 (never executed) Index Cond: (col1 = tbl1.col1) -(15 rows) + Index Searches: 0 +(21 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 = tprt.col1 @@ -3513,10 +3574,12 @@ explain (analyze, costs off, summary off, timing off, buffers off) execute mt_q1 -> Index Scan using ma_test_p2_b_idx on ma_test_p2 ma_test_1 (actual rows=1.00 loops=1) Filter: ((a >= $1) AND ((a % 10) = 5)) Rows Removed by Filter: 9 + Index Searches: 1 -> Index Scan using ma_test_p3_b_idx on ma_test_p3 ma_test_2 (actual rows=1.00 loops=1) Filter: ((a >= $1) AND ((a % 10) = 5)) Rows Removed by Filter: 9 -(9 rows) + Index Searches: 1 +(11 rows) execute mt_q1(15); a @@ -3534,7 +3597,8 @@ explain (analyze, costs off, summary off, timing off, buffers off) execute mt_q1 -> Index Scan using ma_test_p3_b_idx on ma_test_p3 ma_test_1 (actual rows=1.00 loops=1) Filter: ((a >= $1) AND ((a % 10) = 5)) Rows Removed by Filter: 9 -(6 rows) + Index Searches: 1 +(7 rows) execute mt_q1(25); a @@ -3582,13 +3646,17 @@ explain (analyze, costs off, summary off, timing off, buffers off) select * from -> Limit (actual rows=1.00 loops=1) -> Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=1.00 loops=1) Index Cond: (b IS NOT NULL) + Index Searches: 1 -> Index Scan using ma_test_p1_b_idx on ma_test_p1 ma_test_1 (never executed) Filter: (a >= (InitPlan 2).col1) + Index Searches: 0 -> Index Scan using ma_test_p2_b_idx on ma_test_p2 ma_test_2 (actual rows=10.00 loops=1) Filter: (a >= (InitPlan 2).col1) + Index Searches: 1 -> Index Scan using ma_test_p3_b_idx on ma_test_p3 ma_test_3 (actual rows=10.00 loops=1) Filter: (a >= (InitPlan 2).col1) -(14 rows) + Index Searches: 1 +(18 rows) reset enable_seqscan; reset enable_sort; @@ -4159,13 +4227,17 @@ select * from rangep where b IN((select 1),(select 2)) order by a; Sort Key: rangep_2.a -> Index Scan using rangep_0_to_100_1_a_idx on rangep_0_to_100_1 rangep_2 (actual rows=0.00 loops=1) Filter: (b = ANY (ARRAY[(InitPlan 1).col1, (InitPlan 2).col1])) + Index Searches: 1 -> Index Scan using rangep_0_to_100_2_a_idx on rangep_0_to_100_2 rangep_3 (actual rows=0.00 loops=1) Filter: (b = ANY (ARRAY[(InitPlan 1).col1, (InitPlan 2).col1])) + Index Searches: 1 -> Index Scan using rangep_0_to_100_3_a_idx on rangep_0_to_100_3 rangep_4 (never executed) Filter: (b = ANY (ARRAY[(InitPlan 1).col1, (InitPlan 2).col1])) + Index Searches: 0 -> Index Scan using rangep_100_to_200_a_idx on rangep_100_to_200 rangep_5 (actual rows=0.00 loops=1) Filter: (b = ANY (ARRAY[(InitPlan 1).col1, (InitPlan 2).col1])) -(15 rows) + Index Searches: 1 +(19 rows) reset enable_sort; drop table rangep; diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out index cd79abc35db..bab0cc93ff5 100644 --- a/src/test/regress/expected/select.out +++ b/src/test/regress/expected/select.out @@ -764,7 +764,8 @@ select * from onek2 where unique2 = 11 and stringu1 = 'ATAAAA'; Index Scan using onek2_u2_prtl on onek2 (actual rows=1.00 loops=1) Index Cond: (unique2 = 11) Filter: (stringu1 = 'ATAAAA'::name) -(3 rows) + Index Searches: 1 +(4 rows) explain (costs off) select unique2 from onek2 where unique2 = 11 and stringu1 = 'ATAAAA'; diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql index d5aab4e5666..c0d47fa875a 100644 --- a/src/test/regress/sql/memoize.sql +++ b/src/test/regress/sql/memoize.sql @@ -23,8 +23,9 @@ begin ln := regexp_replace(ln, 'Evictions: 0', 'Evictions: Zero'); ln := regexp_replace(ln, 'Evictions: \d+', 'Evictions: N'); ln := regexp_replace(ln, 'Memory Usage: \d+', 'Memory Usage: N'); - ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N'); - ln := regexp_replace(ln, 'loops=\d+', 'loops=N'); + ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N'); + ln := regexp_replace(ln, 'loops=\d+', 'loops=N'); + ln := regexp_replace(ln, 'Index Searches: \d+', 'Index Searches: N'); return next ln; end loop; end; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 5f36d589b6b..4a2c74b0899 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -588,6 +588,10 @@ begin ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N'); ln := regexp_replace(ln, 'actual rows=\d+(?:\.\d+)? loops=\d+', 'actual rows=N loops=N'); ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N'); + perform regexp_matches(ln, 'Index Searches: \d+'); + if found then + continue; + end if; return next ln; end loop; end; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9840060997f..dfe2690bdd3 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1238,6 +1238,7 @@ IndexPath IndexRuntimeKeyInfo IndexScan IndexScanDesc +IndexScanInstrumentation IndexScanState IndexStateFlagsAction IndexStmt @@ -2666,6 +2667,7 @@ SharedExecutorInstrumentation SharedFileSet SharedHashInfo SharedIncrementalSortInfo +SharedIndexScanInstrumentation SharedInvalCatalogMsg SharedInvalCatcacheMsg SharedInvalRelcacheMsg