diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 54aecc1d1f1..f3b19d280c3 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -333,6 +333,7 @@ restartScanEntry: entry->nlist = 0; entry->matchBitmap = NULL; entry->matchResult = NULL; + entry->matchNtuples = -1; entry->reduceResult = false; entry->predictNumberResult = 0; @@ -828,7 +829,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, */ while (entry->matchResult == NULL || (!entry->matchResult->lossy && - entry->offset >= entry->matchResult->ntuples) || + entry->offset >= entry->matchNtuples) || entry->matchResult->blockno < advancePastBlk || (ItemPointerIsLossyPage(&advancePast) && entry->matchResult->blockno == advancePastBlk)) @@ -845,9 +846,15 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, break; } + /* Exact pages need their tuple offsets extracted. */ + if (!entry->matchResult->lossy) + entry->matchNtuples = tbm_extract_page_tuple(entry->matchResult, + entry->matchOffsets, + TBM_MAX_TUPLES_PER_PAGE); + /* * Reset counter to the beginning of entry->matchResult. Note: - * entry->offset is still greater than matchResult->ntuples if + * entry->offset is still greater than entry->matchNtuples if * matchResult is lossy. So, on next call we will get next * result from TIDBitmap. */ @@ -874,32 +881,35 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, } /* - * Not a lossy page. Skip over any offsets <= advancePast, and - * return that. + * Not a lossy page. If tuple offsets were extracted, + * entry->matchNtuples must be > -1 */ + Assert(entry->matchNtuples > -1); + + /* Skip over any offsets <= advancePast, and return that. */ if (entry->matchResult->blockno == advancePastBlk) { - Assert(entry->matchResult->ntuples > 0); + Assert(entry->matchNtuples > 0); /* * First, do a quick check against the last offset on the * page. If that's > advancePast, so are all the other * offsets, so just go back to the top to get the next page. */ - if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff) + if (entry->matchOffsets[entry->matchNtuples - 1] <= advancePastOff) { - entry->offset = entry->matchResult->ntuples; + entry->offset = entry->matchNtuples; continue; } /* Otherwise scan to find the first item > advancePast */ - while (entry->matchResult->offsets[entry->offset] <= advancePastOff) + while (entry->matchOffsets[entry->offset] <= advancePastOff) entry->offset++; } ItemPointerSet(&entry->curItem, entry->matchResult->blockno, - entry->matchResult->offsets[entry->offset]); + entry->matchOffsets[entry->offset]); entry->offset++; /* Done unless we need to reduce the result */ diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 7d1e6615260..63ded6301e2 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -107,6 +107,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, scanEntry->matchBitmap = NULL; scanEntry->matchIterator = NULL; scanEntry->matchResult = NULL; + scanEntry->matchNtuples = -1; scanEntry->list = NULL; scanEntry->nlist = 0; scanEntry->offset = InvalidOffsetNumber; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 269d581c2ec..e78682c3cef 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2127,6 +2127,8 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Snapshot snapshot; int ntup; TBMIterateResult *tbmres; + OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE]; + int noffsets = -1; Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN); @@ -2145,6 +2147,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, if (tbmres == NULL) return false; + /* Exact pages need their tuple offsets extracted. */ + if (!tbmres->lossy) + noffsets = tbm_extract_page_tuple(tbmres, offsets, + TBM_MAX_TUPLES_PER_PAGE); + /* * Ignore any claimed entries past what we think is the end of the * relation. It may have been extended after the start of our scan (we @@ -2172,8 +2179,9 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, /* can't be lossy in the skip_fetch case */ Assert(!tbmres->lossy); Assert(bscan->rs_empty_tuples_pending >= 0); + Assert(noffsets > -1); - bscan->rs_empty_tuples_pending += tbmres->ntuples; + bscan->rs_empty_tuples_pending += noffsets; return true; } @@ -2216,9 +2224,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, */ int curslot; - for (curslot = 0; curslot < tbmres->ntuples; curslot++) + /* We must have extracted the tuple offsets by now */ + Assert(noffsets > -1); + + for (curslot = 0; curslot < noffsets; curslot++) { - OffsetNumber offnum = tbmres->offsets[curslot]; + OffsetNumber offnum = offsets[curslot]; ItemPointerData tid; HeapTupleData heapTuple; diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index 3e0bca651f5..3d835024caa 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -40,7 +40,6 @@ #include -#include "access/htup_details.h" #include "common/hashfn.h" #include "common/int.h" #include "nodes/bitmapset.h" @@ -48,14 +47,6 @@ #include "storage/lwlock.h" #include "utils/dsa.h" -/* - * The maximum number of tuples per page is not large (typically 256 with - * 8K pages, or 1024 with 32K pages). So there's not much point in making - * the per-page bitmaps variable size. We just legislate that the size - * is this: - */ -#define MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage - /* * When we have to switch over to lossy storage, we use a data structure * with one bit per page, where all pages having the same number DIV @@ -67,7 +58,7 @@ * table, using identical data structures. (This is because the memory * management for hashtables doesn't easily/efficiently allow space to be * transferred easily from one hashtable to another.) Therefore it's best - * if PAGES_PER_CHUNK is the same as MAX_TUPLES_PER_PAGE, or at least not + * if PAGES_PER_CHUNK is the same as TBM_MAX_TUPLES_PER_PAGE, or at least not * too different. But we also want PAGES_PER_CHUNK to be a power of 2 to * avoid expensive integer remainder operations. So, define it like this: */ @@ -79,7 +70,7 @@ #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) /* number of active words for an exact page: */ -#define WORDS_PER_PAGE ((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1) +#define WORDS_PER_PAGE ((TBM_MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1) /* number of active words for a lossy chunk: */ #define WORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1) @@ -181,7 +172,7 @@ struct TBMPrivateIterator int spageptr; /* next spages index */ int schunkptr; /* next schunks index */ int schunkbit; /* next bit to check in current schunk */ - TBMIterateResult output; /* MUST BE LAST (because variable-size) */ + TBMIterateResult output; }; /* @@ -222,7 +213,7 @@ struct TBMSharedIterator PTEntryArray *ptbase; /* pagetable element array */ PTIterationArray *ptpages; /* sorted exact page index list */ PTIterationArray *ptchunks; /* sorted lossy page index list */ - TBMIterateResult output; /* MUST BE LAST (because variable-size) */ + TBMIterateResult output; }; /* Local function prototypes */ @@ -390,7 +381,7 @@ tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bitnum; /* safety check to ensure we don't overrun bit array bounds */ - if (off < 1 || off > MAX_TUPLES_PER_PAGE) + if (off < 1 || off > TBM_MAX_TUPLES_PER_PAGE) elog(ERROR, "tuple offset out of range: %u", off); /* @@ -696,9 +687,7 @@ tbm_begin_private_iterate(TIDBitmap *tbm) * Create the TBMPrivateIterator struct, with enough trailing space to * serve the needs of the TBMIterateResult sub-struct. */ - iterator = (TBMPrivateIterator *) palloc(sizeof(TBMPrivateIterator) + - MAX_TUPLES_PER_PAGE * - sizeof(OffsetNumber)); + iterator = (TBMPrivateIterator *) palloc(sizeof(TBMPrivateIterator)); iterator->tbm = tbm; /* @@ -906,11 +895,16 @@ tbm_prepare_shared_iterate(TIDBitmap *tbm) /* * tbm_extract_page_tuple - extract the tuple offsets from a page * - * The extracted offsets are stored into TBMIterateResult. + * Returns the number of offsets it filled in if <= max_offsets. Otherwise, + * fills in as many offsets as fit and returns the total number of offsets in + * the page. */ -static inline int -tbm_extract_page_tuple(PagetableEntry *page, TBMIterateResult *output) +int +tbm_extract_page_tuple(TBMIterateResult *iteritem, + OffsetNumber *offsets, + uint32 max_offsets) { + PagetableEntry *page = iteritem->internal_page; int wordnum; int ntuples = 0; @@ -925,7 +919,11 @@ tbm_extract_page_tuple(PagetableEntry *page, TBMIterateResult *output) while (w != 0) { if (w & 1) - output->offsets[ntuples++] = (OffsetNumber) off; + { + if (ntuples < max_offsets) + offsets[ntuples] = (OffsetNumber) off; + ntuples++; + } off++; w >>= 1; } @@ -1012,9 +1010,9 @@ tbm_private_iterate(TBMPrivateIterator *iterator) { /* Return a lossy page indicator from the chunk */ output->blockno = chunk_blockno; - output->ntuples = -1; output->lossy = true; output->recheck = true; + output->internal_page = NULL; iterator->schunkbit++; return output; } @@ -1023,7 +1021,6 @@ tbm_private_iterate(TBMPrivateIterator *iterator) if (iterator->spageptr < tbm->npages) { PagetableEntry *page; - int ntuples; /* In TBM_ONE_PAGE state, we don't allocate an spages[] array */ if (tbm->status == TBM_ONE_PAGE) @@ -1031,10 +1028,8 @@ tbm_private_iterate(TBMPrivateIterator *iterator) else page = tbm->spages[iterator->spageptr]; - /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); + output->internal_page = page; output->blockno = page->blockno; - output->ntuples = ntuples; output->lossy = false; output->recheck = page->recheck; iterator->spageptr++; @@ -1107,9 +1102,9 @@ tbm_shared_iterate(TBMSharedIterator *iterator) { /* Return a lossy page indicator from the chunk */ output->blockno = chunk_blockno; - output->ntuples = -1; output->lossy = true; output->recheck = true; + output->internal_page = NULL; istate->schunkbit++; LWLockRelease(&istate->lock); @@ -1120,12 +1115,9 @@ tbm_shared_iterate(TBMSharedIterator *iterator) if (istate->spageptr < istate->npages) { PagetableEntry *page = &ptbase[idxpages[istate->spageptr]]; - int ntuples; - /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); + output->internal_page = page; output->blockno = page->blockno; - output->ntuples = ntuples; output->lossy = false; output->recheck = page->recheck; istate->spageptr++; @@ -1473,8 +1465,7 @@ tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp) * Create the TBMSharedIterator struct, with enough trailing space to * serve the needs of the TBMIterateResult sub-struct. */ - iterator = (TBMSharedIterator *) palloc0(sizeof(TBMSharedIterator) + - MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber)); + iterator = (TBMSharedIterator *) palloc0(sizeof(TBMSharedIterator)); istate = (TBMSharedIteratorState *) dsa_get_address(dsa, dp); diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index dcd1ae3fc34..50478db9820 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -354,6 +354,8 @@ typedef struct GinScanEntryData TIDBitmap *matchBitmap; TBMPrivateIterator *matchIterator; TBMIterateResult *matchResult; + OffsetNumber matchOffsets[TBM_MAX_TUPLES_PER_PAGE]; + int matchNtuples; /* used for Posting list and one page in Posting tree */ ItemPointerData *list; diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 8cd93d90a86..e185635c10b 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -22,9 +22,17 @@ #ifndef TIDBITMAP_H #define TIDBITMAP_H +#include "access/htup_details.h" #include "storage/itemptr.h" #include "utils/dsa.h" +/* + * The maximum number of tuples per page is not large (typically 256 with + * 8K pages, or 1024 with 32K pages). So there's not much point in making + * the per-page bitmaps variable size. We just legislate that the size + * is this: + */ +#define TBM_MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage /* * Actual bitmap representation is private to tidbitmap.c. Callers can @@ -53,12 +61,22 @@ typedef struct TBMIterator /* Result structure for tbm_iterate */ typedef struct TBMIterateResult { - BlockNumber blockno; /* page number containing tuples */ - int ntuples; /* -1 when lossy */ + BlockNumber blockno; /* block number containing tuples */ + bool lossy; - bool recheck; /* should the tuples be rechecked? */ - /* Note: recheck is always true if lossy */ - OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]; + + /* + * Whether or not the tuples should be rechecked. This is always true if + * the page is lossy but may also be true if the query requires recheck. + */ + bool recheck; + + /* + * Pointer to the page containing the bitmap for this block. It is a void * + * to avoid exposing the details of the tidbitmap PagetableEntry to API + * users. + */ + void *internal_page; } TBMIterateResult; /* function prototypes in nodes/tidbitmap.c */ @@ -75,6 +93,10 @@ extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno); extern void tbm_union(TIDBitmap *a, const TIDBitmap *b); extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b); +extern int tbm_extract_page_tuple(TBMIterateResult *iteritem, + OffsetNumber *offsets, + uint32 max_offsets); + extern bool tbm_is_empty(const TIDBitmap *tbm); extern TBMPrivateIterator *tbm_begin_private_iterate(TIDBitmap *tbm);