mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
Teach TID store to skip bitmap for small numbers of offsets
The header portion of BlocktableEntry has enough padding space for an array of 3 offsets (1 on 32-bit platforms). Use this space instead of having a sparse bitmap array. This will take up a constant amount of space no matter what the offsets are. Reviewed (in an earlier version) by Masahiko Sawada Discussion: https://postgr.es/m/CANWCAZYw+_KAaUNruhJfE=h6WgtBKeDG32St8vBJBEY82bGVRQ@mail.gmail.com Discussion: https://postgr.es/m/CAD21AoBci3Hujzijubomo1tdwH3XtQ9F89cTNQ4bsQijOmqnEw@mail.gmail.com
This commit is contained in:
parent
dd1f6b0c17
commit
f35bd9bf35
@ -34,6 +34,9 @@
|
|||||||
/* number of active words for a page: */
|
/* number of active words for a page: */
|
||||||
#define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1)
|
#define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1)
|
||||||
|
|
||||||
|
/* number of offsets we can store in the header of a BlocktableEntry */
|
||||||
|
#define NUM_FULL_OFFSETS ((sizeof(bitmapword) - sizeof(uint16)) / sizeof(OffsetNumber))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is named similarly to PagetableEntry in tidbitmap.c
|
* This is named similarly to PagetableEntry in tidbitmap.c
|
||||||
* because the two have a similar function.
|
* because the two have a similar function.
|
||||||
@ -41,6 +44,13 @@
|
|||||||
typedef struct BlocktableEntry
|
typedef struct BlocktableEntry
|
||||||
{
|
{
|
||||||
uint16 nwords;
|
uint16 nwords;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can store a small number of offsets here to avoid wasting space with
|
||||||
|
* a sparse bitmap.
|
||||||
|
*/
|
||||||
|
OffsetNumber full_offsets[NUM_FULL_OFFSETS];
|
||||||
|
|
||||||
bitmapword words[FLEXIBLE_ARRAY_MEMBER];
|
bitmapword words[FLEXIBLE_ARRAY_MEMBER];
|
||||||
} BlocktableEntry;
|
} BlocktableEntry;
|
||||||
#define MaxBlocktableEntrySize \
|
#define MaxBlocktableEntrySize \
|
||||||
@ -331,33 +341,53 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
|
|||||||
for (int i = 1; i < num_offsets; i++)
|
for (int i = 1; i < num_offsets; i++)
|
||||||
Assert(offsets[i] > offsets[i - 1]);
|
Assert(offsets[i] > offsets[i - 1]);
|
||||||
|
|
||||||
for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
|
memset(page, 0, offsetof(BlocktableEntry, words));
|
||||||
wordnum <= WORDNUM(offsets[num_offsets - 1]);
|
|
||||||
wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
|
|
||||||
{
|
|
||||||
word = 0;
|
|
||||||
|
|
||||||
while (idx < num_offsets)
|
if (num_offsets <= NUM_FULL_OFFSETS)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < num_offsets; i++)
|
||||||
{
|
{
|
||||||
OffsetNumber off = offsets[idx];
|
OffsetNumber off = offsets[i];
|
||||||
|
|
||||||
/* safety check to ensure we don't overrun bit array bounds */
|
/* safety check to ensure we don't overrun bit array bounds */
|
||||||
if (!OffsetNumberIsValid(off))
|
if (!OffsetNumberIsValid(off))
|
||||||
elog(ERROR, "tuple offset out of range: %u", off);
|
elog(ERROR, "tuple offset out of range: %u", off);
|
||||||
|
|
||||||
if (off >= next_word_threshold)
|
page->full_offsets[i] = off;
|
||||||
break;
|
|
||||||
|
|
||||||
word |= ((bitmapword) 1 << BITNUM(off));
|
|
||||||
idx++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write out offset bitmap for this wordnum */
|
page->nwords = 0;
|
||||||
page->words[wordnum] = word;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
|
||||||
|
wordnum <= WORDNUM(offsets[num_offsets - 1]);
|
||||||
|
wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
|
||||||
|
{
|
||||||
|
word = 0;
|
||||||
|
|
||||||
page->nwords = wordnum;
|
while (idx < num_offsets)
|
||||||
Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
|
{
|
||||||
|
OffsetNumber off = offsets[idx];
|
||||||
|
|
||||||
|
/* safety check to ensure we don't overrun bit array bounds */
|
||||||
|
if (!OffsetNumberIsValid(off))
|
||||||
|
elog(ERROR, "tuple offset out of range: %u", off);
|
||||||
|
|
||||||
|
if (off >= next_word_threshold)
|
||||||
|
break;
|
||||||
|
|
||||||
|
word |= ((bitmapword) 1 << BITNUM(off));
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* write out offset bitmap for this wordnum */
|
||||||
|
page->words[wordnum] = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
page->nwords = wordnum;
|
||||||
|
Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
|
||||||
|
}
|
||||||
|
|
||||||
if (TidStoreIsShared(ts))
|
if (TidStoreIsShared(ts))
|
||||||
shared_ts_set(ts->tree.shared, blkno, page);
|
shared_ts_set(ts->tree.shared, blkno, page);
|
||||||
@ -384,14 +414,27 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
|
|||||||
if (page == NULL)
|
if (page == NULL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
wordnum = WORDNUM(off);
|
if (page->nwords == 0)
|
||||||
bitnum = BITNUM(off);
|
{
|
||||||
|
/* we have offsets in the header */
|
||||||
/* no bitmap for the off */
|
for (int i = 0; i < NUM_FULL_OFFSETS; i++)
|
||||||
if (wordnum >= page->nwords)
|
{
|
||||||
|
if (page->full_offsets[i] == off)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wordnum = WORDNUM(off);
|
||||||
|
bitnum = BITNUM(off);
|
||||||
|
|
||||||
return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
|
/* no bitmap for the off */
|
||||||
|
if (wordnum >= page->nwords)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -511,25 +554,37 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
|
|||||||
result->num_offsets = 0;
|
result->num_offsets = 0;
|
||||||
result->blkno = blkno;
|
result->blkno = blkno;
|
||||||
|
|
||||||
for (wordnum = 0; wordnum < page->nwords; wordnum++)
|
if (page->nwords == 0)
|
||||||
{
|
{
|
||||||
bitmapword w = page->words[wordnum];
|
/* we have offsets in the header */
|
||||||
int off = wordnum * BITS_PER_BITMAPWORD;
|
for (int i = 0; i < NUM_FULL_OFFSETS; i++)
|
||||||
|
|
||||||
/* Make sure there is enough space to add offsets */
|
|
||||||
if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
|
|
||||||
{
|
{
|
||||||
result->max_offset *= 2;
|
if (page->full_offsets[i] != InvalidOffsetNumber)
|
||||||
result->offsets = repalloc(result->offsets,
|
result->offsets[result->num_offsets++] = page->full_offsets[i];
|
||||||
sizeof(OffsetNumber) * result->max_offset);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
while (w != 0)
|
else
|
||||||
|
{
|
||||||
|
for (wordnum = 0; wordnum < page->nwords; wordnum++)
|
||||||
{
|
{
|
||||||
if (w & 1)
|
bitmapword w = page->words[wordnum];
|
||||||
result->offsets[result->num_offsets++] = (OffsetNumber) off;
|
int off = wordnum * BITS_PER_BITMAPWORD;
|
||||||
off++;
|
|
||||||
w >>= 1;
|
/* Make sure there is enough space to add offsets */
|
||||||
|
if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
|
||||||
|
{
|
||||||
|
result->max_offset *= 2;
|
||||||
|
result->offsets = repalloc(result->offsets,
|
||||||
|
sizeof(OffsetNumber) * result->max_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (w != 0)
|
||||||
|
{
|
||||||
|
if (w & 1)
|
||||||
|
result->offsets[result->num_offsets++] = (OffsetNumber) off;
|
||||||
|
off++;
|
||||||
|
w >>= 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,20 @@ SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
|
|||||||
(VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
|
(VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
|
||||||
(VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
|
(VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
|
||||||
GROUP BY blk;
|
GROUP BY blk;
|
||||||
|
-- Test offsets embedded in the bitmap header.
|
||||||
|
SELECT do_set_block_offsets(501, array[greatest((random() * :maxoffset)::int, 1)]::int2[]);
|
||||||
|
do_set_block_offsets
|
||||||
|
----------------------
|
||||||
|
501
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
|
||||||
|
FROM generate_series(1, 3);
|
||||||
|
do_set_block_offsets
|
||||||
|
----------------------
|
||||||
|
502
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Add enough TIDs to cause the store to appear "full", compared
|
-- Add enough TIDs to cause the store to appear "full", compared
|
||||||
-- to the allocated memory it started out with. This is easier
|
-- to the allocated memory it started out with. This is easier
|
||||||
-- with memory contexts in local memory.
|
-- with memory contexts in local memory.
|
||||||
@ -73,6 +87,20 @@ SELECT test_create(true);
|
|||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- Test offsets embedded in the bitmap header.
|
||||||
|
SELECT do_set_block_offsets(501, array[greatest((random() * :maxoffset)::int, 1)]::int2[]);
|
||||||
|
do_set_block_offsets
|
||||||
|
----------------------
|
||||||
|
501
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
|
||||||
|
FROM generate_series(1, 3);
|
||||||
|
do_set_block_offsets
|
||||||
|
----------------------
|
||||||
|
502
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Random TIDs test. The offset numbers are randomized and must be
|
-- Random TIDs test. The offset numbers are randomized and must be
|
||||||
-- unique and ordered.
|
-- unique and ordered.
|
||||||
INSERT INTO hideblocks (blockno)
|
INSERT INTO hideblocks (blockno)
|
||||||
|
@ -28,6 +28,11 @@ SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
|
|||||||
(VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
|
(VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
|
||||||
GROUP BY blk;
|
GROUP BY blk;
|
||||||
|
|
||||||
|
-- Test offsets embedded in the bitmap header.
|
||||||
|
SELECT do_set_block_offsets(501, array[greatest((random() * :maxoffset)::int, 1)]::int2[]);
|
||||||
|
SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
|
||||||
|
FROM generate_series(1, 3);
|
||||||
|
|
||||||
-- Add enough TIDs to cause the store to appear "full", compared
|
-- Add enough TIDs to cause the store to appear "full", compared
|
||||||
-- to the allocated memory it started out with. This is easier
|
-- to the allocated memory it started out with. This is easier
|
||||||
-- with memory contexts in local memory.
|
-- with memory contexts in local memory.
|
||||||
@ -49,6 +54,11 @@ SELECT test_destroy();
|
|||||||
-- because unused static functions would raise warnings there.
|
-- because unused static functions would raise warnings there.
|
||||||
SELECT test_create(true);
|
SELECT test_create(true);
|
||||||
|
|
||||||
|
-- Test offsets embedded in the bitmap header.
|
||||||
|
SELECT do_set_block_offsets(501, array[greatest((random() * :maxoffset)::int, 1)]::int2[]);
|
||||||
|
SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
|
||||||
|
FROM generate_series(1, 3);
|
||||||
|
|
||||||
-- Random TIDs test. The offset numbers are randomized and must be
|
-- Random TIDs test. The offset numbers are randomized and must be
|
||||||
-- unique and ordered.
|
-- unique and ordered.
|
||||||
INSERT INTO hideblocks (blockno)
|
INSERT INTO hideblocks (blockno)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user