1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Cache hash index's metapage in rel->rd_amcache.

This avoids a very significant amount of buffer manager traffic and
contention when scanning hash indexes, because it's no longer
necessary to lock and pin the metapage for every scan.  We do need
some way of figuring out when the cache is too stale to use any more,
so that when we lock the primary bucket page to which the cached
metapage points us, we can tell whether a split has occurred since we
cached the metapage data.  To do that, we use the hash_prevblkno field
in the primary bucket page, which would otherwise always be set to
InvalidBuffer.

This patch contains code so that it will continue working (although
less efficiently) with hash indexes built before this change, but
perhaps we should consider bumping the hash version and ripping out
the compatibility code.  That decision can be made later, though.

Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me.
Before committing, I made a number of cosmetic changes to the last
posted version of the patch, adjusted _hash_getcachedmetap to be more
careful about order of operation, and made some necessary updates to
the pageinspect documentation and regression tests.
This commit is contained in:
Robert Haas
2017-02-07 12:24:25 -05:00
parent 39c3ca5161
commit 293e24e507
8 changed files with 279 additions and 181 deletions

View File

@@ -61,10 +61,21 @@ typedef uint32 Bucket;
#define LH_PAGE_TYPE \
(LH_OVERFLOW_PAGE|LH_BUCKET_PAGE|LH_BITMAP_PAGE|LH_META_PAGE)
/*
* In an overflow page, hasho_prevblkno stores the block number of the previous
* page in the bucket chain; in a bucket page, hasho_prevblkno stores the
* hashm_maxbucket value as of the last time the bucket was last split, or
* else as of the time the bucket was created. The latter convention is used
* to determine whether a cached copy of the metapage is too stale to be used
* without needing to lock or pin the metapage.
*
* hasho_nextblkno is always the block number of the next page in the
* bucket chain, or InvalidBlockNumber if there are no more such pages.
*/
typedef struct HashPageOpaqueData
{
BlockNumber hasho_prevblkno; /* previous ovfl (or bucket) blkno */
BlockNumber hasho_nextblkno; /* next ovfl blkno */
BlockNumber hasho_prevblkno; /* see above */
BlockNumber hasho_nextblkno; /* see above */
Bucket hasho_bucket; /* bucket number this pg belongs to */
uint16 hasho_flag; /* page type code, see above */
uint16 hasho_page_id; /* for identification of hash indexes */
@@ -309,6 +320,11 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
int access, int flags);
extern Buffer _hash_getbuf_with_condlock_cleanup(Relation rel,
BlockNumber blkno, int flags);
extern HashMetaPage _hash_getcachedmetap(Relation rel, Buffer *metabuf,
bool force_refresh);
extern Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey,
int access,
HashMetaPage *cachedmetap);
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
ForkNumber forkNum);