1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-12 05:01:15 +03:00

Cache hash index's metapage in rel->rd_amcache.

This avoids a very significant amount of buffer manager traffic and
contention when scanning hash indexes, because it's no longer
necessary to lock and pin the metapage for every scan.  We do need
some way of figuring out when the cache is too stale to use any more,
so that when we lock the primary bucket page to which the cached
metapage points us, we can tell whether a split has occurred since we
cached the metapage data.  To do that, we use the hash_prevblkno field
in the primary bucket page, which would otherwise always be set to
InvalidBuffer.

This patch contains code so that it will continue working (although
less efficiently) with hash indexes built before this change, but
perhaps we should consider bumping the hash version and ripping out
the compatibility code.  That decision can be made later, though.

Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me.
Before committing, I made a number of cosmetic changes to the last
posted version of the patch, adjusted _hash_getcachedmetap to be more
careful about order of operation, and made some necessary updates to
the pageinspect documentation and regression tests.
This commit is contained in:
Robert Haas
2017-02-07 12:24:25 -05:00
parent 39c3ca5161
commit 293e24e507
8 changed files with 279 additions and 181 deletions

View File

@@ -32,9 +32,7 @@ _hash_doinsert(Relation rel, IndexTuple itup)
Buffer bucket_buf;
Buffer metabuf;
HashMetaPage metap;
BlockNumber blkno;
BlockNumber oldblkno;
bool retry;
HashMetaPage usedmetap = NULL;
Page metapage;
Page page;
HashPageOpaque pageopaque;
@@ -42,9 +40,6 @@ _hash_doinsert(Relation rel, IndexTuple itup)
bool do_expand;
uint32 hashkey;
Bucket bucket;
uint32 maxbucket;
uint32 highmask;
uint32 lowmask;
/*
* Get the hash key for the item (it's stored in the index tuple itself).
@@ -57,10 +52,14 @@ _hash_doinsert(Relation rel, IndexTuple itup)
* need to be consistent */
restart_insert:
/* Read the metapage */
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
/*
* Read the metapage. We don't lock it yet; HashMaxItemSize() will
* examine pd_pagesize_version, but that can't change so we can examine
* it without a lock.
*/
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
metapage = BufferGetPage(metabuf);
metap = HashPageGetMeta(metapage);
/*
* Check whether the item can fit on a hash page at all. (Eventually, we
@@ -76,66 +75,17 @@ restart_insert:
itemsz, HashMaxItemSize(metapage)),
errhint("Values larger than a buffer page cannot be indexed.")));
oldblkno = InvalidBlockNumber;
retry = false;
/*
* Loop until we get a lock on the correct target bucket.
*/
for (;;)
{
/*
* Compute the target bucket number, and convert to block number.
*/
bucket = _hash_hashkey2bucket(hashkey,
metap->hashm_maxbucket,
metap->hashm_highmask,
metap->hashm_lowmask);
blkno = BUCKET_TO_BLKNO(metap, bucket);
/*
* Copy bucket mapping info now; refer the comment in
* _hash_expandtable where we copy this information before calling
* _hash_splitbucket to see why this is okay.
*/
maxbucket = metap->hashm_maxbucket;
highmask = metap->hashm_highmask;
lowmask = metap->hashm_lowmask;
/* Release metapage lock, but keep pin. */
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
/*
* If the previous iteration of this loop locked the primary page of
* what is still the correct target bucket, we are done. Otherwise,
* drop any old lock before acquiring the new one.
*/
if (retry)
{
if (oldblkno == blkno)
break;
_hash_relbuf(rel, buf);
}
/* Fetch and lock the primary bucket page for the target bucket */
buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
/*
* Reacquire metapage lock and check that no bucket split has taken
* place while we were awaiting the bucket lock.
*/
LockBuffer(metabuf, BUFFER_LOCK_SHARE);
oldblkno = blkno;
retry = true;
}
/* Lock the primary bucket page for the target bucket. */
buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
&usedmetap);
Assert(usedmetap != NULL);
/* remember the primary bucket buffer to release the pin on it at end. */
bucket_buf = buf;
page = BufferGetPage(buf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(pageopaque->hasho_bucket == bucket);
bucket = pageopaque->hasho_bucket;
/*
* If this bucket is in the process of being split, try to finish the
@@ -151,8 +101,10 @@ restart_insert:
/* release the lock on bucket buffer, before completing the split. */
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
_hash_finish_split(rel, metabuf, buf, pageopaque->hasho_bucket,
maxbucket, highmask, lowmask);
_hash_finish_split(rel, metabuf, buf, bucket,
usedmetap->hashm_maxbucket,
usedmetap->hashm_highmask,
usedmetap->hashm_lowmask);
/* release the pin on old and meta buffer. retry for insert. */
_hash_dropbuf(rel, buf);
@@ -225,6 +177,7 @@ restart_insert:
*/
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
metap = HashPageGetMeta(metapage);
metap->hashm_ntuples += 1;
/* Make sure this stays in sync with _hash_expandtable() */