mirror of
https://github.com/postgres/postgres.git
synced 2025-11-12 05:01:15 +03:00
Cache hash index's metapage in rel->rd_amcache.
This avoids a very significant amount of buffer manager traffic and contention when scanning hash indexes, because it's no longer necessary to lock and pin the metapage for every scan. We do need some way of figuring out when the cache is too stale to use any more, so that when we lock the primary bucket page to which the cached metapage points us, we can tell whether a split has occurred since we cached the metapage data. To do that, we use the hash_prevblkno field in the primary bucket page, which would otherwise always be set to InvalidBuffer. This patch contains code so that it will continue working (although less efficiently) with hash indexes built before this change, but perhaps we should consider bumping the hash version and ripping out the compatibility code. That decision can be made later, though. Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me. Before committing, I made a number of cosmetic changes to the last posted version of the patch, adjusted _hash_getcachedmetap to be more careful about order of operation, and made some necessary updates to the pageinspect documentation and regression tests.
This commit is contained in:
@@ -32,9 +32,7 @@ _hash_doinsert(Relation rel, IndexTuple itup)
|
||||
Buffer bucket_buf;
|
||||
Buffer metabuf;
|
||||
HashMetaPage metap;
|
||||
BlockNumber blkno;
|
||||
BlockNumber oldblkno;
|
||||
bool retry;
|
||||
HashMetaPage usedmetap = NULL;
|
||||
Page metapage;
|
||||
Page page;
|
||||
HashPageOpaque pageopaque;
|
||||
@@ -42,9 +40,6 @@ _hash_doinsert(Relation rel, IndexTuple itup)
|
||||
bool do_expand;
|
||||
uint32 hashkey;
|
||||
Bucket bucket;
|
||||
uint32 maxbucket;
|
||||
uint32 highmask;
|
||||
uint32 lowmask;
|
||||
|
||||
/*
|
||||
* Get the hash key for the item (it's stored in the index tuple itself).
|
||||
@@ -57,10 +52,14 @@ _hash_doinsert(Relation rel, IndexTuple itup)
|
||||
* need to be consistent */
|
||||
|
||||
restart_insert:
|
||||
/* Read the metapage */
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
||||
|
||||
/*
|
||||
* Read the metapage. We don't lock it yet; HashMaxItemSize() will
|
||||
* examine pd_pagesize_version, but that can't change so we can examine
|
||||
* it without a lock.
|
||||
*/
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
|
||||
metapage = BufferGetPage(metabuf);
|
||||
metap = HashPageGetMeta(metapage);
|
||||
|
||||
/*
|
||||
* Check whether the item can fit on a hash page at all. (Eventually, we
|
||||
@@ -76,66 +75,17 @@ restart_insert:
|
||||
itemsz, HashMaxItemSize(metapage)),
|
||||
errhint("Values larger than a buffer page cannot be indexed.")));
|
||||
|
||||
oldblkno = InvalidBlockNumber;
|
||||
retry = false;
|
||||
|
||||
/*
|
||||
* Loop until we get a lock on the correct target bucket.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
/*
|
||||
* Compute the target bucket number, and convert to block number.
|
||||
*/
|
||||
bucket = _hash_hashkey2bucket(hashkey,
|
||||
metap->hashm_maxbucket,
|
||||
metap->hashm_highmask,
|
||||
metap->hashm_lowmask);
|
||||
|
||||
blkno = BUCKET_TO_BLKNO(metap, bucket);
|
||||
|
||||
/*
|
||||
* Copy bucket mapping info now; refer the comment in
|
||||
* _hash_expandtable where we copy this information before calling
|
||||
* _hash_splitbucket to see why this is okay.
|
||||
*/
|
||||
maxbucket = metap->hashm_maxbucket;
|
||||
highmask = metap->hashm_highmask;
|
||||
lowmask = metap->hashm_lowmask;
|
||||
|
||||
/* Release metapage lock, but keep pin. */
|
||||
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
/*
|
||||
* If the previous iteration of this loop locked the primary page of
|
||||
* what is still the correct target bucket, we are done. Otherwise,
|
||||
* drop any old lock before acquiring the new one.
|
||||
*/
|
||||
if (retry)
|
||||
{
|
||||
if (oldblkno == blkno)
|
||||
break;
|
||||
_hash_relbuf(rel, buf);
|
||||
}
|
||||
|
||||
/* Fetch and lock the primary bucket page for the target bucket */
|
||||
buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
|
||||
|
||||
/*
|
||||
* Reacquire metapage lock and check that no bucket split has taken
|
||||
* place while we were awaiting the bucket lock.
|
||||
*/
|
||||
LockBuffer(metabuf, BUFFER_LOCK_SHARE);
|
||||
oldblkno = blkno;
|
||||
retry = true;
|
||||
}
|
||||
/* Lock the primary bucket page for the target bucket. */
|
||||
buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
|
||||
&usedmetap);
|
||||
Assert(usedmetap != NULL);
|
||||
|
||||
/* remember the primary bucket buffer to release the pin on it at end. */
|
||||
bucket_buf = buf;
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(pageopaque->hasho_bucket == bucket);
|
||||
bucket = pageopaque->hasho_bucket;
|
||||
|
||||
/*
|
||||
* If this bucket is in the process of being split, try to finish the
|
||||
@@ -151,8 +101,10 @@ restart_insert:
|
||||
/* release the lock on bucket buffer, before completing the split. */
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
_hash_finish_split(rel, metabuf, buf, pageopaque->hasho_bucket,
|
||||
maxbucket, highmask, lowmask);
|
||||
_hash_finish_split(rel, metabuf, buf, bucket,
|
||||
usedmetap->hashm_maxbucket,
|
||||
usedmetap->hashm_highmask,
|
||||
usedmetap->hashm_lowmask);
|
||||
|
||||
/* release the pin on old and meta buffer. retry for insert. */
|
||||
_hash_dropbuf(rel, buf);
|
||||
@@ -225,6 +177,7 @@ restart_insert:
|
||||
*/
|
||||
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
metap = HashPageGetMeta(metapage);
|
||||
metap->hashm_ntuples += 1;
|
||||
|
||||
/* Make sure this stays in sync with _hash_expandtable() */
|
||||
|
||||
Reference in New Issue
Block a user