mirror of
https://github.com/postgres/postgres.git
synced 2025-10-22 14:32:25 +03:00
Expand hash indexes more gradually.
Since hash indexes typically have very few overflow pages, adding a
new splitpoint essentially doubles the on-disk size of the index,
which can lead to large and abrupt increases in disk usage (and
perhaps long delays on occasion). To mitigate this problem to some
degree, divide larger splitpoints into four equal phases. This means
that, for example, instead of growing from 4GB to 8GB all at once, a
hash index will now grow from 4GB to 5GB to 6GB to 7GB to 8GB, which
is perhaps still not as smooth as we'd like but certainly an
improvement.
This changes the on-disk format of the metapage, so bump HASH_VERSION
from 2 to 3. This will force a REINDEX of all existing hash indexes,
but that's probably a good idea anyway. First, hash indexes from
pre-10 versions of PostgreSQL could easily be corrupted, and we don't
want to confuse corruption carried over from an older release with any
corruption caused despite the new write-ahead logging in v10. Second,
it will let us remove some backward-compatibility code added by commit
293e24e507
.
Mithun Cy, reviewed by Amit Kapila, Jesper Pedersen and me. Regression
test outputs updated by me.
Discussion: http://postgr.es/m/CAD__OuhG6F1gQLCgMQNnMNgoCvOLQZz9zKYJQNYvYmmJoM42gA@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoYty0jCf-pa+m+vYUJ716+AxM7nv_syvyanyf5O-L_i2A@mail.gmail.com
This commit is contained in:
@@ -36,7 +36,7 @@ typedef uint32 Bucket;
|
||||
#define InvalidBucket ((Bucket) 0xFFFFFFFF)
|
||||
|
||||
#define BUCKET_TO_BLKNO(metap,B) \
|
||||
((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
|
||||
((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)
|
||||
|
||||
/*
|
||||
* Special space for hash index pages.
|
||||
@@ -158,7 +158,8 @@ typedef HashScanOpaqueData *HashScanOpaque;
|
||||
#define HASH_METAPAGE 0 /* metapage is always block 0 */
|
||||
|
||||
#define HASH_MAGIC 0x6440640
|
||||
#define HASH_VERSION 2 /* 2 signifies only hash key value is stored */
|
||||
#define HASH_VERSION 3 /* 3 signifies multi-phased bucket allocation
|
||||
* to reduce doubling */
|
||||
|
||||
/*
|
||||
* spares[] holds the number of overflow pages currently allocated at or
|
||||
@@ -176,13 +177,28 @@ typedef HashScanOpaqueData *HashScanOpaque;
|
||||
*
|
||||
* The limitation on the size of spares[] comes from the fact that there's
|
||||
* no point in having more than 2^32 buckets with only uint32 hashcodes.
|
||||
* (Note: The value of HASH_MAX_SPLITPOINTS which is the size of spares[] is
|
||||
* adjusted in such a way to accommodate multi phased allocation of buckets
|
||||
* after HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE).
|
||||
*
|
||||
* There is no particular upper limit on the size of mapp[], other than
|
||||
* needing to fit into the metapage. (With 8K block size, 128 bitmaps
|
||||
* limit us to 64 GB of overflow space...)
|
||||
*/
|
||||
#define HASH_MAX_SPLITPOINTS 32
|
||||
#define HASH_MAX_BITMAPS 128
|
||||
|
||||
#define HASH_SPLITPOINT_PHASE_BITS 2
|
||||
#define HASH_SPLITPOINT_PHASES_PER_GRP (1 << HASH_SPLITPOINT_PHASE_BITS)
|
||||
#define HASH_SPLITPOINT_PHASE_MASK (HASH_SPLITPOINT_PHASES_PER_GRP - 1)
|
||||
#define HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE 10
|
||||
|
||||
/* defines max number of splitpoit phases a hash index can have */
|
||||
#define HASH_MAX_SPLITPOINT_GROUP 32
|
||||
#define HASH_MAX_SPLITPOINTS \
|
||||
(((HASH_MAX_SPLITPOINT_GROUP - HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) * \
|
||||
HASH_SPLITPOINT_PHASES_PER_GRP) + \
|
||||
HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
|
||||
|
||||
typedef struct HashMetaPageData
|
||||
{
|
||||
uint32 hashm_magic; /* magic no. for hash tables */
|
||||
@@ -382,6 +398,8 @@ extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype);
|
||||
extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
|
||||
uint32 highmask, uint32 lowmask);
|
||||
extern uint32 _hash_log2(uint32 num);
|
||||
extern uint32 _hash_spareindex(uint32 num_bucket);
|
||||
extern uint32 _hash_get_totalbuckets(uint32 splitpoint_phase);
|
||||
extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
|
||||
extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
|
||||
extern bool _hash_convert_tuple(Relation index,
|
||||
|
Reference in New Issue
Block a user