1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Expand hash indexes more gradually.

Since hash indexes typically have very few overflow pages, adding a
new splitpoint essentially doubles the on-disk size of the index,
which can lead to large and abrupt increases in disk usage (and
perhaps long delays on occasion).  To mitigate this problem to some
degree, divide larger splitpoints into four equal phases.  This means
that, for example, instead of growing from 4GB to 8GB all at once, a
hash index will now grow from 4GB to 5GB to 6GB to 7GB to 8GB, which
is perhaps still not as smooth as we'd like but certainly an
improvement.

This changes the on-disk format of the metapage, so bump HASH_VERSION
from 2 to 3.  This will force a REINDEX of all existing hash indexes,
but that's probably a good idea anyway.  First, hash indexes from
pre-10 versions of PostgreSQL could easily be corrupted, and we don't
want to confuse corruption carried over from an older release with any
corruption caused despite the new write-ahead logging in v10.  Second,
it will let us remove some backward-compatibility code added by commit
293e24e507.

Mithun Cy, reviewed by Amit Kapila, Jesper Pedersen and me.  Regression
test outputs updated by me.

Discussion: http://postgr.es/m/CAD__OuhG6F1gQLCgMQNnMNgoCvOLQZz9zKYJQNYvYmmJoM42gA@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoYty0jCf-pa+m+vYUJ716+AxM7nv_syvyanyf5O-L_i2A@mail.gmail.com
This commit is contained in:
Robert Haas
2017-04-03 23:46:33 -04:00
parent 334bf9c77d
commit ea69a0dead
11 changed files with 218 additions and 86 deletions

View File

@@ -127,6 +127,7 @@
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/hash.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "commands/tablespace.h"
@@ -473,7 +474,9 @@ struct Tuplesortstate
bool enforceUnique; /* complain if we find duplicate tuples */
/* These are specific to the index_hash subcase: */
uint32 hash_mask; /* mask for sortable part of hash code */
uint32 high_mask; /* masks for sortable part of hash code */
uint32 low_mask;
uint32 max_buckets;
/*
* These variables are specific to the Datum case; they are set by
@@ -991,7 +994,9 @@ tuplesort_begin_index_btree(Relation heapRel,
Tuplesortstate *
tuplesort_begin_index_hash(Relation heapRel,
Relation indexRel,
uint32 hash_mask,
uint32 high_mask,
uint32 low_mask,
uint32 max_buckets,
int workMem, bool randomAccess)
{
Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
@@ -1002,8 +1007,11 @@ tuplesort_begin_index_hash(Relation heapRel,
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"begin index sort: hash_mask = 0x%x, workMem = %d, randomAccess = %c",
hash_mask,
"begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
"max_buckets = 0x%x, workMem = %d, randomAccess = %c",
high_mask,
low_mask,
max_buckets,
workMem, randomAccess ? 't' : 'f');
#endif
@@ -1017,7 +1025,9 @@ tuplesort_begin_index_hash(Relation heapRel,
state->heapRel = heapRel;
state->indexRel = indexRel;
state->hash_mask = hash_mask;
state->high_mask = high_mask;
state->low_mask = low_mask;
state->max_buckets = max_buckets;
MemoryContextSwitchTo(oldcontext);
@@ -4157,8 +4167,8 @@ static int
comparetup_index_hash(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state)
{
uint32 hash1;
uint32 hash2;
Bucket bucket1;
Bucket bucket2;
IndexTuple tuple1;
IndexTuple tuple2;
@@ -4167,13 +4177,16 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b,
* that the first column of the index tuple is the hash key.
*/
Assert(!a->isnull1);
hash1 = DatumGetUInt32(a->datum1) & state->hash_mask;
bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
state->max_buckets, state->high_mask,
state->low_mask);
Assert(!b->isnull1);
hash2 = DatumGetUInt32(b->datum1) & state->hash_mask;
if (hash1 > hash2)
bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
state->max_buckets, state->high_mask,
state->low_mask);
if (bucket1 > bucket2)
return 1;
else if (hash1 < hash2)
else if (bucket1 < bucket2)
return -1;
/*