Expand hash indexes more gradually.

Since hash indexes typically have very few overflow pages, adding a new splitpoint essentially doubles the on-disk size of the index, which can lead to large and abrupt increases in disk usage (and perhaps long delays on occasion). To mitigate this problem to some degree, divide larger splitpoints into four equal phases. This means that, for example, instead of growing from 4GB to 8GB all at once, a hash index will now grow from 4GB to 5GB to 6GB to 7GB to 8GB, which is perhaps still not as smooth as we'd like but certainly an improvement. This changes the on-disk format of the metapage, so bump HASH_VERSION from 2 to 3. This will force a REINDEX of all existing hash indexes, but that's probably a good idea anyway. First, hash indexes from pre-10 versions of PostgreSQL could easily be corrupted, and we don't want to confuse corruption carried over from an older release with any corruption caused despite the new write-ahead logging in v10. Second, it will let us remove some backward-compatibility code added by commit 293e24e507. Mithun Cy, reviewed by Amit Kapila, Jesper Pedersen and me. Regression test outputs updated by me. Discussion: http://postgr.es/m/CAD__OuhG6F1gQLCgMQNnMNgoCvOLQZz9zKYJQNYvYmmJoM42gA@mail.gmail.com Discussion: http://postgr.es/m/CA+TgmoYty0jCf-pa+m+vYUJ716+AxM7nv_syvyanyf5O-L_i2A@mail.gmail.com
2025-11-10 17:42:29 +03:00 · 2017-04-03 23:46:33 -04:00
parent 334bf9c77d
commit ea69a0dead
11 changed files with 218 additions and 86 deletions
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -127,6 +127,7 @@

 #include "access/htup_details.h"
 #include "access/nbtree.h"
+#include "access/hash.h"
 #include "catalog/index.h"
 #include "catalog/pg_am.h"
 #include "commands/tablespace.h"
@@ -473,7 +474,9 @@ struct Tuplesortstate
 	bool		enforceUnique;	/* complain if we find duplicate tuples */

 	/* These are specific to the index_hash subcase: */
-	uint32		hash_mask;		/* mask for sortable part of hash code */
+	uint32		high_mask;		/* masks for sortable part of hash code */
+	uint32		low_mask;
+	uint32		max_buckets;

 	/*
 	 * These variables are specific to the Datum case; they are set by
@@ -991,7 +994,9 @@ tuplesort_begin_index_btree(Relation heapRel,
 Tuplesortstate *
 tuplesort_begin_index_hash(Relation heapRel,
 						   Relation indexRel,
-						   uint32 hash_mask,
+						   uint32 high_mask,
+						   uint32 low_mask,
+						   uint32 max_buckets,
 						   int workMem, bool randomAccess)
 {
 	Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
@@ -1002,8 +1007,11 @@ tuplesort_begin_index_hash(Relation heapRel,
 #ifdef TRACE_SORT
 	if (trace_sort)
 		elog(LOG,
-		"begin index sort: hash_mask = 0x%x, workMem = %d, randomAccess = %c",
-			 hash_mask,
+			 "begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
+			 "max_buckets = 0x%x, workMem = %d, randomAccess = %c",
+			 high_mask,
+			 low_mask,
+			 max_buckets,
 			 workMem, randomAccess ? 't' : 'f');
 #endif

@@ -1017,7 +1025,9 @@ tuplesort_begin_index_hash(Relation heapRel,
 	state->heapRel = heapRel;
 	state->indexRel = indexRel;

-	state->hash_mask = hash_mask;
+	state->high_mask = high_mask;
+	state->low_mask = low_mask;
+	state->max_buckets = max_buckets;

 	MemoryContextSwitchTo(oldcontext);

@@ -4157,8 +4167,8 @@ static int
 comparetup_index_hash(const SortTuple *a, const SortTuple *b,
 					  Tuplesortstate *state)
 {
-	uint32		hash1;
-	uint32		hash2;
+	Bucket		bucket1;
+	Bucket		bucket2;
 	IndexTuple	tuple1;
 	IndexTuple	tuple2;

@@ -4167,13 +4177,16 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b,
 	 * that the first column of the index tuple is the hash key.
 	 */
 	Assert(!a->isnull1);
-	hash1 = DatumGetUInt32(a->datum1) & state->hash_mask;
+	bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
+								   state->max_buckets, state->high_mask,
+								   state->low_mask);
 	Assert(!b->isnull1);
-	hash2 = DatumGetUInt32(b->datum1) & state->hash_mask;
-
-	if (hash1 > hash2)
+	bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
+								   state->max_buckets, state->high_mask,
+								   state->low_mask);
+	if (bucket1 > bucket2)
 		return 1;
-	else if (hash1 < hash2)
+	else if (bucket1 < bucket2)
 		return -1;

 	/*