mirror of
https://github.com/postgres/postgres.git
synced 2025-05-06 19:59:18 +03:00
Add regression test case exercising the sorting path for hash index build.
We've broken this code path at least twice in the past, so it's prudent to have a test case that covers it. To allow exercising the code path without creating a very large (and slow to run) test case, redefine the sort threshold to be bounded by maintenance_work_mem as well as the number of available buffers. While at it, fix an ancient oversight that when building a temp index, the number of available buffers is not NBuffers but NLocBuffer. Also, if assertions are enabled, apply a direct test that the sort actually does return the tuples in the expected order. Peter Geoghegan Patch: <CAM3SWZTBAo4hjbBd780+MrOKiKp_TMo1N3A0Rw9_im8gbD7fQA@mail.gmail.com>
This commit is contained in:
parent
278148907a
commit
9563d5b5e4
@ -22,6 +22,7 @@
|
|||||||
#include "access/relscan.h"
|
#include "access/relscan.h"
|
||||||
#include "catalog/index.h"
|
#include "catalog/index.h"
|
||||||
#include "commands/vacuum.h"
|
#include "commands/vacuum.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
#include "optimizer/plancat.h"
|
#include "optimizer/plancat.h"
|
||||||
#include "utils/index_selfuncs.h"
|
#include "utils/index_selfuncs.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
@ -97,6 +98,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
|||||||
double reltuples;
|
double reltuples;
|
||||||
double allvisfrac;
|
double allvisfrac;
|
||||||
uint32 num_buckets;
|
uint32 num_buckets;
|
||||||
|
long sort_threshold;
|
||||||
HashBuildState buildstate;
|
HashBuildState buildstate;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -120,12 +122,24 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
|||||||
* then we'll thrash horribly. To prevent that scenario, we can sort the
|
* then we'll thrash horribly. To prevent that scenario, we can sort the
|
||||||
* tuples by (expected) bucket number. However, such a sort is useless
|
* tuples by (expected) bucket number. However, such a sort is useless
|
||||||
* overhead when the index does fit in RAM. We choose to sort if the
|
* overhead when the index does fit in RAM. We choose to sort if the
|
||||||
* initial index size exceeds NBuffers.
|
* initial index size exceeds maintenance_work_mem, or the number of
|
||||||
|
* buffers usable for the index, whichever is less. (Limiting by the
|
||||||
|
* number of buffers should reduce thrashing between PG buffers and kernel
|
||||||
|
* buffers, which seems useful even if no physical I/O results. Limiting
|
||||||
|
* by maintenance_work_mem is useful to allow easy testing of the sort
|
||||||
|
* code path, and may be useful to DBAs as an additional control knob.)
|
||||||
*
|
*
|
||||||
* NOTE: this test will need adjustment if a bucket is ever different from
|
* NOTE: this test will need adjustment if a bucket is ever different from
|
||||||
* one page.
|
* one page. Also, "initial index size" accounting does not include the
|
||||||
|
* metapage, nor the first bitmap page.
|
||||||
*/
|
*/
|
||||||
if (num_buckets >= (uint32) NBuffers)
|
sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
|
||||||
|
if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
|
||||||
|
sort_threshold = Min(sort_threshold, NBuffers);
|
||||||
|
else
|
||||||
|
sort_threshold = Min(sort_threshold, NLocBuffer);
|
||||||
|
|
||||||
|
if (num_buckets >= (uint32) sort_threshold)
|
||||||
buildstate.spool = _h_spoolinit(heap, index, num_buckets);
|
buildstate.spool = _h_spoolinit(heap, index, num_buckets);
|
||||||
else
|
else
|
||||||
buildstate.spool = NULL;
|
buildstate.spool = NULL;
|
||||||
|
@ -37,6 +37,7 @@ struct HSpool
|
|||||||
{
|
{
|
||||||
Tuplesortstate *sortstate; /* state data for tuplesort.c */
|
Tuplesortstate *sortstate; /* state data for tuplesort.c */
|
||||||
Relation index;
|
Relation index;
|
||||||
|
uint32 hash_mask; /* bitmask for hash codes */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -47,7 +48,6 @@ HSpool *
|
|||||||
_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
|
_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
|
||||||
{
|
{
|
||||||
HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
|
HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
|
||||||
uint32 hash_mask;
|
|
||||||
|
|
||||||
hspool->index = index;
|
hspool->index = index;
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
|
|||||||
* we could just compute num_buckets - 1. We prefer not to assume that
|
* we could just compute num_buckets - 1. We prefer not to assume that
|
||||||
* here, though.
|
* here, though.
|
||||||
*/
|
*/
|
||||||
hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
|
hspool->hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We size the sort area as maintenance_work_mem rather than work_mem to
|
* We size the sort area as maintenance_work_mem rather than work_mem to
|
||||||
@ -69,7 +69,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
|
|||||||
*/
|
*/
|
||||||
hspool->sortstate = tuplesort_begin_index_hash(heap,
|
hspool->sortstate = tuplesort_begin_index_hash(heap,
|
||||||
index,
|
index,
|
||||||
hash_mask,
|
hspool->hash_mask,
|
||||||
maintenance_work_mem,
|
maintenance_work_mem,
|
||||||
false);
|
false);
|
||||||
|
|
||||||
@ -105,12 +105,29 @@ _h_indexbuild(HSpool *hspool)
|
|||||||
{
|
{
|
||||||
IndexTuple itup;
|
IndexTuple itup;
|
||||||
bool should_free;
|
bool should_free;
|
||||||
|
#ifdef USE_ASSERT_CHECKING
|
||||||
|
uint32 hashkey = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
tuplesort_performsort(hspool->sortstate);
|
tuplesort_performsort(hspool->sortstate);
|
||||||
|
|
||||||
while ((itup = tuplesort_getindextuple(hspool->sortstate,
|
while ((itup = tuplesort_getindextuple(hspool->sortstate,
|
||||||
true, &should_free)) != NULL)
|
true, &should_free)) != NULL)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Technically, it isn't critical that hash keys be found in sorted
|
||||||
|
* order, since this sorting is only used to increase locality of
|
||||||
|
* access as a performance optimization. It still seems like a good
|
||||||
|
* idea to test tuplesort.c's handling of hash index tuple sorts
|
||||||
|
* through an assertion, though.
|
||||||
|
*/
|
||||||
|
#ifdef USE_ASSERT_CHECKING
|
||||||
|
uint32 lasthashkey = hashkey;
|
||||||
|
|
||||||
|
hashkey = _hash_get_indextuple_hashkey(itup) & hspool->hash_mask;
|
||||||
|
Assert(hashkey >= lasthashkey);
|
||||||
|
#endif
|
||||||
|
|
||||||
_hash_doinsert(hspool->index, itup);
|
_hash_doinsert(hspool->index, itup);
|
||||||
if (should_free)
|
if (should_free)
|
||||||
pfree(itup);
|
pfree(itup);
|
||||||
|
@ -2346,6 +2346,13 @@ CREATE UNLOGGED TABLE unlogged_hash_table (id int4);
|
|||||||
CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);
|
CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);
|
||||||
DROP TABLE unlogged_hash_table;
|
DROP TABLE unlogged_hash_table;
|
||||||
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
|
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
|
||||||
|
-- Test hash index build tuplesorting. Force hash tuplesort using low
|
||||||
|
-- maintenance_work_mem setting and fillfactor:
|
||||||
|
SET maintenance_work_mem = '1MB';
|
||||||
|
CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
|
||||||
|
WARNING: hash indexes are not WAL-logged and their use is discouraged
|
||||||
|
DROP INDEX hash_tuplesort_idx;
|
||||||
|
RESET maintenance_work_mem;
|
||||||
--
|
--
|
||||||
-- Test functional index
|
-- Test functional index
|
||||||
--
|
--
|
||||||
|
@ -690,6 +690,13 @@ DROP TABLE unlogged_hash_table;
|
|||||||
|
|
||||||
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
|
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
|
||||||
|
|
||||||
|
-- Test hash index build tuplesorting. Force hash tuplesort using low
|
||||||
|
-- maintenance_work_mem setting and fillfactor:
|
||||||
|
SET maintenance_work_mem = '1MB';
|
||||||
|
CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
|
||||||
|
DROP INDEX hash_tuplesort_idx;
|
||||||
|
RESET maintenance_work_mem;
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Test functional index
|
-- Test functional index
|
||||||
|
Loading…
x
Reference in New Issue
Block a user