1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-03 01:21:48 +03:00

Allow parallel CREATE INDEX for GIN indexes

Allow using parallel workers to build a GIN index, similarly to BTREE
and BRIN. For large tables this may result in significant speedup when
the build is CPU-bound.

The work is divided so that each worker builds index entries on a subset
of the table, determined by the regular parallel scan used to read the
data. Each worker uses a local tuplesort to sort and merge the entries
for the same key. The TID lists do not overlap (for a given key), which
means the merge sort simply concatenates the two lists. The merged
entries are written into a shared tuplesort for the leader.

The leader needs to merge the sorted entries again, before writing them
into the index. But this way a significant part of the work happens in
the workers, and the leader is left with merging fewer large entries,
which is more efficient.

Most of the parallelism infrastructure is a simplified copy of the code
used by BTREE indexes, omitting the parts irrelevant for GIN indexes
(e.g. uniqueness checks).

Original patch by me, with reviews and substantial improvements by
Matthias van de Meent, certainly enough to make him a co-author.

Author: Tomas Vondra, Matthias van de Meent
Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke
Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
This commit is contained in:
Tomas Vondra 2025-03-03 16:53:03 +01:00
parent 3f1db99bfa
commit 8492feb98f
9 changed files with 1939 additions and 18 deletions

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@
#include "access/xloginsert.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "storage/indexfsm.h"
@ -55,7 +56,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amclusterable = false;
amroutine->ampredlocks = true;
amroutine->amcanparallel = false;
amroutine->amcanbuildparallel = false;
amroutine->amcanbuildparallel = true;
amroutine->amcaninclude = false;
amroutine->amusemaintenanceworkmem = true;
amroutine->amsummarizing = false;
@ -74,7 +75,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amgettreeheight = NULL;
amroutine->amoptions = ginoptions;
amroutine->amproperty = NULL;
amroutine->ambuildphasename = NULL;
amroutine->ambuildphasename = ginbuildphasename;
amroutine->amvalidate = ginvalidate;
amroutine->amadjustmembers = ginadjustmembers;
amroutine->ambeginscan = ginbeginscan;
@ -702,3 +703,28 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
END_CRIT_SECTION();
}
/*
* ginbuildphasename() -- Return name of index build phase.
*/
char *
ginbuildphasename(int64 phasenum)
{
switch (phasenum)
{
case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE:
return "initializing";
case PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN:
return "scanning table";
case PROGRESS_GIN_PHASE_PERFORMSORT_1:
return "sorting tuples (workers)";
case PROGRESS_GIN_PHASE_MERGE_1:
return "merging tuples (workers)";
case PROGRESS_GIN_PHASE_PERFORMSORT_2:
return "sorting tuples";
case PROGRESS_GIN_PHASE_MERGE_2:
return "merging tuples";
default:
return NULL;
}
}

View File

@ -15,6 +15,7 @@
#include "postgres.h"
#include "access/brin.h"
#include "access/gin.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/session.h"
@ -148,6 +149,9 @@ static const struct
{
"_brin_parallel_build_main", _brin_parallel_build_main
},
{
"_gin_parallel_build_main", _gin_parallel_build_main
},
{
"parallel_vacuum_main", parallel_vacuum_main
}

View File

@ -20,10 +20,12 @@
#include "postgres.h"
#include "access/brin_tuple.h"
#include "access/gin_tuple.h"
#include "access/hash.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
#include "executor/executor.h"
#include "pg_trace.h"
#include "utils/datum.h"
@ -46,6 +48,8 @@ static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
int count);
static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
int count);
static void removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups,
int count);
static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
int count);
static int comparetup_heap(const SortTuple *a, const SortTuple *b,
@ -74,6 +78,8 @@ static int comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b
Tuplesortstate *state);
static int comparetup_index_brin(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static int comparetup_index_gin(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
SortTuple *stup);
static void readtup_index(Tuplesortstate *state, SortTuple *stup,
@ -82,6 +88,10 @@ static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
SortTuple *stup);
static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
LogicalTape *tape, unsigned int len);
static void writetup_index_gin(Tuplesortstate *state, LogicalTape *tape,
SortTuple *stup);
static void readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
LogicalTape *tape, unsigned int len);
static int comparetup_datum(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static int comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
@ -568,6 +578,77 @@ tuplesort_begin_index_brin(int workMem,
return state;
}
Tuplesortstate *
tuplesort_begin_index_gin(Relation heapRel,
Relation indexRel,
int workMem, SortCoordinate coordinate,
int sortopt)
{
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
sortopt);
TuplesortPublic *base = TuplesortstateGetPublic(state);
MemoryContext oldcontext;
int i;
TupleDesc desc = RelationGetDescr(indexRel);
oldcontext = MemoryContextSwitchTo(base->maincontext);
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"begin index sort: workMem = %d, randomAccess = %c",
workMem,
sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif
/*
* Multi-column GIN indexes expand the row into a separate index entry for
* attribute, and that's what we write into the tuplesort. But we still
* need to initialize sortsupport for all the attributes.
*/
base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
/* Prepare SortSupport data for each column */
base->sortKeys = (SortSupport) palloc0(base->nKeys *
sizeof(SortSupportData));
for (i = 0; i < base->nKeys; i++)
{
SortSupport sortKey = base->sortKeys + i;
Form_pg_attribute att = TupleDescAttr(desc, i);
TypeCacheEntry *typentry;
sortKey->ssup_cxt = CurrentMemoryContext;
sortKey->ssup_collation = indexRel->rd_indcollation[i];
sortKey->ssup_nulls_first = false;
sortKey->ssup_attno = i + 1;
sortKey->abbreviate = false;
Assert(sortKey->ssup_attno != 0);
if (!OidIsValid(sortKey->ssup_collation))
sortKey->ssup_collation = DEFAULT_COLLATION_OID;
/*
* Look for a ordering for the index key data type, and then the sort
* support function.
*/
typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
}
base->removeabbrev = removeabbrev_index_gin;
base->comparetup = comparetup_index_gin;
base->writetup = writetup_index_gin;
base->readtup = readtup_index_gin;
base->haveDatum1 = false;
base->arg = NULL;
MemoryContextSwitchTo(oldcontext);
return state;
}
Tuplesortstate *
tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
bool nullsFirstFlag, int workMem,
@ -803,6 +884,37 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
MemoryContextSwitchTo(oldcontext);
}
void
tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
{
SortTuple stup;
GinTuple *ctup;
TuplesortPublic *base = TuplesortstateGetPublic(state);
MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
Size tuplen;
/* copy the GinTuple into the right memory context */
ctup = palloc(size);
memcpy(ctup, tuple, size);
stup.tuple = ctup;
stup.datum1 = (Datum) 0;
stup.isnull1 = false;
/* GetMemoryChunkSpace is not supported for bump contexts */
if (TupleSortUseBumpTupleCxt(base->sortopt))
tuplen = MAXALIGN(size);
else
tuplen = GetMemoryChunkSpace(ctup);
tuplesort_puttuple_common(state, &stup,
base->sortKeys &&
base->sortKeys->abbrev_converter &&
!stup.isnull1, tuplen);
MemoryContextSwitchTo(oldcontext);
}
/*
* Accept one Datum while collecting input data for sort.
*
@ -975,6 +1087,29 @@ tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
return &btup->tuple;
}
GinTuple *
tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
SortTuple stup;
GinTuple *tup;
if (!tuplesort_gettuple_common(state, forward, &stup))
stup.tuple = NULL;
MemoryContextSwitchTo(oldcontext);
if (!stup.tuple)
return false;
tup = (GinTuple *) stup.tuple;
*len = tup->tuplen;
return tup;
}
/*
* Fetch the next Datum in either forward or back direction.
* Returns false if no more datums.
@ -1763,6 +1898,69 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
stup->datum1 = tuple->tuple.bt_blkno;
}
/*
* Routines specialized for GIN case
*/
static void
removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups, int count)
{
Assert(false);
elog(ERROR, "removeabbrev_index_gin not implemented");
}
static int
comparetup_index_gin(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state)
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
Assert(!TuplesortstateGetPublic(state)->haveDatum1);
return _gin_compare_tuples((GinTuple *) a->tuple,
(GinTuple *) b->tuple,
base->sortKeys);
}
static void
writetup_index_gin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
GinTuple *tuple = (GinTuple *) stup->tuple;
unsigned int tuplen = tuple->tuplen;
tuplen = tuplen + sizeof(tuplen);
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
LogicalTapeWrite(tape, tuple, tuple->tuplen);
if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
}
static void
readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
LogicalTape *tape, unsigned int len)
{
GinTuple *tuple;
TuplesortPublic *base = TuplesortstateGetPublic(state);
unsigned int tuplen = len - sizeof(unsigned int);
/*
* Allocate space for the GIN sort tuple, which already has the proper
* length included in the header.
*/
tuple = (GinTuple *) tuplesort_readtup_alloc(state, tuplen);
tuple->tuplen = tuplen;
LogicalTapeReadExact(tape, tuple, tuplen);
if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
stup->tuple = (void *) tuple;
/* no abbreviations (FIXME maybe use attrnum for this?) */
stup->datum1 = (Datum) 0;
}
/*
* Routines specialized for DatumTuple case
*/

View File

@ -12,6 +12,8 @@
#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "nodes/execnodes.h"
#include "storage/shm_toc.h"
#include "storage/block.h"
#include "utils/relcache.h"
@ -36,6 +38,17 @@
#define GIN_SEARCH_MODE_ALL 2
#define GIN_SEARCH_MODE_EVERYTHING 3 /* for internal use only */
/*
* Constant definition for progress reporting. Phase numbers must match
* ginbuildphasename.
*/
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN 2
#define PROGRESS_GIN_PHASE_PERFORMSORT_1 3
#define PROGRESS_GIN_PHASE_MERGE_1 4
#define PROGRESS_GIN_PHASE_PERFORMSORT_2 5
#define PROGRESS_GIN_PHASE_MERGE_2 6
/*
* GinStatsData represents stats data for planner use
*/
@ -88,4 +101,6 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats,
bool is_build);
extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
#endif /* GIN_H */

View File

@ -109,6 +109,7 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
GinNullCategory *category);
extern char *ginbuildphasename(int64 phasenum);
/* gininsert.c */
extern IndexBuildResult *ginbuild(Relation heap, Relation index,

View File

@ -0,0 +1,44 @@
/*--------------------------------------------------------------------------
* gin.h
* Public header file for Generalized Inverted Index access method.
*
* Copyright (c) 2006-2024, PostgreSQL Global Development Group
*
* src/include/access/gin.h
*--------------------------------------------------------------------------
*/
#ifndef GIN_TUPLE_
#define GIN_TUPLE_
#include "access/ginblock.h"
#include "storage/itemptr.h"
#include "utils/sortsupport.h"
/*
* Data for one key in a GIN index.
*/
typedef struct GinTuple
{
int tuplen; /* length of the whole tuple */
OffsetNumber attrnum; /* attnum of index key */
uint16 keylen; /* bytes in data for key value */
int16 typlen; /* typlen for key */
bool typbyval; /* typbyval for key */
signed char category; /* category: normal or NULL? */
int nitems; /* number of TIDs in the data */
char data[FLEXIBLE_ARRAY_MEMBER];
} GinTuple;
static inline ItemPointer
GinTupleGetFirst(GinTuple *tup)
{
GinPostingList *list;
list = (GinPostingList *) SHORTALIGN(tup->data + tup->keylen);
return &list->first;
}
extern int _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup);
#endif /* GIN_TUPLE_H */

View File

@ -22,6 +22,7 @@
#define TUPLESORT_H
#include "access/brin_tuple.h"
#include "access/gin_tuple.h"
#include "access/itup.h"
#include "executor/tuptable.h"
#include "storage/dsm.h"
@ -443,6 +444,10 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
int sortopt);
extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
int sortopt);
extern Tuplesortstate *tuplesort_begin_index_gin(Relation heapRel,
Relation indexRel,
int workMem, SortCoordinate coordinate,
int sortopt);
extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
Oid sortOperator, Oid sortCollation,
bool nullsFirstFlag,
@ -456,6 +461,7 @@ extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
Relation rel, ItemPointer self,
const Datum *values, const bool *isnull);
extern void tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size);
extern void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size);
extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
bool isNull);
@ -465,6 +471,8 @@ extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
extern BrinTuple *tuplesort_getbrintuple(Tuplesortstate *state, Size *len,
bool forward);
extern GinTuple *tuplesort_getgintuple(Tuplesortstate *state, Size *len,
bool forward);
extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy,
Datum *val, bool *isNull, Datum *abbrev);

View File

@ -1032,11 +1032,14 @@ GinBtreeData
GinBtreeDataLeafInsertData
GinBtreeEntryInsertData
GinBtreeStack
GinBuffer
GinBuildShared
GinBuildState
GinChkVal
GinEntries
GinEntryAccumulator
GinIndexStat
GinLeader
GinMetaPageData
GinNullCategory
GinOptions
@ -1052,6 +1055,7 @@ GinScanOpaqueData
GinState
GinStatsData
GinTernaryValue
GinTuple
GinTupleCollector
GinVacuumState
GistBuildMode