diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT index 76ac83722f5..f1a1d9252c7 100644 --- a/src/backend/access/heap/README.HOT +++ b/src/backend/access/heap/README.HOT @@ -401,6 +401,12 @@ from the index, as well as ensuring that no one can see any inconsistent rows in a broken HOT chain (the first condition is stronger than the second). Finally, we can mark the index valid for searches. +Note that we do not need to set pg_index.indcheckxmin in this code path, +because we have outwaited any transactions that would need to avoid using +the index. (indcheckxmin is only needed because non-concurrent CREATE +INDEX doesn't want to wait; its stronger lock would create too much risk of +deadlock if it did.) + Limitations and Restrictions ---------------------------- diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index ff96bd92b31..108d859c0dc 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1022,7 +1022,7 @@ BuildIndexInfo(Relation index) /* other info */ ii->ii_Unique = indexStruct->indisunique; - ii->ii_ReadyForInserts = indexStruct->indisready; + ii->ii_ReadyForInserts = IndexIsReady(indexStruct); /* initialize index-build state to default */ ii->ii_Concurrent = false; @@ -1426,8 +1426,20 @@ index_build(Relation heapRelation, * index's usability horizon. Moreover, we *must not* try to change * the index's pg_index entry while reindexing pg_index itself, and this * optimization nicely prevents that. + * + * We also need not set indcheckxmin during a concurrent index build, + * because we won't set indisvalid true until all transactions that care + * about the broken HOT chains are gone. + * + * Therefore, this code path can only be taken during non-concurrent + * CREATE INDEX. Thus the fact that heap_update will set the pg_index + * tuple's xmin doesn't matter, because that tuple was created in the + * current transaction anyway. That also means we don't need to worry + * about any concurrent readers of the tuple; no other transaction can see + * it yet. */ - if (indexInfo->ii_BrokenHotChain && !isreindex) + if (indexInfo->ii_BrokenHotChain && !isreindex && + !indexInfo->ii_Concurrent) { Oid indexId = RelationGetRelid(indexRelation); Relation pg_index; @@ -2243,6 +2255,66 @@ validate_index_heapscan(Relation heapRelation, } +/* + * index_set_state_flags - adjust pg_index state flags + * + * This is used during CREATE INDEX CONCURRENTLY to adjust the pg_index + * flags that denote the index's state. We must use an in-place update of + * the pg_index tuple, because we do not have exclusive lock on the parent + * table and so other sessions might concurrently be doing SnapshotNow scans + * of pg_index to identify the table's indexes. A transactional update would + * risk somebody not seeing the index at all. Because the update is not + * transactional and will not roll back on error, this must only be used as + * the last step in a transaction that has not made any transactional catalog + * updates! + * + * Note that heap_inplace_update does send a cache inval message for the + * tuple, so other sessions will hear about the update as soon as we commit. + */ +void +index_set_state_flags(Oid indexId, IndexStateFlagsAction action) +{ + Relation pg_index; + HeapTuple indexTuple; + Form_pg_index indexForm; + + /* Assert that current xact hasn't done any transactional updates */ + Assert(GetTopTransactionIdIfAny() == InvalidTransactionId); + + /* Open pg_index and fetch a writable copy of the index's tuple */ + pg_index = heap_open(IndexRelationId, RowExclusiveLock); + + indexTuple = SearchSysCacheCopy(INDEXRELID, + ObjectIdGetDatum(indexId), + 0, 0, 0); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexId); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + /* Perform the requested state change on the copy */ + switch (action) + { + case INDEX_CREATE_SET_READY: + /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */ + Assert(!indexForm->indisready); + Assert(!indexForm->indisvalid); + indexForm->indisready = true; + break; + case INDEX_CREATE_SET_VALID: + /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */ + Assert(indexForm->indisready); + Assert(!indexForm->indisvalid); + indexForm->indisvalid = true; + break; + } + + /* ... and write it back in-place */ + heap_inplace_update(pg_index, indexTuple); + + heap_close(pg_index, RowExclusiveLock); +} + + /* * IndexGetRelation: given an index's relation OID, get the OID of the * relation it is an index on. Uses the system cache. @@ -2281,6 +2353,7 @@ reindex_index(Oid indexId) IndexInfo *indexInfo; HeapTuple indexTuple; Form_pg_index indexForm; + bool index_bad; /* * Open and lock the parent heap relation. ShareLock is sufficient since @@ -2396,17 +2469,28 @@ reindex_index(Oid indexId) elog(ERROR, "cache lookup failed for index %u", indexId); indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - if (!indexForm->indisvalid || !indexForm->indisready || + index_bad = (!indexForm->indisvalid || + !indexForm->indisready); + if (index_bad || (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain)) { if (!indexInfo->ii_BrokenHotChain) indexForm->indcheckxmin = false; - else if (!indexForm->indisvalid || !indexForm->indisready) + else if (index_bad) indexForm->indcheckxmin = true; indexForm->indisvalid = true; indexForm->indisready = true; simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); CatalogUpdateIndexes(pg_index, indexTuple); + + /* + * Invalidate the relcache for the table, so that after we commit + * all sessions will refresh the table's index list. This ensures + * that if anyone misses seeing the pg_index row during this + * update, they'll refresh their list before attempting any update + * on the table. + */ + CacheInvalidateRelcache(heapRelation); } heap_close(pg_index, RowExclusiveLock); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 720195ea4db..ff2a5fb60a4 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -443,7 +443,7 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) * might put recently-dead tuples out-of-order in the new table, and there * is little harm in that.) */ - if (!OldIndex->rd_index->indisvalid) + if (!IndexIsValid(OldIndex->rd_index)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster on invalid index \"%s\"", @@ -485,6 +485,11 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) * mark_index_clustered: mark the specified index as the one clustered on * * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. + * + * Note: we do transactional updates of the pg_index rows, which are unsafe + * against concurrent SnapshotNow scans of pg_index. Therefore this is unsafe + * to execute with less than full exclusive lock on the parent table; + * otherwise concurrent executions of RelationGetIndexList could miss indexes. */ void mark_index_clustered(Relation rel, Oid indexOid) @@ -545,6 +550,9 @@ mark_index_clustered(Relation rel, Oid indexOid) } else if (thisIndexOid == indexOid) { + /* this was checked earlier, but let's be real sure */ + if (!IndexIsValid(indexForm)) + elog(ERROR, "cannot cluster on invalid index %u", indexOid); indexForm->indisclustered = true; simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); CatalogUpdateIndexes(pg_index, indexTuple); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 6ba5b5d36b8..a185a1c23ab 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -135,9 +135,6 @@ DefineIndex(RangeVar *heapRelation, LockRelId heaprelid; LOCKTAG heaplocktag; Snapshot snapshot; - Relation pg_index; - HeapTuple indexTuple; - Form_pg_index indexForm; int i; /* @@ -565,24 +562,7 @@ DefineIndex(RangeVar *heapRelation, * commit this transaction, any new transactions that open the table must * insert new entries into the index for insertions and non-HOT updates. */ - pg_index = heap_open(IndexRelationId, RowExclusiveLock); - - indexTuple = SearchSysCacheCopy(INDEXRELID, - ObjectIdGetDatum(indexRelationId), - 0, 0, 0); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", indexRelationId); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - - Assert(!indexForm->indisready); - Assert(!indexForm->indisvalid); - - indexForm->indisready = true; - - simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); - CatalogUpdateIndexes(pg_index, indexTuple); - - heap_close(pg_index, RowExclusiveLock); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); /* we can do away with our snapshot */ PopActiveSnapshot(); @@ -706,24 +686,7 @@ DefineIndex(RangeVar *heapRelation, /* * Index can now be marked valid -- update its pg_index entry */ - pg_index = heap_open(IndexRelationId, RowExclusiveLock); - - indexTuple = SearchSysCacheCopy(INDEXRELID, - ObjectIdGetDatum(indexRelationId), - 0, 0, 0); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", indexRelationId); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - - Assert(indexForm->indisready); - Assert(!indexForm->indisvalid); - - indexForm->indisvalid = true; - - simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); - CatalogUpdateIndexes(pg_index, indexTuple); - - heap_close(pg_index, RowExclusiveLock); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); /* * The pg_index update will cause backends (including this one) to update @@ -731,7 +694,7 @@ DefineIndex(RangeVar *heapRelation, * relcache inval on the parent table to force replanning of cached plans. * Otherwise existing sessions might fail to use the new index where it * would be useful. (Note that our earlier commits did not create reasons - * to replan; relcache flush on the index itself was sufficient.) + * to replan; so relcache flush on the index itself was sufficient.) */ CacheInvalidateRelcacheByRelid(heaprelid.relId); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index becbe19627b..b0dac88471e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3763,6 +3763,8 @@ ATExecDropNotNull(Relation rel, const char *colName) /* * Check that the attribute is not in a primary key + * + * Note: we'll throw error even if the pkey index is not valid. */ /* Loop over all indexes on the relation */ @@ -4869,7 +4871,7 @@ transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, /* * Get the list of index OIDs for the table from the relcache, and look up * each one in the pg_index syscache until we find one marked primary key - * (hopefully there isn't more than one such). + * (hopefully there isn't more than one such). Insist it's valid, too. */ *indexOid = InvalidOid; @@ -4885,7 +4887,7 @@ transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, if (!HeapTupleIsValid(indexTuple)) elog(ERROR, "cache lookup failed for index %u", indexoid); indexStruct = (Form_pg_index) GETSTRUCT(indexTuple); - if (indexStruct->indisprimary) + if (indexStruct->indisprimary && IndexIsValid(indexStruct)) { *indexOid = indexoid; break; @@ -4973,10 +4975,12 @@ transformFkeyCheckAttrs(Relation pkrel, /* * Must have the right number of columns; must be unique and not a - * partial index; forget it if there are any expressions, too + * partial index; forget it if there are any expressions, too. Invalid + * indexes are out as well. */ if (indexStruct->indnatts == numattrs && indexStruct->indisunique && + IndexIsValid(indexStruct) && heap_attisnull(indexTuple, Anum_pg_index_indpred) && heap_attisnull(indexTuple, Anum_pg_index_indexprs)) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index bdff22e451a..2569f28b80c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -3853,9 +3853,16 @@ vac_cmp_vtlinks(const void *left, const void *right) /* - * Open all the indexes of the given relation, obtaining the specified kind - * of lock on each. Return an array of Relation pointers for the indexes - * into *Irel, and the number of indexes into *nindexes. + * Open all the vacuumable indexes of the given relation, obtaining the + * specified kind of lock on each. Return an array of Relation pointers for + * the indexes into *Irel, and the number of indexes into *nindexes. + * + * We consider an index vacuumable if it is marked insertable (IndexIsReady). + * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in + * execution, and what we have is too corrupt to be processable. We will + * vacuum even if the index isn't indisvalid; this is important because in a + * unique index, uniqueness checks will be performed anyway and had better not + * hit dangling index pointers. */ void vac_open_indexes(Relation relation, LOCKMODE lockmode, @@ -3869,21 +3876,30 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, indexoidlist = RelationGetIndexList(relation); - *nindexes = list_length(indexoidlist); + /* allocate enough memory for all indexes */ + i = list_length(indexoidlist); - if (*nindexes > 0) - *Irel = (Relation *) palloc(*nindexes * sizeof(Relation)); + if (i > 0) + *Irel = (Relation *) palloc(i * sizeof(Relation)); else *Irel = NULL; + /* collect just the ready indexes */ i = 0; foreach(indexoidscan, indexoidlist) { Oid indexoid = lfirst_oid(indexoidscan); + Relation indrel; - (*Irel)[i++] = index_open(indexoid, lockmode); + indrel = index_open(indexoid, lockmode); + if (IndexIsReady(indrel->rd_index)) + (*Irel)[i++] = indrel; + else + index_close(indrel, lockmode); } + *nindexes = i; + list_free(indexoidlist); } diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 5eb8e0bc6e5..3af803d9366 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -970,6 +970,9 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo) /* * For each index, open the index relation and save pg_index info. We * acquire RowExclusiveLock, signifying we will update the index. + * + * Note: we do this even if the index is not IndexIsReady; it's not worth + * the trouble to optimize for the case where it isn't. */ i = 0; foreach(l, indexoidlist) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index f7dd970e39e..f275b1df24c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -158,9 +158,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * Ignore invalid indexes, since they can't safely be used for * queries. Note that this is OK because the data structure we * are constructing is only used by the planner --- the executor - * still needs to insert into "invalid" indexes! + * still needs to insert into "invalid" indexes, if they're marked + * IndexIsReady. */ - if (!index->indisvalid) + if (!IndexIsValid(index)) { index_close(indexRelation, NoLock); continue; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 66d33d86ac2..7a7febf385c 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1704,9 +1704,20 @@ RelationReloadIndexInfo(Relation relation) RelationGetRelid(relation)); index = (Form_pg_index) GETSTRUCT(tuple); + /* + * Basically, let's just copy all the bool fields. There are one or + * two of these that can't actually change in the current code, but + * it's not worth it to track exactly which ones they are. None of + * the array fields are allowed to change, though. + */ + relation->rd_index->indisunique = index->indisunique; + relation->rd_index->indisprimary = index->indisprimary; + relation->rd_index->indisclustered = index->indisclustered; relation->rd_index->indisvalid = index->indisvalid; relation->rd_index->indcheckxmin = index->indcheckxmin; relation->rd_index->indisready = index->indisready; + + /* Copy xmin too, as that is needed to make sense of indcheckxmin */ HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, HeapTupleHeaderGetXmin(tuple->t_data)); @@ -3085,7 +3096,8 @@ RelationGetIndexList(Relation relation) result = insert_ordered_oid(result, index->indexrelid); /* Check to see if it is a unique, non-partial btree index on OID */ - if (index->indnatts == 1 && + if (IndexIsValid(index) && + index->indnatts == 1 && index->indisunique && index->indkey.values[0] == ObjectIdAttributeNumber && index->indclass.values[0] == OID_BTREE_OPS_OID && @@ -3392,6 +3404,11 @@ RelationGetIndexAttrBitmap(Relation relation) /* * For each index, add referenced attributes to indexattrs. + * + * Note: we consider all indexes returned by RelationGetIndexList, even if + * they are not indisready or indisvalid. This is important because an + * index for which CREATE INDEX CONCURRENTLY has just started must be + * included in HOT-safety decisions (see README.HOT). */ indexattrs = NULL; foreach(l, indexoidlist) diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 5eca12b2ef5..a2c3fc04b32 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -27,6 +27,13 @@ typedef void (*IndexBuildCallback) (Relation index, bool tupleIsAlive, void *state); +/* Action code for index_set_state_flags */ +typedef enum +{ + INDEX_CREATE_SET_READY, + INDEX_CREATE_SET_VALID +} IndexStateFlagsAction; + extern Oid index_create(Oid heapRelationId, const char *indexRelationName, @@ -70,6 +77,8 @@ extern double IndexBuildHeapScan(Relation heapRelation, extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); +extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action); + extern void reindex_index(Oid indexId); extern bool reindex_relation(Oid relid, bool toast_too); diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index 1ec2e49a59a..efe5203004f 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -86,4 +86,12 @@ typedef FormData_pg_index *Form_pg_index; #define INDOPTION_DESC 0x0001 /* values are in reverse order */ #define INDOPTION_NULLS_FIRST 0x0002 /* NULLs are first instead of last */ +/* + * Use of these macros is recommended over direct examination of the state + * flag columns where possible; this allows source code compatibility with + * 9.2 and up. + */ +#define IndexIsValid(indexForm) ((indexForm)->indisvalid) +#define IndexIsReady(indexForm) ((indexForm)->indisready) + #endif /* PG_INDEX_H */