mirror of
https://github.com/postgres/postgres.git
synced 2025-12-10 14:22:35 +03:00
amcheck: Fix snapshot usage in bt_index_parent_check
We were using SnapshotAny to do some index checks, but that's wrong and
causes spurious errors when used on indexes created by CREATE INDEX
CONCURRENTLY. Fix it to use an MVCC snapshot, and add a test for it.
This problem came in with commit 5ae2087202, which introduced
uniqueness check. Backpatch to 17.
Author: Mihail Nikalayeu <mihailnikalayeu@gmail.com>
Reviewed-by: Andrey Borodin <x4mmm@yandex-team.ru>
Backpatch-through: 17
Discussion: https://postgr.es/m/CANtu0ojmVd27fEhfpST7RG2KZvwkX=dMyKUqg0KM87FkOSdz8Q@mail.gmail.com
This commit is contained in:
@@ -64,5 +64,28 @@ $node->pgbench(
|
|||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
# Test bt_index_parent_check() with indexes created with
|
||||||
|
# CREATE INDEX CONCURRENTLY.
|
||||||
|
$node->safe_psql('postgres', q(CREATE TABLE quebec(i int primary key)));
|
||||||
|
# Insert two rows into index
|
||||||
|
$node->safe_psql('postgres',
|
||||||
|
q(INSERT INTO quebec SELECT i FROM generate_series(1, 2) s(i);));
|
||||||
|
|
||||||
|
# start background transaction
|
||||||
|
my $in_progress_h = $node->background_psql('postgres');
|
||||||
|
$in_progress_h->query_safe(q(BEGIN; SELECT pg_current_xact_id();));
|
||||||
|
|
||||||
|
# delete one row from table, while background transaction is in progress
|
||||||
|
$node->safe_psql('postgres', q(DELETE FROM quebec WHERE i = 1;));
|
||||||
|
# create index concurrently, which will skip the deleted row
|
||||||
|
$node->safe_psql('postgres', q(CREATE INDEX CONCURRENTLY oscar ON quebec(i);));
|
||||||
|
|
||||||
|
# check index using bt_index_parent_check
|
||||||
|
my $result = $node->psql('postgres',
|
||||||
|
q(SELECT bt_index_parent_check('oscar', heapallindexed => true)));
|
||||||
|
is($result, '0', 'bt_index_parent_check for CIC after removed row');
|
||||||
|
|
||||||
|
$in_progress_h->quit;
|
||||||
|
|
||||||
$node->stop;
|
$node->stop;
|
||||||
done_testing();
|
done_testing();
|
||||||
|
|||||||
@@ -382,7 +382,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
|
|||||||
BTMetaPageData *metad;
|
BTMetaPageData *metad;
|
||||||
uint32 previouslevel;
|
uint32 previouslevel;
|
||||||
BtreeLevel current;
|
BtreeLevel current;
|
||||||
Snapshot snapshot = SnapshotAny;
|
|
||||||
|
|
||||||
if (!readonly)
|
if (!readonly)
|
||||||
elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"",
|
elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"",
|
||||||
@@ -433,54 +432,46 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
|
|||||||
state->heaptuplespresent = 0;
|
state->heaptuplespresent = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Register our own snapshot in !readonly case, rather than asking
|
* Register our own snapshot for heapallindexed, rather than asking
|
||||||
* table_index_build_scan() to do this for us later. This needs to
|
* table_index_build_scan() to do this for us later. This needs to
|
||||||
* happen before index fingerprinting begins, so we can later be
|
* happen before index fingerprinting begins, so we can later be
|
||||||
* certain that index fingerprinting should have reached all tuples
|
* certain that index fingerprinting should have reached all tuples
|
||||||
* returned by table_index_build_scan().
|
* returned by table_index_build_scan().
|
||||||
*/
|
*/
|
||||||
if (!state->readonly)
|
state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
|
||||||
{
|
|
||||||
snapshot = RegisterSnapshot(GetTransactionSnapshot());
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GetTransactionSnapshot() always acquires a new MVCC snapshot in
|
* GetTransactionSnapshot() always acquires a new MVCC snapshot in
|
||||||
* READ COMMITTED mode. A new snapshot is guaranteed to have all
|
* READ COMMITTED mode. A new snapshot is guaranteed to have all the
|
||||||
* the entries it requires in the index.
|
* entries it requires in the index.
|
||||||
*
|
*
|
||||||
* We must defend against the possibility that an old xact
|
* We must defend against the possibility that an old xact snapshot
|
||||||
* snapshot was returned at higher isolation levels when that
|
* was returned at higher isolation levels when that snapshot is not
|
||||||
* snapshot is not safe for index scans of the target index. This
|
* safe for index scans of the target index. This is possible when
|
||||||
* is possible when the snapshot sees tuples that are before the
|
* the snapshot sees tuples that are before the index's indcheckxmin
|
||||||
* index's indcheckxmin horizon. Throwing an error here should be
|
* horizon. Throwing an error here should be very rare. It doesn't
|
||||||
* very rare. It doesn't seem worth using a secondary snapshot to
|
* seem worth using a secondary snapshot to avoid this.
|
||||||
* avoid this.
|
*/
|
||||||
*/
|
if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
|
||||||
if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
|
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
|
||||||
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
|
state->snapshot->xmin))
|
||||||
snapshot->xmin))
|
ereport(ERROR,
|
||||||
ereport(ERROR,
|
errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
|
||||||
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
|
errmsg("index \"%s\" cannot be verified using transaction snapshot",
|
||||||
errmsg("index \"%s\" cannot be verified using transaction snapshot",
|
RelationGetRelationName(rel)));
|
||||||
RelationGetRelationName(rel))));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need a snapshot to check the uniqueness of the index. For better
|
* We need a snapshot to check the uniqueness of the index. For better
|
||||||
* performance take it once per index check. If snapshot already taken
|
* performance, take it once per index check. If one was already taken
|
||||||
* reuse it.
|
* above, use that.
|
||||||
*/
|
*/
|
||||||
if (state->checkunique)
|
if (state->checkunique)
|
||||||
{
|
{
|
||||||
state->indexinfo = BuildIndexInfo(state->rel);
|
state->indexinfo = BuildIndexInfo(state->rel);
|
||||||
if (state->indexinfo->ii_Unique)
|
|
||||||
{
|
if (state->indexinfo->ii_Unique && state->snapshot == InvalidSnapshot)
|
||||||
if (snapshot != SnapshotAny)
|
state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
|
||||||
state->snapshot = snapshot;
|
|
||||||
else
|
|
||||||
state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(!state->rootdescend || state->readonly);
|
Assert(!state->rootdescend || state->readonly);
|
||||||
@@ -555,13 +546,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
|
|||||||
/*
|
/*
|
||||||
* Create our own scan for table_index_build_scan(), rather than
|
* Create our own scan for table_index_build_scan(), rather than
|
||||||
* getting it to do so for us. This is required so that we can
|
* getting it to do so for us. This is required so that we can
|
||||||
* actually use the MVCC snapshot registered earlier in !readonly
|
* actually use the MVCC snapshot registered earlier.
|
||||||
* case.
|
|
||||||
*
|
*
|
||||||
* Note that table_index_build_scan() calls heap_endscan() for us.
|
* Note that table_index_build_scan() calls heap_endscan() for us.
|
||||||
*/
|
*/
|
||||||
scan = table_beginscan_strat(state->heaprel, /* relation */
|
scan = table_beginscan_strat(state->heaprel, /* relation */
|
||||||
snapshot, /* snapshot */
|
state->snapshot, /* snapshot */
|
||||||
0, /* number of keys */
|
0, /* number of keys */
|
||||||
NULL, /* scan key */
|
NULL, /* scan key */
|
||||||
true, /* buffer access strategy OK */
|
true, /* buffer access strategy OK */
|
||||||
@@ -569,16 +559,15 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY
|
* Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY
|
||||||
* behaves in !readonly case.
|
* behaves.
|
||||||
*
|
*
|
||||||
* It's okay that we don't actually use the same lock strength for the
|
* It's okay that we don't actually use the same lock strength for the
|
||||||
* heap relation as any other ii_Concurrent caller would in !readonly
|
* heap relation as any other ii_Concurrent caller would. We have no
|
||||||
* case. We have no reason to care about a concurrent VACUUM
|
* reason to care about a concurrent VACUUM operation, since there
|
||||||
* operation, since there isn't going to be a second scan of the heap
|
* isn't going to be a second scan of the heap that needs to be sure
|
||||||
* that needs to be sure that there was no concurrent recycling of
|
* that there was no concurrent recycling of TIDs.
|
||||||
* TIDs.
|
|
||||||
*/
|
*/
|
||||||
indexinfo->ii_Concurrent = !state->readonly;
|
indexinfo->ii_Concurrent = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't wait for uncommitted tuple xact commit/abort when index is a
|
* Don't wait for uncommitted tuple xact commit/abort when index is a
|
||||||
@@ -602,14 +591,11 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
|
|||||||
state->heaptuplespresent, RelationGetRelationName(heaprel),
|
state->heaptuplespresent, RelationGetRelationName(heaprel),
|
||||||
100.0 * bloom_prop_bits_set(state->filter))));
|
100.0 * bloom_prop_bits_set(state->filter))));
|
||||||
|
|
||||||
if (snapshot != SnapshotAny)
|
|
||||||
UnregisterSnapshot(snapshot);
|
|
||||||
|
|
||||||
bloom_free(state->filter);
|
bloom_free(state->filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Be tidy: */
|
/* Be tidy: */
|
||||||
if (snapshot == SnapshotAny && state->snapshot != InvalidSnapshot)
|
if (state->snapshot != InvalidSnapshot)
|
||||||
UnregisterSnapshot(state->snapshot);
|
UnregisterSnapshot(state->snapshot);
|
||||||
MemoryContextDelete(state->targetcontext);
|
MemoryContextDelete(state->targetcontext);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -382,7 +382,7 @@ SET client_min_messages = DEBUG1;
|
|||||||
verification functions is <literal>true</literal>, an additional
|
verification functions is <literal>true</literal>, an additional
|
||||||
phase of verification is performed against the table associated with
|
phase of verification is performed against the table associated with
|
||||||
the target index relation. This consists of a <quote>dummy</quote>
|
the target index relation. This consists of a <quote>dummy</quote>
|
||||||
<command>CREATE INDEX</command> operation, which checks for the
|
<command>CREATE INDEX CONCURRENTLY</command> operation, which checks for the
|
||||||
presence of all hypothetical new index tuples against a temporary,
|
presence of all hypothetical new index tuples against a temporary,
|
||||||
in-memory summarizing structure (this is built when needed during
|
in-memory summarizing structure (this is built when needed during
|
||||||
the basic first phase of verification). The summarizing structure
|
the basic first phase of verification). The summarizing structure
|
||||||
|
|||||||
Reference in New Issue
Block a user