1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Teach contrib/amcheck to check the unique constraint violation

Add the 'checkunique' argument to bt_index_check() and bt_index_parent_check().
When the flag is specified the procedures will check the unique constraint
violation for unique indexes.  Only one heap entry for all equal keys in
the index should be visible (including posting list entries).  Report an error
otherwise.

pg_amcheck called with the --checkunique option will do the same check for all
the indexes it checks.

Author: Anastasia Lubennikova <lubennikovaav@gmail.com>
Author: Pavel Borisov <pashkin.elfe@gmail.com>
Author: Maxim Orlov <orlovmg@gmail.com>
Reviewed-by: Mark Dilger <mark.dilger@enterprisedb.com>
Reviewed-by: Zhihong Yu <zyu@yugabyte.com>
Reviewed-by: Peter Geoghegan <pg@bowt.ie>
Reviewed-by: Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CALT9ZEHRn5xAM5boga0qnrCmPV52bScEK2QnQ1HmUZDD301JEg%40mail.gmail.com
This commit is contained in:
Alexander Korotkov
2023-10-28 00:21:23 +03:00
parent 8b62b441ff
commit 5ae2087202
13 changed files with 830 additions and 23 deletions

View File

@ -81,11 +81,19 @@ typedef struct BtreeCheckState
bool heapallindexed;
/* Also making sure non-pivot tuples can be found by new search? */
bool rootdescend;
/* Also check uniqueness constraint if index is unique */
bool checkunique;
/* Per-page context */
MemoryContext targetcontext;
/* Buffer access strategy */
BufferAccessStrategy checkstrategy;
/*
* Info for uniqueness checking. Fill these fields once per index check.
*/
IndexInfo *indexinfo;
Snapshot snapshot;
/*
* Mutable state, for verification of particular page:
*/
@ -140,19 +148,33 @@ PG_FUNCTION_INFO_V1(bt_index_check);
PG_FUNCTION_INFO_V1(bt_index_parent_check);
static void bt_index_check_internal(Oid indrelid, bool parentcheck,
bool heapallindexed, bool rootdescend);
bool heapallindexed, bool rootdescend,
bool checkunique);
static inline void btree_index_checkable(Relation rel);
static inline bool btree_index_mainfork_expected(Relation rel);
static void bt_check_every_level(Relation rel, Relation heaprel,
bool heapkeyspace, bool readonly, bool heapallindexed,
bool rootdescend);
bool rootdescend, bool checkunique);
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
BtreeLevel level);
static void bt_recheck_sibling_links(BtreeCheckState *state,
BlockNumber btpo_prev_from_target,
BlockNumber leftcurrent);
static bool heap_entry_is_visible(BtreeCheckState *state, ItemPointer tid);
static void bt_report_duplicate(BtreeCheckState *state, ItemPointer tid,
BlockNumber block, OffsetNumber offset,
int posting, ItemPointer nexttid,
BlockNumber nblock, OffsetNumber noffset,
int nposting);
static void bt_entry_unique_check(BtreeCheckState *state, IndexTuple itup,
BlockNumber targetblock,
OffsetNumber offset, int *lVis_i,
ItemPointer *lVis_tid,
OffsetNumber *lVis_offset,
BlockNumber *lVis_block);
static void bt_target_page_check(BtreeCheckState *state);
static BTScanInsert bt_right_page_check_scankey(BtreeCheckState *state);
static BTScanInsert bt_right_page_check_scankey(BtreeCheckState *state,
OffsetNumber *rightfirstoffset);
static void bt_child_check(BtreeCheckState *state, BTScanInsert targetkey,
OffsetNumber downlinkoffnum);
static void bt_child_highkey_check(BtreeCheckState *state,
@ -192,7 +214,7 @@ static inline ItemPointer BTreeTupleGetHeapTIDCareful(BtreeCheckState *state,
static inline ItemPointer BTreeTupleGetPointsToTID(IndexTuple itup);
/*
* bt_index_check(index regclass, heapallindexed boolean)
* bt_index_check(index regclass, heapallindexed boolean, checkunique boolean)
*
* Verify integrity of B-Tree index.
*
@ -205,17 +227,20 @@ bt_index_check(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
bool heapallindexed = false;
bool checkunique = false;
if (PG_NARGS() == 2)
if (PG_NARGS() >= 2)
heapallindexed = PG_GETARG_BOOL(1);
if (PG_NARGS() == 3)
checkunique = PG_GETARG_BOOL(2);
bt_index_check_internal(indrelid, false, heapallindexed, false);
bt_index_check_internal(indrelid, false, heapallindexed, false, checkunique);
PG_RETURN_VOID();
}
/*
* bt_index_parent_check(index regclass, heapallindexed boolean)
* bt_index_parent_check(index regclass, heapallindexed boolean, rootdescend boolean, checkunique boolean)
*
* Verify integrity of B-Tree index.
*
@ -229,13 +254,16 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
Oid indrelid = PG_GETARG_OID(0);
bool heapallindexed = false;
bool rootdescend = false;
bool checkunique = false;
if (PG_NARGS() >= 2)
heapallindexed = PG_GETARG_BOOL(1);
if (PG_NARGS() == 3)
if (PG_NARGS() >= 3)
rootdescend = PG_GETARG_BOOL(2);
if (PG_NARGS() == 4)
checkunique = PG_GETARG_BOOL(3);
bt_index_check_internal(indrelid, true, heapallindexed, rootdescend);
bt_index_check_internal(indrelid, true, heapallindexed, rootdescend, checkunique);
PG_RETURN_VOID();
}
@ -245,7 +273,7 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
*/
static void
bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
bool rootdescend)
bool rootdescend, bool checkunique)
{
Oid heapid;
Relation indrel;
@ -356,7 +384,7 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
/* Check index, possibly against table it is an index on */
bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,
heapallindexed, rootdescend);
heapallindexed, rootdescend, checkunique);
}
/* Roll back any GUC changes executed by index functions */
@ -457,7 +485,8 @@ btree_index_mainfork_expected(Relation rel)
*/
static void
bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
bool readonly, bool heapallindexed, bool rootdescend)
bool readonly, bool heapallindexed, bool rootdescend,
bool checkunique)
{
BtreeCheckState *state;
Page metapage;
@ -489,6 +518,8 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
state->readonly = readonly;
state->heapallindexed = heapallindexed;
state->rootdescend = rootdescend;
state->checkunique = checkunique;
state->snapshot = InvalidSnapshot;
if (state->heapallindexed)
{
@ -546,6 +577,23 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
}
}
/*
* We need a snapshot to check the uniqueness of the index. For better
* performance take it once per index check. If snapshot already taken
* reuse it.
*/
if (state->checkunique)
{
state->indexinfo = BuildIndexInfo(state->rel);
if (state->indexinfo->ii_Unique)
{
if (snapshot != SnapshotAny)
state->snapshot = snapshot;
else
state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
}
}
Assert(!state->rootdescend || state->readonly);
if (state->rootdescend && !state->heapkeyspace)
ereport(ERROR,
@ -672,6 +720,8 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
}
/* Be tidy: */
if (snapshot == SnapshotAny && state->snapshot != InvalidSnapshot)
UnregisterSnapshot(state->snapshot);
MemoryContextDelete(state->targetcontext);
}
@ -912,6 +962,161 @@ nextpage:
return nextleveldown;
}
/* Check visibility of the table entry referenced by nbtree index */
static bool
heap_entry_is_visible(BtreeCheckState *state, ItemPointer tid)
{
bool tid_visible;
TupleTableSlot *slot = table_slot_create(state->heaprel, NULL);
tid_visible = table_tuple_fetch_row_version(state->heaprel,
tid, state->snapshot, slot);
if (slot != NULL)
ExecDropSingleTupleTableSlot(slot);
return tid_visible;
}
/*
* Prepare an error message for unique constrain violation in
* a btree index and report ERROR.
*/
static void
bt_report_duplicate(BtreeCheckState *state,
ItemPointer tid, BlockNumber block, OffsetNumber offset,
int posting,
ItemPointer nexttid, BlockNumber nblock, OffsetNumber noffset,
int nposting)
{
char *htid,
*nhtid,
*itid,
*nitid = "",
*pposting = "",
*pnposting = "";
htid = psprintf("tid=(%u,%u)",
ItemPointerGetBlockNumberNoCheck(tid),
ItemPointerGetOffsetNumberNoCheck(tid));
nhtid = psprintf("tid=(%u,%u)",
ItemPointerGetBlockNumberNoCheck(nexttid),
ItemPointerGetOffsetNumberNoCheck(nexttid));
itid = psprintf("tid=(%u,%u)", block, offset);
if (nblock != block || noffset != offset)
nitid = psprintf(" tid=(%u,%u)", nblock, noffset);
if (posting >= 0)
pposting = psprintf(" posting %u", posting);
if (nposting >= 0)
pnposting = psprintf(" posting %u", nposting);
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index uniqueness is violated for index \"%s\"",
RelationGetRelationName(state->rel)),
errdetail("Index %s%s and%s%s (point to heap %s and %s) page lsn=%X/%X.",
itid, pposting, nitid, pnposting, htid, nhtid,
LSN_FORMAT_ARGS(state->targetlsn))));
}
/* Check if current nbtree leaf entry complies with UNIQUE constraint */
static void
bt_entry_unique_check(BtreeCheckState *state, IndexTuple itup,
BlockNumber targetblock, OffsetNumber offset, int *lVis_i,
ItemPointer *lVis_tid, OffsetNumber *lVis_offset,
BlockNumber *lVis_block)
{
ItemPointer tid;
bool has_visible_entry = false;
Assert(targetblock != P_NONE);
/*
* Current tuple has posting list. Report duplicate if TID of any posting
* list entry is visible and lVis_tid is valid.
*/
if (BTreeTupleIsPosting(itup))
{
for (int i = 0; i < BTreeTupleGetNPosting(itup); i++)
{
tid = BTreeTupleGetPostingN(itup, i);
if (heap_entry_is_visible(state, tid))
{
has_visible_entry = true;
if (ItemPointerIsValid(*lVis_tid))
{
bt_report_duplicate(state,
*lVis_tid, *lVis_block,
*lVis_offset, *lVis_i,
tid, targetblock,
offset, i);
}
/*
* Prevent double reporting unique constraint violation between
* the posting list entries of the first tuple on the page after
* cross-page check.
*/
if (*lVis_block != targetblock && ItemPointerIsValid(*lVis_tid))
return;
*lVis_i = i;
*lVis_tid = tid;
*lVis_offset = offset;
*lVis_block = targetblock;
}
}
}
/*
* Current tuple has no posting list. If TID is visible save info about
* it for the next comparisons in the loop in bt_page_check(). Report
* duplicate if lVis_tid is already valid.
*/
else
{
tid = BTreeTupleGetHeapTID(itup);
if (heap_entry_is_visible(state, tid))
{
has_visible_entry = true;
if (ItemPointerIsValid(*lVis_tid))
{
bt_report_duplicate(state,
*lVis_tid, *lVis_block,
*lVis_offset, *lVis_i,
tid, targetblock,
offset, -1);
}
*lVis_i = -1;
*lVis_tid = tid;
*lVis_offset = offset;
*lVis_block = targetblock;
}
}
if (!has_visible_entry && *lVis_block != InvalidBlockNumber &&
*lVis_block != targetblock)
{
char *posting = "";
if (*lVis_i >= 0)
posting = psprintf(" posting %u", *lVis_i);
ereport(DEBUG1,
(errcode(ERRCODE_NO_DATA),
errmsg("index uniqueness can not be checked for index tid=(%u,%u) in index \"%s\"",
targetblock, offset,
RelationGetRelationName(state->rel)),
errdetail("It doesn't have visible heap tids and key is equal to the tid=(%u,%u)%s (points to heap tid=(%u,%u)).",
*lVis_block, *lVis_offset, posting,
ItemPointerGetBlockNumberNoCheck(*lVis_tid),
ItemPointerGetOffsetNumberNoCheck(*lVis_tid)),
errhint("VACUUM the table and repeat the check.")));
}
}
/*
* Raise an error when target page's left link does not point back to the
* previous target page, called leftcurrent here. The leftcurrent page's
@ -1066,6 +1271,9 @@ bt_recheck_sibling_links(BtreeCheckState *state,
* - Various checks on the structure of tuples themselves. For example, check
* that non-pivot tuples have no truncated attributes.
*
* - For index with unique constraint make sure that only one of table entries
* for equal keys is visible.
*
* Furthermore, when state passed shows ShareLock held, function also checks:
*
* - That all child pages respect strict lower bound from parent's pivot
@ -1088,6 +1296,13 @@ bt_target_page_check(BtreeCheckState *state)
OffsetNumber max;
BTPageOpaque topaque;
/* last visible entry info for checking indexes with unique constraint */
int lVis_i = -1; /* the position of last visible item for
* posting tuple. for non-posting tuple (-1) */
ItemPointer lVis_tid = NULL;
BlockNumber lVis_block = InvalidBlockNumber;
OffsetNumber lVis_offset = InvalidOffsetNumber;
topaque = BTPageGetOpaque(state->target);
max = PageGetMaxOffsetNumber(state->target);
@ -1478,6 +1693,45 @@ bt_target_page_check(BtreeCheckState *state)
LSN_FORMAT_ARGS(state->targetlsn))));
}
/*
* If the index is unique verify entries uniqueness by checking the heap
* tuples visibility.
*/
if (state->checkunique && state->indexinfo->ii_Unique &&
P_ISLEAF(topaque) && !skey->anynullkeys)
bt_entry_unique_check(state, itup, state->targetblock, offset,
&lVis_i, &lVis_tid, &lVis_offset,
&lVis_block);
if (state->checkunique && state->indexinfo->ii_Unique &&
P_ISLEAF(topaque) && OffsetNumberNext(offset) <= max)
{
/* Save current scankey tid */
scantid = skey->scantid;
/*
* Invalidate scankey tid to make _bt_compare compare only keys in
* the item to report equality even if heap TIDs are different
*/
skey->scantid = NULL;
/*
* If next key tuple is different, invalidate last visible entry
* data (whole index tuple or last posting in index tuple). Key
* containing null value does not violate unique constraint and
* treated as different to any other key.
*/
if (_bt_compare(state->rel, skey, state->target,
OffsetNumberNext(offset)) != 0 || skey->anynullkeys)
{
lVis_i = -1;
lVis_tid = NULL;
lVis_block = InvalidBlockNumber;
lVis_offset = InvalidOffsetNumber;
}
skey->scantid = scantid; /* Restore saved scan key state */
}
/*
* * Last item check *
*
@ -1495,12 +1749,16 @@ bt_target_page_check(BtreeCheckState *state)
* available from sibling for various reasons, though (e.g., target is
* the rightmost page on level).
*/
else if (offset == max)
if (offset == max)
{
BTScanInsert rightkey;
BlockNumber rightblock_number;
/* first offset on a right index page (log only) */
OffsetNumber rightfirstoffset = InvalidOffsetNumber;
/* Get item in next/right page */
rightkey = bt_right_page_check_scankey(state);
rightkey = bt_right_page_check_scankey(state, &rightfirstoffset);
if (rightkey &&
!invariant_g_offset(state, rightkey, max))
@ -1534,6 +1792,45 @@ bt_target_page_check(BtreeCheckState *state)
state->targetblock, offset,
LSN_FORMAT_ARGS(state->targetlsn))));
}
/*
* If index has unique constraint make sure that no more than one
* found equal items is visible.
*/
rightblock_number = topaque->btpo_next;
if (state->checkunique && state->indexinfo->ii_Unique &&
rightkey && P_ISLEAF(topaque) && rightblock_number != P_NONE)
{
elog(DEBUG2, "check cross page unique condition");
/*
* Make _bt_compare compare only index keys without heap TIDs.
* rightkey->scantid is modified destructively but it is ok
* for it is not used later.
*/
rightkey->scantid = NULL;
/* The first key on the next page is the same */
if (_bt_compare(state->rel, rightkey, state->target, max) == 0 && !rightkey->anynullkeys)
{
elog(DEBUG2, "cross page equal keys");
state->target = palloc_btree_page(state,
rightblock_number);
topaque = BTPageGetOpaque(state->target);
if (P_IGNORE(topaque) || !P_ISLEAF(topaque))
break;
itemid = PageGetItemIdCareful(state, rightblock_number,
state->target,
rightfirstoffset);
itup = (IndexTuple) PageGetItem(state->target, itemid);
bt_entry_unique_check(state, itup, rightblock_number, rightfirstoffset,
&lVis_i, &lVis_tid, &lVis_offset,
&lVis_block);
}
}
}
/*
@ -1579,9 +1876,11 @@ bt_target_page_check(BtreeCheckState *state)
*
* Note that !readonly callers must reverify that target page has not
* been concurrently deleted.
*
* Save rightfirstdataoffset for detailed error message.
*/
static BTScanInsert
bt_right_page_check_scankey(BtreeCheckState *state)
bt_right_page_check_scankey(BtreeCheckState *state, OffsetNumber *rightfirstoffset)
{
BTPageOpaque opaque;
ItemId rightitem;
@ -1748,6 +2047,7 @@ bt_right_page_check_scankey(BtreeCheckState *state)
/* Return first data item (if any) */
rightitem = PageGetItemIdCareful(state, targetnext, rightpage,
P_FIRSTDATAKEY(opaque));
*rightfirstoffset = P_FIRSTDATAKEY(opaque);
}
else if (!P_ISLEAF(opaque) &&
nline >= OffsetNumberNext(P_FIRSTDATAKEY(opaque)))