mirror of
https://github.com/postgres/postgres.git
synced 2025-05-03 22:24:49 +03:00
amcheck: Distinguish interrupted page deletion from corruption.
This prevents false-positive reports about "the first child of leftmost target page is not leftmost of its level", "block %u is not leftmost" and "left link/right link pair". They appeared if amcheck ran before VACUUM cleaned things, after a cluster exited recovery between the first-stage and second-stage WAL records of a deletion. Back-patch to v11 (all supported versions). Reviewed by Peter Geoghegan. Discussion: https://postgr.es/m/20231005025232.c7.nmisch@google.com
This commit is contained in:
parent
5f06918399
commit
6f81386a9c
@ -12,6 +12,7 @@ PGFILEDESC = "amcheck - function for verifying relation integrity"
|
|||||||
|
|
||||||
REGRESS = check check_btree check_heap
|
REGRESS = check check_btree check_heap
|
||||||
|
|
||||||
|
EXTRA_INSTALL = contrib/pg_walinspect
|
||||||
TAP_TESTS = 1
|
TAP_TESTS = 1
|
||||||
|
|
||||||
ifdef USE_PGXS
|
ifdef USE_PGXS
|
||||||
|
83
contrib/amcheck/t/005_pitr.pl
Normal file
83
contrib/amcheck/t/005_pitr.pl
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# Copyright (c) 2021-2023, PostgreSQL Global Development Group
|
||||||
|
|
||||||
|
# Test integrity of intermediate states by PITR to those states
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use PostgreSQL::Test::Cluster;
|
||||||
|
use PostgreSQL::Test::Utils;
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
# origin node: generate WAL records of interest.
|
||||||
|
my $origin = PostgreSQL::Test::Cluster->new('origin');
|
||||||
|
$origin->init(has_archiving => 1, allows_streaming => 1);
|
||||||
|
$origin->append_conf('postgresql.conf', 'autovacuum = off');
|
||||||
|
$origin->start;
|
||||||
|
$origin->backup('my_backup');
|
||||||
|
# Create a table with each of 6 PK values spanning 1/4 of a block. Delete the
|
||||||
|
# first four, so one index leaf is eligible for deletion. Make a replication
|
||||||
|
# slot just so pg_walinspect will always have access to later WAL.
|
||||||
|
my $setup = <<EOSQL;
|
||||||
|
BEGIN;
|
||||||
|
CREATE EXTENSION amcheck;
|
||||||
|
CREATE EXTENSION pg_walinspect;
|
||||||
|
CREATE TABLE not_leftmost (c text);
|
||||||
|
ALTER TABLE not_leftmost ALTER c SET STORAGE PLAIN;
|
||||||
|
INSERT INTO not_leftmost
|
||||||
|
SELECT repeat(n::text, database_block_size / 4)
|
||||||
|
FROM generate_series(1,6) t(n), pg_control_init();
|
||||||
|
ALTER TABLE not_leftmost ADD CONSTRAINT not_leftmost_pk PRIMARY KEY (c);
|
||||||
|
DELETE FROM not_leftmost WHERE c ~ '^[1-4]';
|
||||||
|
SELECT pg_create_physical_replication_slot('for_walinspect', true, false);
|
||||||
|
COMMIT;
|
||||||
|
EOSQL
|
||||||
|
$origin->safe_psql('postgres', $setup);
|
||||||
|
my $before_vacuum_lsn =
|
||||||
|
$origin->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
|
||||||
|
# VACUUM to delete the aforementioned leaf page. Force an XLogFlush() by
|
||||||
|
# dropping a permanent table. That way, the XLogReader infrastructure can
|
||||||
|
# always see VACUUM's records, even under synchronous_commit=off. Finally,
|
||||||
|
# find the LSN of that VACUUM's last UNLINK_PAGE record.
|
||||||
|
my $vacuum = <<EOSQL;
|
||||||
|
SET synchronous_commit = off;
|
||||||
|
VACUUM (VERBOSE, INDEX_CLEANUP ON) not_leftmost;
|
||||||
|
CREATE TABLE XLogFlush ();
|
||||||
|
DROP TABLE XLogFlush;
|
||||||
|
SELECT max(start_lsn)
|
||||||
|
FROM pg_get_wal_records_info('$before_vacuum_lsn', pg_current_wal_flush_lsn())
|
||||||
|
WHERE resource_manager = 'Btree' AND record_type = 'UNLINK_PAGE';
|
||||||
|
EOSQL
|
||||||
|
my $unlink_lsn = $origin->safe_psql('postgres', $vacuum);
|
||||||
|
$origin->stop;
|
||||||
|
die "did not find UNLINK_PAGE record" unless $unlink_lsn;
|
||||||
|
|
||||||
|
# replica node: amcheck at notable points in the WAL stream
|
||||||
|
my $replica = PostgreSQL::Test::Cluster->new('replica');
|
||||||
|
$replica->init_from_backup($origin, 'my_backup', has_restoring => 1);
|
||||||
|
$replica->append_conf('postgresql.conf',
|
||||||
|
"recovery_target_lsn = '$unlink_lsn'");
|
||||||
|
$replica->append_conf('postgresql.conf', 'recovery_target_inclusive = off');
|
||||||
|
$replica->append_conf('postgresql.conf', 'recovery_target_action = promote');
|
||||||
|
$replica->start;
|
||||||
|
$replica->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
|
||||||
|
or die "Timed out while waiting for PITR promotion";
|
||||||
|
# recovery done; run amcheck
|
||||||
|
my $debug = "SET client_min_messages = 'debug1'";
|
||||||
|
my ($rc, $stderr);
|
||||||
|
$rc = $replica->psql(
|
||||||
|
'postgres',
|
||||||
|
"$debug; SELECT bt_index_parent_check('not_leftmost_pk', true)",
|
||||||
|
stderr => \$stderr);
|
||||||
|
print STDERR $stderr, "\n";
|
||||||
|
is($rc, 0, "bt_index_parent_check passes");
|
||||||
|
like(
|
||||||
|
$stderr,
|
||||||
|
qr/interrupted page deletion detected/,
|
||||||
|
"bt_index_parent_check: interrupted page deletion detected");
|
||||||
|
$rc = $replica->psql(
|
||||||
|
'postgres',
|
||||||
|
"$debug; SELECT bt_index_check('not_leftmost_pk', true)",
|
||||||
|
stderr => \$stderr);
|
||||||
|
print STDERR $stderr, "\n";
|
||||||
|
is($rc, 0, "bt_index_check passes");
|
||||||
|
|
||||||
|
done_testing();
|
@ -147,6 +147,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
|
|||||||
bool rootdescend);
|
bool rootdescend);
|
||||||
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
|
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
|
||||||
BtreeLevel level);
|
BtreeLevel level);
|
||||||
|
static bool bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
|
||||||
|
BlockNumber start,
|
||||||
|
BTPageOpaque start_opaque);
|
||||||
static void bt_recheck_sibling_links(BtreeCheckState *state,
|
static void bt_recheck_sibling_links(BtreeCheckState *state,
|
||||||
BlockNumber btpo_prev_from_target,
|
BlockNumber btpo_prev_from_target,
|
||||||
BlockNumber leftcurrent);
|
BlockNumber leftcurrent);
|
||||||
@ -775,7 +778,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
|
|||||||
*/
|
*/
|
||||||
if (state->readonly)
|
if (state->readonly)
|
||||||
{
|
{
|
||||||
if (!P_LEFTMOST(opaque))
|
if (!bt_leftmost_ignoring_half_dead(state, current, opaque))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||||
errmsg("block %u is not leftmost in index \"%s\"",
|
errmsg("block %u is not leftmost in index \"%s\"",
|
||||||
@ -829,8 +832,16 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sibling links should be in mutual agreement */
|
/*
|
||||||
if (opaque->btpo_prev != leftcurrent)
|
* Sibling links should be in mutual agreement. There arises
|
||||||
|
* leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
|
||||||
|
* of the parent's low-key downlink is half-dead. (A half-dead page
|
||||||
|
* has no downlink from its parent.) Under heavyweight locking, the
|
||||||
|
* last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
|
||||||
|
* Without heavyweight locking, validation of the P_NONE case remains
|
||||||
|
* unimplemented.
|
||||||
|
*/
|
||||||
|
if (opaque->btpo_prev != leftcurrent && leftcurrent != P_NONE)
|
||||||
bt_recheck_sibling_links(state, opaque->btpo_prev, leftcurrent);
|
bt_recheck_sibling_links(state, opaque->btpo_prev, leftcurrent);
|
||||||
|
|
||||||
/* Check level */
|
/* Check level */
|
||||||
@ -911,6 +922,66 @@ nextpage:
|
|||||||
return nextleveldown;
|
return nextleveldown;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
|
||||||
|
* half-dead, sibling-linked pages to the left. If a half-dead page appears
|
||||||
|
* under state->readonly, the database exited recovery between the first-stage
|
||||||
|
* and second-stage WAL records of a deletion.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
|
||||||
|
BlockNumber start,
|
||||||
|
BTPageOpaque start_opaque)
|
||||||
|
{
|
||||||
|
BlockNumber reached = start_opaque->btpo_prev,
|
||||||
|
reached_from = start;
|
||||||
|
bool all_half_dead = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To handle the !readonly case, we'd need to accept BTP_DELETED pages and
|
||||||
|
* potentially observe nbtree/README "Page deletion and backwards scans".
|
||||||
|
*/
|
||||||
|
Assert(state->readonly);
|
||||||
|
|
||||||
|
while (reached != P_NONE && all_half_dead)
|
||||||
|
{
|
||||||
|
Page page = palloc_btree_page(state, reached);
|
||||||
|
BTPageOpaque reached_opaque = BTPageGetOpaque(page);
|
||||||
|
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
|
||||||
|
* writes that side-links will continue to point to the siblings.
|
||||||
|
* Check btpo_next for that property.
|
||||||
|
*/
|
||||||
|
all_half_dead = P_ISHALFDEAD(reached_opaque) &&
|
||||||
|
reached != start &&
|
||||||
|
reached != reached_from &&
|
||||||
|
reached_opaque->btpo_next == reached_from;
|
||||||
|
if (all_half_dead)
|
||||||
|
{
|
||||||
|
XLogRecPtr pagelsn = PageGetLSN(page);
|
||||||
|
|
||||||
|
/* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
|
||||||
|
ereport(DEBUG1,
|
||||||
|
(errcode(ERRCODE_NO_DATA),
|
||||||
|
errmsg_internal("harmless interrupted page deletion detected in index \"%s\"",
|
||||||
|
RelationGetRelationName(state->rel)),
|
||||||
|
errdetail_internal("Block=%u right block=%u page lsn=%X/%X.",
|
||||||
|
reached, reached_from,
|
||||||
|
LSN_FORMAT_ARGS(pagelsn))));
|
||||||
|
|
||||||
|
reached_from = reached;
|
||||||
|
reached = reached_opaque->btpo_prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
return all_half_dead;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Raise an error when target page's left link does not point back to the
|
* Raise an error when target page's left link does not point back to the
|
||||||
* previous target page, called leftcurrent here. The leftcurrent page's
|
* previous target page, called leftcurrent here. The leftcurrent page's
|
||||||
@ -951,6 +1022,9 @@ bt_recheck_sibling_links(BtreeCheckState *state,
|
|||||||
BlockNumber btpo_prev_from_target,
|
BlockNumber btpo_prev_from_target,
|
||||||
BlockNumber leftcurrent)
|
BlockNumber leftcurrent)
|
||||||
{
|
{
|
||||||
|
/* passing metapage to BTPageGetOpaque() would give irrelevant findings */
|
||||||
|
Assert(leftcurrent != P_NONE);
|
||||||
|
|
||||||
if (!state->readonly)
|
if (!state->readonly)
|
||||||
{
|
{
|
||||||
Buffer lbuf;
|
Buffer lbuf;
|
||||||
@ -1934,7 +2008,8 @@ bt_child_highkey_check(BtreeCheckState *state,
|
|||||||
opaque = BTPageGetOpaque(page);
|
opaque = BTPageGetOpaque(page);
|
||||||
|
|
||||||
/* The first page we visit at the level should be leftmost */
|
/* The first page we visit at the level should be leftmost */
|
||||||
if (first && !BlockNumberIsValid(state->prevrightlink) && !P_LEFTMOST(opaque))
|
if (first && !BlockNumberIsValid(state->prevrightlink) &&
|
||||||
|
!bt_leftmost_ignoring_half_dead(state, blkno, opaque))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||||
errmsg("the first child of leftmost target page is not leftmost of its level in index \"%s\"",
|
errmsg("the first child of leftmost target page is not leftmost of its level in index \"%s\"",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user