mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Avoid deleting critical WAL segments during pg_rewind
Previously, in unlucky cases, it was possible for pg_rewind to remove
certain WAL segments from the rewound demoted primary. In particular
this happens if those files have been marked for archival (i.e., their
.ready files were created) but not yet archived; the newly promoted node
no longer has such files because of them having been recycled, but they
are likely critical for recovery in the demoted node. If pg_rewind
removes them, recovery is not possible anymore.
Fix this by maintaining a hash table of files in this situation in the
scan that looks for a checkpoint, which the decide_file_actions phase
can consult so that it knows to preserve them.
Backpatch to 14. The problem also exists in 13, but that branch was not
blessed with commit eb00f1d4bf
, so this patch is difficult to apply
there. Users of older releases will just have to continue to be extra
careful when rewinding.
Co-authored-by: Полина Бунгина (Polina Bungina) <bungina@gmail.com>
Co-authored-by: Alexander Kukushkin <cyberdemn@gmail.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-by: Atsushi Torikoshi <torikoshia@oss.nttdata.com>
Discussion: https://postgr.es/m/CAAtGL4AhzmBRsEsaDdz7065T+k+BscNadfTqP1NcPmsqwA5HBw@mail.gmail.com
This commit is contained in:
@@ -39,14 +39,14 @@
|
|||||||
* appearing in source and target systems.
|
* appearing in source and target systems.
|
||||||
*/
|
*/
|
||||||
static uint32 hash_string_pointer(const char *s);
|
static uint32 hash_string_pointer(const char *s);
|
||||||
#define SH_PREFIX filehash
|
#define SH_PREFIX filehash
|
||||||
#define SH_ELEMENT_TYPE file_entry_t
|
#define SH_ELEMENT_TYPE file_entry_t
|
||||||
#define SH_KEY_TYPE const char *
|
#define SH_KEY_TYPE const char *
|
||||||
#define SH_KEY path
|
#define SH_KEY path
|
||||||
#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
|
#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
|
||||||
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
|
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
|
||||||
#define SH_SCOPE static inline
|
#define SH_SCOPE static inline
|
||||||
#define SH_RAW_ALLOCATOR pg_malloc0
|
#define SH_RAW_ALLOCATOR pg_malloc0
|
||||||
#define SH_DECLARE
|
#define SH_DECLARE
|
||||||
#define SH_DEFINE
|
#define SH_DEFINE
|
||||||
#include "lib/simplehash.h"
|
#include "lib/simplehash.h"
|
||||||
@@ -61,7 +61,36 @@ static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
|
|||||||
|
|
||||||
static file_entry_t *insert_filehash_entry(const char *path);
|
static file_entry_t *insert_filehash_entry(const char *path);
|
||||||
static file_entry_t *lookup_filehash_entry(const char *path);
|
static file_entry_t *lookup_filehash_entry(const char *path);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A separate hash table which tracks WAL files that must not be deleted.
|
||||||
|
*/
|
||||||
|
typedef struct keepwal_entry
|
||||||
|
{
|
||||||
|
const char *path;
|
||||||
|
uint32 status;
|
||||||
|
} keepwal_entry;
|
||||||
|
|
||||||
|
#define SH_PREFIX keepwal
|
||||||
|
#define SH_ELEMENT_TYPE keepwal_entry
|
||||||
|
#define SH_KEY_TYPE const char *
|
||||||
|
#define SH_KEY path
|
||||||
|
#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
|
||||||
|
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
|
||||||
|
#define SH_SCOPE static inline
|
||||||
|
#define SH_RAW_ALLOCATOR pg_malloc0
|
||||||
|
#define SH_DECLARE
|
||||||
|
#define SH_DEFINE
|
||||||
|
#include "lib/simplehash.h"
|
||||||
|
|
||||||
|
#define KEEPWAL_INITIAL_SIZE 1000
|
||||||
|
|
||||||
|
|
||||||
|
static keepwal_hash *keepwal = NULL;
|
||||||
|
static bool keepwal_entry_exists(const char *path);
|
||||||
|
|
||||||
static int final_filemap_cmp(const void *a, const void *b);
|
static int final_filemap_cmp(const void *a, const void *b);
|
||||||
|
|
||||||
static bool check_file_excluded(const char *path, bool is_source);
|
static bool check_file_excluded(const char *path, bool is_source);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -207,6 +236,39 @@ lookup_filehash_entry(const char *path)
|
|||||||
return filehash_lookup(filehash, path);
|
return filehash_lookup(filehash, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize a hash table to store WAL file names that must be kept.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
keepwal_init(void)
|
||||||
|
{
|
||||||
|
/* An initial hash size out of thin air */
|
||||||
|
keepwal = keepwal_create(KEEPWAL_INITIAL_SIZE, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mark the given file to prevent its removal */
|
||||||
|
void
|
||||||
|
keepwal_add_entry(const char *path)
|
||||||
|
{
|
||||||
|
keepwal_entry *entry;
|
||||||
|
bool found;
|
||||||
|
|
||||||
|
/* Should only be called with keepwal initialized */
|
||||||
|
Assert(keepwal != NULL);
|
||||||
|
|
||||||
|
entry = keepwal_insert(keepwal, path, &found);
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
entry->path = pg_strdup(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return true if file is marked as not to be removed, false otherwise */
|
||||||
|
static bool
|
||||||
|
keepwal_entry_exists(const char *path)
|
||||||
|
{
|
||||||
|
return keepwal_lookup(keepwal, path) != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Callback for processing source file list.
|
* Callback for processing source file list.
|
||||||
*
|
*
|
||||||
@@ -686,7 +748,15 @@ decide_file_action(file_entry_t *entry)
|
|||||||
}
|
}
|
||||||
else if (entry->target_exists && !entry->source_exists)
|
else if (entry->target_exists && !entry->source_exists)
|
||||||
{
|
{
|
||||||
/* File exists in target, but not source. Remove it. */
|
/*
|
||||||
|
* For files that exist in target but not in source, we check the
|
||||||
|
* keepwal hash table; any files listed therein must not be removed.
|
||||||
|
*/
|
||||||
|
if (keepwal_entry_exists(path))
|
||||||
|
{
|
||||||
|
pg_log_debug("Not removing file \"%s\" because it is required for recovery", path);
|
||||||
|
return FILE_ACTION_NONE;
|
||||||
|
}
|
||||||
return FILE_ACTION_REMOVE;
|
return FILE_ACTION_REMOVE;
|
||||||
}
|
}
|
||||||
else if (!entry->target_exists && !entry->source_exists)
|
else if (!entry->target_exists && !entry->source_exists)
|
||||||
|
@@ -110,4 +110,7 @@ extern filemap_t *decide_file_actions(void);
|
|||||||
extern void calculate_totals(filemap_t *filemap);
|
extern void calculate_totals(filemap_t *filemap);
|
||||||
extern void print_filemap(filemap_t *filemap);
|
extern void print_filemap(filemap_t *filemap);
|
||||||
|
|
||||||
|
extern void keepwal_init(void);
|
||||||
|
extern void keepwal_add_entry(const char *path);
|
||||||
|
|
||||||
#endif /* FILEMAP_H */
|
#endif /* FILEMAP_H */
|
||||||
|
@@ -43,6 +43,7 @@ tests += {
|
|||||||
't/007_standby_source.pl',
|
't/007_standby_source.pl',
|
||||||
't/008_min_recovery_point.pl',
|
't/008_min_recovery_point.pl',
|
||||||
't/009_growing_files.pl',
|
't/009_growing_files.pl',
|
||||||
|
't/010_keep_recycled_wals.pl',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@@ -175,6 +175,8 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
|
|||||||
XLogReaderState *xlogreader;
|
XLogReaderState *xlogreader;
|
||||||
char *errormsg;
|
char *errormsg;
|
||||||
XLogPageReadPrivate private;
|
XLogPageReadPrivate private;
|
||||||
|
XLogSegNo current_segno = 0;
|
||||||
|
TimeLineID current_tli = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The given fork pointer points to the end of the last common record,
|
* The given fork pointer points to the end of the last common record,
|
||||||
@@ -217,6 +219,25 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
|
|||||||
LSN_FORMAT_ARGS(searchptr));
|
LSN_FORMAT_ARGS(searchptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Detect if a new WAL file has been opened */
|
||||||
|
if (xlogreader->seg.ws_tli != current_tli ||
|
||||||
|
xlogreader->seg.ws_segno != current_segno)
|
||||||
|
{
|
||||||
|
char xlogfname[MAXFNAMELEN];
|
||||||
|
|
||||||
|
snprintf(xlogfname, MAXFNAMELEN, XLOGDIR "/");
|
||||||
|
|
||||||
|
/* update curent values */
|
||||||
|
current_tli = xlogreader->seg.ws_tli;
|
||||||
|
current_segno = xlogreader->seg.ws_segno;
|
||||||
|
|
||||||
|
XLogFileName(xlogfname + sizeof(XLOGDIR),
|
||||||
|
current_tli, current_segno, WalSegSz);
|
||||||
|
|
||||||
|
/* Track this filename as one to not remove */
|
||||||
|
keepwal_add_entry(xlogfname);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if it is a checkpoint record. This checkpoint record needs to
|
* Check if it is a checkpoint record. This checkpoint record needs to
|
||||||
* be the latest checkpoint before WAL forked and not the checkpoint
|
* be the latest checkpoint before WAL forked and not the checkpoint
|
||||||
|
@@ -446,6 +446,9 @@ main(int argc, char **argv)
|
|||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Initialize hashtable that tracks WAL files protected from removal */
|
||||||
|
keepwal_init();
|
||||||
|
|
||||||
findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
|
findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
|
||||||
&chkptrec, &chkpttli, &chkptredo, restore_command);
|
&chkptrec, &chkpttli, &chkptredo, restore_command);
|
||||||
pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
|
pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
|
||||||
|
62
src/bin/pg_rewind/t/010_keep_recycled_wals.pl
Normal file
62
src/bin/pg_rewind/t/010_keep_recycled_wals.pl
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
|
||||||
|
#
|
||||||
|
# Test situation where a target data directory contains
|
||||||
|
# WAL files that were already recycled by the new primary.
|
||||||
|
#
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings FATAL => 'all';
|
||||||
|
use PostgreSQL::Test::Utils;
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
use FindBin;
|
||||||
|
use lib $FindBin::RealBin;
|
||||||
|
use RewindTest;
|
||||||
|
|
||||||
|
RewindTest::setup_cluster();
|
||||||
|
$node_primary->enable_archiving();
|
||||||
|
RewindTest::start_primary();
|
||||||
|
|
||||||
|
RewindTest::create_standby();
|
||||||
|
$node_standby->enable_restoring($node_primary, 0);
|
||||||
|
$node_standby->reload();
|
||||||
|
|
||||||
|
RewindTest::primary_psql("CHECKPOINT"); # last common checkpoint
|
||||||
|
|
||||||
|
# We use "perl -e 'exit(1)'" as an alternative to "false", because the latter
|
||||||
|
# might not be available on Windows.
|
||||||
|
my $false = "$^X -e 'exit(1)'";
|
||||||
|
$node_primary->append_conf(
|
||||||
|
'postgresql.conf', qq(
|
||||||
|
archive_command = '$false'
|
||||||
|
));
|
||||||
|
$node_primary->reload();
|
||||||
|
|
||||||
|
# advance WAL on primary; this WAL segment will never make it to the archive
|
||||||
|
RewindTest::primary_psql("CREATE TABLE t(a int)");
|
||||||
|
RewindTest::primary_psql("INSERT INTO t VALUES(0)");
|
||||||
|
RewindTest::primary_psql("SELECT pg_switch_wal()");
|
||||||
|
|
||||||
|
RewindTest::promote_standby;
|
||||||
|
|
||||||
|
# new primary loses diverging WAL segment
|
||||||
|
RewindTest::standby_psql("INSERT INTO t values(0)");
|
||||||
|
RewindTest::standby_psql("SELECT pg_switch_wal()");
|
||||||
|
|
||||||
|
$node_standby->stop();
|
||||||
|
$node_primary->stop();
|
||||||
|
|
||||||
|
my ($stdout, $stderr) = run_command(
|
||||||
|
[
|
||||||
|
'pg_rewind', '--debug',
|
||||||
|
'--source-pgdata', $node_standby->data_dir,
|
||||||
|
'--target-pgdata', $node_primary->data_dir,
|
||||||
|
'--no-sync',
|
||||||
|
]);
|
||||||
|
|
||||||
|
like(
|
||||||
|
$stderr,
|
||||||
|
qr/Not removing file .* because it is required for recovery/,
|
||||||
|
"some WAL files were skipped");
|
||||||
|
|
||||||
|
done_testing();
|
@@ -3430,6 +3430,8 @@ json_manifest_perwalrange_callback
|
|||||||
json_ofield_action
|
json_ofield_action
|
||||||
json_scalar_action
|
json_scalar_action
|
||||||
json_struct_action
|
json_struct_action
|
||||||
|
keepwal_entry
|
||||||
|
keepwal_hash
|
||||||
keyEntryData
|
keyEntryData
|
||||||
key_t
|
key_t
|
||||||
lclContext
|
lclContext
|
||||||
|
Reference in New Issue
Block a user