diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 858d8d9f2fa..f7f3b8227fd 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -45,7 +45,13 @@ static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size); static void getRestoreCommand(const char *argv0); static void sanityChecks(void); -static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex); +static TimeLineHistoryEntry *getTimelineHistory(TimeLineID tli, bool is_source, + int *nentries); +static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, + int a_nentries, + TimeLineHistoryEntry *b_history, + int b_nentries, + XLogRecPtr *recptr, int *tliIndex); static void ensureCleanShutdown(const char *argv0); static void disconnect_atexit(void); @@ -134,6 +140,8 @@ main(int argc, char **argv) XLogRecPtr chkptrec; TimeLineID chkpttli; XLogRecPtr chkptredo; + TimeLineID source_tli; + TimeLineID target_tli; XLogRecPtr target_wal_endrec; size_t size; char *buffer; @@ -332,14 +340,28 @@ main(int argc, char **argv) sanityChecks(); + /* + * Usually, the TLI can be found in the latest checkpoint record. But if + * the source server is just being promoted (or it's a standby that's + * following a primary that's just being promoted), and the checkpoint + * requested by the promotion hasn't completed yet, the latest timeline is + * in minRecoveryPoint. So we check which is later, the TLI of the + * minRecoveryPoint or the latest checkpoint. + */ + source_tli = Max(ControlFile_source.minRecoveryPointTLI, + ControlFile_source.checkPointCopy.ThisTimeLineID); + + /* Similarly for the target. */ + target_tli = Max(ControlFile_target.minRecoveryPointTLI, + ControlFile_target.checkPointCopy.ThisTimeLineID); + /* * Find the common ancestor timeline between the clusters. * * If both clusters are already on the same timeline, there's nothing to * do. */ - if (ControlFile_target.checkPointCopy.ThisTimeLineID == - ControlFile_source.checkPointCopy.ThisTimeLineID) + if (target_tli == source_tli) { pg_log_info("source and target cluster are on the same timeline"); rewind_needed = false; @@ -348,12 +370,31 @@ main(int argc, char **argv) else { XLogRecPtr chkptendrec; + TimeLineHistoryEntry *sourceHistory; + int sourceNentries; + + /* + * Retrieve timelines for both source and target, and find the point + * where they diverged. + */ + sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries); + targetHistory = getTimelineHistory(target_tli, false, &targetNentries); + + findCommonAncestorTimeline(sourceHistory, sourceNentries, + targetHistory, targetNentries, + &divergerec, &lastcommontliIndex); - findCommonAncestorTimeline(&divergerec, &lastcommontliIndex); pg_log_info("servers diverged at WAL location %X/%X on timeline %u", LSN_FORMAT_ARGS(divergerec), targetHistory[lastcommontliIndex].tli); + /* + * Don't need the source history anymore. The target history is still + * needed by the routines in parsexlog.c, when we read the target WAL. + */ + pfree(sourceHistory); + + /* * Determine the end-of-WAL on the target. * @@ -654,7 +695,8 @@ perform_rewind(filemap_t *filemap, rewind_source *source, pg_fatal("source system was in unexpected state at end of rewind"); endrec = source->get_current_wal_insert_lsn(source); - endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID; + endtli = Max(ControlFile_source_after.checkPointCopy.ThisTimeLineID, + ControlFile_source_after.minRecoveryPointTLI); } } else @@ -796,16 +838,12 @@ MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b) } /* - * Retrieve timeline history for given control file which should behold - * either source or target. + * Retrieve timeline history for the source or target system. */ static TimeLineHistoryEntry * -getTimelineHistory(ControlFileData *controlFile, int *nentries) +getTimelineHistory(TimeLineID tli, bool is_source, int *nentries) { TimeLineHistoryEntry *history; - TimeLineID tli; - - tli = controlFile->checkPointCopy.ThisTimeLineID; /* * Timeline 1 does not have a history file, so there is no need to check @@ -826,12 +864,10 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries) TLHistoryFilePath(path, tli); /* Get history file from appropriate source */ - if (controlFile == &ControlFile_source) + if (is_source) histfile = source->fetch_file(source, path, NULL); - else if (controlFile == &ControlFile_target) - histfile = slurpFile(datadir_target, path, NULL); else - pg_fatal("invalid control file"); + histfile = slurpFile(datadir_target, path, NULL); history = rewind_parseTimeLineHistory(histfile, tli, nentries); pg_free(histfile); @@ -841,12 +877,10 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries) { int i; - if (controlFile == &ControlFile_source) + if (is_source) pg_log_debug("Source timeline history:"); - else if (controlFile == &ControlFile_target) - pg_log_debug("Target timeline history:"); else - Assert(false); + pg_log_debug("Target timeline history:"); /* * Print the target timeline history. @@ -866,28 +900,19 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries) } /* - * Determine the TLI of the last common timeline in the timeline history of the - * two clusters. targetHistory is filled with target timeline history and - * targetNentries is number of items in targetHistory. *tliIndex is set to the - * index of last common timeline in targetHistory array, and *recptr is set to - * the position where the timeline history diverged (ie. the first WAL record - * that's not the same in both clusters). - * - * Control files of both clusters must be read into ControlFile_target/source - * before calling this routine. + * Determine the TLI of the last common timeline in the timeline history of + * two clusters. *tliIndex is set to the index of last common timeline in + * the arrays, and *recptr is set to the position where the timeline history + * diverged (ie. the first WAL record that's not the same in both clusters). */ static void -findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex) +findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, + TimeLineHistoryEntry *b_history, int b_nentries, + XLogRecPtr *recptr, int *tliIndex) { - TimeLineHistoryEntry *sourceHistory; - int sourceNentries; int i, n; - /* Retrieve timelines for both source and target */ - sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries); - targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries); - /* * Trace the history forward, until we hit the timeline diverge. It may * still be possible that the source and target nodes used the same @@ -896,21 +921,19 @@ findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex) * recovery processes. Hence check the start position of the new timeline * as well and move down by one extra timeline entry if they do not match. */ - n = Min(sourceNentries, targetNentries); + n = Min(a_nentries, b_nentries); for (i = 0; i < n; i++) { - if (sourceHistory[i].tli != targetHistory[i].tli || - sourceHistory[i].begin != targetHistory[i].begin) + if (a_history[i].tli != b_history[i].tli || + a_history[i].begin != b_history[i].begin) break; } if (i > 0) { i--; - *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end); + *recptr = MinXLogRecPtr(a_history[i].end, b_history[i].end); *tliIndex = i; - - pg_free(sourceHistory); return; } else diff --git a/src/bin/pg_rewind/t/007_standby_source.pl b/src/bin/pg_rewind/t/007_standby_source.pl index 35fe27888ed..3f813929a66 100644 --- a/src/bin/pg_rewind/t/007_standby_source.pl +++ b/src/bin/pg_rewind/t/007_standby_source.pl @@ -83,7 +83,6 @@ $node_b->wait_for_catchup('node_c', 'write', $lsn); # A (primary) <--- B (standby) C (primary) $node_c->promote; -$node_c->safe_psql('postgres', "checkpoint"); # Insert a row in A. This causes A/B and C to have "diverged", so that it's diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl index af262f3338a..c753a64fdb1 100644 --- a/src/bin/pg_rewind/t/008_min_recovery_point.pl +++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl @@ -76,13 +76,6 @@ $node_1->wait_for_catchup('node_3'); # $node_1->stop('fast'); $node_3->promote; -# Force a checkpoint after the promotion. pg_rewind looks at the control -# file to determine what timeline the server is on, and that isn't updated -# immediately at promotion, but only at the next checkpoint. When running -# pg_rewind in remote mode, it's possible that we complete the test steps -# after promotion so quickly that when pg_rewind runs, the standby has not -# performed a checkpoint after promotion yet. -$node_3->safe_psql('postgres', "checkpoint"); # reconfigure node_1 as a standby following node_3 my $node_3_connstr = $node_3->connstr; @@ -108,8 +101,6 @@ $node_2->restart(); $node_3->wait_for_catchup('node_1'); $node_1->promote; -# Force a checkpoint after promotion, like earlier. -$node_1->safe_psql('postgres', "checkpoint"); # # We now have a split-brain with two primaries. Insert a row on both to diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm index aa16a1b3f18..373f6dfbf7f 100644 --- a/src/bin/pg_rewind/t/RewindTest.pm +++ b/src/bin/pg_rewind/t/RewindTest.pm @@ -198,14 +198,6 @@ sub promote_standby # the primary out-of-sync with the standby. $node_standby->promote; - # Force a checkpoint after the promotion. pg_rewind looks at the control - # file to determine what timeline the server is on, and that isn't updated - # immediately at promotion, but only at the next checkpoint. When running - # pg_rewind in remote mode, it's possible that we complete the test steps - # after promotion so quickly that when pg_rewind runs, the standby has not - # performed a checkpoint after promotion yet. - standby_psql("checkpoint"); - return; }