From 7969dc48000ce1c2fb25fdc6b07865ca52bfede0 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 8 Oct 2020 11:28:10 +0900 Subject: [PATCH] Enable "node rejoin" to join a target with a lower timeline This has been possible since PostgreSQL 9.6, but the node rejoin/follow check did not consider this possibility. --- HISTORY | 1 + doc/appendix-release-notes.xml | 8 +++++ doc/repmgr-node-rejoin.xml | 5 +++ repmgr-client.c | 64 ++++++++++++++++++++++++++++------ 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/HISTORY b/HISTORY index 225756d2..a5de2e82 100644 --- a/HISTORY +++ b/HISTORY @@ -11,6 +11,7 @@ repmgr: report database connection error if the --optformat option was provided to "node check" (Ian) repmgr: improve "node rejoin" checks (Ian) + repmgr: enable "node rejoin" to join a target with a lower timeline (Ian) repmgr: improve output formatting for cluster matrix/crosscheck (Ian) repmgr: improve database connection failure error checking on the demotion candidate during "standby switchover" (Ian) diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index 03fb890d..00589481 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -107,6 +107,14 @@ + + + repmgr node rejoin: + enable a node to attach to a target node even the target node + has a lower timeline (PostgreSQL 9.6 and later). + + + repmgr node check: diff --git a/doc/repmgr-node-rejoin.xml b/doc/repmgr-node-rejoin.xml index 0427e422..8500db58 100644 --- a/doc/repmgr-node-rejoin.xml +++ b/doc/repmgr-node-rejoin.xml @@ -392,6 +392,11 @@ the current standby's PostgreSQL log will contain entries with the text "record with incorrect prev-link". + + In PostgreSQL 9.5 and earlier, it is not possible to use + pg_rewind to attach to a target node with a lower + timeline than the local node. + We strongly recommend running repmgr node rejoin with the option first. Additionally it might be a good idea diff --git a/repmgr-client.c b/repmgr-client.c index 737e6fa8..942dd46e 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -4044,7 +4044,7 @@ check_standby_join(PGconn *upstream_conn, t_node_info *upstream_node_record, t_n /* * Here we'll perform some timeline sanity checks to ensure the follow target - * can actually be followed. + * can actually be followed or rejoined. * * See also comment for check_node_can_follow() in repmgrd-physical.c . */ @@ -4130,20 +4130,64 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo action, follow_target_identification.timeline); - /* upstream's timeline is lower than ours - impossible case */ + /* + * The upstream's timeline is lower than ours - we cannot follow, and rejoin + * requires PostgreSQL 9.6 and later. + */ if (follow_target_identification.timeline < local_tli) { - log_error(_("this node's timeline is ahead of the %s target node's timeline"), action); - log_detail(_("this node's timeline is %i, %s target node's timeline is %i"), - local_tli, - action, - follow_target_identification.timeline); - PQfinish(follow_target_repl_conn); - return false; + /* + * "repmgr standby follow" is impossible in this case + */ + if (is_rejoin == false) + { + log_error(_("this node's timeline is ahead of the %s target node's timeline"), action); + log_detail(_("this node's timeline is %i, %s target node's timeline is %i"), + local_tli, + action, + follow_target_identification.timeline); + + if (PQserverVersion(follow_target_conn) >= 90600) + { + log_hint(_("use \"repmgr node rejoin --force-rewind\" to reattach this node")); + } + + PQfinish(follow_target_repl_conn); + return false; + } + + /* + * pg_rewind can only rejoin to a lower timeline from PostgreSQL 9.6 + */ + if (PQserverVersion(follow_target_conn) < 90600) + { + log_error(_("this node's timeline is ahead of the %s target node's timeline"), action); + log_detail(_("this node's timeline is %i, %s target node's timeline is %i"), + local_tli, + action, + follow_target_identification.timeline); + + if (runtime_options.force_rewind_used == true) + { + log_hint(_("pg_rewind can only be used to rejoin to a node with a lower timeline from PostgreSQL 9.6")); + } + + PQfinish(follow_target_repl_conn); + return false; + } + + if (runtime_options.force_rewind_used == false) + { + log_notice(_("pg_rewind execution required for this node to attach to rejoin target node %i"), + follow_target_node_record->node_id); + log_hint(_("provide --force-rewind")); + PQfinish(follow_target_repl_conn); + return false; + } } /* timelines are the same - check relative positions */ - if (follow_target_identification.timeline == local_tli) + else if (follow_target_identification.timeline == local_tli) { XLogRecPtr follow_target_xlogpos = get_node_current_lsn(follow_target_conn);