From 028c874f810f5dad91f4899352e25864db9a4528 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 11 Jan 2019 16:34:04 +0900 Subject: [PATCH] "standby follow": simplify check when follow target has higher timeline No need for a CHECKPOINT here, which simplifies things considerably. --- doc/repmgr-standby-follow.sgml | 43 ++++++++++++++++------------ repmgr-action-standby.c | 52 +++++++--------------------------- 2 files changed, 35 insertions(+), 60 deletions(-) diff --git a/doc/repmgr-standby-follow.sgml b/doc/repmgr-standby-follow.sgml index 86956568..fba841e1 100644 --- a/doc/repmgr-standby-follow.sgml +++ b/doc/repmgr-standby-follow.sgml @@ -22,6 +22,17 @@ default location; no additional arguments are required. + + By default &repmgr; will attempt to attach the standby to the current primary. + If is provided, &repmgr; will attempt + to attach the standby to the specified node, which can be another standby. + + + + This command will force a restart of the standby server, which must be + running. + + To re-add an inactive node to the replication cluster, use @@ -29,29 +40,25 @@ - - This command will force a restart of the standby server, which must be - running. Additionally, in order to be able to verify whether the standby - can attach to the upstream node, a CHECKPOINT will - be executed - this requires superuser privileges, and will be executed - even with the option. - - - - - If the &repmgr; database user is not a superuser, it will not be possible - to execute CHECKPOINT, meaning &repmgr; may not be - able to determine whether the upstream node can be followed. - - - - repmgr standby follow will wait up to standby_follow_timeout seconds (default: 30) - to verify the standby has actually connected to the new primary. + to verify the standby has actually connected to the new upstream node. + + + If is set for the standby, it + will not attach to the new upstream node until it has replayed available + WAL. + + + Conversely, if the standby is attached follows another standby + with set, that standby's replay + state may actually be behind that of its new downstream node. + + + diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 0bd766c8..f2d218a4 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2633,8 +2633,6 @@ do_standby_follow(void) else { XLogRecPtr local_xlogpos = get_current_lsn(local_conn); - bool can_follow = true; - XLogRecPtr local_min_recovery_location = InvalidXLogRecPtr; /* * upstream has higher timeline - check where it forked off from this node's timeline @@ -2650,8 +2648,6 @@ do_standby_follow(void) exit(ERR_FOLLOW_FAIL); } - local_min_recovery_location = get_min_recovery_location(config_file_options.data_directory); - /* * Local node has proceeded beyond the follow target's fork, so we * definitely can't attach. @@ -2660,59 +2656,31 @@ do_standby_follow(void) * not contain all changes which are being replayed to this standby. */ if (local_xlogpos > follow_target_history->end) - { - can_follow = false; - } - else - { - /* - * XXX can we establish what the window is where we *need* to execute - * a CHECKPOINT? - */ - - /* - * Execute CHECKPOINT on the local node - we'll need this to update - * the pg_control file so we can compare positions with the new upstream. - * There is no way of avoiding this for --dry-run. - */ - - if (is_superuser_connection(local_conn, NULL) == true) - { - log_notice(_("executing CHECKPOINT")); - checkpoint(local_conn); - } - else - { - log_warning(_("connection is not a superuser, unable to execute CHECKPOINT")); - log_detail(_("a CHECKPOINT is required in order to compare local and follow target states")); - } - - log_debug("upstream tli: %i; branch LSN: %X/%X", - follow_target_history->tli, format_lsn(follow_target_history->end)); - - if (follow_target_history->end < local_min_recovery_location) - can_follow = false; - } - - if (can_follow == false) { log_error(_("this node cannot attach to follow target node %i"), follow_target_node_id); log_detail(_("follow target server's timeline %i forked off current database system timeline %i before current recovery point %X/%X\n"), local_identification.timeline + 1, local_identification.timeline, - format_lsn(local_min_recovery_location)); + format_lsn(local_xlogpos)); PQfinish(follow_target_conn); - PQfinish(follow_target_repl_conn); PQfinish(local_conn); exit(ERR_FOLLOW_FAIL); } + if (runtime_options.dry_run == true) + { + log_info(_("local node %i can follow target node %i"), + config_file_options.node_id, + follow_target_node_id); + log_detail(_("local node's recovery point: %X/%X; follow target node's fork point: %X/%X"), + format_lsn(local_xlogpos), + format_lsn(follow_target_history->end)); + } } } PQfinish(local_conn); - PQfinish(follow_target_repl_conn); if (runtime_options.dry_run == true)