From 028c874f810f5dad91f4899352e25864db9a4528 Mon Sep 17 00:00:00 2001
From: Ian Barwick <barwick@gmail.com>
Date: Fri, 11 Jan 2019 16:34:04 +0900
Subject: [PATCH] "standby follow": simplify check when follow target has
 higher timeline

No need for a CHECKPOINT here, which simplifies things considerably.
---
 doc/repmgr-standby-follow.sgml | 43 ++++++++++++++++------------
 repmgr-action-standby.c        | 52 +++++++---------------------------
 2 files changed, 35 insertions(+), 60 deletions(-)
diff --git a/doc/repmgr-standby-follow.sgml b/doc/repmgr-standby-follow.sgml
index 86956568..fba841e1 100644
--- a/doc/repmgr-standby-follow.sgml
+++ b/doc/repmgr-standby-follow.sgml
@@ -22,6 +22,17 @@
       default location; no additional arguments are required.
     </para>
 
+	<para>
+	  By default &repmgr; will attempt to attach the standby to the current primary.
+	  If <option>--upstream-node-id</option> is provided, &repmgr; will attempt
+	  to attach the standby to the specified node, which can be another standby.
+	</para>
+
+    <para>
+      This command will force a restart of the standby server, which must be
+      running.
+    </para>
+
 	<tip>
       <para>
 		To re-add an inactive node to the replication cluster, use
@@ -29,29 +40,25 @@
       </para>
 	</tip>
 
-    <para>
-      This command will force a restart of the standby server, which must be
-      running. Additionally, in order to be able to verify whether the standby
-      can attach to the upstream node, a <command>CHECKPOINT</command> will
-      be executed - this requires superuser privileges, and will be executed
-      even with the <option>--dry-run</option> option.
-    </para>
-
-    <important>
-      <para>
-        If the &repmgr; database user is not a superuser, it will not be possible
-        to execute <command>CHECKPOINT</command>, meaning &repmgr; may not be
-        able to determine whether the upstream node can be followed.
-      </para>
-    </important>
-
-
 	<para>
 	  <command>repmgr standby follow</command> will wait up to
 	  <varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
-	  to verify the standby has actually connected to the new primary.
+	  to verify the standby has actually connected to the new upstream node.
 	</para>
 
+	<note>
+	  <para>
+		If <option>recovery_min_apply_delay</option> is set for the standby, it
+		will not attach to the new upstream node until it has replayed available
+		WAL.
+	  </para>
+	  <para>
+		Conversely, if the standby is attached follows another standby
+		with <option>recovery_min_apply_delay</option> set, that standby's replay
+		state may actually be behind that of its new downstream node.
+	  </para>
+	</note>
+
   </refsect1>
 
   <refsect1>
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index 0bd766c8..f2d218a4 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -2633,8 +2633,6 @@ do_standby_follow(void)
 		else
 		{
 			XLogRecPtr local_xlogpos = get_current_lsn(local_conn);
-			bool can_follow = true;
-			XLogRecPtr local_min_recovery_location = InvalidXLogRecPtr;
 
 			/*
 			 * upstream has higher timeline - check where it forked off from this node's timeline
@@ -2650,8 +2648,6 @@ do_standby_follow(void)
 				exit(ERR_FOLLOW_FAIL);
 			}
 
-			local_min_recovery_location = get_min_recovery_location(config_file_options.data_directory);
-
 			/*
 			 * Local node has proceeded beyond the follow target's fork, so we
 			 * definitely can't attach.
@@ -2660,59 +2656,31 @@ do_standby_follow(void)
 			 * not contain all changes which are being replayed to this standby.
 			 */
 			if (local_xlogpos > follow_target_history->end)
-			{
-				can_follow = false;
-			}
-			else
-			{
-				/*
-				 * XXX can we establish what the window is where we *need* to execute
-				 * a CHECKPOINT?
-				 */
-
-				/*
-				 * Execute CHECKPOINT on the local node - we'll need this to update
-				 * the pg_control file so we can compare positions with the new upstream.
-				 * There is no way of avoiding this for --dry-run.
-				 */
-
-				if (is_superuser_connection(local_conn, NULL) == true)
-				{
-					log_notice(_("executing CHECKPOINT"));
-					checkpoint(local_conn);
-				}
-				else
-				{
-					log_warning(_("connection is not a superuser, unable to execute CHECKPOINT"));
-					log_detail(_("a CHECKPOINT is required in order to compare local and follow target states"));
-				}
-
-				log_debug("upstream tli: %i; branch LSN: %X/%X",
-						  follow_target_history->tli, format_lsn(follow_target_history->end));
-
-				if (follow_target_history->end < local_min_recovery_location)
-					can_follow = false;
-			}
-
-			if (can_follow == false)
 			{
 				log_error(_("this node cannot attach to follow target node %i"),
 						  follow_target_node_id);
 				log_detail(_("follow target server's timeline %i forked off current database system timeline %i before current recovery point %X/%X\n"),
 						   local_identification.timeline + 1,
 						   local_identification.timeline,
-						   format_lsn(local_min_recovery_location));
+						   format_lsn(local_xlogpos));
 
 				PQfinish(follow_target_conn);
-				PQfinish(follow_target_repl_conn);
 				PQfinish(local_conn);
 				exit(ERR_FOLLOW_FAIL);
 			}
+			if (runtime_options.dry_run == true)
+			{
+				log_info(_("local node %i can follow target node %i"),
+						 config_file_options.node_id,
+						 follow_target_node_id);
+				log_detail(_("local node's recovery point: %X/%X; follow target node's fork point: %X/%X"),
+						   format_lsn(local_xlogpos),
+						   format_lsn(follow_target_history->end));
+			}
 		}
 	}
 
 	PQfinish(local_conn);
-
 	PQfinish(follow_target_repl_conn);
 
 	if (runtime_options.dry_run == true)