From 09979eaa91dfe457a7f750a363757e0112dc0789 Mon Sep 17 00:00:00 2001
From: Ian Barwick <barwick@gmail.com>
Date: Tue, 11 Jun 2019 15:14:17 +0900
Subject: [PATCH] note that "standby follow" requires a primary to be available

While it's technically possible to have a standby follow another
standby while the primary is not available, repmgr will not be able
to update its metadata, which will cause Confusion and Chaos.

Update the documentation to make this clear, and provide a more helpful
error message if this situation occurs. The operation previously
failed anyway, but with an unhelpful message about not being able to
find a node record.
---
 HISTORY                        |  1 +
 doc/appendix-release-notes.xml |  9 ++++-
 doc/repmgr-standby-follow.xml  | 73 ++++++++++++++++++----------------
 repmgr-action-standby.c        | 24 ++++++++---
 4 files changed, 66 insertions(+), 41 deletions(-)
diff --git a/HISTORY b/HISTORY
index 9cc1ea56..a327f4b2 100644
--- a/HISTORY
+++ b/HISTORY
@@ -24,6 +24,7 @@
         repmgr: ensure BDR2-specific functionality cannot be used on
           BDR3 and later (Ian)
         repmgr: canonicalize the data directory path (Ian)
+        repmgr: note that "standby follow" requires a primary to be available (Ian)
         repmgrd: monitor standbys attached to primary (Ian)
         repmgrd: add "primary visibility consensus" functionality (Ian)
         repmgrd: fix memory leak which occurs while the monitored PostgreSQL
diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml
index cb08a81e..68f7290e 100644
--- a/doc/appendix-release-notes.xml
+++ b/doc/appendix-release-notes.xml
@@ -43,6 +43,14 @@
             </para>
 		  </listitem>
 
+		  <listitem>
+            <para>
+              <link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>:
+              note that an active, reachable cluster primary is required for this command;
+			  and provide a more helpful error message if no reachable primary could be found.
+            </para>
+		  </listitem>
+
           <listitem>
             <para>
               &repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
@@ -75,7 +83,6 @@
             </para>
           </listitem>
 
-
           <listitem>
             <para>
               <link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>:
diff --git a/doc/repmgr-standby-follow.xml b/doc/repmgr-standby-follow.xml
index 6ff8e306..b0f4a6b6 100644
--- a/doc/repmgr-standby-follow.xml
+++ b/doc/repmgr-standby-follow.xml
@@ -20,49 +20,54 @@
       (&quot;follow target&quot;). Typically this will be the primary, but this
       command can also be used to attach the standby to another standby.
     </para>
+
     <para>
-      This command requires a valid
-      <filename>repmgr.conf</filename> file for the standby, either specified
-      explicitly with <literal>-f/--config-file</literal> or located in a
+      This command requires a valid <filename>repmgr.conf</filename> file for the standby,
+      either specified explicitly with <literal>-f/--config-file</literal> or located in a
       default location; no additional arguments are required.
     </para>
 
-	<para>
-	  By default &repmgr; will attempt to attach the standby to the current primary.
-	  If <option>--upstream-node-id</option> is provided, &repmgr; will attempt
-	  to attach the standby to the specified node, which can be another standby.
-	</para>
-
-    <para>
-      This command will force a restart of the standby server, which must be
-      running.
+    <para>The standby node (&quot;follow candidate&quot;) <emphasis>must</emphasis>
+      be running. If the new upstream (&quot;follow target&quot;) is not the primary,
+      the cluster primary <emphasis>must</emphasis> be running and accessible from the
+      standby node.
     </para>
 
-	<tip>
+    <tip>
       <para>
-		To re-add an inactive node to the replication cluster, use
-		<xref linkend="repmgr-node-rejoin"/>.
+        To re-add an inactive node to the replication cluster, use
+        <xref linkend="repmgr-node-rejoin"/>.
       </para>
-	</tip>
+    </tip>
 
-	<para>
-	  <command>repmgr standby follow</command> will wait up to
-	  <varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
-	  to verify the standby has actually connected to the new upstream node.
-	</para>
+    <para>
+      By default &repmgr; will attempt to attach the standby to the current primary.
+      If <option>--upstream-node-id</option> is provided, &repmgr; will attempt
+      to attach the standby to the specified node, which can be another standby.
+    </para>
 
-	<note>
-	  <para>
-	    If <option>recovery_min_apply_delay</option> is set for the standby, it
-	    will not attach to the new upstream node until it has replayed available
-	    WAL.
-	  </para>
-	  <para>
-	    Conversely, if the standby is attached to an upstream standby
-	    which has <option>recovery_min_apply_delay</option> set, the upstream
-            standby's replay state may actually be behind that of its new downstream node.
-	  </para>
-	</note>
+    <para>
+      This command will force a restart of PostgreSQL on the standby node.
+    </para>
+
+    <para>
+      <command>repmgr standby follow</command> will wait up to
+      <varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
+      to verify the standby has actually connected to the new upstream node.
+    </para>
+
+    <note>
+      <para>
+        If <option>recovery_min_apply_delay</option> is set for the standby, it
+        will not attach to the new upstream node until it has replayed available
+        WAL.
+      </para>
+      <para>
+        Conversely, if the standby is attached to an upstream standby
+        which has <option>recovery_min_apply_delay</option> set, the upstream
+        standby's replay state may actually be behind that of its new downstream node.
+      </para>
+    </note>
 
   </refsect1>
 
@@ -124,7 +129,7 @@
           <para>
             Note that when using &repmgrd;, <option>--upstream-node-id</option>
             should always be configured;
-			see <link linkend="repmgrd-automatic-failover-configuration">Automatic failover configuration</link>
+            see <link linkend="repmgrd-automatic-failover-configuration">Automatic failover configuration</link>
             for details.
           </para>
         </listitem>
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index f13f658f..304750c1 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -2784,12 +2784,6 @@ do_standby_follow(void)
 
 	PQfinish(local_conn);
 
-	if (runtime_options.dry_run == true)
-	{
-		log_info(_("prerequisites for executing STANDBY FOLLOW are met"));
-		exit(SUCCESS);
-	}
-
 	/*
 	 * Here we'll need a connection to the primary, if the upstream is not a primary.
 	 */
@@ -2802,12 +2796,30 @@ do_standby_follow(void)
 		primary_conn = get_primary_connection_quiet(follow_target_conn,
 													&primary_node_id,
 													NULL);
+
+		/*
+		 * If follow target is not primary and no other primary could be found,
+		 * abort because we won't be able to update the node record.
+		 */
+		if (PQstatus(primary_conn) != CONNECTION_OK)
+		{
+			log_error(_("unable to determine the cluster primary"));
+			log_detail(_("an active primary node is required for \"repmgr standby follow\""));
+			PQfinish(follow_target_conn);
+			exit(ERR_FOLLOW_FAIL);
+		}
 	}
 	else
 	{
 		primary_conn = follow_target_conn;
 	}
 
+	if (runtime_options.dry_run == true)
+	{
+		log_info(_("prerequisites for executing STANDBY FOLLOW are met"));
+		exit(SUCCESS);
+	}
+
 	initPQExpBuffer(&follow_output);
 
 	success = do_standby_follow_internal(