From b9ba97a36db3e0110f85f7e63bb244a9dceadf8b Mon Sep 17 00:00:00 2001
From: Ian Barwick <barwick@gmail.com>
Date: Fri, 1 Feb 2019 15:23:24 +0900
Subject: [PATCH] "standby switchover": check replication connection to
 upstream

Ensure repmgr checks the standby (promotion candidate) is currently
attached to the primary (demotion candidate).

Addresses issue reported in GitHub #519.
---
 HISTORY                         |  2 ++
 doc/appendix-release-notes.sgml | 24 ++++++++++++++++--------
 repmgr-action-standby.c         | 23 +++++++++++++++++------
 3 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/HISTORY b/HISTORY
index bf057b9e..429efc17 100644
--- a/HISTORY
+++ b/HISTORY
@@ -8,6 +8,8 @@
           when checking received WAL location; GitHub #518 (Ian)
         repmgr: ensure "standby switchover" verifies repmgr can read the
           data directory on the demotion candidate; GitHub #523 (Ian)
+        repmgr: ensure "standby switchover" verifies replication connection
+		  exists; GitHub #519 (Ian)
         repmgr: when executing "standby follow" and "node rejoin", check that
           it will actually be possible to stream from the target node (Ian)
         repmgr: "standby switchover": improve handling of connection URIs when
diff --git a/doc/appendix-release-notes.sgml b/doc/appendix-release-notes.sgml
index b86c960a..56df7c60 100644
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -105,14 +105,6 @@
             </para>
           </listitem>
 
-         <listitem>
-            <para>
-              Add check <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>
-              when comparing received WAL on the standby to the primary's shutdown location to avoid a potential
-              race condition if the standby's walreceiver has not yet flushed all received WAL to disk.
-              GitHub #518.
-            </para>
-          </listitem>
 
        </itemizedlist>
       </para>
@@ -131,6 +123,22 @@
             </para>
           </listitem>
 
+         <listitem>
+            <para>
+              &repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
+              avoid a potential race condition when comparing received WAL on the standby to the primary's shutdown location,
+			  as the standby's walreceiver may not have yet flushed all received WAL to disk. GitHub #518.
+            </para>
+          </listitem>
+
+
+         <listitem>
+            <para>
+              &repmgr;: when executing <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
+			  verify the standby (promotion candidate) is currently attached to the primary (demotion candidate). GitHub #519.
+            </para>
+          </listitem>
+
           <listitem>
             <para>
               <application>repmgrd</application>:  on a cascaded standby, don't fail over if
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index dea953fa..48c11a19 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -3124,7 +3124,7 @@ do_standby_switchover(void)
 
 	if (record_status != RECORD_FOUND)
 	{
-		log_error(_("unable to retrieve node record for currentr primary (node %i)"),
+		log_error(_("unable to retrieve node record for current primary (node %i)"),
 				  remote_node_id);
 
 		PQfinish(local_conn);
@@ -3154,8 +3154,6 @@ do_standby_switchover(void)
 
 	/*
 	 * Check this standby is attached to the demotion candidate
-	 * TODO:
-	 *  - check application_name in pg_stat_replication
 	 */
 
 	if (local_node_record.upstream_node_id != remote_node_record.node_id)
@@ -3170,6 +3168,20 @@ do_standby_switchover(void)
 		exit(ERR_BAD_CONFIG);
 	}
 
+	if (is_downstream_node_attached(remote_conn, local_node_record.node_name) == false)
+	{
+		log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"),
+				  local_node_record.node_name,
+				  local_node_record.node_id,
+				  remote_node_record.node_name,
+				  remote_node_record.node_id);
+
+		PQfinish(local_conn);
+		PQfinish(remote_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
 	log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
 
 	/* this will fill the %p event notification parameter */
@@ -3352,6 +3364,8 @@ do_standby_switchover(void)
 		exit(ERR_BAD_CONFIG);
 	}
 
+	/* check remote repmgr has the data directory correctly configured */
+
 	if (parse_data_directory_config(command_output.data) == false)
 	{
 		log_error(_("\"data_directory\" parameter in repmgr.conf on \"%s\" is incorrectly configured"),
@@ -3376,9 +3390,6 @@ do_standby_switchover(void)
 	}
 
 
-	/* check remote repmgr has the data directory correctly configured */
-
-	// - add repmgr node check --data-directory
 
 	/*
 	 * populate local node record with current state of various replication-related