From 23045846792f31e15a0b3ae10c9fa9fa60f44527 Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Mon, 14 Oct 2019 09:31:47 +0900
Subject: [PATCH] Fix handling of upstream node change check

repmgrd has a check to see if the upstream node has unexpectedly
changed, e.g. if the repmgrd service is paused and the PostgreSQL
instance has been pointed to another node.

However this check was relying on the node record on the local node
being up-to-date, which may not be the case immediately after a
failover, when the node is still replaying records updated prior
to the node's own record being updated. In this case it will
mistakenly assume the node is following the original primary
and attempt to restart monitoring, which will fail as the original
primary is no longer available.

To prevent this, we check against the node's record on the upstream
node.

Addresses issue noted in GitHub #587 and #588.
---
 HISTORY                        |  3 ++-
 doc/appendix-release-notes.xml | 26 ++++++++++++++++++++++++
 repmgrd-physical.c             | 36 ++++++++++++++++++++++++++--------
 3 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/HISTORY b/HISTORY
index 95169f2f..ae297e79 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,10 +1,11 @@
-5.0     2019-09-??
+5.0     2019-10-??
         general: add PostgreSQL 12 support (Ian)
         general: parse configuration file using flex (Ian)
         repmgr: rename "repmgr daemon ..." commands to "repmgr service ..." (Ian)
         repmgr: improve data directory check (Ian)
         repmgr: improve extension check during "standby clone" (Ian)
 		repmgr: pass provided log level when executing repmgr remotely (Ian)
+        repmgrd: fix handling of upstream node change check (Ian)
 
 4.4     2019-06-27
         repmgr: improve "daemon status" output (Ian)
diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml
index e1dc3eab..6fa3a86a 100644
--- a/doc/appendix-release-notes.xml
+++ b/doc/appendix-release-notes.xml
@@ -228,8 +228,34 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
               or <literal>pg_monitor</literal> are permitted to do this as well.
             </para>
           </listitem>
+
+          <listitem>
+            <para>
+              &repmgrd;: Fix handling of upstream node change check.
+            </para>
+            <para>
+              &repmgrd; has a check to see if the upstream node has unexpectedly
+              changed, e.g. if the repmgrd service is paused and the PostgreSQL
+              instance has been pointed to another node.
+            </para>
+            <para>
+              However this check was relying on the node record on the local node
+              being up-to-date, which may not be the case immediately after a
+              failover, when the node is still replaying records updated prior
+              to the node's own record being updated. In this case it will
+              mistakenly assume the node is following the original primary
+              and attempt to restart monitoring, which will fail as the original
+              primary is no longer available.
+            </para>
+            <para>
+              To prevent this, the node's record on the upstream node is checked
+              to see if the reported upstream <literal>node_id</literal> matches
+              the expected <literal>node_id</literal>. GitHub #587/#588.
+            </para>
+          </listitem>
         </itemizedlist>
       </para>
+
     </sect2>
   </sect1>
 
diff --git a/repmgrd-physical.c b/repmgrd-physical.c
index b51b0df9..ee3a8c56 100644
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
@@ -2086,7 +2086,6 @@ loop:
 			}
 		}
 
-
 		if (got_SIGHUP)
 		{
 			handle_sighup(&local_conn, STANDBY);
@@ -2096,13 +2095,34 @@ loop:
 
 		if (local_monitoring_state == MS_NORMAL && last_known_upstream_node_id != local_node_info.upstream_node_id)
 		{
-			log_notice(_("local node %i's upstream appears to have changed, restarting monitoring"),
-					   local_node_info.node_id);
-			log_detail(_("currently monitoring upstream %i; new upstream is %i"),
-					   last_known_upstream_node_id,
-					   local_node_info.upstream_node_id);
-			close_connection(&upstream_conn);
-			return;
+			/*
+			 * It's possible that after a change of upstream, the local node record will not
+			 * yet have been updated with the new upstream node ID. Therefore we check the
+			 * node record on the upstream, and if that matches "last_known_upstream_node_id",
+			 * take that as the correct value.
+			 */
+
+			if (monitoring_state == MS_NORMAL)
+			{
+				t_node_info node_info_on_upstream = T_NODE_INFO_INITIALIZER;
+				record_status = get_node_record(primary_conn, config_file_options.node_id, &node_info_on_upstream);
+
+				if (last_known_upstream_node_id == node_info_on_upstream.upstream_node_id)
+				{
+					local_node_info.upstream_node_id = last_known_upstream_node_id;
+				}
+			}
+
+			if (last_known_upstream_node_id != local_node_info.upstream_node_id)
+			{
+				log_notice(_("local node %i's upstream appears to have changed, restarting monitoring"),
+						   local_node_info.node_id);
+				log_detail(_("currently monitoring upstream %i; new upstream is %i"),
+						   last_known_upstream_node_id,
+						   local_node_info.upstream_node_id);
+				close_connection(&upstream_conn);
+				return;
+			}
 		}
 
 		log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",