From f471316504fbe8c3d1253dd0a44b61b55fe6eef7 Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Tue, 16 Oct 2018 11:27:40 +0900
Subject: [PATCH] repmgrd: improve promotion script failure handling

While scanning for a new primary following a promotion script failure,
repmgrd was treating a witness server as a potential new primary
and would attempt to "follow" it. Fortunately "repmgr standby follow"
would do the right thing and choose the actual primary, if available,
otherwise do nothing, so the cluster would eventually end up in the
correct state, albeit for the wrong reason.

By skipping the witness server as a potential new primary,
repmgrd will do the right thing if the original primary does come
back online, i.e. resume monitoring as before.
---
 repmgrd-physical.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/repmgrd-physical.c b/repmgrd-physical.c
index 0a2c808c..efec0493 100644
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
@@ -1008,6 +1008,13 @@ monitor_streaming_standby(void)
 								continue;
 							}
 
+							/* skip witness node - we can't possibly "follow" that */
+
+							if (cell->node_info->type == WITNESS)
+							{
+								continue;
+							}
+
 							cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
 
 							if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
@@ -1030,6 +1037,7 @@ monitor_streaming_standby(void)
 							follow_new_primary(follow_node_id);
 						}
 					}
+
 					clear_node_info_list(&sibling_nodes);
 				}
 			}
@@ -2386,6 +2394,8 @@ follow_new_primary(int new_primary_id)
 	RecordStatus record_status = RECORD_NOT_FOUND;
 	bool		new_primary_ok = false;
 
+	log_verbose(LOG_DEBUG, "follow_new_primary(): new primary id is %i", new_primary_id);
+
 	record_status = get_node_record(local_conn, new_primary_id, &new_primary);
 
 	if (record_status != RECORD_FOUND)