From 1a0049f086b775ff0f660a3f504b5c26ff3ecc43 Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Mon, 8 Aug 2016 14:20:28 +0900
Subject: [PATCH] repmgrd: prevent endless loops in failover with manual node

The LSN reported by the shared memory function defaults to "0/0"
(InvalidXLogRecPtr) - this indicates that the repmgrd on that node
hasn't been able to update it yet. However during failover several
places in the code assumed this is an error, which would cause
an endless loop waiting for updates which would never come.

To get around this without changing function definitions, we can
store an explicit message in the shared memory location field so the
caller can tell whether the other node hasn't yet updated the field,
or encountered situation which means it should not be considered
as a promotion candidate (which in most cases will be because
`failover` is set to `manual`.

Resolves GitHub #222.
---
 HISTORY   |  5 ++++
 repmgrd.c | 87 +++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/HISTORY b/HISTORY
index 72508957..f51c6e4b 100644
--- a/HISTORY
+++ b/HISTORY
@@ -2,6 +2,11 @@
         repmgr: suppress connection error display in `repmgr cluster show`
           unless `--verbose` supplied (Ian)
 
+3.1.5   2016-08-
+        repmgrd: in a failover situation, prevent endless looping when
+        attempting to establish the status of a node with
+        `failover=manual` (Ian)
+
 3.1.4   2016-07-12
         repmgr: new configuration option for setting "restore_command"
           in the recovery.conf file generated by repmgr (Martín)
diff --git a/repmgrd.c b/repmgrd.c
index d4bc19ae..a12c9674 100644
--- a/repmgrd.c
+++ b/repmgrd.c
@@ -41,7 +41,10 @@
 #include "access/xlogdefs.h"
 #include "pqexpbuffer.h"
 
+/* Message strings passed in repmgrSharedState->location */
 
+#define PASSIVE_NODE "PASSIVE_NODE"
+#define LSN_QUERY_ERROR "LSN_QUERY_ERROR"
 
 /* Local info */
 t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
@@ -784,6 +787,13 @@ standby_monitor(void)
 		{
 			log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type);
 
+			/*
+			 * Set the location string in shared memory to indicate to other
+			 * repmgrd instances that we're *not* a promotion candidate
+			 */
+
+			update_shared_memory(PASSIVE_NODE);
+
 			for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
 			{
 				master_conn = get_master_connection(my_local_conn,
@@ -800,6 +810,10 @@ standby_monitor(void)
 				}
 				else
 				{
+					/*
+					 * XXX this is the only place where `retry_promote_interval_secs`
+					 * is used - this parameter should be renamed or possibly be replaced
+					 */
 					log_err(
 					    _("no new master found, waiting %i seconds before retry...\n"),
 					    local_options.retry_promote_interval_secs
@@ -829,6 +843,9 @@ standby_monitor(void)
 
 				terminate(ERR_DB_CON);
 			}
+
+			/*
+			 */
 		}
 		else if (local_options.failover == AUTOMATIC_FAILOVER)
 		{
@@ -1170,8 +1187,6 @@ do_master_failover(void)
 	XLogRecPtr	xlog_recptr;
 	bool		lsn_format_ok;
 
-	char		last_xlog_replay_location[MAXLEN];
-
 	PGconn	   *node_conn = NULL;
 
 	/*
@@ -1352,8 +1367,8 @@ do_master_failover(void)
 				  " considered as new master and exit.\n"),
 				PQerrorMessage(my_local_conn));
 		PQclear(res);
-		sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
-		update_shared_memory(last_xlog_replay_location);
+
+		update_shared_memory(LSN_QUERY_ERROR);
 		terminate(ERR_DB_QUERY);
 	}
 	/* write last location in shared memory */
@@ -1403,6 +1418,7 @@ do_master_failover(void)
 
 		while (!nodes[i].is_ready)
 		{
+			char location_value[MAXLEN];
 
 			sqlquery_snprintf(sqlquery,
 							  "SELECT %s.repmgr_get_last_standby_location()",
@@ -1418,7 +1434,11 @@ do_master_failover(void)
 				terminate(ERR_DB_QUERY);
 			}
 
-			xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
+			/* Copy the returned value as we'll need to reference it a few times */
+			strncpy(location_value, PQgetvalue(res, 0, 0), MAXLEN);
+			PQclear(res);
+
+			xlog_recptr = lsn_to_xlogrecptr(location_value, &lsn_format_ok);
 
 			/* If position reported as "invalid", check for format error or
 			 * empty string; otherwise position is 0/0 and we need to continue
@@ -1426,10 +1446,36 @@ do_master_failover(void)
 			 */
 			if (xlog_recptr == InvalidXLogRecPtr)
 			{
+				bool continue_loop = true;
+
 				if (lsn_format_ok == false)
 				{
+
+					/*
+					 * The node is indicating it is not a promotion candidate -
+					 * in this case we can store its invalid LSN to ensure it
+					 * can't be a promotion candidate when comparing locations
+					 */
+					if (strcmp(location_value, PASSIVE_NODE) == 0)
+					{
+						log_debug("node %i is passive mode\n", nodes[i].node_id);
+						log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
+						nodes[i].xlog_location = InvalidXLogRecPtr;
+						continue_loop = false;
+					}
+					/*
+					 * This should probably never happen but if it does, rule the
+					 * node out as a promotion candidate
+					 */
+					else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
+					{
+						log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"), nodes[i].node_id);
+						nodes[i].xlog_location = InvalidXLogRecPtr;
+						continue_loop = false;
+					}
+
 					/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
-					if (*PQgetvalue(res, 0, 0) == '\0')
+					else if (*location_value == '\0')
 					{
 						log_crit(
 							_("unable to obtain LSN from node %i"), nodes[i].node_id
@@ -1438,8 +1484,8 @@ do_master_failover(void)
 							_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
 							);
 
-						PQclear(res);
 						PQfinish(node_conn);
+						/* XXX shouldn't we just ignore this node? */
 						exit(ERR_BAD_CONFIG);
 					}
 
@@ -1447,25 +1493,29 @@ do_master_failover(void)
 					 * Very unlikely to happen; in the absence of any better
 					 * strategy keep checking
 					 */
-					log_warning(_("unable to parse LSN \"%s\"\n"),
-								PQgetvalue(res, 0, 0));
+					else {
+						log_warning(_("unable to parse LSN \"%s\"\n"),
+									location_value);
+					}
 				}
 				else
 				{
 					log_debug(
 						_("invalid LSN returned from node %i: '%s'\n"),
 						nodes[i].node_id,
-						PQgetvalue(res, 0, 0)
-						);
+						location_value);
 				}
 
-				PQclear(res);
-
-				/* If position is 0/0, keep checking */
-				/* XXX we should add a timeout here to prevent infinite looping
+				/*
+				 * If the node is still reporting an InvalidXLogRecPtr, it means
+				 * its repmgrd hasn't yet had time to update it (either with a valid
+				 * XLogRecPtr or a message) so we continue looping.
+				 *
+				 * XXX we should add a timeout here to prevent infinite looping
 				 * if the other node's repmgrd is not up
 				 */
-				continue;
+				if (continue_loop == true)
+					continue;
 			}
 
 			if (nodes[i].xlog_location < xlog_recptr)
@@ -1473,8 +1523,7 @@ do_master_failover(void)
 				nodes[i].xlog_location = xlog_recptr;
 			}
 
-			log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, PQgetvalue(res, 0, 0));
-			PQclear(res);
+			log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
 
 			ready_nodes++;
 			nodes[i].is_ready = true;
@@ -2150,7 +2199,7 @@ lsn_to_xlogrecptr(char *lsn, bool *format_ok)
 	{
 		if (format_ok != NULL)
 			*format_ok = false;
-		log_err(_("incorrect log location format: %s\n"), lsn);
+		log_warning(_("incorrect log location format: %s\n"), lsn);
 		return 0;
 	}