repmgrd: improve detection of status change from primary to standby

If repmgrd is running in degraded mode on a primary which has been stopped, then manually been brought back online as a standby (e.g. by creating recovery.conf and starting the server), ensure it not only detects the change but automatically updates the node record so it can resume monitoring the node as a standby. Previously, repmgrd was looping waiting for the record to be updated (as is done transparently when executing "repmgr node rejoin") but if the record was not updated within the timeout period (e.g. by "repmgr standby register) it would fail to resume monitoring as a standby. It seems reasonable to have repmgrd automatically update the node record, as this will restore failover capability as quickly as possible. If this is not desired, then the onus is on the user to shut down repmgrd while making the desired changes.
2026-07-16 06:19:05 +00:00 · 2018-02-22 11:19:37 +09:00
parent 98af51da03
commit 22b3a74fa0
5 changed files with 111 additions and 40 deletions
@@ -1,5 +1,6 @@
-4.0.4   2018-??-??
+4.0.4   2018-03-??
        repmgr: ensure "node rejoin" honours "--dry-run" option; GitHub #383 (Ian)
+        repmgrd: improve detection of status change from primary to standby (Ian)

 4.0.3   2018-02-15
        repmgr: improve switchover handling when "pg_ctl" used to control the
@@ -2344,8 +2344,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active)

 	initPQExpBuffer(&query);

-	appendPQExpBuffer(
-					  &query,
+	appendPQExpBuffer(&query,
 					  "UPDATE repmgr.nodes SET active = %s "
 					  " WHERE node_id = %i",
 					  active == true ? "TRUE" : "FALSE",
@@ -2370,6 +2369,40 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active)
 }


+bool
+update_node_record_set_active_standby(PGconn *conn, int this_node_id)
+{
+	PQExpBufferData query;
+	PGresult   *res = NULL;
+
+	initPQExpBuffer(&query);
+
+	appendPQExpBuffer(&query,
+					  "UPDATE repmgr.nodes "
+					  "   SET type = 'standby', "
+					  "       active = TRUE "
+					  " WHERE node_id = %i",
+					  this_node_id);
+
+	log_verbose(LOG_DEBUG, "update_node_record_set_active_standby():\n  %s", query.data);
+
+	res = PQexec(conn, query.data);
+	termPQExpBuffer(&query);
+
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		log_error(_("unable to update node record:\n  %s"),
+				  PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	PQclear(res);
+
+	return true;
+}
+
+
 bool
 update_node_record_set_primary(PGconn *conn, int this_node_id)
 {
@@ -428,6 +428,7 @@ bool		truncate_node_records(PGconn *conn);

 bool		update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
 bool		update_node_record_set_primary(PGconn *conn, int this_node_id);
+bool		update_node_record_set_active_standby(PGconn *conn, int this_node_id);
 bool		update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
 bool		update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
 bool		update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
@@ -40,7 +40,7 @@
   </listitem>

   <listitem>
-    <simpara>repmgrd is monitoring the primary node, but it is not available</simpara>
+    <simpara>repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary)</simpara>
   </listitem>
  </itemizedlist>
 </para>
@@ -69,7 +69,15 @@
  By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
  However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
  after which <application>repmgrd</application> will terminate.
-
 </para>

+ <note>
+   <para>
+     If <application>repmgrd</application> is monitoring a primary mode which has been stopped
+     and manually restarted as a standby attached to a new primary, it will automatically detect
+     the status change and update the node record to reflect the node's new status
+     as an active standby. It will then resume monitoring the node as a standby.
+   </para>
+ </note>
+
 </chapter>
@@ -325,7 +325,6 @@ monitor_streaming_primary(void)
 				else
 				{
 					local_node_info.node_status = NODE_STATUS_UP;
-					monitoring_state = MS_NORMAL;

 					initPQExpBuffer(&event_details);

@@ -353,54 +352,83 @@ monitor_streaming_primary(void)
 						else
 						{
 							RecordStatus record_status;
-							int i = 0;

 							log_debug("primary node id is now %i", primary_node_id);

-							/*
-							 * poll for a while until record type is returned as "STANDBY" - it's possible
-							 * that there's a gap between the server being restarted and the record
-							 * being updated
-							 */
-							for (i = 0; i < 30; i++)
-							{
-								/*
-								 * try and refresh the local node record from the primary, as the updated
-								 * local node record may not have been replicated yet
-								 */
-
-								record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
-
-								if (record_status == RECORD_FOUND)
-								{
-									log_debug("type = %s", get_node_type_string(local_node_info.type));
-
-									if (local_node_info.type == STANDBY)
-									{
-										PQfinish(new_primary_conn);
-
-										/* XXX add event notification */
-										return;
-									}
-								}
-								sleep(1);
-							}
-
-							PQfinish(new_primary_conn);
+							record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);

 							if (record_status == RECORD_FOUND)
 							{
-								log_warning(_("repmgr node record is still %s"), get_node_type_string(local_node_info.type));
+								bool resume_monitoring = true;
+
+								log_debug("node %i is registered with type = %s",
+										  config_file_options.node_id,
+										  get_node_type_string(local_node_info.type));
+
+								/*
+								 * node has recovered but metadata not updated - we can do that ourselves,
+								 */
+								if (local_node_info.type == PRIMARY)
+								{
+									log_notice(_("node \"%s\" (ID: %i) still registered as primary, setting to standby"),
+											   config_file_options.node_name,
+											   config_file_options.node_id);
+
+									if (update_node_record_set_active_standby(new_primary_conn, config_file_options.node_id) == false)
+									{
+										resume_monitoring = false;
+									}
+									else
+									{
+										record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
+
+										if (record_status != RECORD_FOUND)
+										{
+											resume_monitoring = false;
+										}
+									}
+								}
+
+								if (resume_monitoring == true)
+								{
+									monitoring_state = MS_NORMAL;
+
+									appendPQExpBuffer(&event_details,
+													  _("former primary has been restored as standby after %i seconds, updating node record and resuming monitoring"),
+													  degraded_monitoring_elapsed);
+
+									create_event_notification(new_primary_conn,
+															  &config_file_options,
+															  config_file_options.node_id,
+															  "repmgrd_standby_reconnect",
+															  true,
+															  event_details.data);
+									log_notice("%s", event_details.data);
+									termPQExpBuffer(&event_details);
+
+									PQfinish(new_primary_conn);
+
+									/* restart monitoring as standby */
+									return;
+								}
 							}
-							else
+							else if (record_status == RECORD_NOT_FOUND)
 							{
-								log_error(_("no metadata record found for this node"));
+								log_error(_("no metadata record found for this node on current primary %i"), primary_node_id);
 								log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
+
+								PQfinish(new_primary_conn);
+
+								/* add event notification */
+								terminate(ERR_BAD_CONFIG);
 							}
+
 						}
 					}
 					else
 					{
+						monitoring_state = MS_NORMAL;
+
 						appendPQExpBuffer(&event_details,
 										  _("reconnected to primary node after %i seconds, resuming monitoring"),
 										  degraded_monitoring_elapsed);