repmgrd: restrict BDR monitoring to two node setup

It's not safe to have more than two nodes with this kind of "failover", so we don't need to select alternative nodes by priority.
2026-03-22 22:56:29 +00:00 · 2017-07-14 12:56:11 +09:00
parent 91c7013024
commit e3b3fb65f0
12 changed files with 96 additions and 212 deletions
--- a/repmgrd-bdr.c
+++ b/repmgrd-bdr.c
@@ -14,6 +14,9 @@

 static volatile sig_atomic_t got_SIGHUP = false;

+static void do_bdr_failover(NodeInfoList *nodes);
+
+
 void
 do_bdr_node_check(void)
 {
@@ -25,12 +28,8 @@ void
 monitor_bdr(void)
 {
 	NodeInfoList    nodes = T_NODE_INFO_LIST_INITIALIZER;
-	PGconn		   *monitoring_conn = NULL;
-	t_node_info	   *monitored_node = NULL;
 	t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
-
 	RecordStatus  record_status;
-	bool failover_done = false;

 	/* sanity check local database */
 	log_info(_("connecting to local database '%s'"),
@@ -118,7 +117,6 @@ monitor_bdr(void)
 	}

 	/* Log startup event */
-
 	create_event_record(local_conn,
 						&config_file_options,
 						config_file_options.node_id,
@@ -128,158 +126,72 @@ monitor_bdr(void)

 	/*
 	 * retrieve list of nodes - we'll need these if the DB connection goes away,
-	 * or if we're monitoring a non-local node
 	 */
-	get_node_records_by_priority(local_conn, &nodes);
+	get_all_node_records(local_conn, &nodes);

-		/* decided which node to monitor */
+	log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);

-	if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_LOCAL)
-	{
-		// if local, reuse local_conn and node info
-		//record_status = get_node_record(local_conn, config_file_options.node_id, &monitored_node);
-		monitored_node = &local_node_info;
-
-		monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
-		log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
-	}
-	else
-	{
-		NodeInfoListCell *cell;
-
-		for (cell = nodes.head; cell; cell = cell->next)
-		{
-			log_debug("main_loop_bdr() checking node %s %i", cell->node_info->node_name, cell->node_info->priority);
-
-			monitoring_conn = establish_db_connection(cell->node_info->conninfo, false);
-			if (PQstatus(monitoring_conn) == CONNECTION_OK)
-			{
-				log_debug("main_loop_bdr() monitoring node '%s' (ID %i, priority %i)",
-						  cell->node_info->node_name, cell->node_info->node_id, cell->node_info->priority);
-				/* fetch the record again, as the node list is transient */
-				monitored_node = get_node_record_pointer(monitoring_conn, cell->node_info->node_id);
-
-				break;
-			}
-		}
-	}
-
-	// check monitored_node not null!
+	log_info(_("starting continuous bdr node monitoring"));

 	while (true)
 	{
-		/* normal state - connection active */
-		if (PQstatus(monitoring_conn) == CONNECTION_OK)
+
+		/* monitoring loop */
+		log_verbose(LOG_DEBUG, "bdr check loop...");
+
+		switch (monitoring_state)
 		{
-			// XXX detail
-			log_info(_("starting continuous bdr node monitoring"));
-
-			/* monitoring loop */
-			do
+			case MS_NORMAL:
 			{
-				log_verbose(LOG_DEBUG, "bdr check loop...");
-
+				if (is_server_available(local_node_info.conninfo) == false)
 				{
-					NodeInfoListCell *cell;
-
-					for (cell = nodes.head; cell; cell = cell->next)
-					{
-						log_debug("bdr_monitor() %s", cell->node_info->node_name);
-					}
-				}
-
-				if (is_server_available(monitored_node->conninfo) == false)
-				{
-					t_node_info  *new_monitored_node;
-
 					// XXX improve
 					log_warning("connection problem!");
-					new_monitored_node = do_bdr_failover(&nodes, monitored_node);
-
-					if (new_monitored_node != NULL)
-					{
-						pfree(monitored_node);
-						monitored_node = new_monitored_node;
-					}
-					log_notice(_("monitored_node->node_name is now '%s' \n"), monitored_node->node_name);
+					do_bdr_failover(&nodes);
 				}
 				else
 				{
+					log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
+								config_file_options.monitor_interval_secs);
 					sleep(config_file_options.monitor_interval_secs);
 				}
-
-				if (got_SIGHUP)
-				{
-					/*
-					 * if we can reload, then could need to change
-					 * local_conn
-					 */
-					if (reload_config(&config_file_options))
-					{
-						PQfinish(local_conn);
-						local_conn = establish_db_connection(config_file_options.conninfo, true);
-						update_registration(local_conn);
-					}
-
-					/* reload node list */
-					get_node_records_by_priority(local_conn, &nodes);
-
-					got_SIGHUP = false;
-				}
-
-			} while (!failover_done);
-		}
-		/* local connection inactive - periodically try and connect */
-		/* TODO: make this an option */
-		else
-		{
-
-			monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
-
-			if (PQstatus(monitoring_conn) == CONNECTION_OK)
+			}
+			case MS_DEGRADED:
 			{
-				// XXX event bdr_node_recovered -> if monitored == local node
-
-				if (monitored_node->node_id == config_file_options.node_id)
+				/* degraded monitoring */
+				if (is_server_available(local_node_info.conninfo) == true)
 				{
-					log_notice(_("local connection has returned, resuming monitoring"));
+					log_notice(_("monitored node %i has recovered"), local_node_info.node_id);
+					// do_bdr_recovery()
 				}
 				else
 				{
-					log_notice(_("connection to '%s' has returned, resuming monitoring"), monitored_node->node_name);
+					log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
+								config_file_options.monitor_interval_secs);
+					sleep(config_file_options.monitor_interval_secs);
 				}
 			}
-			else
-			{
-				sleep(config_file_options.monitor_interval_secs);
-			}
-
-
-			if (got_SIGHUP)
-			{
-				/*
-				 * if we can reload, then could need to change
-				 * local_conn
-				 */
-				if (reload_config(&config_file_options))
-				{
-					if (PQstatus(local_conn) == CONNECTION_OK)
-					{
-						PQfinish(local_conn);
-						local_conn = establish_db_connection(config_file_options.conninfo, true);
-						update_registration(local_conn);
-					}
-				}
-
-				/* reload node list */
-				if (PQstatus(local_conn) == CONNECTION_OK)
-					get_node_records_by_priority(local_conn, &nodes);
-
-				got_SIGHUP = false;
-			}
 		}

-		failover_done = false;
+		if (got_SIGHUP)
+		{
+			/*
+			 * if we can reload, then could need to change
+			 * local_conn
+			 */
+			if (reload_config(&config_file_options))
+			{
+				PQfinish(local_conn);
+				local_conn = establish_db_connection(config_file_options.conninfo, true);
+				update_registration(local_conn);
+			}
+
+			/* reload node list */
+			get_all_node_records(local_conn, &nodes);
+
+			got_SIGHUP = false;
+		}
+
 	}

 	return;
@@ -294,43 +206,44 @@ monitor_bdr(void)
 * we'll do the following:
 *
 *  - attempt to find another node, to set our node record as inactive
+ *    (there should be only one other node)
 *  - generate an event log record on that node
 *  - optionally execute `bdr_failover_command`, passing the conninfo string
 *    of that node to the command; this can be used for e.g. reconfiguring
 *    pgbouncer.
- *  - if mode is 'BDR_MONITORING_PRIORITY', redirect monitoring to that node.
 *
 */
-t_node_info *
-do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
+
+void
+do_bdr_failover(NodeInfoList *nodes)
 {
 	PGconn	   *next_node_conn = NULL;
 	NodeInfoListCell *cell;
 	bool	    failover_success = false;
 	PQExpBufferData event_details;
+	RecordStatus  record_status;
 	t_event_info event_info = T_EVENT_INFO_INITIALIZER;
-	t_node_info *new_monitored_node = NULL;
+	t_node_info target_node = T_NODE_INFO_INITIALIZER;

 	initPQExpBuffer(&event_details);

-	/* get next active priority node */
+	/* get next active node */

 	for (cell = nodes->head; cell; cell = cell->next)
 	{
 		log_debug("do_bdr_failover() %s", cell->node_info->node_name);

 		/* don't attempt to connect to the current monitored node, as that's the one which has failed  */
-		if (cell->node_info->node_id == monitored_node->node_id)
+		if (cell->node_info->node_id == local_node_info.node_id)
 			continue;

 		/* XXX skip inactive node? */
-
 		next_node_conn = establish_db_connection(cell->node_info->conninfo, false);

 		if (PQstatus(next_node_conn) == CONNECTION_OK)
 		{
 			// XXX check if record returned
-			new_monitored_node = get_node_record_pointer(next_node_conn, cell->node_info->node_id);
+			record_status = get_node_record(next_node_conn, cell->node_info->node_id, &target_node);

 			break;
 		}
@@ -345,40 +258,34 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)

 		log_error("%s", event_details.data);

-
 		// no other nodes found
 		// continue degraded monitoring until node is restored?
 	}
 	else
 	{
-		log_info(_("connecting to target node %s"), cell->node_info->node_name);
+		log_info(_("connecting to target node %s"), target_node.node_name);

 		failover_success = true;

-		event_info.conninfo_str = cell->node_info->conninfo;
-		event_info.node_name = cell->node_info->node_name;
+		event_info.conninfo_str = target_node.conninfo;
+		event_info.node_name = target_node.node_name;

 		/* update our own record on the other node */
-		if (monitored_node->node_id == config_file_options.node_id)
-		{
-			update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
-		}
-
-		if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
-		{
-			log_notice(_("monitoring next available node by prioriy: %s (ID %i)"),
-					   new_monitored_node->node_name,
-					   new_monitored_node->node_id);
-		}
+		update_node_record_set_active(next_node_conn, local_node_info.node_id, false);

 		appendPQExpBuffer(&event_details,
 						  _("node '%s' (ID: %i) detected as failed; next available node is '%s' (ID: %i)"),
-						  monitored_node->node_name,
-						  monitored_node->node_id,
-						  cell->node_info->node_name,
-						  cell->node_info->node_id);
+						  local_node_info.node_name,
+						  local_node_info.node_id,
+						  target_node.node_name,
+						  target_node.node_id);
 	}

+	monitoring_state = MS_DEGRADED;
+	INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
+
+	// check here that the node hasn't come back up...
+
 	/*
 	 * Create an event record
 	 *
@@ -400,11 +307,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)

 	termPQExpBuffer(&event_details);

-	//failover_done = true;
-
-	if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
-		return new_monitored_node;

 	/* local monitoring mode - there's no new node to monitor */
-	return NULL;
+	return;
 }