repmgrd: refactory primary failover code into separate function

2026-06-01 11:49:06 +00:00 · 2017-07-04 20:42:22 +09:00
parent f7f49ae85e
commit e1f4384f7e
1 changed files with 215 additions and 177 deletions
@@ -93,6 +93,10 @@ static void handle_sigint(SIGNAL_ARGS);
 #endif
 static PGconn *try_reconnect(const char *conninfo, NodeStatus *node_status);
 static bool do_primary_failover(void);
 static bool do_upstream_standby_failover(void);
 static ElectionResult do_election(void);
 static const char *_print_voting_status(NodeVotingStatus voting_status);
 static const char *_print_election_result(ElectionResult result);
@@ -699,9 +703,95 @@ monitor_streaming_standby(void)
 					goto loop;
 				}
-				/* still down after reconnect attempt(s) - */
+				/* still down after reconnect attempt(s) */
 				if (upstream_node_status == NODE_STATUS_DOWN)
 				{
 					bool failover_done = false;
 					if (upstream_node_info.type == PRIMARY)
 					{
 						failover_done = do_primary_failover();
 					}
 					else if (upstream_node_info.type == STANDBY)
 					{
 						failover_done = do_upstream_standby_failover();
 					}
 					// it's possible it will make sense to return in
 					// all cases to restart monitoring
 					if (failover_done == true)
 						return;
 				}
 			}
 		}
 	loop:
 		/* emit "still alive" log message at regular intervals, if requested */
 		if (config_file_options.log_status_interval > 0)
 		{
 			double		log_status_interval_elapsed = 0;
 			instr_time	log_status_interval_current;
 			INSTR_TIME_SET_CURRENT(log_status_interval_current);
 			INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start);
 			log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current);
 			if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval)
 			{
 				log_info(_("node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"),
 						 local_node_info.node_name,
 						 local_node_info.node_id,
 						 upstream_node_info.node_name,
 						 upstream_node_info.node_id);
 				//log_debug(
 				INSTR_TIME_SET_CURRENT(log_status_interval_start);
 			}
 		}
 		/*
 		 * handle local node failure
 		 *
 		 * currently we'll just check the connection, and try to reconnect
 		 *
 		 * TODO: add timeout, after which we run in degraded state
 		 */
 		if (is_server_available(local_node_info.conninfo) == false)
 		{
 			log_warning(_("connection to local node %i lost"), local_node_info.node_id);
 			if (local_conn != NULL)
 			{
 				PQfinish(local_conn);
 				local_conn = NULL;
 			}
 		}
 		if (PQstatus(local_conn) != CONNECTION_OK)
 		{
 			log_info(_("attempting to reconnect"));
 			local_conn = establish_db_connection(config_file_options.conninfo, false);
 			if (PQstatus(local_conn) != CONNECTION_OK)
 			{
 				log_warning(_("reconnection failed"));
 			}
 			else
 			{
 				log_info(_("reconnected"));
 			}
 		}
 		sleep(1);
 	}
 }
 static bool
 do_primary_failover(void)
 {
 	/* attempt to initiate voting process */
 	ElectionResult election_result = do_election();
@@ -834,7 +924,7 @@ monitor_streaming_standby(void)
 			log_info(_("switching to primary monitoring mode"));
 			failover_state = FAILOVER_STATE_NONE;
-							return;
+			return true;
 		case FAILOVER_STATE_PRIMARY_REAPPEARED:
 			log_debug("failover state is PRIMARY_REAPPEARED");
@@ -851,11 +941,8 @@ monitor_streaming_standby(void)
 					   upstream_node_info.node_name, upstream_node_info.node_id);
 			failover_state = FAILOVER_STATE_NONE;
-							return;
+			return true;
 						case FAILOVER_STATE_PROMOTION_FAILED:
 							log_debug("failover state is PROMOTION FAILED");
 							break;
 		case FAILOVER_STATE_FOLLOWED_NEW_PRIMARY:
 			log_info(_("resuming standby monitoring mode"));
@@ -863,7 +950,7 @@ monitor_streaming_standby(void)
 					   upstream_node_info.node_name, upstream_node_info.node_id);
 			failover_state = FAILOVER_STATE_NONE;
-							return;
+			return true;
 		case FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY:
 			log_info(_("resuming standby monitoring mode"));
@@ -871,86 +958,37 @@ monitor_streaming_standby(void)
 					   upstream_node_info.node_name, upstream_node_info.node_id);
 			failover_state = FAILOVER_STATE_NONE;
-							return;
+			return true;
 		case FAILOVER_STATE_PROMOTION_FAILED:
 			log_debug("failover state is PROMOTION FAILED");
 			return false;
 		case FAILOVER_STATE_NO_NEW_PRIMARY:
 		case FAILOVER_STATE_WAITING_NEW_PRIMARY:
 			/* pass control back down to start_monitoring() */
 			// -> should kick off new election
-							return;
+			return false;
 		case FAILOVER_STATE_LOCAL_NODE_FAILURE:
 		case FAILOVER_STATE_UNKNOWN:
 		case FAILOVER_STATE_NONE:
 			log_debug("failover state is %i", failover_state);
-							break;
+			return false;
 					}
 	}
-			}
+	// should never reach here
-		}
+	return false;
 	loop:
 		/* emit "still alive" log message at regular intervals, if requested */
 		if (config_file_options.log_status_interval > 0)
 		{
 			double		log_status_interval_elapsed = 0;
 			instr_time	log_status_interval_current;
 			INSTR_TIME_SET_CURRENT(log_status_interval_current);
 			INSTR_TIME_SUBTRACT(log_status_interval_current, log_status_interval_start);
 			log_status_interval_elapsed = INSTR_TIME_GET_DOUBLE(log_status_interval_current);
 			if ((int) log_status_interval_elapsed >= config_file_options.log_status_interval)
 			{
 				log_info(_("node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"),
 						 local_node_info.node_name,
 						 local_node_info.node_id,
 						 upstream_node_info.node_name,
 						 upstream_node_info.node_id);
 				//log_debug(
 				INSTR_TIME_SET_CURRENT(log_status_interval_start);
 			}
 		}
 		/*
 		 * handle local node failure
 		 *
 		 * currently we'll just check the connection, and try to reconnect
 		 *
 		 * TODO: add timeout, after which we run in degraded state
 		 */
 		if (is_server_available(local_node_info.conninfo) == false)
 		{
 			log_warning(_("connection to local node %i lost"), local_node_info.node_id);
 			if (local_conn != NULL)
 			{
 				PQfinish(local_conn);
 				local_conn = NULL;
 			}
 		}
 		if (PQstatus(local_conn) != CONNECTION_OK)
 		{
 			log_info(_("attempting to reconnect"));
 			local_conn = establish_db_connection(config_file_options.conninfo, false);
 			if (PQstatus(local_conn) != CONNECTION_OK)
 			{
 				log_warning(_("reconnection failed"));
 			}
 			else
 			{
 				log_info(_("reconnected"));
 			}
 		}
 		sleep(1);
 	}
 }
 static bool
 do_upstream_standby_failover(void)
 {
 	// not implemented yet
 	return false;
 }
 static FailoverState
 promote_self(void)
 {