mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 00:26:30 +00:00
repmgrd: finalize witness failover handling
This commit is contained in:
@@ -867,8 +867,8 @@ monitor_streaming_standby(void)
|
|||||||
follow_new_primary(follow_node_id);
|
follow_new_primary(follow_node_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
clear_node_info_list(&standby_nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1183,7 +1183,93 @@ monitor_streaming_witness(void)
|
|||||||
|
|
||||||
if (monitoring_state == MS_DEGRADED)
|
if (monitoring_state == MS_DEGRADED)
|
||||||
{
|
{
|
||||||
// XXX
|
int degraded_monitoring_elapsed = calculate_elapsed(degraded_monitoring_start);
|
||||||
|
|
||||||
|
log_debug("monitoring node %i in degraded state for %i seconds",
|
||||||
|
upstream_node_info.node_id,
|
||||||
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
|
if (is_server_available(upstream_node_info.conninfo) == true)
|
||||||
|
{
|
||||||
|
primary_conn = establish_db_connection(upstream_node_info.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||||
|
{
|
||||||
|
upstream_node_info.node_status = NODE_STATUS_UP;
|
||||||
|
monitoring_state = MS_NORMAL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("reconnected to upstream node %i after %i seconds, resuming monitoring"),
|
||||||
|
upstream_node_info.node_id,
|
||||||
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
|
create_event_notification(primary_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_upstream_reconnect",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
log_notice("%s", event_details.data);
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
goto loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* unable to connect to former primary - check if another node
|
||||||
|
* has been promoted
|
||||||
|
*/
|
||||||
|
|
||||||
|
NodeInfoListCell *cell;
|
||||||
|
int follow_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
|
get_active_sibling_node_records(local_conn,
|
||||||
|
local_node_info.node_id,
|
||||||
|
local_node_info.upstream_node_id,
|
||||||
|
&standby_nodes);
|
||||||
|
|
||||||
|
if (standby_nodes.node_count > 0)
|
||||||
|
{
|
||||||
|
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
||||||
|
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
/* skip local node check, we did that above */
|
||||||
|
if (cell->node_info->node_id == local_node_info.node_id)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get_recovery_type(cell->node_info->conn) == RECTYPE_PRIMARY)
|
||||||
|
{
|
||||||
|
follow_node_id = cell->node_info->node_id;
|
||||||
|
PQfinish(cell->node_info->conn);
|
||||||
|
cell->node_info->conn = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
PQfinish(cell->node_info->conn);
|
||||||
|
cell->node_info->conn = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (follow_node_id != UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
witness_follow_new_primary(follow_node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clear_node_info_list(&standby_nodes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
loop:
|
loop:
|
||||||
|
|
||||||
@@ -1229,6 +1315,31 @@ loop:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (got_SIGHUP)
|
||||||
|
{
|
||||||
|
log_debug("SIGHUP received");
|
||||||
|
|
||||||
|
if (reload_config(&config_file_options))
|
||||||
|
{
|
||||||
|
PQfinish(local_conn);
|
||||||
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
|
|
||||||
|
if (*config_file_options.log_file)
|
||||||
|
{
|
||||||
|
FILE *fd;
|
||||||
|
|
||||||
|
fd = freopen(config_file_options.log_file, "a", stderr);
|
||||||
|
if (fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "error reopening stderr to \"%s\": %s",
|
||||||
|
config_file_options.log_file, strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
got_SIGHUP = false;
|
||||||
|
}
|
||||||
|
|
||||||
sleep(config_file_options.monitor_interval_secs);
|
sleep(config_file_options.monitor_interval_secs);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -2491,8 +2602,8 @@ bool do_witness_failover(void)
|
|||||||
failover_state = FAILOVER_STATE_NONE;
|
failover_state = FAILOVER_STATE_NONE;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
case FAILOVER_STATE_FOLLOW_FAIL:
|
|
||||||
|
|
||||||
|
case FAILOVER_STATE_FOLLOW_FAIL:
|
||||||
/*
|
/*
|
||||||
* for whatever reason we were unable to follow the new primary -
|
* for whatever reason we were unable to follow the new primary -
|
||||||
* continue monitoring in degraded state
|
* continue monitoring in degraded state
|
||||||
@@ -2503,6 +2614,9 @@ bool do_witness_failover(void)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
monitoring_state = MS_DEGRADED;
|
||||||
|
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/* should never reach here */
|
/* should never reach here */
|
||||||
|
|||||||
Reference in New Issue
Block a user