repmgrd: improve node role change detection

This commit is contained in:
Ian Barwick
2018-10-19 11:25:11 +09:00
parent 15bbe04a6f
commit 77c9092794

View File

@@ -71,6 +71,8 @@ static void notify_followers(NodeInfoList *standby_nodes, int follow_node_id);
static void check_connection(t_node_info *node_info, PGconn **conn); static void check_connection(t_node_info *node_info, PGconn **conn);
static bool check_primary_status(int degraded_monitoring_elapsed);
static bool wait_primary_notification(int *new_primary_id); static bool wait_primary_notification(int *new_primary_id);
static FailoverState follow_new_primary(int new_primary_id); static FailoverState follow_new_primary(int new_primary_id);
static FailoverState witness_follow_new_primary(int new_primary_id); static FailoverState witness_follow_new_primary(int new_primary_id);
@@ -341,6 +343,13 @@ monitor_streaming_primary(void)
repmgrd_set_pid(local_conn, getpid(), pid_file); repmgrd_set_pid(local_conn, getpid(), pid_file);
} }
/*
* check that the local node is still primary, otherwise switch
* to standby monitoring
*/
if (check_primary_status(-1) == false)
return;
goto loop; goto loop;
} }
@@ -393,6 +402,71 @@ monitor_streaming_primary(void)
{ {
local_node_info.node_status = NODE_STATUS_UP; local_node_info.node_status = NODE_STATUS_UP;
if (check_primary_status(degraded_monitoring_elapsed) == false)
return;
goto loop;
}
}
/*
* possibly attempt to find another node from cached list check if
* there's a new primary - if so add hook for fencing? loop, if
* starts up check status, switch monitoring mode
*/
}
loop:
/* check node is still primary, if not restart monitoring */
if (check_primary_status(-1) == false)
return;
/* emit "still alive" log message at regular intervals, if requested */
if (config_file_options.log_status_interval > 0)
{
int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start);
if (log_status_interval_elapsed >= config_file_options.log_status_interval)
{
log_info(_("monitoring primary node \"%s\" (node ID: %i) in %s state"),
local_node_info.node_name,
local_node_info.node_id,
print_monitoring_state(monitoring_state));
if (monitoring_state == MS_DEGRADED)
{
log_detail(_("waiting for the node to become available"));
}
INSTR_TIME_SET_CURRENT(log_status_interval_start);
}
}
if (got_SIGHUP)
{
handle_sighup(&local_conn, PRIMARY);
}
log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
}
/*
* If monitoring a primary, it's possible that after an outage of the local node
* (due to e.g. a switchover), the node has come back as a standby. We therefore
* need to verify its status and if everything looks OK, restart monitoring in
* standby mode.
*/
bool
check_primary_status(int degraded_monitoring_elapsed)
{
PQExpBufferData event_details;
/* check to see if the node has been restored as a standby */ /* check to see if the node has been restored as a standby */
if (get_recovery_type(local_conn) == RECTYPE_STANDBY) if (get_recovery_type(local_conn) == RECTYPE_STANDBY)
{ {
@@ -400,9 +474,18 @@ monitor_streaming_primary(void)
initPQExpBuffer(&event_details); initPQExpBuffer(&event_details);
if (degraded_monitoring_elapsed > 0)
{
appendPQExpBuffer(&event_details, appendPQExpBuffer(&event_details,
_("reconnected to node after %i seconds, node is now a standby, switching to standby monitoring"), _("reconnected to node after %i seconds, node is now a standby, switching to standby monitoring"),
degraded_monitoring_elapsed); degraded_monitoring_elapsed);
}
else
{
appendPQExpBufferStr(&event_details,
_("node is now a standby, switching to standby monitoring"));
}
log_notice("%s", event_details.data); log_notice("%s", event_details.data);
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
@@ -456,17 +539,28 @@ monitor_streaming_primary(void)
} }
if (resume_monitoring == true) if (resume_monitoring == true)
{
initPQExpBuffer(&event_details);
if (degraded_monitoring_elapsed > 0)
{ {
monitoring_state = MS_NORMAL; monitoring_state = MS_NORMAL;
log_notice(_("former primary has been restored as standby after %i seconds, updating node record and resuming monitoring"), log_notice(_("former primary has been restored as standby after %i seconds, updating node record and resuming monitoring"),
degraded_monitoring_elapsed); degraded_monitoring_elapsed);
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details, appendPQExpBuffer(&event_details,
_("node restored as standby after %i seconds, monitoring connection to upstream node %i"), _("node restored as standby after %i seconds, monitoring connection to upstream node %i"),
degraded_monitoring_elapsed, degraded_monitoring_elapsed,
local_node_info.upstream_node_id); local_node_info.upstream_node_id);
}
else
{
appendPQExpBuffer(&event_details,
_("node has become a standby, monitoring connection to upstream node %i"),
local_node_info.upstream_node_id);
}
create_event_notification(new_primary_conn, create_event_notification(new_primary_conn,
&config_file_options, &config_file_options,
@@ -481,12 +575,11 @@ monitor_streaming_primary(void)
close_connection(&new_primary_conn); close_connection(&new_primary_conn);
/* restart monitoring as standby */ /* restart monitoring as standby */
return; return false;
} }
} }
else if (record_status == RECORD_NOT_FOUND) else if (record_status == RECORD_NOT_FOUND)
{ {
PQExpBufferData event_details;
initPQExpBuffer(&event_details); initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details, appendPQExpBuffer(&event_details,
@@ -511,6 +604,8 @@ monitor_streaming_primary(void)
} }
} }
else else
{
if (degraded_monitoring_elapsed > 0)
{ {
monitoring_state = MS_NORMAL; monitoring_state = MS_NORMAL;
@@ -528,53 +623,14 @@ monitor_streaming_primary(void)
log_notice("%s", event_details.data); log_notice("%s", event_details.data);
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
goto loop;
}
}
}
/*
* possibly attempt to find another node from cached list check if
* there's a new primary - if so add hook for fencing? loop, if
* starts up check status, switch monitoring mode
*/
}
loop:
/* emit "still alive" log message at regular intervals, if requested */
if (config_file_options.log_status_interval > 0)
{
int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start);
if (log_status_interval_elapsed >= config_file_options.log_status_interval)
{
log_info(_("monitoring primary node \"%s\" (node ID: %i) in %s state"),
local_node_info.node_name,
local_node_info.node_id,
print_monitoring_state(monitoring_state));
if (monitoring_state == MS_DEGRADED)
{
log_detail(_("waiting for the node to become available"));
}
INSTR_TIME_SET_CURRENT(log_status_interval_start);
} }
} }
if (got_SIGHUP) return true;
{
handle_sighup(&local_conn, PRIMARY);
}
log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
} }
void void
monitor_streaming_standby(void) monitor_streaming_standby(void)
{ {
@@ -595,7 +651,7 @@ monitor_streaming_standby(void)
*/ */
if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID) if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID)
{ {
local_node_info.upstream_node_id = get_primary_node_id(local_conn); upstream_conn = get_primary_connection(local_conn, &local_node_info.upstream_node_id, NULL);
/* /*
* Terminate if there doesn't appear to be an active cluster primary. * Terminate if there doesn't appear to be an active cluster primary.
@@ -608,8 +664,12 @@ monitor_streaming_standby(void)
log_error(_("unable to determine an active primary for this cluster, terminating")); log_error(_("unable to determine an active primary for this cluster, terminating"));
terminate(ERR_BAD_CONFIG); terminate(ERR_BAD_CONFIG);
} }
}
(void) get_node_record(upstream_conn, local_node_info.upstream_node_id, &upstream_node_info);
}
else
{
record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info); record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info);
/* /*
@@ -636,6 +696,8 @@ monitor_streaming_standby(void)
log_debug("connecting to upstream node %i: \"%s\"", upstream_node_info.node_id, upstream_node_info.conninfo); log_debug("connecting to upstream node %i: \"%s\"", upstream_node_info.node_id, upstream_node_info.conninfo);
upstream_conn = establish_db_connection(upstream_node_info.conninfo, false); upstream_conn = establish_db_connection(upstream_node_info.conninfo, false);
}
/* /*
* Upstream node must be running at repmgrd startup. * Upstream node must be running at repmgrd startup.
@@ -652,6 +714,15 @@ monitor_streaming_standby(void)
terminate(ERR_DB_CONN); terminate(ERR_DB_CONN);
} }
record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info);
if (upstream_node_info.node_id == local_node_info.node_id)
{
PQfinish(upstream_conn);
upstream_conn = NULL;
return;
}
/* /*
* refresh upstream node record from upstream node, so it's as up-to-date * refresh upstream node record from upstream node, so it's as up-to-date
* as possible * as possible
@@ -682,6 +753,23 @@ monitor_streaming_standby(void)
primary_conn = upstream_conn; primary_conn = upstream_conn;
} }
/*
* It's possible monitoring has been restarted after some outage which
* resulted in the local node being marked as inactive; if so mark it
* as active again.
*/
if (local_node_info.active == false)
{
if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
{
PQExpBufferData event_details;
initPQExpBuffer(&event_details);
local_node_info.active = true;
}
}
primary_node_id = get_primary_node_id(primary_conn); primary_node_id = get_primary_node_id(primary_conn);
/* Log startup event */ /* Log startup event */
@@ -766,6 +854,7 @@ monitor_streaming_standby(void)
if (PQstatus(local_conn) != CONNECTION_OK) if (PQstatus(local_conn) != CONNECTION_OK)
{ {
check_connection(&local_node_info, &local_conn); check_connection(&local_node_info, &local_conn);
log_debug("YYY here");
} }
try_reconnect(&upstream_conn, &upstream_node_info); try_reconnect(&upstream_conn, &upstream_node_info);
@@ -781,11 +870,38 @@ monitor_streaming_standby(void)
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY) if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
{ {
ExecStatusType ping_result;
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"), log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
upstream_node_info.node_name, upstream_node_info.node_id); upstream_node_info.node_name, upstream_node_info.node_id);
PQfinish(upstream_conn); PQfinish(upstream_conn);
upstream_conn = NULL; upstream_conn = NULL;
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
/* check local connection */
ping_result = connection_ping(local_conn);
if (ping_result != PGRES_TUPLES_OK)
{
int i;
PQfinish(local_conn);
for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
{
local_conn = establish_db_connection(local_node_info.conninfo, false);
if (PQstatus(local_conn) == CONNECTION_OK)
break;
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
i + 1,
config_file_options.repmgrd_standby_startup_timeout);
sleep(1);
}
}
return; return;
} }
} }
@@ -1150,6 +1266,7 @@ loop:
check_connection(&local_node_info, &local_conn); check_connection(&local_node_info, &local_conn);
if (PQstatus(local_conn) != CONNECTION_OK) if (PQstatus(local_conn) != CONNECTION_OK)
{ {
if (local_node_info.active == true) if (local_node_info.active == true)
@@ -1190,11 +1307,39 @@ loop:
} }
else else
{ {
int stored_local_node_id = repmgrd_get_local_node_id(local_conn);
/*
* If the local node was restarted, we'll need to reinitialise values
* stored in shared memory.
*/
if (stored_local_node_id == UNKNOWN_NODE_ID)
{
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
}
if (PQstatus(primary_conn) == CONNECTION_OK)
{
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
{
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
upstream_node_info.node_name, upstream_node_info.node_id);
PQfinish(primary_conn);
primary_conn = NULL;
termPQExpBuffer(&event_details);
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
return;
}
}
log_debug("YYY here3 active ? %c", local_node_info.active ? 't' : 'f');
/* we've reconnected to the local node after an outage */ /* we've reconnected to the local node after an outage */
if (local_node_info.active == false) if (local_node_info.active == false)
{ {
int stored_local_node_id = UNKNOWN_NODE_ID;
if (PQstatus(primary_conn) == CONNECTION_OK) if (PQstatus(primary_conn) == CONNECTION_OK)
{ {
if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true) if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
@@ -1204,7 +1349,6 @@ loop:
initPQExpBuffer(&event_details); initPQExpBuffer(&event_details);
local_node_info.active = true; local_node_info.active = true;
appendPQExpBuffer(&event_details, appendPQExpBuffer(&event_details,
_("reconnected to local node \"%s\" (ID: %i), marking active"), _("reconnected to local node \"%s\" (ID: %i), marking active"),
local_node_info.node_name, local_node_info.node_name,
@@ -1222,18 +1366,6 @@ loop:
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
} }
} }
/*
* If the local node was restarted, we'll need to reinitialise values
* stored in shared memory.
*/
stored_local_node_id = repmgrd_get_local_node_id(local_conn);
if (stored_local_node_id == UNKNOWN_NODE_ID)
{
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
}
} }
} }
@@ -1634,14 +1766,28 @@ loop:
} }
/* refresh repmgr.nodes after "witness_sync_interval" seconds */ /*
* Refresh repmgr.nodes after "witness_sync_interval" seconds, and check if primary
* has changed
*/
{ {
int witness_sync_interval_elapsed = calculate_elapsed(witness_sync_interval_start); int witness_sync_interval_elapsed = calculate_elapsed(witness_sync_interval_start);
if (witness_sync_interval_elapsed >= config_file_options.witness_sync_interval) if (witness_sync_interval_elapsed >= config_file_options.witness_sync_interval)
{ {
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
{
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
upstream_node_info.node_name, upstream_node_info.node_id);
PQfinish(primary_conn);
primary_conn = NULL;
return;
}
log_debug("synchronising witness node records"); log_debug("synchronising witness node records");
witness_copy_node_records(primary_conn, local_conn); witness_copy_node_records(primary_conn, local_conn);
INSTR_TIME_SET_CURRENT(witness_sync_interval_start); INSTR_TIME_SET_CURRENT(witness_sync_interval_start);
} }
} }
@@ -3132,6 +3278,8 @@ check_connection(t_node_info *node_info, PGconn **conn)
if (is_server_available(node_info->conninfo) == false) if (is_server_available(node_info->conninfo) == false)
{ {
log_warning(_("connection to node %i lost"), node_info->node_id); log_warning(_("connection to node %i lost"), node_info->node_id);
PQfinish(*conn);
*conn = NULL;
} }
if (PQstatus(*conn) != CONNECTION_OK) if (PQstatus(*conn) != CONNECTION_OK)