repmgrd: catch corner case in standby connection handle check

If repmgrd marks the local node as unavailable, and it was actually
restarting but a failover event occured before the next local node
check, failover will continue with the stale connection handle.

Add a final local node check just before starting the failover
process, so repmgrd can reconnect if it wasn't able to before.
This commit is contained in:
Ian Barwick
2018-04-24 21:49:55 +09:00
parent 4455ded935
commit 7822aa784f
2 changed files with 27 additions and 11 deletions

View File

@@ -2390,7 +2390,9 @@ update_node_record_set_primary(PGconn *conn, int this_node_id)
" UPDATE repmgr.nodes "
" SET active = FALSE "
" WHERE type = 'primary' "
" AND active IS TRUE ");
" AND active IS TRUE "
" AND node_id != %i ",
this_node_id);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
@@ -2412,7 +2414,8 @@ update_node_record_set_primary(PGconn *conn, int this_node_id)
appendPQExpBuffer(&query,
" UPDATE repmgr.nodes"
" SET type = 'primary', "
" upstream_node_id = NULL "
" upstream_node_id = NULL, "
" active = TRUE "
" WHERE node_id = %i ",
this_node_id);
@@ -3856,6 +3859,8 @@ connection_ping(PGconn *conn)
{
PGresult *res = PQexec(conn, "SELECT TRUE");
log_verbose(LOG_DEBUG, "connection_ping(): result is %s", PQresStatus(PQresultStatus(res)));
PQclear(res);
return;
}

View File

@@ -750,6 +750,17 @@ monitor_streaming_standby(void)
termPQExpBuffer(&event_details);
close_connection(&upstream_conn);
/*
* if local node is unreachable, make a last-minute attempt to reconnect
* before continuing with the failover process
*/
if (PQstatus(local_conn) != CONNECTION_OK)
{
check_connection(&local_node_info, &local_conn);
}
upstream_conn = try_reconnect(&upstream_node_info);
/* Node has recovered - log and continue */
@@ -985,6 +996,15 @@ loop:
}
}
if (PQstatus(primary_conn) == CONNECTION_OK && config_file_options.monitoring_history == true)
{
update_monitoring_history();
}
else
{
connection_ping(local_conn);
}
/*
* handle local node failure
*
@@ -1069,15 +1089,6 @@ loop:
}
if (PQstatus(primary_conn) == CONNECTION_OK && config_file_options.monitoring_history == true)
{
update_monitoring_history();
}
else
{
connection_ping(local_conn);
}
if (got_SIGHUP)
{
log_debug("SIGHUP received");