mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
repmgrd: catch corner case in standby connection handle check
If repmgrd marks the local node as unavailable, and it was actually restarting but a failover event occured before the next local node check, failover will continue with the stale connection handle. Add a final local node check just before starting the failover process, so repmgrd can reconnect if it wasn't able to before.
This commit is contained in:
@@ -2390,7 +2390,9 @@ update_node_record_set_primary(PGconn *conn, int this_node_id)
|
|||||||
" UPDATE repmgr.nodes "
|
" UPDATE repmgr.nodes "
|
||||||
" SET active = FALSE "
|
" SET active = FALSE "
|
||||||
" WHERE type = 'primary' "
|
" WHERE type = 'primary' "
|
||||||
" AND active IS TRUE ");
|
" AND active IS TRUE "
|
||||||
|
" AND node_id != %i ",
|
||||||
|
this_node_id);
|
||||||
|
|
||||||
res = PQexec(conn, query.data);
|
res = PQexec(conn, query.data);
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
@@ -2412,7 +2414,8 @@ update_node_record_set_primary(PGconn *conn, int this_node_id)
|
|||||||
appendPQExpBuffer(&query,
|
appendPQExpBuffer(&query,
|
||||||
" UPDATE repmgr.nodes"
|
" UPDATE repmgr.nodes"
|
||||||
" SET type = 'primary', "
|
" SET type = 'primary', "
|
||||||
" upstream_node_id = NULL "
|
" upstream_node_id = NULL, "
|
||||||
|
" active = TRUE "
|
||||||
" WHERE node_id = %i ",
|
" WHERE node_id = %i ",
|
||||||
this_node_id);
|
this_node_id);
|
||||||
|
|
||||||
@@ -3856,6 +3859,8 @@ connection_ping(PGconn *conn)
|
|||||||
{
|
{
|
||||||
PGresult *res = PQexec(conn, "SELECT TRUE");
|
PGresult *res = PQexec(conn, "SELECT TRUE");
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "connection_ping(): result is %s", PQresStatus(PQresultStatus(res)));
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -750,6 +750,17 @@ monitor_streaming_standby(void)
|
|||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
close_connection(&upstream_conn);
|
close_connection(&upstream_conn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if local node is unreachable, make a last-minute attempt to reconnect
|
||||||
|
* before continuing with the failover process
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
check_connection(&local_node_info, &local_conn);
|
||||||
|
}
|
||||||
|
|
||||||
upstream_conn = try_reconnect(&upstream_node_info);
|
upstream_conn = try_reconnect(&upstream_node_info);
|
||||||
|
|
||||||
/* Node has recovered - log and continue */
|
/* Node has recovered - log and continue */
|
||||||
@@ -985,6 +996,15 @@ loop:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (PQstatus(primary_conn) == CONNECTION_OK && config_file_options.monitoring_history == true)
|
||||||
|
{
|
||||||
|
update_monitoring_history();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
connection_ping(local_conn);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* handle local node failure
|
* handle local node failure
|
||||||
*
|
*
|
||||||
@@ -1069,15 +1089,6 @@ loop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (PQstatus(primary_conn) == CONNECTION_OK && config_file_options.monitoring_history == true)
|
|
||||||
{
|
|
||||||
update_monitoring_history();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
connection_ping(local_conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
{
|
{
|
||||||
log_debug("SIGHUP received");
|
log_debug("SIGHUP received");
|
||||||
|
|||||||
Reference in New Issue
Block a user