mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 00:26:30 +00:00
repmgrd: improve reconnection handling
Previously, if the server being monitored was not available, repmgrd would always close the existing connection handle and open a new one. However, in some cases, e.g. a brief network outage, the existing connection handle is still good and does not need to be reopened. This could be particularly problematic if monitoring_history is on, as this risks leaving orphan sessions on the primary which (given a sufficiently unstable network) could lead to all available backends being occupied. Instead, during an outage we now use a new connection to verify the server is accessible; if the old connection is still available (e.g. following a short network interruption) we continue using that; if not (e.g. the server was restarted), we use the new one.
This commit is contained in:
48
repmgrd.c
48
repmgrd.c
@@ -770,10 +770,10 @@ show_help(void)
|
||||
}
|
||||
|
||||
|
||||
PGconn *
|
||||
try_reconnect(t_node_info *node_info)
|
||||
void
|
||||
try_reconnect(PGconn **conn, t_node_info *node_info)
|
||||
{
|
||||
PGconn *conn;
|
||||
PGconn *our_conn;
|
||||
t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
|
||||
int i;
|
||||
@@ -782,7 +782,6 @@ try_reconnect(t_node_info *node_info)
|
||||
|
||||
initialize_conninfo_params(&conninfo_params, false);
|
||||
|
||||
|
||||
/* we assume by now the conninfo string is parseable */
|
||||
(void) parse_conninfo_string(node_info->conninfo, &conninfo_params, NULL, false);
|
||||
|
||||
@@ -805,18 +804,47 @@ try_reconnect(t_node_info *node_info)
|
||||
* degraded monitoring? - make that configurable
|
||||
*/
|
||||
|
||||
conn = establish_db_connection_by_params(&conninfo_params, false);
|
||||
our_conn = establish_db_connection_by_params(&conninfo_params, false);
|
||||
|
||||
if (PQstatus(conn) == CONNECTION_OK)
|
||||
if (PQstatus(our_conn) == CONNECTION_OK)
|
||||
{
|
||||
free_conninfo_params(&conninfo_params);
|
||||
|
||||
log_info(_("connection to node %i succeeded"), node_info.node_id);
|
||||
|
||||
if (PQstatus(*conn) == CONNECTION_BAD)
|
||||
{
|
||||
log_verbose(LOG_INFO, "original connection handle returned CONNECTION_BAD, using new connection");
|
||||
close_connection(conn);
|
||||
*conn = our_conn;
|
||||
}
|
||||
else
|
||||
{
|
||||
ExecStatusType ping_result;
|
||||
|
||||
ping_result = connection_ping(*conn);
|
||||
|
||||
if (ping_result != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_info("original connnection no longer available, using new connection");
|
||||
close_connection(conn);
|
||||
*conn = our_conn;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("original connection is still available"));
|
||||
|
||||
PQfinish(our_conn);
|
||||
}
|
||||
}
|
||||
|
||||
node_info->node_status = NODE_STATUS_UP;
|
||||
return conn;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
close_connection(&conn);
|
||||
log_notice(_("unable to reconnect to node"));
|
||||
close_connection(&our_conn);
|
||||
log_notice(_("unable to reconnect to node %i"), node_info.node_id);
|
||||
}
|
||||
|
||||
if (i + 1 < max_attempts)
|
||||
@@ -835,7 +863,7 @@ try_reconnect(t_node_info *node_info)
|
||||
|
||||
free_conninfo_params(&conninfo_params);
|
||||
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user