mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgrd: additional check for the upstream connection
It's possible the upstream server was intermittently unavailable in the interval between checks, invalidating the upstream connection. With check types "ping" and "connection", the connection would not be restored, so if the availability check was successful, additionally verify the upstream connection and restore if necessary. Addresses GitHub #633.
This commit is contained in:
@@ -1443,6 +1443,7 @@ monitor_streaming_standby(void)
|
|||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "checking %s", upstream_node_info.conninfo);
|
log_verbose(LOG_DEBUG, "checking %s", upstream_node_info.conninfo);
|
||||||
|
|
||||||
if (check_upstream_connection(&upstream_conn, upstream_node_info.conninfo) == true)
|
if (check_upstream_connection(&upstream_conn, upstream_node_info.conninfo) == true)
|
||||||
{
|
{
|
||||||
set_upstream_last_seen(local_conn, upstream_node_info.node_id);
|
set_upstream_last_seen(local_conn, upstream_node_info.node_id);
|
||||||
@@ -3088,8 +3089,6 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
update_monitoring_history(void)
|
update_monitoring_history(void)
|
||||||
{
|
{
|
||||||
|
|||||||
60
repmgrd.c
60
repmgrd.c
@@ -813,32 +813,65 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
/* Check the connection status twice in case it changes after reset */
|
/* Check the connection status twice in case it changes after reset */
|
||||||
bool twice = false;
|
bool twice = false;
|
||||||
|
|
||||||
if (config_file_options.connection_check_type == CHECK_PING)
|
|
||||||
return is_server_available(conninfo);
|
|
||||||
|
|
||||||
if (config_file_options.connection_check_type == CHECK_CONNECTION)
|
log_debug("connection check type is \"%s\"",
|
||||||
|
print_connection_check_type(config_file_options.connection_check_type));
|
||||||
|
/*
|
||||||
|
* For the check types which do not involve using the existing database
|
||||||
|
* connection, we'll perform the actual check, then as an additional
|
||||||
|
* safeguard verify that the connection is still valid (as it might have
|
||||||
|
* gone away during a brief outage between checks).
|
||||||
|
*/
|
||||||
|
if (config_file_options.connection_check_type != CHECK_QUERY)
|
||||||
{
|
{
|
||||||
bool success = true;
|
bool success = true;
|
||||||
PGconn *test_conn = PQconnectdb(conninfo);
|
|
||||||
|
|
||||||
log_debug("check_upstream_connection(): attempting to connect to \"%s\"", conninfo);
|
if (config_file_options.connection_check_type == CHECK_PING)
|
||||||
|
|
||||||
if (PQstatus(test_conn) != CONNECTION_OK)
|
|
||||||
{
|
{
|
||||||
log_warning(_("unable to connect to \"%s\""), conninfo);
|
success = is_server_available(conninfo);
|
||||||
log_detail("\n%s", PQerrorMessage(test_conn));
|
|
||||||
success = false;
|
|
||||||
}
|
}
|
||||||
PQfinish(test_conn);
|
else if (config_file_options.connection_check_type == CHECK_CONNECTION)
|
||||||
|
{
|
||||||
|
PGconn *test_conn = PQconnectdb(conninfo);
|
||||||
|
|
||||||
return success;
|
log_debug("check_upstream_connection(): attempting to connect to \"%s\"", conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(test_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to connect to \"%s\""), conninfo);
|
||||||
|
log_detail("\n%s", PQerrorMessage(test_conn));
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
PQfinish(test_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success == false)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Checks have succeeded, but the open connection to the primary has gone away,
|
||||||
|
* possibly due to a brief outage between monitoring intervals - attempt to
|
||||||
|
* reset it.
|
||||||
|
*/
|
||||||
|
log_notice(_("upstream is available but upstream connection has gone away, resetting"));
|
||||||
|
|
||||||
|
PQfinish(*conn);
|
||||||
|
*conn = PQconnectdb(conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_debug("check_upstream_connection(): connection not OK");
|
log_debug("check_upstream_connection(): upstream connection has gone away, resetting");
|
||||||
if (twice)
|
if (twice)
|
||||||
return false;
|
return false;
|
||||||
/* reconnect */
|
/* reconnect */
|
||||||
@@ -873,6 +906,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* reconnect */
|
/* reconnect */
|
||||||
|
log_debug("check_upstream_connection(): upstream connection not available, resetting");
|
||||||
PQfinish(*conn);
|
PQfinish(*conn);
|
||||||
*conn = PQconnectdb(conninfo);
|
*conn = PQconnectdb(conninfo);
|
||||||
twice = true;
|
twice = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user