Better detect completion of demotion candidate shutdown

If a connection attempt fails, keep pinging the server until it
finally away, or the timeout kicks in.

Addresses issue reported in GitHub #188 and previously noted in
repmgr.c
This commit is contained in:
Ian Barwick
2016-06-30 21:27:52 +09:00
parent c30447ac90
commit 746c9793ed

View File

@@ -2769,8 +2769,8 @@ do_standby_switchover(void)
char repmgr_db_cli_params[MAXLEN] = "";
int query_result;
t_node_info remote_node_record;
bool connection_success;
bool connection_success,
shutdown_success;
/*
* If --remote_pg_bindir supplied, use that to build the path on the
@@ -3187,37 +3187,42 @@ do_standby_switchover(void)
termPQExpBuffer(&command_output);
connection_success = false;
shutdown_success = false;
/* loop for timeout waiting for current primary to stop */
for(i = 0; i < options.reconnect_attempts; i++)
for (i = 0; i < options.reconnect_attempts; i++)
{
/* Check whether primary is available */
remote_conn = test_db_connection(remote_conninfo, false); /* don't fail on error */
/* XXX failure to connect doesn't mean the server is necessarily
* completely stopped - we need to better detect the reason for
* connection failure ("server not listening" vs "shutting down")
*
* -> check is_pgup()
/*
* If we're unable to connect, keep PQping-ing the server until it
* finally goes away
*/
if (PQstatus(remote_conn) != CONNECTION_OK)
{
connection_success = true;
PGPing ping_res = PQping(remote_conninfo);
log_notice(_("current master has been stopped\n"));
break;
/* database server could not be contacted */
if (ping_res == PQPING_NO_RESPONSE)
{
/* XXX we should double-check access to the physical server here */
shutdown_success = true;
log_notice(_("current master has been stopped\n"));
break;
}
}
PQfinish(remote_conn);
// configurable?
/* XXX make configurable? */
sleep(options.reconnect_interval);
i++;
}
if (connection_success == false)
if (shutdown_success == false)
{
log_err(_("master server did not shut down\n"));
log_hint(_("check the master server status before performing any further actions"));