repmgr: move demoted primary check to the final step during switchover

This will give the demoted primary more time to start up as a standby,
during which "standby follow" can be executed on sibling nodes, if
specified.
This commit is contained in:
Ian Barwick
2018-03-27 16:41:13 +09:00
parent 6f9a1f975e
commit 3e1f0ec168

View File

@@ -3667,61 +3667,6 @@ do_standby_switchover(void)
termPQExpBuffer(&command_output);
/*
* Clean up remote node. It's possible that the standby is still starting up,
* so poll for a while until we get a connection.
*/
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
{
remote_conn = establish_db_connection(remote_node_record.conninfo, false);
if (PQstatus(remote_conn) == CONNECTION_OK)
break;
log_info(_("sleeping 1 second; %i of %i attempts (\"standby_reconnect_timeout\") to reconnect to demoted primary"),
i + 1,
config_file_options.standby_reconnect_timeout);
sleep(1);
}
/* check new standby (old primary) is reachable */
if (PQstatus(remote_conn) != CONNECTION_OK)
{
switchover_success = false;
/* TODO: double-check whether new standby has attached */
log_warning(_("switchover did not fully complete"));
log_detail(_("node \"%s\" is now primary but node \"%s\" is not reachable"),
local_node_record.node_name,
remote_node_record.node_name);
if (config_file_options.use_replication_slots == true)
{
log_hint(_("any inactive replication slots on the old primary will need to be dropped manually"));
}
}
else
{
if (config_file_options.use_replication_slots == true)
{
drop_replication_slot_if_exists(remote_conn,
remote_node_record.node_id,
local_node_record.slot_name);
}
/* TODO warn about any inactive replication slots */
log_notice(_("switchover was successful"));
log_detail(_("node \"%s\" is now primary and node \"%s\" is attached as standby"),
local_node_record.node_name,
remote_node_record.node_name);
}
PQfinish(remote_conn);
/*
* If --siblings-follow specified, attempt to make them follow the new
* primary
@@ -3797,6 +3742,61 @@ do_standby_switchover(void)
PQfinish(local_conn);
/*
* Clean up remote node. It's possible that the standby is still starting up,
* so poll for a while until we get a connection.
*/
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
{
remote_conn = establish_db_connection(remote_node_record.conninfo, false);
if (PQstatus(remote_conn) == CONNECTION_OK)
break;
log_info(_("sleeping 1 second; %i of %i attempts (\"standby_reconnect_timeout\") to reconnect to demoted primary"),
i + 1,
config_file_options.standby_reconnect_timeout);
sleep(1);
}
/* check new standby (old primary) is reachable */
if (PQstatus(remote_conn) != CONNECTION_OK)
{
switchover_success = false;
/* TODO: double-check whether new standby has attached */
log_warning(_("switchover did not fully complete"));
log_detail(_("node \"%s\" is now primary but node \"%s\" is not reachable"),
local_node_record.node_name,
remote_node_record.node_name);
if (config_file_options.use_replication_slots == true)
{
log_hint(_("any inactive replication slots on the old primary will need to be dropped manually"));
}
}
else
{
if (config_file_options.use_replication_slots == true)
{
drop_replication_slot_if_exists(remote_conn,
remote_node_record.node_id,
local_node_record.slot_name);
}
/* TODO warn about any inactive replication slots */
log_notice(_("switchover was successful"));
log_detail(_("node \"%s\" is now primary and node \"%s\" is attached as standby"),
local_node_record.node_name,
remote_node_record.node_name);
}
PQfinish(remote_conn);
if (switchover_success == true)
{
log_notice(_("STANDBY SWITCHOVER has completed successfully"));