diff --git a/HISTORY b/HISTORY index b4db0431..74c92fc2 100644 --- a/HISTORY +++ b/HISTORY @@ -1,6 +1,8 @@ 4.0.5 2018-??-?? repmgr: fix display of conninfo parsing error messages (Ian) repmgr: fix "repmgr cluster crosscheck" output; GitHub #389 (Ian) + repmgr: poll demoted primary after restart as a standby during a + switchover operation; GitHub #408 (Ian) repmgrd: fix memory leaks in witness code (AndrzejNowicki, Martín) repmgrd: set "connect_timeout=2" when pinging a server (Ian) diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml index acb8e0d6..bec28cce 100644 --- a/doc/repmgr-standby-switchover.sgml +++ b/doc/repmgr-standby-switchover.sgml @@ -115,6 +115,48 @@ + + Configuration file settings + + + Note that following parameters in repmgr.conf are relevant to the + switchover operation: + + + + reconnect_attempts: number of times to check the original primary + for a clean shutdown after executing the shutdown command, before aborting + + + + + reconnect_interval: interval (in seconds) to check the original + primary for a clean shutdown after executing the shutdown command (up to a maximum + of reconnect_attempts tries) + + + + + replication_lag_critical: + if replication lag (in seconds) on the standby exceeds this value, the + switchover will be aborted (unless the -F/--force option + is provided) + + + + + + standby_reconnect_timeout: + Number of seconds to attempt to reconnect to the demoted primary + once it has been restarted. + + + + + + + + Execution diff --git a/doc/switchover.sgml b/doc/switchover.sgml index 13c83ece..f34c5416 100644 --- a/doc/switchover.sgml +++ b/doc/switchover.sgml @@ -170,34 +170,16 @@ - - Note that following parameters in repmgr.conf are relevant to the - switchover operation: - - - - reconnect_attempts: number of times to check the original primary - for a clean shutdown after executing the shutdown command, before aborting - - - - - reconnect_interval: interval (in seconds) to check the original - primary for a clean shutdown after executing the shutdown command (up to a maximum - of reconnect_attempts tries) - - - - - replication_lag_critical: - if replication lag (in seconds) on the standby exceeds this value, the - switchover will be aborted (unless the -F/--force option - is provided) - - - - + + + See for a full list of available + command line options and repmgr.conf settings relevant + to performing a switchover. + + + + diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index c46a8ff2..2ab0dae6 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -3570,8 +3570,23 @@ x */ termPQExpBuffer(&command_output); - /* clean up remote node */ - remote_conn = establish_db_connection(remote_node_record.conninfo, false); + /* + * Clean up remote node. It's possible that the standby is still starting up, + * so poll for a while until we get a connection. + */ + + for (i = 0; i < config_file_options.standby_reconnect_timeout; i++) + { + remote_conn = establish_db_connection(remote_node_record.conninfo, false); + + if (PQstatus(remote_conn) == CONNECTION_OK) + break; + + log_info(_("sleeping 1 second; %i of %i attempts (\"standby_reconnect_timeout\") to reconnect to demoted primary"), + i + 1, + config_file_options.standby_reconnect_timeout); + sleep(1); + } /* check new standby (old primary) is reachable */ if (PQstatus(remote_conn) != CONNECTION_OK) @@ -3584,6 +3599,11 @@ x */ log_detail(_("node \"%s\" is now primary but node \"%s\" is not reachable"), local_node_record.node_name, remote_node_record.node_name); + + if (config_file_options.use_replication_slots == true) + { + log_hint(_("any inactive replication slots on the old primary will need to be dropped manually")); + } } else {