diff --git a/HISTORY b/HISTORY
index db7118c0..d5039b8c 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,8 +1,10 @@
4.0.3 2018-02-
repmgr: improve switchover handling when "pg_ctl" used to control the
server and logging output is not explicitly redirected (Ian)
+ repmgr: improve switchover log messages and exit code when old primary could
+ not be shut down cleanly (Ian)
-4.0.2 2018-01-
+4.0.2 2018-01-18
repmgr: add missing -W option to getopt_long() invocation; GitHub #350 (Ian)
repmgr: automatically create slot name if missing; GitHub #343 (Ian)
repmgr: fixes to parsing output of remote repmgr invocations; GitHub #349 (Ian)
diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml
index dada6a6b..e551ce04 100644
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -22,9 +22,17 @@
If other standbys are connected to the demotion candidate, &repmgr; can instruct
- these to follow the new primary if the option --siblings-follow
+ these to follow the new primary if the option --siblings-follow
is specified.
+
+
+ Performing a switchover is a non-trivial operation. In particular it
+ relies on the current primary being able to shut down cleanly and quickly.
+ &repmgr; will attempt to check for potential issues but cannot guarantee
+ a successful switchover.
+
+
@@ -47,6 +55,13 @@
Check prerequisites but don't actually execute a switchover.
+
+
+ Success of does not imply the switchover will
+ complete successfully, only that
+ the prerequisites for performing the operation are met.
+
+
@@ -57,6 +72,12 @@
Ignore warnings and continue anyway.
+
+ Specifically, if a problem is encountered when shutting down the current primary,
+ using will cause &repmgr; to continue by promoting
+ the standby to be the new primary, and if is
+ specified, attach any other standbys to the new primary.
+
@@ -103,6 +124,11 @@
repmgrd should not be active on any nodes while a switchover is being
executed. This restriction may be lifted in a later version.
+
+ External database connections, e.g. from an application, should not be permitted while
+ the switchover is taking place. In particular, active transactions on the primary
+ can potentially disrupt the shutdown process.
+
@@ -119,6 +145,44 @@
+
+ Exit codes
+
+ Following exit codes can be emitted by repmgr standby switchover:
+
+
+
+
+
+
+
+ The switchover completed successfully.
+
+
+
+
+
+
+
+
+ The switchover could not be executed.
+
+
+
+
+
+
+
+
+ The switchover was executed but a problem was encountered.
+ Typically this means the former primary could not be reattached
+ as a standby.
+
+
+
+
+
+ See also
diff --git a/errcode.h b/errcode.h
index 4d137603..c152967a 100644
--- a/errcode.h
+++ b/errcode.h
@@ -43,6 +43,6 @@
#define ERR_BARMAN 19
#define ERR_REGISTRATION_SYNC 20
#define ERR_OUT_OF_MEMORY 21
-#define ERR_REJOIN_FAIL 22
+#define ERR_SWITCHOVER_INCOMPLETE 22
#endif /* _ERRCODE_H_ */
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index 465e2510..fe82bbc7 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -2036,6 +2036,10 @@ do_standby_switchover(void)
i;
bool command_success = false;
bool shutdown_success = false;
+
+ /* this flag will use to generate the final message generated */
+ bool switchover_success = true;
+
XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr;
ReplInfo replication_info = T_REPLINFO_INTIALIZER;
@@ -2894,12 +2898,17 @@ do_standby_switchover(void)
/* clean up remote node */
remote_conn = establish_db_connection(remote_node_record.conninfo, false);
- /* check replication status */
+ /* check new standby (old primary) is reachable */
if (PQstatus(remote_conn) != CONNECTION_OK)
{
- log_error(_("unable to reestablish connection to remote node \"%s\""),
- remote_node_record.node_name);
- /* log_hint(_("")); // depends on replication status */
+ switchover_success = false;
+
+ /* TODO: double-check whether new standby has attached */
+
+ log_warning(_("switchover did not fully complete"));
+ log_detail(_("node \"%s\" is now primary but node \"%s\" is not reachable"),
+ local_node_record.node_name,
+ remote_node_record.node_name);
}
else
{
@@ -2910,17 +2919,20 @@ do_standby_switchover(void)
local_node_record.slot_name);
}
/* TODO warn about any inactive replication slots */
+
+ log_notice(_("switchover was successful"));
+ log_detail(_("node \"%s\" is now primary and node \"%s\" is attached as standby"),
+ local_node_record.node_name,
+ remote_node_record.node_name);
+
}
PQfinish(remote_conn);
- log_notice(_("switchover was successful"));
- log_detail(_("node \"%s\" is now primary"),
- local_node_record.node_name);
/*
* If --siblings-follow specified, attempt to make them follow the new
- * standby
+ * primary
*/
if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
@@ -2993,7 +3005,17 @@ do_standby_switchover(void)
PQfinish(local_conn);
- log_notice(_("STANDBY SWITCHOVER is complete"));
+ if (switchover_success == true)
+ {
+ log_notice(_("STANDBY SWITCHOVER has completed successfully"));
+ }
+ else
+ {
+ log_notice(_("STANDBY SWITCHOVER has completed with issues"));
+ log_hint(_("see preceding log message(s) for details"));
+ exit(ERR_SWITCHOVER_INCOMPLETE);
+ }
+
return;
}