repmgrd: fix upstream role check

Only take action if it's confirmed as a standby.
"standby switchover": close all connections used to check repmgrd status
2026-03-23 07:06:30 +00:00 · 2018-10-23 12:50:04 +09:00 · 2018-10-23 10:59:24 +09:00 · 2018-10-23 09:28:46 +09:00 · 2018-10-23 09:24:17 +09:00 · 2018-10-23 09:22:04 +09:00
77 changed files with 5900 additions and 1805 deletions
--- a/31
+++ b/31
@@ -1,4 +1,33 @@
-4.1.0   2018-??-??
+4.2.0   2018-??-??
        repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
          GitHub #504 (Ian)
        repmgr: add "--node-id" option to "repmgr cluster cleanup"; GitHub #493 (Ian)
        repmgr: report unreachable nodes when running "repmgr cluster (matrix|crosscheck);
          GitHub #246 (Ian)
        repmgr: add configuration file parameter "repmgr_bindir"; GitHub #246 (Ian)
        repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
        repmgrd: fix parsing of -d/--daemonize option (Ian)
        repmgrd: support "pausing" of repmgrd (Ian)
 4.1.1   2018-09-05
        logging: explicitly log the text of failed queries as ERRORs to
          assist logfile analysis; GitHub #498
        repmgr: truncate version string, if necessary; GitHub #490 (Ian)
        repmgr: improve messages emitted during "standby promote" (Ian)
        repmgr: "standby clone" - don't copy external config files in --dry-run
          mode; GitHub #491 (Ian)
        repmgr: add "cluster_cleanup" event; GitHub #492 (Ian)
        repmgr: (standby switchover) improve detection of free walsenders;
          GitHub #495 (Ian)
        repmgr: (node rejoin) improve replication slot handling; GitHub #499 (Ian)
        repmgrd: ensure that sending SIGHUP always results in the log file
          being reopened; GitHub #485 (Ian)
        repmgrd: report version number *after* logger initialisation; GitHub #487 (Ian)
        repmgrd: fix startup on witness node when local data is stale; GitHub #488/#489 (Ian)
        repmgrd: improve cascaded standby failover handling; GitHub #480 (Ian)
        repmgrd: improve reconnection handling (Ian)
 4.1.0   2018-07-31
        repmgr: change default log_level to INFO, add documentation; GitHub #470 (Ian)
        repmgr: add "--missing-slots" check to "repmgr node check" (Ian)
        repmgr: improve command line error handling; GitHub #464 (Ian)
--- a/Makefile.in
+++ b/Makefile.in
@@ -13,8 +13,9 @@ DATA = \
  repmgr--unpackaged--4.0.sql \
  repmgr--4.0.sql \
  repmgr--4.0--4.1.sql \
-  repmgr--4.1.sql
+  repmgr--4.1.sql \
-
+  repmgr--4.1--4.2.sql \
  repmgr--4.2.sql
 REGRESS = repmgr_extension
@@ -29,19 +30,24 @@ all: \
 PG_CPPFLAGS = -std=gnu89 -I$(includedir_internal) -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
 SHLIB_LINK = $(libpq)
-HEADERS = $(wildcard *.h)
+
 OBJS = \
 	repmgr.o
 include Makefile.global
 ifeq ($(vpath_build),yes)
 	HEADERS = $(wildcard *.h)
 else
 	HEADERS_built = $(wildcard *.h)
 endif
 $(info Building against PostgreSQL $(MAJORVERSION))
 REPMGR_CLIENT_OBJS = repmgr-client.o \
 	repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
-	repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
+	repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
 	configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
 REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
 DATE=$(shell date "+%Y-%m-%d")
@@ -85,6 +91,7 @@ additional-clean:
 	rm -f repmgr-action-bdr.o
 	rm -f repmgr-action-node.o
 	rm -f repmgr-action-cluster.o
 	rm -f repmgr-action-daemon.o
 	rm -f repmgrd.o
 	rm -f repmgrd-physical.o
 	rm -f repmgrd-bdr.o
--- a/configfile.c
+++ b/configfile.c
@@ -28,6 +28,7 @@ char		config_file_path[MAXPGPATH] = "";
 static bool config_file_provided = false;
 bool		config_file_found = false;
 static void parse_config(t_configuration_options *options, bool terse);
 static void _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *warning_list);
 static void _parse_line(char *buf, char *name, char *value);
@@ -87,8 +88,7 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
 			if (pwd != NULL)
 			{
-				appendPQExpBuffer(&fullpath,
+				appendPQExpBufferStr(&fullpath, pwd);
 								  "%s", pwd);
 			}
 			else
 			{
@@ -104,9 +104,7 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
 					exit(ERR_BAD_CONFIG);
 				}
-				appendPQExpBuffer(&fullpath,
+				appendPQExpBufferStr(&fullpath, cwd);
 								  "%s",
 								  cwd);
 			}
 			appendPQExpBuffer(&fullpath,
@@ -238,7 +236,7 @@ end_search:
 }
-void
+static void
 parse_config(t_configuration_options *options, bool terse)
 {
 	/* Collate configuration file errors here for friendlier reporting */
@@ -287,6 +285,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	memset(options->data_directory, 0, sizeof(options->data_directory));
 	memset(options->config_directory, 0, sizeof(options->data_directory));
 	memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
 	memset(options->repmgr_bindir, 0, sizeof(options->repmgr_bindir));
 	options->replication_type = REPLICATION_TYPE_PHYSICAL;
 	/*-------------
@@ -334,6 +333,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	 * standby switchover settings
 	 *------------------------
 	 */
 	options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
 	options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
 	/*-----------------
@@ -488,6 +488,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		}
 		else if (strcmp(name, "pg_bindir") == 0)
 			strncpy(options->pg_bindir, value, MAXPGPATH);
 		else if (strcmp(name, "repmgr_bindir") == 0)
 			strncpy(options->repmgr_bindir, value, MAXPGPATH);
 		else if (strcmp(name, "replication_type") == 0)
 		{
@@ -544,6 +546,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 			options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
 		/* standby switchover settings */
 		else if (strcmp(name, "shutdown_check_timeout") == 0)
 			options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
 		else if (strcmp(name, "standby_reconnect_timeout") == 0)
 			options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
@@ -785,7 +789,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		PQconninfoFree(conninfo_options);
 	}
 	/* set values for parameters which default to other parameters */
 	/*
@@ -1052,11 +1055,13 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
 * - repmgrd_standby_startup_timeout
 * - retry_promote_interval_secs
 *
- * non-changeable options
+ * non-changeable options (repmgrd references these from the "repmgr.nodes"
 * table, not the configuration file)
 *
 * - node_id
 * - node_name
 * - data_directory
 * - location
 * - priority
 * - replication_type
 *
@@ -1065,7 +1070,7 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
 */
 bool
-reload_config(t_configuration_options *orig_options)
+reload_config(t_configuration_options *orig_options, t_server_type server_type)
 {
 	PGconn	   *conn;
 	t_configuration_options new_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
@@ -1081,6 +1086,20 @@ reload_config(t_configuration_options *orig_options)
 	_parse_config(&new_options, &config_errors, &config_warnings);
 	if (server_type == PRIMARY || server_type == STANDBY)
 	{
 		if (new_options.promote_command[0] == '\0')
 		{
 			item_list_append(&config_errors, _("\"promote_command\": required parameter was not found"));
 		}
 		if (new_options.follow_command[0] == '\0')
 		{
 			item_list_append(&config_errors, _("\"follow_command\": required parameter was not found"));
 		}
 	}
 	if (config_errors.head != NULL)
 	{
 		ItemListCell *cell = NULL;
@@ -1089,8 +1108,8 @@ reload_config(t_configuration_options *orig_options)
 		initPQExpBuffer(&errors);
-		appendPQExpBuffer(&errors,
+		appendPQExpBufferStr(&errors,
-						  "following errors were detected:\n");
+							 "following errors were detected:\n");
 		for (cell = config_errors.head; cell; cell = cell->next)
 		{
@@ -1258,7 +1277,7 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}
-	/* promote_delay */
+	/* promote_delay (for testing use only; not documented */
 	if (orig_options->promote_delay != new_options.promote_delay)
 	{
 		orig_options->promote_delay = new_options.promote_delay;
@@ -1512,6 +1531,9 @@ parse_bool(const char *s, const char *config_item, ItemList *error_list)
 {
 	PQExpBufferData errors;
 	if (s == NULL)
 		return true;
 	if (strcasecmp(s, "0") == 0)
 		return false;
--- a/configfile.h
+++ b/configfile.h
@@ -75,6 +75,7 @@ typedef struct
 	char		data_directory[MAXPGPATH];
 	char		config_directory[MAXPGPATH];
 	char		pg_bindir[MAXPGPATH];
 	char		repmgr_bindir[MAXPGPATH];
 	int			replication_type;
 	/* log settings */
@@ -103,6 +104,7 @@ typedef struct
 	int			standby_follow_timeout;
 	/* standby switchover settings */
 	int			shutdown_check_timeout;
 	int			standby_reconnect_timeout;
 	/* node rejoin settings */
@@ -170,7 +172,7 @@ typedef struct
 #define T_CONFIGURATION_OPTIONS_INITIALIZER { \
 		/* node information */ \
-		UNKNOWN_NODE_ID, "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL,	\
+		UNKNOWN_NODE_ID, "", "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL,	\
 		/* log settings */ \
 		"", "", "", DEFAULT_LOG_STATUS_INTERVAL,	\
 		/* standby clone settings */ \
@@ -181,6 +183,7 @@ typedef struct
 		DEFAULT_PRIMARY_FOLLOW_TIMEOUT,	\
 		DEFAULT_STANDBY_FOLLOW_TIMEOUT,	\
 		/* standby switchover settings */ \
 		DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
 		DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
 		/* node rejoin settings */ \
 		DEFAULT_NODE_REJOIN_TIMEOUT, \
@@ -273,13 +276,13 @@ typedef struct
 	"", "", "", "" \
 }
 #include "dbutils.h"
 void		set_progname(const char *argv0);
 const char *progname(void);
 void		load_config(const char *config_file, bool verbose, bool terse, t_configuration_options *options, char *argv0);
-void		parse_config(t_configuration_options *options, bool terse);
+bool		reload_config(t_configuration_options *orig_options, t_server_type server_type);
 bool		reload_config(t_configuration_options *orig_options);
 bool		parse_recovery_conf(const char *data_dir, t_recovery_conf *conf);
--- a/18
+++ b/18
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for repmgr 4.1.
+# Generated by GNU Autoconf 2.69 for repmgr 4.2.
 #
 # Report bugs to <pgsql-bugs@postgresql.org>.
 #
@@ -582,8 +582,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='repmgr'
 PACKAGE_TARNAME='repmgr'
-PACKAGE_VERSION='4.1'
+PACKAGE_VERSION='4.2'
-PACKAGE_STRING='repmgr 4.1'
+PACKAGE_STRING='repmgr 4.2'
 PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
 PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures repmgr 4.1 to adapt to many kinds of systems.
+\`configure' configures repmgr 4.2 to adapt to many kinds of systems.
 Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1239,7 +1239,7 @@ fi
 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of repmgr 4.1:";;
+     short | recursive ) echo "Configuration of repmgr 4.2:";;
   esac
  cat <<\_ACEOF
@@ -1313,7 +1313,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-repmgr configure 4.1
+repmgr configure 4.2
 generated by GNU Autoconf 2.69
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
-It was created by repmgr $as_me 4.1, which was
+It was created by repmgr $as_me 4.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
  $ $0 $@
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by repmgr $as_me 4.1, which was
+This file was extended by repmgr $as_me 4.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
  CONFIG_FILES    = $CONFIG_FILES
@@ -2422,7 +2422,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-repmgr config.status 4.1
+repmgr config.status 4.2
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"
--- a/configure.in
+++ b/configure.in
@@ -1,4 +1,4 @@
-AC_INIT([repmgr], [4.1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
+AC_INIT([repmgr], [4.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
 AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
--- a/controldata.c
+++ b/controldata.c
@@ -227,7 +227,15 @@ get_controlfile(const char *DataDir)
 	control_file_info->control_file_processed = true;
-	if (version_num >= 90500)
+	if (version_num >= 110000)
 	{
 		ControlFileData11 *ptr = (struct ControlFileData11 *)ControlFileDataPtr;
 		control_file_info->system_identifier = ptr->system_identifier;
 		control_file_info->state = ptr->state;
 		control_file_info->checkPoint = ptr->checkPoint;
 		control_file_info->data_checksum_version = ptr->data_checksum_version;
 	}
 	else if (version_num >= 90500)
 	{
 		ControlFileData95 *ptr = (struct ControlFileData95 *)ControlFileDataPtr;
 		control_file_info->system_identifier = ptr->system_identifier;
--- a/controldata.h
+++ b/controldata.h
@@ -265,6 +265,71 @@ typedef struct ControlFileData95
 } ControlFileData95;
 /*
 * Following field removed in 11:
 *
 *  XLogRecPtr	prevCheckPoint;
 *
 * In 10, following field appended *after* "data_checksum_version":
 *
 * 	char		mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
 *
 * (but we don't care about that)
 */
 typedef struct ControlFileData11
 {
 	uint64		system_identifier;
 	uint32		pg_control_version;		/* PG_CONTROL_VERSION */
 	uint32		catalog_version_no;		/* see catversion.h */
 	DBState		state;			/* see enum above */
 	pg_time_t	time;			/* time stamp of last pg_control update */
 	XLogRecPtr	checkPoint;		/* last check point record ptr */
 	CheckPoint95	checkPointCopy; /* copy of last check point record */
 	XLogRecPtr	unloggedLSN;	/* current fake LSN value, for unlogged rels */
 	XLogRecPtr	minRecoveryPoint;
 	TimeLineID	minRecoveryPointTLI;
 	XLogRecPtr	backupStartPoint;
 	XLogRecPtr	backupEndPoint;
 	bool		backupEndRequired;
 	int			wal_level;
 	bool		wal_log_hints;
 	int			MaxConnections;
 	int			max_worker_processes;
 	int			max_prepared_xacts;
 	int			max_locks_per_xact;
 	bool		track_commit_timestamp;
 	uint32		maxAlign;		/* alignment requirement for tuples */
 	double		floatFormat;	/* constant 1234567.0 */
 	uint32		blcksz;			/* data block size for this DB */
 	uint32		relseg_size;	/* blocks per segment of large relation */
 	uint32		xlog_blcksz;	/* block size within WAL files */
 	uint32		xlog_seg_size;	/* size of each WAL segment */
 	uint32		nameDataLen;	/* catalog name field width */
 	uint32		indexMaxKeys;	/* max number of columns in an index */
 	uint32		toast_max_chunk_size;	/* chunk size in TOAST tables */
 	uint32		loblksize;		/* chunk size in pg_largeobject */
 	bool		enableIntTimes; /* int64 storage enabled? */
 	bool		float4ByVal;	/* float4 pass-by-value? */
 	bool		float8ByVal;	/* float8, int8, etc pass-by-value? */
 	uint32		data_checksum_version;
 } ControlFileData11;
 extern DBState get_db_state(const char *data_directory);
--- a/dbutils.c
+++ b/dbutils.c
--- a/dbutils.h
+++ b/dbutils.h
@@ -327,6 +327,21 @@ typedef struct
    UNKNOWN_TIMELINE_ID, \
 	InvalidXLogRecPtr \
 }
 typedef struct RepmgrdInfo {
 	int node_id;
 	int pid;
 	char pid_text[MAXLEN];
 	char pid_file[MAXLEN];
 	bool pg_running;
 	char pg_running_text[MAXLEN];
 	bool running;
 	char repmgrd_running[MAXLEN];
 	bool paused;
 } RepmgrdInfo;
 /* global variables */
 extern int	server_version_num;
@@ -399,6 +414,11 @@ bool		identify_system(PGconn *repl_conn, t_system_identification *identification
 bool		repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
 int			repmgrd_get_local_node_id(PGconn *conn);
 BackupState	server_in_exclusive_backup_mode(PGconn *conn);
 void		repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile);
 pid_t		repmgrd_get_pid(PGconn *conn);
 bool		repmgrd_is_running(PGconn *conn);
 bool		repmgrd_is_paused(PGconn *conn);
 bool		repmgrd_pause(PGconn *conn, bool pause);
 /* extension functions */
 ExtensionStatus get_repmgr_extension_status(PGconn *conn);
@@ -421,7 +441,7 @@ t_node_info *get_node_record_pointer(PGconn *conn, int node_id);
 bool		get_local_node_record(PGconn *conn, int node_id, t_node_info *node_info);
 bool		get_primary_node_record(PGconn *conn, t_node_info *node_info);
-void		get_all_node_records(PGconn *conn, NodeInfoList *node_list);
+bool		get_all_node_records(PGconn *conn, NodeInfoList *node_list);
 void		get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
 void		get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
 void		get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
@@ -475,7 +495,7 @@ int			wait_connection_availability(PGconn *conn, long long timeout);
 /* node availability functions */
 bool		is_server_available(const char *conninfo);
 bool		is_server_available_params(t_conninfo_param_list *param_list);
-void		connection_ping(PGconn *conn);
+ExecStatusType	connection_ping(PGconn *conn);
 /* monitoring functions  */
 void
@@ -491,8 +511,8 @@ add_monitoring_record(PGconn *primary_conn,
 					  long long unsigned int apply_lag_bytes
 );
-int			get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history);
+int			get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history, int node_id);
-bool		delete_monitoring_records(PGconn *primary_conn, int keep_history);
+bool		delete_monitoring_records(PGconn *primary_conn, int keep_history, int node_id);
--- a/doc/appendix-faq.sgml
+++ b/doc/appendix-faq.sgml
@@ -108,6 +108,14 @@
     is not possible, contact your vendor for assistance.
   </para>
  </sect2>
  <sect2 id="faq-old-packages">
   <title>How can I obtain old versions of &repmgr; packages?</title>
   <para>
     See appendix <xref linkend="packages-old-versions"> for details.
   </para>
  </sect2>
 </sect1>
 <sect1 id="faq-repmgr" xreflabel="repmgr">
@@ -239,11 +247,22 @@
     Under some circumstances event notifications can be generated for servers
     which have not yet been registered; it's also useful to retain a record
     of events which includes servers removed from the replication cluster
-     which no longer have an entry in the <literal>repmrg.nodes</literal> table.
+     which no longer have an entry in the <literal>repmgr.nodes</literal> table.
   </para>
  </sect2>
-
+  <sect2 id="faq-repmgr-recovery-conf-quoted-values" xreflabel="Quoted values in recovery.conf">
    <title>Why are some values in <filename>recovery.conf</filename> surrounded by pairs of single quotes?</title>
    <para>
      This is to ensure that user-supplied values which are written as parameter values in <filename>recovery.conf</filename>
      are escaped correctly and do not cause errors when <filename>recovery.conf</filename> is parsed.
    </para>
    <para>
      The escaping is performed by an internal PostgreSQL routine, which leaves strings consisting
      of digits and alphabetical characters only as-is, but wraps everything else in pairs of single quotes,
      even if the string does not contain any characters which need escaping.
    </para>
  </sect2>
 </sect1>
@@ -255,7 +274,7 @@
  <sect2 id="faq-repmgrd-prevent-promotion" xreflabel="Prevent standby from being promoted to primary">
   <title>How can I prevent a node from ever being promoted to primary?</title>
   <para>
-    In `repmgr.conf`, set its priority to a value of 0 or less; apply the changed setting with
+     In <filename>repmgr.conf</filename>, set its priority to a value of <literal>0</literal>; apply the changed setting with
    <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>.
   </para>
   <para>
@@ -303,5 +322,36 @@
   </para>
  </sect2>
  <sect2 id="faq-repmgrd-pg-bindir" xreflabel="repmgrd does not apply pg_bindir to promote_command or follow_command">
    <title>
      <application>repmgrd</application> ignores pg_bindir when executing <varname>promote_command</varname> or <varname>follow_command</varname>
    </title>
    <para>
      <varname>promote_command</varname> or <varname>follow_command</varname> can be user-defined scripts,
      so &repmgr; will not apply <option>pg_bindir</option> even if excuting &repmgr;. Always provide the full
      path; see <xref linkend="repmgrd-automatic-failover-configuration"> for more details.
    </para>
  </sect2>
  <sect2 id="faq-repmgrd-startup-no-upstream" xreflabel="repmgrd does not start if upstream node is not running">
    <title>
      <application>repmgrd</application> aborts startup with the error "<literal>upstream node must be running before repmgrd can start</literal>"
    </title>
    <para>
      <application>repmgrd</application> does this to avoid starting up on a replication cluster
      which is not in a healthy state. If the upstream is unavailable, <application>repmgrd</application>
      may initiate a failover immediately after starting up, which could have unintended side-effects,
      particularly if <application>repmgrd</application> is not running on other nodes.
    </para>
    <para>
      In particular, it's possible that the node's local copy of the <literal>repmgr.nodes</literal> copy
      is out-of-date, which may lead to incorrect failover behaviour.
    </para>
    <para>
      The onus is therefore on the adminstrator to manually set the cluster to a stable, healthy state before
      starting <application>repmgrd</application>.
    </para>
  </sect2>
 </sect1>
 </appendix>
--- a/doc/appendix-packages.sgml
+++ b/doc/appendix-packages.sgml
@@ -12,10 +12,17 @@
  <sect1 id="packages-centos" xreflabel="CentOS packages">
    <title>CentOS Packages</title>
    <indexterm>
      <primary>packages</primary>
      <secondary>CentOS packages</secondary>
    </indexterm>
    <indexterm>
      <primary>CentOS</primary>
      <secondary>package information</secondary>
    </indexterm>
    <para>
      Currently, &repmgr; RPM packages are provided for versions 6.x and 7.x of CentOS. These should also
      work on matching versions of Red Hat Enterprise Linux, Scientific Linux and Oracle Enterprise Linux;
@@ -53,11 +60,11 @@
          <tbody>
            <row>
              <entry>Repository URL:</entry>
-              <entry><ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink></entry>
+              <entry><ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink></entry>
            </row>
            <row>
              <entry>Repository documentation:</entry>
-              <entry><ulink url="https://repmgr.org/docs/4.0/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/4.0/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
+              <entry><ulink url="https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
            </row>
          </tbody>
        </tgroup>
@@ -237,6 +244,12 @@
      <primary>packages</primary>
      <secondary>Debian/Ubuntu packages</secondary>
    </indexterm>
    <indexterm>
      <primary>Debian/Ubuntu</primary>
      <secondary>package information</secondary>
    </indexterm>
    <para>
      &repmgr; <literal>.deb</literal> packages are provided via the
      PostgreSQL Community APT repository, and are available for each community-supported
@@ -253,6 +266,23 @@
      </para>
      <table id="apt-2ndquadrant-repository">
        <title>2ndQuadrant public repository</title>
        <tgroup cols="2">
          <tbody>
            <row>
              <entry>Repository URL:</entry>
              <entry><ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink></entry>
            </row>
            <row>
              <entry>Repository documentation:</entry>
              <entry><ulink url="https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
            </row>
          </tbody>
        </tgroup>
      </table>
      <table id="apt-repository">
        <title>PostgreSQL Community APT repository (PGDG)</title>
        <tgroup cols="2">
@@ -365,6 +395,127 @@
  </sect1>
  <sect1 id="packages-snapshot" xreflabel="Snapshot packages">
    <title>Snapshot packages</title>
    <indexterm>
      <primary>snapshot packages</primary>
    </indexterm>
    <indexterm>
      <primary>packages</primary>
      <secondary>snaphots</secondary>
    </indexterm>
    <para>
      For testing new features and bug fixes, from time to time 2ndQuadrant provides
      so-called &quot;snapshot packages&quot; via its public repository. These packages
      are built from the &repmgr; source at a particular point in time, and are not formal
      releases.
    </para>
    <note>
      <para>
        We do not recommend installing these packages in a production environment
        unless specifically advised.
      </para>
    </note>
    <para>
      To install a snapshot package, it's necessary to install the 2ndQuadrant public snapshot repository,
      following the instructions here: <ulink url="https://dl.2ndquadrant.com/default/release/site/">https://dl.2ndquadrant.com/default/release/site/</ulink> but replace <literal>release</literal> with <literal>snapshot</literal>
      in the appropriate URL.
    </para>
    <para>
      For example, to install the snapshot RPM repository for PostgreSQL 9.6, execute (as <literal>root</literal>):
      <programlisting>
 curl https://dl.2ndquadrant.com/default/snapshot/get/9.6/rpm | bash</programlisting>
      or as a normal user with root sudo access:
      <programlisting>
 curl https://dl.2ndquadrant.com/default/snapshot/get/9.6/rpm | sudo bash</programlisting>
    </para>
    <para>
      Alternatively you can browse the repository here:
      <ulink url="https://dl.2ndquadrant.com/default/snapshot/browse/">https://dl.2ndquadrant.com/default/snapshot/browse/</ulink>.
    </para>
    <para>
      Once the repository is installed, installing or updating &repmgr; will result in the latest snapshot
      package being installed.
    </para>
    <para>
      The package name will be formatted like this:
      <programlisting>
 repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
      containg the snapshot build number (here: <literal>320</literal>) and the hash
      of the <application>git</application> commit it was built from (here: <literal>g5113ab0</literal>).
    </para>
    <para>
      Note that the next formal release (in the above example <literal>4.1.1</literal>), once available,
      will install in place of any snapshot builds.
    </para>
  </sect1>
  <sect1 id="packages-old-versions" xreflabel="Installing old package versions">
    <title>Installing old package versions</title>
    <indexterm>
      <primary>old packages</primary>
    </indexterm>
    <indexterm>
      <primary>packages</primary>
      <secondary>old versions</secondary>
    </indexterm>
    <sect2 id="packages-old-versions-debian" xreflabel="old Debian package versions">
      <title>Debian/Ubuntu</title>
      <para>
        An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
        <ulink url="http://atalia.postgresql.org/morgue/r/repmgr/">http://atalia.postgresql.org/morgue/r/repmgr/</ulink>
      </para>
    </sect2>
    <sect2 id="packages-old-versions-rhel-centos" xreflabel="old RHEL/CentOS package versions">
      <title>RHEL/CentOS</title>
      <para>
        Old RPM packages (<literal>3.2</literal> and later) can be retrieved from the
        (deprecated) 2ndQuadrant repository at
        <ulink url="http://packages.2ndquadrant.com/">http://packages.2ndquadrant.com/</ulink>
        by installing the appropriate repository RPM:
      </para>
      <itemizedlist spacing="compact" mark="bullet">
        <listitem>
          <simpara>
            <ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
          </simpara>
        </listitem>
      </itemizedlist>
      <para>
        Old versions can be located with e.g.:
        <programlisting>
          yum --showduplicates list repmgr96</programlisting>
        (substitute the appropriate package name; see <xref linkend="packages-centos">) and installed with:
        <programlisting>
          yum install {package_name}-{version}</programlisting>
        where <literal>{package_name}</literal> is the base package name (e.g. <literal>repmgr96</literal>)
        and <literal>{version}</literal> is the version listed by the
        <command> yum --showduplicates list ...</command> command, e.g. <literal>4.0.6-1.rhel6</literal>.
      </para>
      <para>For example:
        <programlisting>
          yum install repmgr96-4.0.6-1.rhel6</programlisting>
      </para>
    </sect2>
  </sect1>
  <sect1 id="packages-packager-info" xreflabel="Information for packagers">
    <title>Information for packagers</title>
@@ -373,7 +524,7 @@
      <secondary>information for packagers</secondary>
    </indexterm>
    <para>
-      We recommend patching the following  parameters when
+      We recommend patching the following parameters when
      building the package as built-in default values for user convenience.
      These values can nevertheless be overridden by the user, if desired.
    </para>
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -15,9 +15,294 @@
    See also: <xref linkend="upgrading-repmgr">
  </para>
  <sect1 id="release-4.2">
    <title>Release 4.2</title>
    <para><emphasis>???, 2018</emphasis></para>
    <para>
    </para>
    <sect2>
      <title>Configuration file changes</title>
      <para>
        <itemizedlist>
          <listitem>
            <para>
              New parameter <varname>shutdown_check_timeout</varname> (default: 60 seconds) added;
              this provides an explicit timeout for
              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
              to check that the demotion candidate (current primary) has shut down. Previously, the parameters
              <literal>reconnect_attempts</literal> and <literal>reconnect_interval</literal>
              were used to calculate a timeout, but these are actually
              intended for primary failure detection. (GitHub #504).
            </para>
          </listitem>
        </itemizedlist>
        <itemizedlist>
          <listitem>
            <para>
              New parameter <varname>repmgr_bindir</varname> added, to facilitate remote invocation of repmgr
              when the repmgr binary is located somewhere other than the PostgreSQL binary directory, as it
              cannot be assumed all package maintainers will install &repmgr; there.
            </para>
            <para>
              This parameter is optional; if not set (the default), &repmgr; will fall back
              to <option>pg_bindir</option> (if set).
            </para>
            <para>
              (GitHub #246).
            </para>
          </listitem>
        </itemizedlist>
      </para>
    </sect2>
    <sect2>
      <title>repmgr enhancements</title>
      <para>
       <itemizedlist>
          <listitem>
            <para>
              <command><link linkend="repmgr-cluster-cleanup">repmgr cluster cleanup</link></command>
              now accepts the <option>--node-id</option> option to delete records for only one
              node. (GitHub #493).
            </para>
          </listitem>
          <listitem>
            <para>
              When running
              <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command> and
              <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>,
              &repmgr; will report nodes unreachable via SSH, and emit return code <literal>ERR_BAD_SSH</literal>.
              (GitHub #246).
            </para>
            <note>
              <para>
                Users relying on
                <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
                to return a non-zero return code as a way of detecting connectivity errors should be aware
                that <literal>ERR_BAD_SSH</literal> will be returned if there is an SSH connection error
                from the node where the command is executed, even if the command is able to establish
                that PostgreSQL connectivity is fine. Therefore the exact return code should be checked
                to determine what kind of connectivity error has been detected.
              </para>
            </note>
          </listitem>
        </itemizedlist>
      </para>
    </sect2>
    <sect2>
      <title>repmgrd enhancements</title>
      <para>
       <itemizedlist>
          <listitem>
            <para>
              <application>repmgrd</application> can now be &quot;paused&quot;, i.e. instructed
              not to take any action such as a failover, even if the prerequisites for such an
              action are detected.
            </para>
            <para>
              This removes the need to stop <application>repmgrd</application> on all nodes when
              performing a planned operation such as a switchover.
            </para>
            <para>
              For further details, see <link linkend="repmgrd-pausing">Pausing repmgrd</link>.
            </para>
          </listitem>
       </itemizedlist>
      </para>
    </sect2>
    <sect2>
      <title>Bug fixes</title>
      <para>
        <itemizedlist>
          <listitem>
            <para>
              &repmgr;: fix &quot;Missing replication slots&quot; label in
              <command><link linkend="repmgr-node-check">repmgr node check</link></command>.  (GitHub #507)
            </para>
          </listitem>
          <listitem>
            <para>
              <application>repmgrd</application>: fix parsing of <option>-d/--daemonize</option> option.
            </para>
          </listitem>
        </itemizedlist>
      </para>
    </sect2>
  </sect1>
  <sect1 id="release-4.1.1">
    <title>Release 4.1.1</title>
    <para><emphasis>Wed September 5, 2018</emphasis></para>
    <para>
      repmgr 4.1.1 contains a number of usability enhancements and bug fixes.
    </para>
    <para>
 	  We recommend upgrading to this version as soon as possible.
 	  This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.1.0;
      <application>repmgrd</application> (if running) should be restarted.
      See <xref linkend="upgrading-repmgr"> for more details.
 	</para>
    <sect2>
      <title>repmgr enhancements</title>
      <para>
        <itemizedlist>
          <listitem>
            <para>
              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover --dry-run</link></command>
              no longer copies external configuration files to test they can be copied; this avoids making
              any changes to the target system. (GitHub #491).
            </para>
          </listitem>
          <listitem>
            <para>
              <command><link linkend="repmgr-cluster-cleanup">repmgr cluster cleanup</link></command>:
              add <literal>cluster_cleanup</literal> event. (GitHub #492).
            </para>
          </listitem>
          <listitem>
            <para>
              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>:
              improve detection of free walsenders. (GitHub #495).
            </para>
          </listitem>
          <listitem>
            <para>
              Improve messages emitted during
              <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>.
            </para>
          </listitem>
        </itemizedlist>
      </para>
   </sect2>
    <sect2>
      <title>repmgrd enhancements</title>
      <para>
        <itemizedlist>
          <listitem>
            <para>
              Always reopen the log file after
              receiving <literal>SIGHUP</literal>. Previously this only happened if
              a configuration file change was detected.
              (GitHub #485).
            </para>
          </listitem>
          <listitem>
            <para>
              Report version number <emphasis>after</emphasis>
              logger initialisation. (GitHub #487).
            </para>
          </listitem>
          <listitem>
            <para>
              Improve cascaded standby failover handling. (GitHub #480).
            </para>
          </listitem>
          <listitem>
            <para>
              Improve reconnection handling after brief network outages; if
              monitoring data being collected, this could lead to orphaned
              sessions on the primary. (GitHub #480).
            </para>
          </listitem>
          <listitem>
            <para>
              Check <varname>promote_command</varname> and <varname>follow_command</varname>
              are defined when reloading configuration. These were checked on startup but
              not reload by <application>repmgrd</application>, which made it possible to
              make <application>repmgrd</application> with invalid values. It's unlikely
              anyone would want to do this, but we should make it impossible anyway.
              (GitHub #486).
            </para>
          </listitem>
        </itemizedlist>
      </para>
    </sect2>
    <sect2>
      <title>Other</title>
      <para>
        <itemizedlist>
          <listitem>
            <para>
              Text of any failed queries will now be logged as <literal>ERROR</literal> to assist
              logfile analysis at log levels higher than <literal>DEBUG</literal>.
              (GitHub #498).
            </para>
          </listitem>
        </itemizedlist>
      </para>
    </sect2>
    <sect2>
      <title>Bug fixes</title>
      <para>
        <itemizedlist>
          <listitem>
            <para>
              <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:
              remove new upstream's replication slot if it still exists on the rejoined
              standby. (GitHub #499).
            </para>
          </listitem>
          <listitem>
            <para>
              <application>repmgrd</application>: fix startup on witness node when local data is stale. (GitHub #488, #489).
            </para>
          </listitem>
          <listitem>
            <para>
              Truncate version string reported by PostgreSQL if necessary; some
              distributions insert additional detail after the actual version.
              (GitHub #490).
            </para>
          </listitem>
        </itemizedlist>
      </para>
    </sect2>
  </sect1>
  <sect1 id="release-4.1.0">
    <title>Release 4.1.0</title>
-    <para><emphasis>???? ??, 2018</emphasis></para>
+    <para><emphasis>Tue July 31, 2018</emphasis></para>
    <para>
      &repmgr; 4.1.0 introduces some changes to <application>repmgrd</application>
      behaviour and some additional configuration parameters.
@@ -29,19 +314,20 @@
       <itemizedlist>
          <listitem>
            <para>
-              <application>repmgrd</application> (if running) must be restarted.
+              Execute <command>ALTER EXTENSION repmgr UPDATE</command>
              on the primary server in the database where &repmgr; is installed.
            </para>
          </listitem>
          <listitem>
            <para>
-              Execute <command>ALTER EXTENSION repmgr UPGRADE</command>
+              <application>repmgrd</application> must be restarted on all nodes where it is running.
              on the primary server in the database where &repmgr; is installed.
            </para>
          </listitem>
       </itemizedlist>
       A restart of the PostgreSQL server is <emphasis>not</emphasis> required
-       for this release.
+       for this release (unless upgrading from repmgr 3.x).
    </para>
    <para>
       See <xref linkend="upgrading-repmgr-extension"> for more details.
@@ -53,6 +339,17 @@
      review the changes listed below.
    </para>
    <note>
      <para>
        <emphasis>Repository changes</emphasis>
      </para>
      <para>
        Coinciding with this release, the 2ndQuadrant repository structure has changed.
        See section <xref linkend="installation-packages"> for details, particularly
        if you are using a RPM-based system.
      </para>
    </note>
    <sect2>
      <title>Configuration file changes</title>
@@ -214,7 +511,7 @@
  <sect1 id="release-4.0.6">
    <title>Release 4.0.6</title>
-    <para><emphasis>June 14, 2018</emphasis></para>
+    <para><emphasis>Thu June 14, 2018</emphasis></para>
    <para>
 	  &repmgr; 4.0.6 contains a number of bug fixes and usability enhancements.
    </para>
--- a/doc/appendix-signatures.sgml
+++ b/doc/appendix-signatures.sgml
@@ -5,14 +5,14 @@
   <title>repmgr source code signing key</title>
   <para>
     The signing key ID used for <application>repmgr</application> source code bundles is:
-     <ulink url="http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr">
+     <ulink url="https://repmgr.org/download/SOURCE-GPG-KEY-repmgr">
       <literal>0x297F1DCC</literal></ulink>.
   </para>
   <para>
     To download the <application>repmgr</application> source key to your computer:
     <programlisting>
-       curl -s http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr | gpg --import
+       curl -s https://repmgr.org/download/SOURCE-GPG-KEY-repmgr | gpg --import
       gpg --fingerprint 0x297F1DCC
     </programlisting>
     then verify that the fingerprint is the expected value:
--- a/doc/cloning-standbys.sgml
+++ b/doc/cloning-standbys.sgml
@@ -352,10 +352,12 @@
      provide additional parameters for <command>pg_basebackup</command> to customise the
      cloning process.
    </para>
    <para>
     By default, <command>pg_basebackup</command> performs a checkpoint before beginning the backup
     process. However, a normal checkpoint may take some time to complete;
-     a fast checkpoint can be forced with the <literal>-c/--fast-checkpoint</literal> option.
+     a fast checkpoint can be forced with <command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>'s
     <literal>-c/--fast-checkpoint</literal> option.
     Note that this may impact performance of the server being cloned from (typically the primary)
     so should be used with care.
    </para>
@@ -370,6 +372,18 @@
      Other options can be passed to <command>pg_basebackup</command> by including them
      in the <filename>repmgr.conf</filename> setting <varname>pg_basebackup_options</varname>.
    </para>
    <para>
      Not that by default, &repmgr; executes <command>pg_basebackup</command> with <option>-X/--wal-method</option>
      (PostgreSQL 9.6 and earlier: <option>-X/--xlog-method</option>) set to <literal>stream</literal>.
      From PostgreSQL 9.6, if replication slots are in use, it will also create a replication slot before
      running the base backup, and execute <command>pg_basebackup</command> with the
      <option>-S/--slot</option> option set to the name of the previously created replication slot.
    </para>
    <para>
      These parameters can set by the user in <varname>pg_basebackup_options</varname>, in which case they
      will override the &repmgr; default values. However normally there's no reason to do this.
    </para>
    <para>
      If using a separate directory to store WAL files, provide the option <literal>--waldir</literal>
      (<literal>--xlogdir</literal> in PostgreSQL 9.6 and earlier) with the absolute path to the
--- a/doc/configuration-file-service-commands.sgml
+++ b/doc/configuration-file-service-commands.sgml
@@ -17,15 +17,15 @@
    <link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
  </para>
  <para>
-    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
+    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
    server. However this can lead to various problems, particularly when PostgreSQL has been
-    installed from packages, and expecially so if <application>systemd</application> is in use.
+    installed from packages, and especially so if <application>systemd</application> is in use.
  </para>
  <note>
    <para>
-      If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
+      If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
      See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
      entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
    </para>
@@ -48,6 +48,13 @@
    service_reload_command</programlisting>
  </para>
  <note>
    <para>
      &repmgr; will not apply <option>pg_bindir</option> when executing any of these commands;
      these can be user-defined scripts so must always be specified with the full path.
    </para>
  </note>
  <note>
    <para>
      It's also possible to specify a <varname>service_promote_command</varname>.
@@ -92,7 +99,7 @@
      Defaults:postgres !requiretty
      postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
        /usr/bin/systemctl start postgresql-9.6, \
-        /usr/bin/systemctl restart postgresql-9.6 \
+        /usr/bin/systemctl restart postgresql-9.6, \
        /usr/bin/systemctl reload postgresql-9.6</programlisting>
  </para>
--- a/doc/configuring-witness-server.sgml
+++ b/doc/configuring-witness-server.sgml
@@ -16,15 +16,22 @@
 <para>
   A typical use case for a witness server is a two-node streaming replication
   setup, where the primary and standby are in different locations (data centres).
-   By creating a witness server in the same location as the primary, if the primary
+   By creating a witness server in the same location (data centre) as the primary,
-   becomes unavailable  it's possible for the standby to decide whether it can
+   if the primary becomes unavailable it's possible for the standby to decide whether
-   promote itself without risking a "split brain" scenario: if it can't see either the
+   it can promote itself without risking a "split brain" scenario: if it can't see either the
   witness or the primary server, it's likely there's a network-level interruption
   and it should not promote itself. If it can seen the witness but not the primary,
   this proves there is no network interruption and the primary itself is unavailable,
   and it can therefore promote itself (and ideally take action to fence the
   former primary).
 </para>
 <note>
   <para>
     <emphasis>Never</emphasis> install a witness server on the same physical host
     as another node in the replication cluster managed by &repmgr; - it's essential
     the witness is not affected in any way by failure of another node.
   </para>
 </note>
 <para>
   For more complex replication scenarios,e.g. with multiple datacentres, it may
   be preferable to use location-based failover, which ensures that only nodes
--- a/doc/event-notifications.sgml
+++ b/doc/event-notifications.sgml
@@ -147,58 +147,76 @@
 <para>
  By default, all notification types will be passed to the designated script;
  the notification types can be filtered to explicitly named ones using the
-  <varname>event_notifications</varname> parameter:
+  <varname>event_notifications</varname> parameter.
 </para>
 <para>
   Events generated by the &repmgr; command:
  <itemizedlist spacing="compact" mark="bullet">
   <listitem>
-    <simpara><literal>primary_register</literal></simpara>
+     <simpara><literal><link linkend="repmgr-primary-register-events">cluster_created</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>primary_unregister</literal></simpara>
+     <simpara><literal><link linkend="repmgr-primary-register-events">primary_register</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_register</literal></simpara>
+     <simpara><literal><link linkend="repmgr-primary-unregister-events">primary_unregister</link></literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal><link linkend="repmgr-standby-clone-events">standby_clone</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_register_sync</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-register-events">standby_register</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_unregister</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-register-events">standby_register_sync</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_clone</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-unregister-events">standby_unregister</link></literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal><link linkend="repmgr-standby-promote-events">standby_promote</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_promote</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-follow-events">standby_follow</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_follow</literal></simpara>
+     <simpara><literal><link linkend="repmgr-standby-switchover-events">standby_switchover</link></literal></simpara>
   </listitem>
   <listitem>
     <simpara><literal><link linkend="repmgr-witness-register-events">witness_register</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_disconnect_manual</literal></simpara>
+    <simpara><literal><link linkend="repmgr-witness-unregister-events">witness_unregister</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_failure</literal></simpara>
+    <simpara><literal><link linkend="repmgr-node-rejoin-events">node_rejoin</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_recovery</literal></simpara>
+    <simpara><literal><link linkend="repmgr-cluster-cleanup-events">cluster_cleanup</link></literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>witness_register</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>witness_unregister</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>node_rejoin</literal></simpara>
   </listitem>
  </itemizedlist>
 </para>
 <para>
   Events generated by <application>repmgrd</application> (streaming replication mode):
   <itemizedlist spacing="compact" mark="bullet">
   <listitem>
    <simpara><literal>repmgrd_start</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_shutdown</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_reload</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_failover_promote</literal></simpara>
   </listitem>
@@ -208,15 +226,41 @@
   <listitem>
    <simpara><literal>repmgrd_failover_aborted</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_standby_reconnect</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_promote_error</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_local_disconnect</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_local_reconnect</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_upstream_disconnect</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_upstream_reconnect</literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>repmgrd_promote_error</literal></simpara>
+    <simpara><literal>standby_disconnect_manual</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>standby_failure</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>standby_recovery</literal></simpara>
   </listitem>
   </itemizedlist>
 </para>
  <para>
   Events generated by <application>repmgrd</application> (BDR mode):
   <itemizedlist spacing="compact" mark="bullet">
   <listitem>
    <simpara><literal>bdr_failover</literal></simpara>
   </listitem>
--- a/doc/filelist.sgml
+++ b/doc/filelist.sgml
@@ -58,6 +58,7 @@
 <!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
 <!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
 <!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
 <!ENTITY repmgrd-pausing SYSTEM "repmgrd-pausing.sgml">
 <!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">
 <!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
@@ -78,6 +79,9 @@
 <!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
 <!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
 <!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
 <!ENTITY repmgr-daemon-status SYSTEM "repmgr-daemon-status.sgml">
 <!ENTITY repmgr-daemon-pause SYSTEM "repmgr-daemon-pause.sgml">
 <!ENTITY repmgr-daemon-unpause SYSTEM "repmgr-daemon-unpause.sgml">
 <!ENTITY appendix-release-notes  SYSTEM "appendix-release-notes.sgml">
 <!ENTITY appendix-faq      SYSTEM "appendix-faq.sgml">
--- a/doc/install-packages.sgml
+++ b/doc/install-packages.sgml
@@ -16,7 +16,7 @@
  <para>
 	&repmgr; RPM packages for RedHat/CentOS variants and Fedora are available from the
 	<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
-	<ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>; see following
+	<ulink url="https://dl.2ndquadrant.com/">public repository</ulink>; see following
 	section for details.
  </para>
  <para>
@@ -46,26 +46,15 @@
  <sect3 id="installation-packages-redhat-2ndq">
    <title>2ndQuadrant public RPM yum repository</title>
 	<note>
 	  <para>
 		<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> previously provided a dedicated
        &repmgr; repository at
        <ulink url="http://packages.2ndquadrant.com/repmgr/">http://packages.2ndquadrant.com/repmgr/</ulink>.
 		This repository will be deprecated in a future release as it is now replaced by
 		the <ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>
 		documented below.
 	  </para>
 	</note>
    <para>
-      Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
+      Beginning with <ulink url="https://repmgr.org/docs/4.1/release-4.0.5.html">repmgr 4.0.5</ulink>,
      <ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
-	  <ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink> for 2ndQuadrant software,
+	  <ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
 	  including &repmgr;. We recommend using this for all future &repmgr; releases.
 	</para>
 	<para>
 	  General instructions for using this repository can be found on its
-	  <ulink url="https://rpm.2ndquadrant.com/">homepage</ulink>. Specific instructions
+	  <ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
 	  for installing &repmgr; follow below.
 	</para>
    <para>
@@ -75,20 +64,19 @@
 		<listitem>
 		  <para>
 			Locate the repository RPM for your PostgreSQL version from the list at:
-			<ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink>
+			<ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
 		  </para>
 		</listitem>
        <listitem>
          <para>
-            Install the repository RPM for your distribution and PostgreSQL version
+            Install the repository definition for your distribution and PostgreSQL version
 			(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
 		  </para>
 		  <para>
 			For example, for PostgreSQL 10 on CentOS, execute:
 			<programlisting>
-sudo yum install https://rpm.2ndquadrant.com/site/content/2ndquadrant-repo-10-1-1.el7.noarch.rpm
+curl https://dl.2ndquadrant.com/default/release/get/10/rpm | sudo bash</programlisting>
 			</programlisting>
 		  </para>
 		  <para>
 			Verify that the repository is installed with:
@@ -96,8 +84,8 @@ sudo yum install https://rpm.2ndquadrant.com/site/content/2ndquadrant-repo-10-1-
 sudo yum repolist</programlisting>
 			The output should contain two entries like this:
 			<programlisting>
-2ndquadrant-repo-10/7/x86_64         2ndQuadrant packages for PG10 for rhel 7 - x86_64           1
+2ndquadrant-dl-default-release-pg10/7/x86_64        2ndQuadrant packages (PG10) for 7 - x86_64          4
-2ndquadrant-repo-10-debug/7/x86_64   2ndQuadrant packages for PG10 for rhel 7 - x86_64 - Debug   1</programlisting>
+2ndquadrant-dl-default-release-pg10-debug/7/x86_64  2ndQuadrant packages (PG10) for 7 - x86_64 - Debug  3</programlisting>
 		  </para>
 		</listitem>
@@ -177,52 +165,43 @@ $ yum install repmgr10</programlisting>
    <para>
      Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
      <ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
-	  <ulink url="https://apt.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
+	  <ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
 	  including &repmgr;.
 	</para>
 	<para>
 	  General instructions for using this repository can be found on its
-	  <ulink url="https://apt.2ndquadrant.com/">homepage</ulink>. Specific instructions
+	  <ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
 	  for installing &repmgr; follow below.
 	</para>
    <para>
      <emphasis>Installation</emphasis>
      <itemizedlist>
 		<listitem>
 		  <para>
-			If not already present, install the  <application>apt-transport-https</application> package:
+            Install the repository definition for your distribution and PostgreSQL version
-			<programlisting>
+			(this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
-sudo apt-get install apt-transport-https</programlisting>
+            <programlisting>
 curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlisting>
 		  </para>
-		</listitem>
+          <note>
            <para>
              This will automatically install the following additional packages, if not already present:
              <itemizedlist spacing="compact" mark="bullet">
                <listitem>
                  <simpara><literal>lsb-release</literal></simpara>
                </listitem>
                <listitem>
                  <simpara><literal>apt-transport-https</literal></simpara>
                </listitem>
              </itemizedlist>
            </para>
          </note>
        </listitem>
 		<listitem>
 		  <para>
 			Create <filename>/etc/apt/sources.list.d/2ndquadrant.list</filename> as follows:
 			<programlisting>
 sudo sh -c 'echo "deb https://apt.2ndquadrant.com/ $(lsb_release -cs)-2ndquadrant main" > /etc/apt/sources.list.d/2ndquadrant.list'</programlisting>
 		  </para>
 		</listitem>
 		<listitem>
 		  <para>
 			Install the 2ndQuadrant <ulink url="https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc">repository key</ulink>:
 			<programlisting>
 sudo apt-get install curl ca-certificates
 curl https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc | sudo apt-key add -</programlisting>
 		  </para>
 		</listitem>
 		<listitem>
 		  <para>
 			Update the package list
 			<programlisting>
 sudo apt-get update</programlisting>
 		  </para>
 		</listitem>
 		<listitem>
 		  <para>
--- a/doc/install-requirements.sgml
+++ b/doc/install-requirements.sgml
@@ -36,7 +36,7 @@
  </para>
  <para>
-   A dedicated system user for &repmgr; is *not* required; as many &repmgr; and
+   A dedicated system user for &repmgr; is <emphasis>not</emphasis> required; as many &repmgr; and
   <application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
   these commands should be executed by the <literal>postgres</literal> user.
  </para>
@@ -61,6 +61,13 @@
   </itemizedlist>
  </para>
  <tip>
    <simpara>
      Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
      This will make it faster to detect any SSH connection errors.
    </simpara>
  </tip>
  <tip>
   <simpara>
    We recommend using a session multiplexer utility such as <command>screen</command> or
--- a/doc/install-source.sgml
+++ b/doc/install-source.sgml
@@ -12,8 +12,8 @@
   To install &repmgr; the prerequisites for compiling
   &postgres; must be installed. These are described in &postgres;'s
   documentation
-   on <ulink url="https://www.postgresql.org/docs/current/install-requirements.html">build requirements</ulink>
+   on <ulink url="https://www.postgresql.org/docs/current/static/install-requirements.html">build requirements</ulink>
-   and <ulink url="https://www.postgresql.org/docs/current/docguide-toolsets.html">build requirements for documentation</ulink>.
+   and <ulink url="https://www.postgresql.org/docs/current/static/docguide-toolsets.html">build requirements for documentation</ulink>.
  </para>
  <para>
--- a/doc/quickstart.sgml
+++ b/doc/quickstart.sgml
@@ -237,14 +237,42 @@
   server. See sections <xref linkend="configuration"> and <xref linkend="configuration-file">
   for further details about <filename>repmgr.conf</filename>.
  </para>
  <note>
    <para>
      &repmgr; only uses <option>pg_bindir</option> when it executes
      PostgreSQL binaries directly.
    </para>
    <para>
      For user-defined scripts such as <option>promote_command</option> and the
      various <option>service_*_command</option>s, you <emphasis>must</emphasis>
      always explicitly provide the full path to the binary or script being
      executed, even if it is &repmgr; itself.
    </para>
    <para>
      This is because these options can contain user-defined scripts in arbitrary
      locations, so prepending <option>pg_bindir</option> may break them.
    </para>
  </note>
  <tip>
   <simpara>
    For Debian-based distributions we recommend explictly setting
-    <literal>pg_bindir</literal> to the directory where <command>pg_ctl</command> and other binaries
+    <option>pg_bindir</option> to the directory where <command>pg_ctl</command> and other binaries
    not in the standard path are located. For PostgreSQL 9.6 this would be <filename>/usr/lib/postgresql/9.6/bin/</filename>.
   </simpara>
  </tip>
  <tip>
    <simpara>
      If your distribution places the &repmgr; binaries in a location other than the
      PostgreSQL installation directory, specify this with <option>repmgr_bindir</option>
      to enable &repmgr; to perform operations (e.g.
      <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>)
      on other nodes.
    </simpara>
  </tip>
  <para>
   See the file
   <ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>
--- a/doc/repmgr-cluster-cleanup.sgml
+++ b/doc/repmgr-cluster-cleanup.sgml
@@ -15,9 +15,14 @@
    <title>Description</title>
    <para>
      Purges monitoring history from the <literal>repmgr.monitoring_history</literal> table to
-      prevent excessive table growth. Use the <literal>-k/--keep-history</literal> to specify the
+      prevent excessive table growth.
-      number of days of monitoring history to retain. This command can be used
+    </para>
-      manually or as a cronjob.
+    <para>
      By default <emphasis>all</emphasis> data will be removed; Use the <option>-k/--keep-history</option>
      option to specify the number of days of monitoring history to retain.
    </para>
    <para>
      This command can be executed manually or as a cronjob.
    </para>
  </refsect1>
@@ -38,4 +43,35 @@
      <filename>repmgr.conf</filename>.
    </para>
  </refsect1>
  <refsect1 id="repmgr-cluster-cleanup-events">
    <title>Event notifications</title>
    <para>
      A <literal>cluster_cleanup</literal> <link linkend="event-notifications">event notification</link> will be generated.
    </para>
  </refsect1>
  <refsect1>
    <title>Options</title>
    <variablelist>
      <varlistentry>
        <term><option>--node-id</option></term>
        <listitem>
          <para>
            Only delete monitoring records for the specified node.
          </para>
        </listitem>
      </varlistentry>
    </variablelist>
  </refsect1>
  <refsect1>
    <title>See also</title>
    <para>
      For more details see the sections <xref linkend="repmgrd-monitoring"> and
      <xref linkend="repmgrd-monitoring-configuration">.
    </para>
  </refsect1>
 </refentry>
--- a/doc/repmgr-cluster-crosscheck.sgml
+++ b/doc/repmgr-cluster-crosscheck.sgml
@@ -55,12 +55,37 @@
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>ERR_BAD_SSH (12)</option></term>
        <listitem>
          <para>
            One or more nodes could not be accessed via SSH.
          </para>
          <note>
            <simpara>
              This only applies to nodes unreachable from the node where
              this command is executed.
            </simpara>
            <simpara>
              It's also possible that the crosscheck establishes that
              connections between PostgreSQL on all nodes are functioning,
              even if SSH access between some nodes is not possible.
            </simpara>
          </note>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>ERR_NODE_STATUS (25)</option></term>
        <listitem>
          <para>
-            One or more nodes could not be reached.
+            PostgreSQL on one or more nodes could not be reached.
          </para>
          <note>
            <simpara>
              This error code overrides <option>ERR_BAD_SSH</option>.
            </simpara>
          </note>
        </listitem>
      </varlistentry>
--- a/doc/repmgr-cluster-matrix.sgml
+++ b/doc/repmgr-cluster-matrix.sgml
@@ -115,12 +115,26 @@
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>ERR_BAD_SSH (12)</option></term>
        <listitem>
          <para>
            One or more nodes could not be accessed via SSH.
          </para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>ERR_NODE_STATUS (25)</option></term>
        <listitem>
          <para>
-            One or more nodes could not be reached.
+            PostgreSQL on one or more nodes could not be reached.
          </para>
          <note>
            <simpara>
              This error code overrides <option>ERR_BAD_SSH</option>.
            </simpara>
          </note>
        </listitem>
      </varlistentry>
--- a/doc/repmgr-cluster-show.sgml
+++ b/doc/repmgr-cluster-show.sgml
@@ -81,36 +81,56 @@
  <refsect1>
    <title>Options</title>
-    <para>
+
-      <command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
+    <variablelist>
-      outputs the replication cluster's status in a simple CSV format, suitable for
+
-      parsing by scripts:
+      <varlistentry>
-      <programlisting>
+        <term><option>--csv</option></term>
        <listitem>
 		  <para>
 			<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
 			outputs the replication cluster's status in a simple CSV format, suitable for
 			parsing by scripts, e.g.:
 			<programlisting>
    $ repmgr -f /etc/repmgr.conf cluster show --csv
    1,-1,-1
    2,0,0
    3,0,1</programlisting>
-    </para>
+		  </para>
-    <para>
+		  <para>
-      The columns have following meanings:
+			The columns have following meanings:
-      <itemizedlist spacing="compact" mark="bullet">
+			<itemizedlist spacing="compact" mark="bullet">
-        <listitem>
+			  <listitem>
-          <simpara>
+				<simpara>
-            node ID
+				  node ID
-          </simpara>
+				</simpara>
-        </listitem>
+			  </listitem>
-        <listitem>
+			  <listitem>
-          <simpara>
+				<simpara>
            availability (0 = available, -1 = unavailable)
-          </simpara>
+				</simpara>
-        </listitem>
+			  </listitem>
 			  <listitem>
 				<simpara>
 				  recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
 				</simpara>
 			  </listitem>
 			</itemizedlist>
 		  </para>
 		</listitem>
 	  </varlistentry>
      <varlistentry>
        <term><option>--verbose</option></term>
        <listitem>
-          <simpara>
+          <para>
-            recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
+			Display the full text of any database connection error messages
-          </simpara>
+          </para>
        </listitem>
-      </itemizedlist>
+      </varlistentry>
-    </para>
+
 	</variablelist>
  </refsect1>
@@ -145,7 +165,7 @@
  <refsect1>
    <title>See also</title>
    <para>
-     <xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">
+     <xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">, <xref linkend="repmgr-daemon-status">
    </para>
  </refsect1>
--- a/doc/repmgr-daemon-pause.sgml
+++ b/doc/repmgr-daemon-pause.sgml
@@ -0,0 +1,109 @@
 <refentry id="repmgr-daemon-pause">
  <indexterm>
    <primary>repmgr daemon pause</primary>
  </indexterm>
  <refmeta>
    <refentrytitle>repmgr daemon pause</refentrytitle>
  </refmeta>
  <refnamediv>
    <refname>repmgr daemon pause</refname>
    <refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to pause failover operations</refpurpose>
  </refnamediv>
  <refsect1>
    <title>Description</title>
    <para>
      This command can be run on any active node in the replication cluster to instruct all
      running <application>repmgrd</application> instances to &quot;pause&quot; themselves, i.e. take no
      action (such as promoting themselves or following a new primary) if a failover event is detected.
    </para>
    <para>
      This functionality is useful for performing maintenance operations, such as switchovers
      or upgrades, which might otherwise trigger a failover if <application>repmgrd</application>
      is running normally.
    </para>
    <note>
      <para>
        It's important to wait a few seconds after restarting PostgreSQL on any node before running
        <command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
        on the restarted node will take a second or two before it has updated its status.
      </para>
    </note>
    <para>
      <xref linkend="repmgr-daemon-unpause"> will instruct all previously paused <application>repmgrd</application>
      instances to resume normal failover operation.
    </para>
  </refsect1>
  <refsect1>
    <title>Execution</title>
    <para>
      <command>repmgr daemon pause</command> can be executed on any active node in the
      replication cluster. A valid <filename>repmgr.conf</filename> file is required.
      It will have no effect on previously paused nodes.
    </para>
  </refsect1>
  <refsect1>
    <title>Example</title>
    <para>
    <programlisting>
 $ repmgr -f /etc/repmgr.conf daemon pause
 NOTICE: node 1 (node1) paused
 NOTICE: node 2 (node2) paused
 NOTICE: node 3 (node3) paused</programlisting>
    </para>
  </refsect1>
  <refsect1>
    <title>Options</title>
    <variablelist>
      <varlistentry>
        <term><option>--dry-run</option></term>
        <listitem>
          <para>
            Check if nodes are reachable but don't pause <application>repmgrd</application>.
          </para>
        </listitem>
      </varlistentry>
    </variablelist>
  </refsect1>
  <refsect1>
    <title>Exit codes</title>
    <para>
      Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
    </para>
    <variablelist>
      <varlistentry>
        <term><option>SUCCESS (0)</option></term>
        <listitem>
          <para>
            <application>repmgrd</application> could be paused on all nodes.
          </para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>ERR_REPMGRD_PAUSE (26)</option></term>
        <listitem>
          <para>
           <application>repmgrd</application> could not be paused on one or mode nodes.
          </para>
        </listitem>
      </varlistentry>
    </variablelist>
  </refsect1>
  <refsect1>
    <title>See also</title>
    <para>
      <xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-daemon-status">
    </para>
  </refsect1>
 </refentry>
--- a/doc/repmgr-daemon-status.sgml
+++ b/doc/repmgr-daemon-status.sgml
@@ -0,0 +1,165 @@
 <refentry id="repmgr-daemon-status">
  <indexterm>
    <primary>repmgr daemon status</primary>
  </indexterm>
  <refmeta>
    <refentrytitle>repmgr daemon status</refentrytitle>
  </refmeta>
  <refnamediv>
    <refname>repmgr daemon status</refname>
    <refpurpose>display information about the status of <application>repmgrd</application> on each node in the cluster</refpurpose>
  </refnamediv>
  <refsect1>
    <title>Description</title>
    <para>
      This command provides an overview over all active nodes in the cluster and the state
      of each node's <application>repmgrd</application> instance. It can be used to check
      the result of <xref linkend="repmgr-daemon-pause"> and <xref linkend="repmgr-daemon-unpause">
      operations.
    </para>
  </refsect1>
  <refsect1>
    <title>Execution</title>
    <para>
      <command>repmgr daemon status</command> can be executed on any active node in the
      replication cluster. A valid <filename>repmgr.conf</filename> file is required.
    </para>
    <note>
      <para>
        After restarting PostgreSQL on any node, the <application>repmgrd</application> instance
        will take a second or two before it is able to update its status. Until then,
        <application>repmgrd</application> will be shown as not running.
      </para>
    </note>
  </refsect1>
  <refsect1>
    <title>Examples</title>
    <para>
      <application>repmgrd</application> running normally on all nodes:
    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
 ID | Name  | Role    | Status  | repmgrd | PID  | Paused?
 ----+-------+---------+---------+---------+------+---------
 1  | node1 | primary | running | running | 7851 | no
 2  | node2 | standby | running | running | 7889 | no
 3  | node3 | standby | running | running | 7918 | no</programlisting>
    </para>
    <para>
      <application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
 ID | Name  | Role    | Status  | repmgrd | PID  | Paused?
 ----+-------+---------+---------+---------+------+---------
 1  | node1 | primary | running | running | 7851 | yes
 2  | node2 | standby | running | running | 7889 | yes
 3  | node3 | standby | running | running | 7918 | yes</programlisting>
    </para>
    <para>
      <application>repmgrd</application> not running on one node:
    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
 ID | Name  | Role    | Status  | repmgrd     | PID  | Paused?
 ----+-------+---------+---------+-------------+------+---------
 1  | node1 | primary | running | running     | 7851 | yes
 2  | node2 | standby | running | not running | n/a  | n/a
 3  | node3 | standby | running | running     | 7918 | yes</programlisting>
    </para>
  </refsect1>
  <refsect1>
    <title>Options</title>
    <variablelist>
      <varlistentry>
        <term><option>--csv</option></term>
        <listitem>
 		  <para>
 			<command>repmgr daemon status</command> accepts an optional parameter <literal>--csv</literal>, which
 			outputs the replication cluster's status in a simple CSV format, suitable for
 			parsing by scripts, e.g.:
 			<programlisting>
    $ repmgr -f /etc/repmgr.conf daemon status --csv
    1,node1,primary,1,1,10204,1
    2,node2,standby,1,0,-1,1
    3,node3,standby,1,1,10225,1</programlisting>
 		  </para>
 		  <para>
 			The columns have following meanings:
 			<itemizedlist spacing="compact" mark="bullet">
 			  <listitem>
 				<simpara>
 				  node ID
 				</simpara>
 			  </listitem>
 			  <listitem>
 				<simpara>
                  node name
 				</simpara>
 			  </listitem>
 			  <listitem>
 				<simpara>
                  node type (primary or standby)
 				</simpara>
 			  </listitem>
 			  <listitem>
 				<simpara>
                  PostgreSQL server running
 				</simpara>
 			  </listitem>
 			  <listitem>
 				<simpara>
                  <application>repmgrd</application> running (1 = running, 0 = not running)
 				</simpara>
 			  </listitem>
 			  <listitem>
 				<simpara>
                  <application>repmgrd</application> PID (-1 if not running)
 				</simpara>
 			  </listitem>
 			  <listitem>
 				<simpara>
                  <application>repmgrd</application> paused (1 = paused, 0 = not paused)
 				</simpara>
 			  </listitem>
 			</itemizedlist>
 		  </para>
 		</listitem>
 	  </varlistentry>
      <varlistentry>
        <term><option>--verbose</option></term>
        <listitem>
          <para>
 			Display the full text of any database connection error messages
          </para>
        </listitem>
      </varlistentry>
 	</variablelist>
  </refsect1>
  <refsect1>
    <title>See also</title>
    <para>
      <xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-cluster-show">
    </para>
  </refsect1>
 </refentry>
--- a/doc/repmgr-daemon-unpause.sgml
+++ b/doc/repmgr-daemon-unpause.sgml
@@ -0,0 +1,103 @@
 <refentry id="repmgr-daemon-unpause">
  <indexterm>
    <primary>repmgr daemon unpause</primary>
  </indexterm>
  <refmeta>
    <refentrytitle>repmgr daemon unpause</refentrytitle>
  </refmeta>
  <refnamediv>
    <refname>repmgr daemon unpause</refname>
    <refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to resume failover operations</refpurpose>
  </refnamediv>
  <refsect1>
    <title>Description</title>
    <para>
      This command can be run on any active node in the replication cluster to instruct all
      running <application>repmgrd</application> instances to &quot;unpause&quot;
      (following a previous execution of <xref linkend="repmgr-daemon-pause">)
      and resume normal failover/monitoring operation.
    </para>
    <note>
      <para>
        It's important to wait a few seconds after restarting PostgreSQL on any node before running
        <command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
        on the restarted node will take a second or two before it has updated its status.
      </para>
    </note>
  </refsect1>
  <refsect1>
    <title>Execution</title>
    <para>
     <command>repmgr daemon unpause</command> can be executed on any active node in the
      replication cluster. A valid <filename>repmgr.conf</filename> file is required.
      It will have no effect on nodes which are not already paused.
    </para>
  </refsect1>
  <refsect1>
    <title>Example</title>
    <para>
    <programlisting>
 $ repmgr -f /etc/repmgr.conf daemon unpause
 NOTICE: node 1 (node1) unpaused
 NOTICE: node 2 (node2) unpaused
 NOTICE: node 3 (node3) unpaused</programlisting>
    </para>
  </refsect1>
  <refsect1>
    <title>Options</title>
    <variablelist>
      <varlistentry>
        <term><option>--dry-run</option></term>
        <listitem>
          <para>
            Check if nodes are reachable but don't unpause <application>repmgrd</application>.
          </para>
        </listitem>
      </varlistentry>
    </variablelist>
  </refsect1>
  <refsect1>
    <title>Exit codes</title>
    <para>
      Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
    </para>
    <variablelist>
      <varlistentry>
        <term><option>SUCCESS (0)</option></term>
        <listitem>
          <para>
            <application>repmgrd</application> could be unpaused on all nodes.
          </para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>ERR_REPMGRD_PAUSE (26)</option></term>
        <listitem>
          <para>
           <application>repmgrd</application> could not be unpaused on one or mode nodes.
          </para>
        </listitem>
      </varlistentry>
    </variablelist>
  </refsect1>
  <refsect1>
    <title>See also</title>
    <para>
      <xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-status">
    </para>
  </refsect1>
 </refentry>
--- a/doc/repmgr-node-check.sgml
+++ b/doc/repmgr-node-check.sgml
@@ -30,7 +30,8 @@
            Replication lag: OK (N/A - node is primary)
            WAL archiving: OK (0 pending files)
            Downstream servers: OK (2 of 2 downstream nodes attached)
-            Replication slots: OK (node has no replication slots)</programlisting>
+            Replication slots: OK (node has no replication slots)
            Missing replication slots: OK (node has no missing replication slots)</programlisting>
    </para>
  </refsect1>
  <refsect1>
--- a/doc/repmgr-node-rejoin.sgml
+++ b/doc/repmgr-node-rejoin.sgml
@@ -28,6 +28,10 @@
        If the node is running and needs to be attached to the current primary, use
        <xref linkend="repmgr-standby-follow">.
      </para>
      <para>
        Note <xref linkend="repmgr-standby-follow"> can only be used for standbys which have not diverged
        from the rest of the cluster.
      </para>
    </tip>
  </refsect1>
@@ -63,10 +67,10 @@
        <term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
        <listitem>
          <para>
-            Execute <application>pg_rewind</application> if necessary.
+            Execute <application>pg_rewind</application>.
          </para>
          <para>
-            It is only necessary to provide the <application>pg_rewind</application>
+            It is only necessary to provide the <application>pg_rewind</application> path
            if using PostgreSQL 9.3 or 9.4, and <application>pg_rewind</application>
            is not installed in the PostgreSQL <filename>bin</filename> directory.
          </para>
@@ -115,7 +119,7 @@
    </variablelist>
  </refsect1>
- <refsect1>
+  <refsect1>
    <title>Configuration file settings</title>
    <para>
@@ -132,8 +136,9 @@
 	  </itemizedlist>
 	</para>
- </refsect1>
+  </refsect1>
-  <refsect1>
+
  <refsect1 id="repmgr-node-rejoin-events">
    <title>Event notifications</title>
    <para>
      A <literal>node_rejoin</literal> <link linkend="event-notifications">event notification</link> will be generated.
@@ -188,7 +193,7 @@
    </note>
    <para>
-      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
+      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
      pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
      to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
    </para>
@@ -221,6 +226,15 @@
    INFO: pg_rewind would now be executed
    DETAIL: pg_rewind command is:
      pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
    <note>
      <para>
        If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
        this checks the prerequisites for using <application>pg_rewind</application>, but cannot
        predict the outcome of actually executing <application>pg_rewind</application>.
      </para>
    </note>
    <programlisting>
    $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
         --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
--- a/doc/repmgr-primary-register.sgml
+++ b/doc/repmgr-primary-register.sgml
@@ -75,10 +75,18 @@
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-primary-register-events">
    <title>Event notifications</title>
    <para>
-      A <literal>primary_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
+      Following <link linkend="event-notifications">event notifications</link> will be generated:
      <itemizedlist spacing="compact" mark="bullet">
        <listitem>
          <simpara><literal>cluster_created</literal></simpara>
        </listitem>
        <listitem>
          <simpara><literal>primary_register</literal></simpara>
        </listitem>
      </itemizedlist>
    </para>
  </refsect1>
--- a/doc/repmgr-primary-unregister.sgml
+++ b/doc/repmgr-primary-unregister.sgml
@@ -64,7 +64,7 @@
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-primary-unregister-events">
    <title>Event notifications</title>
    <para>
      A <literal>primary_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-clone.sgml
+++ b/doc/repmgr-standby-clone.sgml
@@ -49,7 +49,7 @@
    not be copied by default. &repmgr; can copy these files, either to the same
    location on the standby server (provided appropriate directory and file permissions
    are available), or into the standby's data directory. This requires passwordless
-    SSH access to the primary server. Add the option <literal>--copy-external-config-files</literal>
+    SSH access to the primary server. Add the option <option>--copy-external-config-files</option>
    to the <command>repmgr standby clone</command> command; by default files will be copied to
    the same path as on the upstream server. Note that the user executing <command>repmgr</command>
    must have write access to those directories.
@@ -59,12 +59,29 @@
    <literal>--copy-external-config-files=pgdata</literal>, but note that
    any include directives in the copied files may need to be updated.
   </para>
   <note>
 	 <para>
 	   When executing <command>repmgr standby clone</command> with the
 	   <option>--copy-external-config-files</option> aand <option>--dry-run</option>
 	   options, &repmgr; will check the SSH connection to the source node, but
 	   will not verify whether the files can actually be copied.
 	 </para>
 	 <para>
 	   During the actual clone operation, a check will be made before the database itself
 	   is cloned to determine whether the files can actually be copied; if any problems are
 	   encountered, the clone operation will be aborted, enabling the user to fix
 	   any issues before retrying the clone operation.
 	 </para>
   </note>
   <tip>
    <simpara>
     For reliable configuration file management we recommend using a
     configuration management tool such as Ansible, Chef, Puppet or Salt.
    </simpara>
   </tip>
  </refsect1>
  <refsect1 id="repmgr-standby-clone-recovery-conf">
@@ -333,7 +350,7 @@
    </variablelist>
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-standby-clone-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_clone</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-follow.sgml
+++ b/doc/repmgr-standby-follow.sgml
@@ -94,7 +94,7 @@
    </variablelist>
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-standby-follow-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-promote.sgml
+++ b/doc/repmgr-standby-promote.sgml
@@ -50,7 +50,7 @@
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-standby-promote-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_promote</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-register.sgml
+++ b/doc/repmgr-standby-register.sgml
@@ -159,7 +159,7 @@
    </variablelist>
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-standby-register-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_register</literal> <link linkend="event-notifications">event notification</link>
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -35,6 +35,10 @@
        &repmgr; will attempt to check for potential issues but cannot guarantee
        a successful switchover.
      </para>
      <para>
        &repmgr; will refuse to perform the switchover if an exclusive backup is running on
        the current primary.
      </para>
    </note>
    <para>
      For more details on performing a switchover, including preparation and configuration,
@@ -43,11 +47,14 @@
    <note>
      <para>
-        <application>repmgrd</application> should not be active on any nodes while a switchover is being
+        From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
-        executed. This restriction may be lifted in a later version.
+        <application>repmgrd</application> instances to pause operations while the switchover
        is being carried out, to prevent <application>repmgrd</application> from
        unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
      </para>
      <para>
-        &repmgr; will not perform the switchover if an exclusive backup is running on the current primary.
+        Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
        is not running on any nodes while a switchover is being executed.
      </para>
    </note>
@@ -61,8 +68,9 @@
        <term><option>--always-promote</option></term>
        <listitem>
          <para>
-            Promote standby to primary, even if it is behind original primary
+            Promote standby to primary, even if it is behind or has diverged
-            (original primary will be shut down in any case).
+            from the original primary. The original primary will be shut down in any case,
            and will need to be manually reintegrated into the replication cluster.
          </para>
        </listitem>
      </varlistentry>
@@ -122,6 +130,21 @@
        </listitem>
      </varlistentry>
      <varlistentry>
        <term><option>--repmgrd-no-pause</option></term>
        <listitem>
          <para>
            Don't pause <application>repmgrd</application> while executing a switchover.
          </para>
          <para>
            This option should not be used unless you take steps by other means
            to ensure <application>repmgrd</application> is paused or not
            running on all nodes.
          </para>
        </listitem>
      </varlistentry>
     <varlistentry>
        <term><option>--siblings-follow</option></term>
        <listitem>
@@ -141,19 +164,7 @@
     Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
     switchover operation:
     <itemizedlist spacing="compact" mark="bullet">
-       <listitem>
+
         <simpara>
           <literal>reconnect_attempts</literal>: number of times to check the original primary
           for a clean shutdown after executing the shutdown command, before aborting
         </simpara>
       </listitem>
       <listitem>
         <simpara>
           <literal>reconnect_interval</literal>: interval (in seconds) to check the original
           primary for a clean shutdown after executing the shutdown command (up to a maximum
           of <literal>reconnect_attempts</literal> tries)
         </simpara>
       </listitem>
       <listitem>
         <simpara>
           <literal>replication_lag_critical</literal>:
@@ -163,10 +174,29 @@
         </simpara>
       </listitem>
       <listitem>
         <simpara>
           <literal>shutdown_check_timeout</literal>: maximum number of seconds to wait for the
           demotion candidate (current primary) to shut down, before aborting the switchover.
         </simpara>
         <simpara>
           Note that this parameter is set on the node where <command>repmgr standby switchover</command>
           is executed (promotion candidate); setting it on the demotion candidate (former primary) will
           have no effect.
         </simpara>
         <note>
           <para>
             In versions prior to <link linkend="release-4.2">&repmgr; 4.2</link>, <command>repmgr standby switchover</command> would
             use the values defined in <literal>reconnect_attempts</literal> and <literal>reconnect_interval</literal>
             to determine the timeout for demotion candidate shutdown.
           </para>
         </note>
       </listitem>
       <listitem>
         <simpara>
           <literal>standby_reconnect_timeout</literal>:
-           number of seconds to attempt to wait for the demoted primary
+           maximum number of seconds to attempt to wait for the demotion candidate (former primary)
           to reconnect to the promoted primary (default: 60 seconds)
         </simpara>
       </listitem>
@@ -196,7 +226,7 @@
    </para>
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-standby-switchover-events">
    <title>Event notifications</title>
    <para>
      <literal>standby_switchover</literal> and <literal>standby_promote</literal>
--- a/doc/repmgr-standby-unregister.sgml
+++ b/doc/repmgr-standby-unregister.sgml
@@ -59,7 +59,7 @@
    </variablelist>
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-standby-unregister-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-witness-register.sgml
+++ b/doc/repmgr-witness-register.sgml
@@ -50,7 +50,7 @@
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-witness-register-events">
    <title>Event notifications</title>
    <para>
      A <literal>witness_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-witness-unregister.sgml
+++ b/doc/repmgr-witness-unregister.sgml
@@ -92,7 +92,7 @@
  </refsect1>
-  <refsect1>
+  <refsect1 id="repmgr-witness-unregister-events">
    <title>Event notifications</title>
    <para>
      A <literal>witness_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr.sgml
+++ b/doc/repmgr.sgml
@@ -86,6 +86,7 @@
  &repmgrd-cascading-replication;
  &repmgrd-network-split;
  &repmgrd-witness-server;
  &repmgrd-pausing;
  &repmgrd-degraded-monitoring;
  &repmgrd-monitoring;
  &repmgrd-bdr;
@@ -112,6 +113,9 @@
  &repmgr-cluster-crosscheck;
  &repmgr-cluster-event;
  &repmgr-cluster-cleanup;
  &repmgr-daemon-status;
  &repmgr-daemon-pause;
  &repmgr-daemon-unpause;
 </part>
 &appendix-release-notes;
--- a/doc/repmgrd-bdr.sgml
+++ b/doc/repmgrd-bdr.sgml
@@ -15,7 +15,7 @@
  </para>
  <note>
    <simpara>
-      Due to the nature of BDR, it's only safe to use this solution for
+      Due to the nature of BDR 1.x/2.x, it's only safe to use this solution for
      a two-node scenario. Introducing additional nodes will create an inherent
      risk of node desynchronisation if a node goes down without being cleanly
      removed from the cluster.
--- a/doc/repmgrd-configuration.sgml
+++ b/doc/repmgrd-configuration.sgml
@@ -34,24 +34,6 @@
      the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
    </para>
    <para>
      To apply configuration file changes to a running <application>repmgrd</application>
      daemon, execute the operating system's r<application>repmgrd</application> service reload command
      (see <xref linkend="appendix-packages"> for examples),
      or for instances  which were manually started, execute <command>kill -HUP</command>, e.g.
      <command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
    </para>
    <note>
      <para>
        Check the <application>repmgrd</application> log to see what changes were
        applied, or if any issues were encountered when reloading the configuration.
      </para>
    </note>
    <para>
      Note that only a subset of configuration file parameters can be changed on a
      running <application>repmgrd</application> daemon.
    </para>
    <sect2 id="repmgrd-automatic-failover-configuration">
      <title>automatic failover configuration</title>
@@ -64,8 +46,17 @@
          follow_command='/usr/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
      </para>
      <para>
-        Adjust file paths as appropriate; we recomment specifying the full path to the &repmgr; binary.
+        Adjust file paths as appropriate; alway specify the full path to the &repmgr; binary.
      </para>
      <note>
        <para>
          &repmgr; will not apply <option>pg_bindir</option> when executing <option>promote_command</option>
          or <option>follow_command</option>; these can be user-defined scripts so must always be
          specified with the full path.
        </para>
      </note>
      <para>
        Note that the <literal>--log-to-file</literal> option will cause
        output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
@@ -135,7 +126,7 @@
      </para>
    </sect2>
-    <sect2 id="repmgrd-monitoring-configuration">
+    <sect2 id="repmgrd-monitoring-configuration" xreflabel="repmgrd monitoring configuration">
      <indexterm>
        <primary>repmgrd</primary>
        <secondary>monitoring configuration</secondary>
@@ -158,6 +149,203 @@
      </para>
    </sect2>
    <sect2 id="repmgrd-reloading-configuration"xreflabel="reloading repmgrd configuration">
      <indexterm>
        <primary>repmgrd</primary>
        <secondary>applying configuration changes</secondary>
      </indexterm>
      <title>Applying configuration changes to repmgrd</title>
      <para>
        To apply configuration file changes to a running <application>repmgrd</application>
        daemon, execute the operating system's <application>repmgrd</application> service reload command
        (see <xref linkend="appendix-packages"> for examples),
          or for instances  which were manually started, execute <command>kill -HUP</command>, e.g.
          <command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
      </para>
      <tip>
        <para>
          Check the <application>repmgrd</application> log to see what changes were
          applied, or if any issues were encountered when reloading the configuration.
        </para>
      </tip>
      <para>
        Note that only the following subset of configuration file parameters can be changed on a
        running <application>repmgrd</application> daemon:
      </para>
      <itemizedlist spacing="compact" mark="bullet">
        <listitem>
          <simpara>
            <varname>async_query_timeout</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>bdr_local_monitoring_only</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>bdr_recovery_timeout</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>conninfo</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>degraded_monitoring_timeout</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>event_notification_command</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>event_notifications</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>failover</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>follow_command</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>log_facility</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>log_file</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>log_level</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>log_status_interval</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>monitor_interval_secs</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>monitoring_history</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>primary_notification_timeout</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>promote_command</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>reconnect_attempts</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>reconnect_interval</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>repmgrd_standby_startup_timeout</varname>
          </simpara>
        </listitem>
      </itemizedlist>
      <para>
        The following set of configuration file parameters must be updated via
        <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
        as they require changes to the <literal>repmgr.nodes</literal> table so they are visible to
        all nodes in the replication cluster:
      </para>
      <itemizedlist spacing="compact" mark="bullet">
        <listitem>
          <simpara>
            <varname>node_id</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>node_name</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>data_directory</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>location</varname>
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            <varname>priority</varname>
          </simpara>
        </listitem>
      </itemizedlist>
      <note>
        <para>
          After executing <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
          <application>repmgrd</application> <emphasis>must</emphasis> be restarted for the changes to take effect.
        </para>
      </note>
    </sect2>
  </sect1>
  <sect1 id="repmgrd-daemon">
@@ -266,7 +454,7 @@ REPMGRD_ENABLED=no
 #REPMGRD_CONF="/path/to/repmgr.conf"
 # additional options
-#REPMGRD_OPTS=""
+REPMGRD_OPTS="--daemonize=false"
 # user to run repmgrd as
 #REPMGRD_USER=postgres
@@ -281,6 +469,19 @@ REPMGRD_ENABLED=no
        Set <varname>REPMGRD_ENABLED</varname> to <literal>yes</literal>, and <varname>REPMGRD_CONF</varname>
        to the <filename>repmgr.conf</filename> file you are using.
      </para>
      <tip>
        <para>
          See <xref linkend="packages-debian-ubuntu"> for details of the Debian/Ubuntu packages and
          typical file locations (including <filename>repmgr.conf</filename>).
        </para>
      </tip>
      <para>
        From <application>repmgrd</application> 4.1, ensure <varname>REPMGRD_OPTS</varname> includes
        <option>--daemonize=false</option>, as daemonization is handled by the service command.
        We recommend setting <varname>repmgrd_pid_file</varname> in <filename>repmgr.conf</filename> to the
        same value set in <varname>REPMGRD_PIDFILE</varname> to prevent another <application>repmgrd</application>
        instance from being started manually.
      </para>
      <para>
        If using <application>systemd</application>, you may need to execute <command>systemctl daemon-reload</command>.
        Also, if you attempted to start <application>repmgrd</application> using <command>systemctl start repmgrd</command>,
@@ -323,25 +524,34 @@ REPMGRD_ENABLED=no
     <secondary>repmgrd</secondary>
   </indexterm>
   <indexterm>
     <primary>repmgrd</primary>
     <secondary>log rotation</secondary>
   </indexterm>
  <title>repmgrd log rotation</title>
  <para>
   To ensure the current <application>repmgrd</application> logfile
   (specified in <filename>repmgr.conf</filename> with the parameter
-   <option>log_file</option> does not grow indefinitely, configure your
+   <option>log_file</option>) does not grow indefinitely, configure your
   system's <command>logrotate</command> to regularly rotate it.
  </para>
  <para>
   Sample configuration to rotate logfiles weekly with retention for
   up to 52 weeks and rotation forced if a file grows beyond 100Mb:
   <programlisting>
-    /var/log/postgresql/repmgr-9.6.log {
+    /var/log/repmgr/repmgrd.log {
        missingok
        compress
        rotate 52
        maxsize 100M
        weekly
        create 0600 postgres postgres
        postrotate
            /usr/bin/killall -HUP repmgrd
        endscript
    }</programlisting>
  </para>
 </sect1>
 </chapter>
--- a/doc/repmgrd-degraded-monitoring.sgml
+++ b/doc/repmgrd-degraded-monitoring.sgml
@@ -1,4 +1,4 @@
-<chapter id="repmgrd-degraded-monitoring">
+<chapter id="repmgrd-degraded-monitoring" xreflabel="repmgrd degraded monitoring">
 <indexterm>
   <primary>repmgrd</primary>
   <secondary>degraded monitoring</secondary>
@@ -7,8 +7,8 @@
 <title>"degraded monitoring" mode</title>
 <para>
  In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
-  of monitoring the nodes' upstream server. In these cases it enters "degraded
+  of monitoring the node's upstream server. In these cases it enters &quot;degraded monitoring&quot;
-  monitoring" mode, where <application>repmgrd</application> remains active but is waiting for the situation
+  mode, where <application>repmgrd</application> remains active but is waiting for the situation
  to be resolved.
 </para>
 <para>
--- a/doc/repmgrd-monitoring.sgml
+++ b/doc/repmgrd-monitoring.sgml
@@ -1,4 +1,4 @@
-<chapter id="repmgrd-monitoring">
+<chapter id="repmgrd-monitoring" xreflabel="Monitoring with repmgrd">
 <indexterm>
   <primary>repmgrd</primary>
   <secondary>monitoring</secondary>
--- a/doc/repmgrd-network-split.sgml
+++ b/doc/repmgrd-network-split.sgml
@@ -40,8 +40,8 @@
  In a failover situation, <application>repmgrd</application> will check if any servers in the
  same location as the current primary node are visible.  If not, <application>repmgrd</application>
  will assume a network interruption and not promote any node in any
-  other location (it will however enter <xref linkend="repmgrd-degraded-monitoring"> mode until
+  other location (it will however enter <link linkend="repmgrd-degraded-monitoring">degraded monitoring</link>
-  a primary becomes visible).
+  mode until a primary becomes visible).
 </para>
 </chapter>
--- a/doc/repmgrd-pausing.sgml
+++ b/doc/repmgrd-pausing.sgml
@@ -0,0 +1,169 @@
 <chapter id="repmgrd-pausing" xreflabel="Pausing repmgrd">
  <indexterm>
    <primary>repmgrd</primary>
    <secondary>pausing</secondary>
  </indexterm>
  <indexterm>
    <primary>pausing repmgrd</primary>
  </indexterm>
  <title>Pausing repmgrd</title>
  <para>
    In normal operation, <application>repmgrd</application> monitors the state of the
    PostgreSQL node it is running on, and will take appropriate action if problems
    are detected, e.g. (if so configured) promote the node to primary, if the existing
    primary has been determined as failed.
  </para>
  <para>
    However, <application>repmgrd</application> is unable to distinguish between
    planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
    or upgrading a server), and an actual server outage. In versions prior to &repmgr; 4.2
    it was necessary to stop <application>repmgrd</application> on all nodes (or at least
    on all nodes where <application>repmgrd</application> is
    <link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
    to prevent <application>repmgrd</application> from making changes to the
    replication cluster.
  </para>
  <para>
    From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
    can now be &quot;paused&quot;, i.e. instructed not to take any action such as performing a failover.
    This can be done from any node in the cluster, removing the need to stop/restart
    each <application>repmgrd</application> individually.
  </para>
  <sect1 id="repmgrd-pausing-prerequisites">
    <title>Prerequisites for pausing <application>repmgrd</application></title>
    <para>
      In order to be able to pause/unpause <application>repmgrd</application>, following
      prerequisites must be met:
      <itemizedlist spacing="compact" mark="bullet">
        <listitem>
          <simpara><link linkend="release-4.2">&repmgr; 4.2</link> or later must be installed on all nodes.</simpara>
        </listitem>
        <listitem>
          <simpara>The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version).</simpara>
        </listitem>
        <listitem>
          <simpara>
            PostgreSQL on all nodes must be accessible from the node where the
            <literal>pause</literal>/<literal>unpause</literal> operation is executed, using the
            <varname>conninfo</varname> string shown by <link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>.
          </simpara>
        </listitem>
      </itemizedlist>
    </para>
    <note>
      <para>
        These conditions are required for normal &repmgr; operation in any case.
      </para>
    </note>
  </sect1>
  <sect1 id="repmgrd-pausing-execution">
    <title>Pausing/unpausing <application>repmgrd</application></title>
    <para>
      To pause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link>, e.g.:
   <programlisting>
 $ repmgr -f /etc/repmgr.conf daemon pause
 NOTICE: node 1 (node1) paused
 NOTICE: node 2 (node2) paused
 NOTICE: node 3 (node3) paused</programlisting>
    </para>
    <para>
      The state of <application>repmgrd</application> on each node can be checked with
      <link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>, e.g.:
    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
 ID | Name  | Role    | Status  | repmgrd | PID  | Paused?
 ----+-------+---------+---------+---------+------+---------
 1  | node1 | primary | running | running | 7851 | yes
 2  | node2 | standby | running | running | 7889 | yes
 3  | node3 | standby | running | running | 7918 | yes</programlisting>
    </para>
    <note>
      <para>
        If executing a switchover with  <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
 		&repmgr; will automatically pause/unpause <application>repmgrd</application> as part of the switchover process.
      </para>
    </note>
    <para>
      If the primary (in this example, <literal>node1</literal>) is stopped, <application>repmgrd</application>
      running on one of the standbys (here: <literal>node2</literal>) will react like this:
      <programlisting>
 [2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
 [2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts
 [2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt
 ...
 [2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt
 [2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts
 [2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts
 [2018-09-20 12:22:25] [NOTICE] node is paused
 [2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state
 [2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator
 [2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode</programlisting>
    </para>
    <para>
      If the primary becomes available again (e.g. following a software upgrade), <application>repmgrd</application>
      will automatically reconnect, e.g.:
      <programlisting>
 [2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring</programlisting>
    </para>
    <para>
      To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
   <programlisting>
 $ repmgr -f /etc/repmgr.conf daemon pause
 NOTICE: node 1 (node1) unpaused
 NOTICE: node 2 (node2) unpaused
 NOTICE: node 3 (node3) unpaused</programlisting>
    </para>
    <note>
      <para>
        If the previous primary is no longer accessible when <application>repmgrd</application>
        is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using
        <link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>,
 		and any standbys attached to the new primary with
 		<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>.
      </para>
      <para>
        This is to prevent <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
        resulting in the automatic promotion of a new primary, which may be a problem particularly
        in larger clusters, where <application>repmgrd</application> could select a different promotion
        candidate to the one intended by the administrator.
      </para>
    </note>
  <sect2 id="repmgrd-pausing-details">
    <title>Details on the <application>repmgrd</application> pausing mechanism</title>
    <para>
      The pause state of each node will be stored over a PostgreSQL restart.
    </para>
 	<para>
 	  <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
 	  <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link> can be
 	  executed even if <application>repmgrd</application> is not running; in this case,
 	  <application>repmgrd</application> will start up in whichever pause state has been set.
 	</para>
    <note>
      <para>
 		<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
 		<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
 		<emphasis>do not</emphasis> stop/start <application>repmgrd</application>.
      </para>
    </note>
  </sect2>
  </sect1>
 </chapter>
--- a/doc/switchover.sgml
+++ b/doc/switchover.sgml
@@ -19,9 +19,10 @@
 </para>
 <para>
  <command>repmgr standby switchover</command> differs from other &repmgr;
-  actions in that it also performs actions on another server (the demotion
+  actions in that it also performs actions on other servers (the demotion
-  candidate), which means passwordless SSH access is required to that server
+  candidate, and optionally any other servers which are to follow the new primary),
-  from the one where <command>repmgr standby switchover</command> is executed.
+  which means passwordless SSH access is required to those servers from the one where
  <command>repmgr standby switchover</command> is executed.
 </para>
 <note>
  <simpara>
@@ -60,6 +61,13 @@
    &repmgr; being able to shut down the current primary server quickly and cleanly.
   </para>
   <para>
     Ensure that the promotion candidate has sufficient free walsenders available
     (PostgreSQL configuration item <varname>max_wal_senders</varname>), and if replication
     slots are in use, at least one free slot is available for the demotion candidate (
     PostgreSQL configuration item <varname>max_replication_slots</varname>).
   </para>
   <para>
     Ensure that a passwordless SSH connection is possible from the promotion candidate
     (standby) to the demotion candidate (current primary). If <literal>--siblings-follow</literal>
@@ -146,12 +154,18 @@
    manually with <command>repmgr node check --archive-ready</command>.
   </para>
-   <note>
+    <note>
-     <para>
+      <para>
-       Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
+        From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
-       promoting a node. This restriction will be removed in a future &repmgr; version.
+        <application>repmgrd</application> instances to pause operations while the switchover
-     </para>
+        is being carried out, to prevent <application>repmgrd</application> from
-   </note>
+        unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
      </para>
      <para>
        Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
        is not running on any nodes while a switchover is being executed.
      </para>
    </note>
   <para>
@@ -296,7 +310,21 @@
     2  | node2 | primary | * running |          | default  | host=node2 dbname=repmgr user=repmgr
   </programlisting>
  </para>
  <para>
    If <application>repmgrd</application> is in use, it's worth double-checking that
    all nodes are unpaused by executing <command><link linkend="repmgr-daemon-status">repmgr-daemon-status</link></command>.
  </para>
   <note>
     <para>
       Users of &repmgr; versions prior to 4.2 will need to manually restart <application>repmgrd</application>
       on all nodes after the switchover is completed.
     </para>
    </note>
 </sect1>
 <sect1 id="switchover-caveats" xreflabel="Caveats">
  <indexterm>
   <primary>switchover</primary>
@@ -322,17 +350,76 @@
      for details.
     </simpara>
    </listitem>
    <listitem>
     <simpara>
      <application>repmgrd</application> should not be running with setting <varname>failover=automatic</varname>
      in <filename>repmgr.conf</filename> when a switchover is carried out, otherwise the
      <application>repmgrd</application> daemon may try and promote a standby by itself.
     </simpara>
    </listitem>
   </itemizedlist>
  </para>
  <para>
   We hope to remove some of these restrictions in future versions of &repmgr;.
  </para>
 </sect1>
 <sect1 id="switchover-troubleshooting" xreflabel="Troubleshooting">
   <indexterm>
     <primary>switchover</primary>
     <secondary>troubleshooting</secondary>
   </indexterm>
   <title>Troubleshooting switchover issues</title>
   <para>
     As <link linkend="performing-switchover">emphasised previously</link>, performing a switchover
     is a non-trivial operation and there are a number of potential issues which can occur.
     While &repmgr; attempts to perform sanity checks, there's no guaranteed way of determining the success of
     a switchover without actually carrying it out.
   </para>
   <sect2 id="switchover-troubleshooting-primary-shutdown">
     <title>Demotion candidate (old primary) does not shut down</title>
     <para>
       &repmgr; may abort a switchover with a message like:
       <programlisting>
 ERROR: shutdown of the primary server could not be confirmed
 HINT: check the primary server status before performing any further actions</programlisting>
     </para>
     <para>
       This means the shutdown of the old primary has taken longer than &repmgr; expected,
       and it has given up waiting.
     </para>
     <para>
       In this case, check the PostgreSQL log on the primary server to see what is going
       on. It's entirely possible the shutdown process is just taking longer than the
       timeout set by the configuration parameter <varname>shutdown_check_timeout</varname>
       (default: 60 seconds), in which case you may need to adjust this parameter.
     </para>
     <note>
       <para>
         Note that <varname>shutdown_check_timeout</varname>is set on the node where
         <command>repmgr standby switchover</command> is executed (promotion candidate); setting it on the
         demotion candidate (former primary) will have no effect.
       </para>
     </note>
     <para>
       If the primary server has shut down cleanly, and no other node has been promoted,
       it is safe to restart it, in which case the replication cluster will be restored
       to its original configuration.
     </para>
   </sect2>
   <sect2 id="switchover-troubleshooting-exclusive-backup">
     <title>Switchover aborts with an &quot;exclusive backup&quot; error</title>
     <para>
       &repmgr; may abort a switchover with a message like:
       <programlisting>
 ERROR: unable to perform a switchover while primary server is in exclusive backup mode
 HINT: stop backup before attempting the switchover</programlisting>
     </para>
     <para>
       This means an exclusive backup is running on the current primary; interrupting this
       will not only abort the backup, but potentially leave the primary with an ambiguous
       backup state.
     </para>
     <para>
       To proceed, either wait until the backup has finished, or cancel it with the command
       <command>SELECT pg_stop_backup()</command>. For more details see the PostgreSQL
       documentation section
       <ulink url="https://www.postgresql.org/docs/current/static/continuous-archiving.html#BACKUP-LOWLEVEL-BASE-BACKUP-EXCLUSIVE">Making an exclusive low level backup</ulink>.
     </para>
   </sect2>
 </sect1>
 </chapter>
--- a/doc/upgrading-repmgr.sgml
+++ b/doc/upgrading-repmgr.sgml
@@ -7,9 +7,9 @@
 <title>Upgrading repmgr</title>
 <para>
-  &repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
+  &repmgr; is updated regularly with minor releases (e.g. 4.0.1 to 4.0.2)
  containing bugfixes and other minor improvements. Any substantial new
-  functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
+  functionality will be included in a major release (e.g. 4.0 to 4.1).
 </para>
 <sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
@@ -19,43 +19,201 @@
  </indexterm>
  <title>Upgrading repmgr 4.x and later</title>
  <para>
-    &repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
+    From version 4, &repmgr; consists of three elements:
-    of the two following steps:
+     <itemizedlist spacing="compact" mark="bullet">
-    <orderedlist>
+
-      <listitem>
+       <listitem>
-        <simpara>
+         <simpara>
-          Install the updated package (or compile the updated source)
+           the <application>repmgr</application> and <application>repmgrd</application> executables
-        </simpara>
+         </simpara>
-      </listitem>
+       </listitem>
-      <listitem>
+
-        <simpara>
+       <listitem>
-          <application>repmgrd</application> (if running) must be restarted.
+         <simpara>
-        </simpara>
+           the objects for the &repmgr; PostgreSQL extension (SQL files for creating/updating
-      </listitem>
+           repmgr metadata, and the extension control file)
-      <listitem>
+         </simpara>
-        <simpara>
+       </listitem>
-          For major releases, e.g. from <literal>4.0.x</literal> to <literal>4.1</literal>,
+
-          execute <command>ALTER EXTENSION repmgr UPDATE</command>
+       <listitem>
-          on the primary node in the database where the &repmgr; extension is installed.
+         <simpara>
-        </simpara>
+           the shared library module used by <application>repmgrd</application> which
-        <simpara>
+           is resident in the PostgreSQL backend
-          This will update the extension metadata and, if necessary, apply
+         </simpara>
-          changes to the &repmgr; extension objects.
+       </listitem>
-        </simpara>
+     </itemizedlist>
-      </listitem>
+  </para>
-    </orderedlist>
+  <para>
    With <emphasis>minor releases</emphasis>, usually changes are only made to the <application>repmgr</application>
    and <application>repmgrd</application> executables. In this case, the upgrade is quite straightforward,
    and is simply a case of installing the new version, and restarting <application>repmgrd</application>
    (if running).
  </para>
  <para>
-    Always check the <link linkend="appendix-release-notes">release notes</link> for every
+    For <emphasis>major releases</emphasis>, the &repmgr; PostgreSQL extension will need to be updated
-    release as they may contain upgrade instructions particular to individual versions.
+    to the latest version. Additionally, if the shared library module has been updated (this is sometimes,
    but not always the case), PostgreSQL itself will need to be restarted on each node.
  </para>
  <important>
    <para>
      Always check the <link linkend="appendix-release-notes">release notes</link> for every
      release as they may contain upgrade instructions particular to individual versions.
    </para>
  </important>
-  <para>
+  <sect2 id="upgrading-minor-version" xreflabel="Upgrading a minor version release">
-    Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
+	<indexterm>
-    changes to the shared object file used by <application>repmgrd</application>; check the
+	  <primary>upgrading</primary>
-    release notes for details.
+	  <secondary>minor release</secondary>
-  </para>
+	</indexterm>
 	<title>Upgrading a minor version release</title>
    <para>
      The process for installing minor version upgrades is quite straightforward:
      <itemizedlist spacing="compact" mark="bullet">
        <listitem>
          <simpara>
            install the new &repmgr; version
          </simpara>
        </listitem>
        <listitem>
          <simpara>
            restart <application>repmgrd</application> on all nodes where it is running
          </simpara>
        </listitem>
      </itemizedlist>
    </para>
    <note>
 	  <para>
        Some packaging systems (e.g. <link linkend="packages-debian-ubuntu">Debian/Ubuntu</link>
        may restart <application>repmgrd</application> as part of the package upgrade process.
      </para>
    </note>
 	<para>
 	  Minor version upgrades can be performed in any order on the nodes in the replication
 	  cluster.
 	</para>
 	<para>
 	  A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
 	</para>
    <note>
 	  <para>
 	    The same &repmgr; &quot;major version&quot; (e.g. <literal>4.2</literal>) must be
 	    installed on all nodes in the replication cluster. While it's possible to have differing
 	    &repmgr; &quot;minor versions&quot; (e.g. <literal>4.2.1</literal>)  on different nodes,
 	    we strongly recommend updating all nodes to the latest minor version.
 	  </para>
    </note>
  </sect2>
  <sect2 id="upgrading-major-version" xreflabel="Upgrading a major version release">
 	<indexterm>
 	  <primary>upgrading</primary>
 	  <secondary>major release</secondary>
 	</indexterm>
 	<title>Upgrading a major version release</title>
 	<para>
 	  &quot;major version&quot; upgrades need to be planned more carefully, as they may include
 	  changes to the &repmgr; metadata (which need to be propagated from the primary to all
 	  standbys) and/or changes to the shared object file used by <application>repmgrd</application>
 	  (which require a PostgreSQL restart).
 	</para>
 	<para>
 	  With this in mind,
 	</para>
 	<para>
      <orderedlist>
 		<listitem>
          <simpara>
 			Stop <application>repmgrd</application> (if in use) on all nodes where it is running.
          </simpara>
 		</listitem>
 		<listitem>
          <simpara>
 			Disable the <application>repmgrd</application> service on all nodes where it is in use;
            this is to prevent packages from prematurely restarting <application>repmgrd</application>.
          </simpara>
 		</listitem>
 		<listitem>
          <simpara>
 			Install the updated package (or compile the updated source) on all nodes.
          </simpara>
 		</listitem>
        <listitem>
          <para>
            If running a <literal>systemd</literal>-based Linux distribution, execute (as <literal>root</literal>,
            or with appropriate <literal>sudo</literal> permissions):
            <programlisting>
 systemctl daemon-reload</programlisting>
          </para>
        </listitem>
 		<listitem>
          <simpara>
 			If the &repmgr; shared library module has been updated (check the <link linkend="appendix-release-notes">release notes</link>!),
            restart PostgreSQL, then <application>repmgrd</application> (if in use)	on each node,
            The order in which this is applied to individual nodes is not critical,
 			and it's also fine to restart PostgreSQL on all nodes first before starting <application>repmgrd</application>.
 		  </simpara>
 		  <simpara>
 			Note that if the upgrade requires a PostgreSQL restart, <application>repmgrd</application>
 			will only function correctly once all nodes have been restarted.
          </simpara>
 		</listitem>
 		<listitem>
          <para>
 			On the primary node, execute
 			<programlisting>
 ALTER EXTENSION repmgr UPDATE</programlisting>
 			in the database where &repmgr; is installed.
          </para>
 		</listitem>
 		<listitem>
          <simpara>
 			Reenable the <application>repmgrd</application> service on all nodes where it is in use.
          </simpara>
 		</listitem>
 	  </orderedlist>
 	</para>
 	<tip>
 	  <para>
 		If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
 		with a PostgreSQL minor version upgrade, which will require a restart in any case.
 		New PostgreSQL minor version are usually released every couple of months.
 	  </para>
 	</tip>
  </sect2>
  <sect2 id="upgrading-check-repmgrd" xreflabel="Checking repmgrd status after an upgrade">
 	<indexterm>
 	  <primary>upgrading</primary>
 	  <secondary>checking repmgrd status</secondary>
 	</indexterm>
 	<title>Checking repmgrd status after an upgrade</title>
 	<para>
      From &repmgr; 4.2, once the upgrade is complete, execute the <command><link linkend="repmgr-daemon-status">repmgr daemon status</link></command>
      command (on any node) to show an overview of the status of <application>repmgrd</application> on all nodes.
    </para>
  </sect2>
 </sect1>
 <sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
@@ -95,6 +253,13 @@
    be recreated by <application>pg_upgrade</application>. These will need to
    be recreated manually.
  </para>
  <tip>
 	<para>
 	  Use <command><link linkend="repmgr-node-check">repmgr node check</link></command>
 	  to determine which replacation slots need to be recreated.
 	</para>
  </tip>
 </sect1>
--- a/doc/version.sgml
+++ b/doc/version.sgml
@@ -1 +1 @@
-<!ENTITY repmgrversion "4.1dev">
+<!ENTITY repmgrversion "4.2">
--- a/errcode.h
+++ b/errcode.h
@@ -47,5 +47,6 @@
 #define ERR_FOLLOW_FAIL 23
 #define ERR_REJOIN_FAIL 24
 #define ERR_NODE_STATUS 25
 #define ERR_REPMGRD_PAUSE 26
 #endif							/* _ERRCODE_H_ */
--- a/repmgr--4.1--4.2.sql
+++ b/repmgr--4.1--4.2.sql
@@ -0,0 +1,32 @@
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION repmgr" to load this file. \quit
 CREATE FUNCTION get_repmgrd_pid()
  RETURNS INT
  AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
  LANGUAGE C STRICT;
 CREATE FUNCTION get_repmgrd_pidfile()
  RETURNS TEXT
  AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
  LANGUAGE C STRICT;
 CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
  LANGUAGE C STRICT;
 CREATE FUNCTION repmgrd_is_running()
  RETURNS BOOL
  AS 'MODULE_PATHNAME', 'repmgrd_is_running'
  LANGUAGE C STRICT;
 CREATE FUNCTION repmgrd_pause(BOOL)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'repmgrd_pause'
  LANGUAGE C STRICT;
 CREATE FUNCTION repmgrd_is_paused()
  RETURNS BOOL
  AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
  LANGUAGE C STRICT;
--- a/repmgr--4.1.sql
+++ b/repmgr--4.1.sql
@@ -145,7 +145,6 @@ CREATE FUNCTION unset_bdr_failover_handler()
  AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
  LANGUAGE C STRICT;
 CREATE VIEW repmgr.replication_status AS
  SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
 	     n.type AS node_type, n.active, last_monitor_time,
--- a/repmgr--4.2.sql
+++ b/repmgr--4.2.sql
@@ -0,0 +1,197 @@
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION repmgr" to load this file. \quit
 CREATE TABLE repmgr.nodes (
  node_id          INTEGER     PRIMARY KEY,
  upstream_node_id INTEGER     NULL REFERENCES nodes (node_id) DEFERRABLE,
  active           BOOLEAN     NOT NULL DEFAULT TRUE,
  node_name        TEXT        NOT NULL,
  type             TEXT        NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
  location         TEXT        NOT NULL DEFAULT 'default',
  priority         INT         NOT NULL DEFAULT 100,
  conninfo         TEXT        NOT NULL,
  repluser         VARCHAR(63) NOT NULL,
  slot_name        TEXT        NULL,
  config_file      TEXT        NOT NULL
 );
 CREATE TABLE repmgr.events (
  node_id          INTEGER NOT NULL,
  event            TEXT NOT NULL,
  successful       BOOLEAN NOT NULL DEFAULT TRUE,
  event_timestamp  TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
  details          TEXT NULL
 );
 DO $repmgr$
 DECLARE
  DECLARE server_version_num INT;
 BEGIN
  SELECT setting
    FROM pg_catalog.pg_settings
   WHERE name = 'server_version_num'
    INTO server_version_num;
  IF server_version_num >= 90400 THEN
    EXECUTE $repmgr_func$
 CREATE TABLE repmgr.monitoring_history (
  primary_node_id                INTEGER NOT NULL,
  standby_node_id                INTEGER NOT NULL,
  last_monitor_time              TIMESTAMP WITH TIME ZONE NOT NULL,
  last_apply_time                TIMESTAMP WITH TIME ZONE,
  last_wal_primary_location      PG_LSN NOT NULL,
  last_wal_standby_location      PG_LSN,
  replication_lag                BIGINT NOT NULL,
  apply_lag                      BIGINT NOT NULL
 )
    $repmgr_func$;
  ELSE
    EXECUTE $repmgr_func$
 CREATE TABLE repmgr.monitoring_history (
  primary_node_id                INTEGER NOT NULL,
  standby_node_id                INTEGER NOT NULL,
  last_monitor_time              TIMESTAMP WITH TIME ZONE NOT NULL,
  last_apply_time                TIMESTAMP WITH TIME ZONE,
  last_wal_primary_location      TEXT NOT NULL,
  last_wal_standby_location      TEXT,
  replication_lag                BIGINT NOT NULL,
  apply_lag                      BIGINT NOT NULL
 )
    $repmgr_func$;
  END IF;
 END$repmgr$;
 CREATE INDEX idx_monitoring_history_time
          ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
 CREATE VIEW repmgr.show_nodes AS
   SELECT n.node_id,
          n.node_name,
          n.active,
          n.upstream_node_id,
          un.node_name AS upstream_node_name,
          n.type,
          n.priority,
          n.conninfo
     FROM repmgr.nodes n
 LEFT JOIN repmgr.nodes un
       ON un.node_id = n.upstream_node_id;
 /* XXX update upgrade scripts! */
 CREATE TABLE repmgr.voting_term (
  term INT NOT NULL
 );
 CREATE UNIQUE INDEX voting_term_restrict
 ON repmgr.voting_term ((TRUE));
 CREATE RULE voting_term_delete AS
   ON DELETE TO repmgr.voting_term
   DO INSTEAD NOTHING;
 /* ================= */
 /* repmgrd functions */
 /* ================= */
 /* monitoring functions */
 CREATE FUNCTION set_local_node_id(INT)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'set_local_node_id'
  LANGUAGE C STRICT;
 CREATE FUNCTION get_local_node_id()
  RETURNS INT
  AS 'MODULE_PATHNAME', 'get_local_node_id'
  LANGUAGE C STRICT;
 CREATE FUNCTION standby_set_last_updated()
  RETURNS TIMESTAMP WITH TIME ZONE
  AS 'MODULE_PATHNAME', 'standby_set_last_updated'
  LANGUAGE C STRICT;
 CREATE FUNCTION standby_get_last_updated()
  RETURNS TIMESTAMP WITH TIME ZONE
  AS 'MODULE_PATHNAME', 'standby_get_last_updated'
  LANGUAGE C STRICT;
 /* failover functions */
 CREATE FUNCTION notify_follow_primary(INT)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'notify_follow_primary'
  LANGUAGE C STRICT;
 CREATE FUNCTION get_new_primary()
  RETURNS INT
  AS 'MODULE_PATHNAME', 'get_new_primary'
  LANGUAGE C STRICT;
 CREATE FUNCTION reset_voting_status()
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'reset_voting_status'
  LANGUAGE C STRICT;
 CREATE FUNCTION am_bdr_failover_handler(INT)
  RETURNS BOOL
  AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
  LANGUAGE C STRICT;
 CREATE FUNCTION unset_bdr_failover_handler()
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
  LANGUAGE C STRICT;
 CREATE FUNCTION get_repmgrd_pid()
  RETURNS INT
  AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
  LANGUAGE C STRICT;
 CREATE FUNCTION get_repmgrd_pidfile()
  RETURNS TEXT
  AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
  LANGUAGE C STRICT;
 CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
  LANGUAGE C STRICT;
 CREATE FUNCTION repmgrd_is_running()
  RETURNS BOOL
  AS 'MODULE_PATHNAME', 'repmgrd_is_running'
  LANGUAGE C STRICT;
 CREATE FUNCTION repmgrd_pause(BOOL)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'repmgrd_pause'
  LANGUAGE C STRICT;
 CREATE FUNCTION repmgrd_is_paused()
  RETURNS BOOL
  AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
  LANGUAGE C STRICT;
 CREATE VIEW repmgr.replication_status AS
  SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
 	     n.type AS node_type, n.active, last_monitor_time,
         CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
         m.last_wal_standby_location,
         CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
         CASE WHEN n.type='standby' THEN
           CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
           ELSE NULL
         END AS replication_time_lag,
         CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
         AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
    FROM repmgr.monitoring_history m
    JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
   WHERE (m.standby_node_id, m.last_monitor_time) IN (
 	          SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
 			    FROM repmgr.monitoring_history m1 GROUP BY 1
         );
--- a/repmgr-action-bdr.c
+++ b/repmgr-action-bdr.c
@@ -191,7 +191,7 @@ do_bdr_register(void)
 	{
 		NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER;
-		get_all_node_records(conn, &local_node_records);
+		(void) get_all_node_records(conn, &local_node_records);
 		if (local_node_records.node_count == 0)
 		{
@@ -239,7 +239,7 @@ do_bdr_register(void)
 					continue;
 				}
-				get_all_node_records(bdr_node_conn, &existing_nodes);
+				(void) get_all_node_records(bdr_node_conn, &existing_nodes);
 				for (cell = existing_nodes.head; cell; cell = cell->next)
 				{
--- a/repmgr-action-cluster.c
+++ b/repmgr-action-cluster.c
@@ -26,7 +26,6 @@
 #define SHOW_HEADER_COUNT 7
 typedef enum
 {
 	SHOW_ID = 0,
@@ -51,21 +50,13 @@ typedef enum
 }			EventHeader;
 struct ColHeader
 {
 	char		title[MAXLEN];
 	int			max_length;
 	int			cur_length;
 };
 struct ColHeader headers_show[SHOW_HEADER_COUNT];
 struct ColHeader headers_event[EVENT_HEADER_COUNT];
-static int	build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length);
+static int	build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code);
-static int	build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length);
+static int	build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length, ItemList *warnings, int *error_code);
 static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);
 /*
@@ -84,6 +75,7 @@ do_cluster_show(void)
 	ItemList	warnings = {NULL, NULL};
 	bool		success = false;
 	bool		error_found = false;
 	bool		connection_error_found = false;
 	/* Connect to local database to obtain cluster connection data */
 	log_verbose(LOG_INFO, _("connecting to database"));
@@ -141,14 +133,26 @@ do_cluster_show(void)
 		}
 		else
 		{
 			char		error[MAXLEN];
 			strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
 			cell->node_info->node_status = NODE_STATUS_DOWN;
 			cell->node_info->recovery_type = RECTYPE_UNKNOWN;
-			item_list_append_format(&warnings,
+
-									"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
+			connection_error_found = true;
-									cell->node_info->node_name, cell->node_info->node_id, trim(error));
+
 			if (runtime_options.verbose)
 			{
 				char		error[MAXLEN];
 				strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
 				item_list_append_format(&warnings,
 										"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
 										cell->node_info->node_name, cell->node_info->node_id, trim(error));
 			}
 			else
 			{
 				item_list_append_format(&warnings,
 										"unable to connect to node \"%s\" (ID: %i)",
 										cell->node_info->node_name, cell->node_info->node_id);
 			}
 		}
 		initPQExpBuffer(&details);
@@ -170,16 +174,16 @@ do_cluster_show(void)
 							switch (cell->node_info->recovery_type)
 							{
 								case RECTYPE_PRIMARY:
-									appendPQExpBuffer(&details, "* running");
+									appendPQExpBufferStr(&details, "* running");
 									break;
 								case RECTYPE_STANDBY:
-									appendPQExpBuffer(&details, "! running as standby");
+									appendPQExpBufferStr(&details, "! running as standby");
 									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) is registered as primary but running as standby",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
 								case RECTYPE_UNKNOWN:
-									appendPQExpBuffer(&details, "! unknown");
+									appendPQExpBufferStr(&details, "! unknown");
 									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) has unknown replication status",
 															cell->node_info->node_name, cell->node_info->node_id);
@@ -190,14 +194,14 @@ do_cluster_show(void)
 						{
 							if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
 							{
-								appendPQExpBuffer(&details, "! running");
+								appendPQExpBufferStr(&details, "! running");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
 							else
 							{
-								appendPQExpBuffer(&details, "! running as standby");
+								appendPQExpBufferStr(&details, "! running as standby");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
 														cell->node_info->node_name, cell->node_info->node_id);
@@ -210,7 +214,7 @@ do_cluster_show(void)
 						/* node is unreachable but marked active */
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "? unreachable");
+							appendPQExpBufferStr(&details, "? unreachable");
 							item_list_append_format(&warnings,
 													"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
 													cell->node_info->node_name, cell->node_info->node_id);
@@ -218,7 +222,7 @@ do_cluster_show(void)
 						/* node is unreachable and marked as inactive */
 						else
 						{
-							appendPQExpBuffer(&details, "- failed");
+							appendPQExpBufferStr(&details, "- failed");
 							error_found = true;
 						}
 					}
@@ -234,16 +238,16 @@ do_cluster_show(void)
 							switch (cell->node_info->recovery_type)
 							{
 								case RECTYPE_STANDBY:
-									appendPQExpBuffer(&details, "  running");
+									appendPQExpBufferStr(&details, "  running");
 									break;
 								case RECTYPE_PRIMARY:
-									appendPQExpBuffer(&details, "! running as primary");
+									appendPQExpBufferStr(&details, "! running as primary");
 									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) is registered as standby but running as primary",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
 								case RECTYPE_UNKNOWN:
-									appendPQExpBuffer(&details, "! unknown");
+									appendPQExpBufferStr(&details, "! unknown");
 									item_list_append_format(
 															&warnings,
 															"node \"%s\" (ID: %i) has unknown replication status",
@@ -255,14 +259,14 @@ do_cluster_show(void)
 						{
 							if (cell->node_info->recovery_type == RECTYPE_STANDBY)
 							{
-								appendPQExpBuffer(&details, "! running");
+								appendPQExpBufferStr(&details, "! running");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
 							else
 							{
-								appendPQExpBuffer(&details, "! running as primary");
+								appendPQExpBufferStr(&details, "! running as primary");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
@@ -275,14 +279,14 @@ do_cluster_show(void)
 						/* node is unreachable but marked active */
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "? unreachable");
+							appendPQExpBufferStr(&details, "? unreachable");
 							item_list_append_format(&warnings,
 													"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
 													cell->node_info->node_name, cell->node_info->node_id);
 						}
 						else
 						{
-							appendPQExpBuffer(&details, "- failed");
+							appendPQExpBufferStr(&details, "- failed");
 							error_found = true;
 						}
 					}
@@ -296,11 +300,11 @@ do_cluster_show(void)
 					{
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "* running");
+							appendPQExpBufferStr(&details, "* running");
 						}
 						else
 						{
-							appendPQExpBuffer(&details, "! running");
+							appendPQExpBufferStr(&details, "! running");
 							error_found = true;
 						}
 					}
@@ -309,11 +313,11 @@ do_cluster_show(void)
 					{
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "? unreachable");
+							appendPQExpBufferStr(&details, "? unreachable");
 						}
 						else
 						{
-							appendPQExpBuffer(&details, "- failed");
+							appendPQExpBufferStr(&details, "- failed");
 							error_found = true;
 						}
 					}
@@ -322,7 +326,7 @@ do_cluster_show(void)
 			case UNKNOWN:
 				{
 					/* this should never happen */
-					appendPQExpBuffer(&details, "? unknown node type");
+					appendPQExpBufferStr(&details, "? unknown node type");
 						error_found = true;
 				}
 				break;
@@ -351,36 +355,10 @@ do_cluster_show(void)
 	}
 	/* Print column header row (text mode only) */
 	if (runtime_options.output_mode == OM_TEXT)
 	{
-		for (i = 0; i < SHOW_HEADER_COUNT; i++)
+		print_status_header(SHOW_HEADER_COUNT, headers_show);
 		{
 			if (i == 0)
 				printf(" ");
 			else
 				printf(" | ");
 			printf("%-*s",
 				   headers_show[i].max_length,
 				   headers_show[i].title);
 		}
 		printf("\n");
 		printf("-");
 		for (i = 0; i < SHOW_HEADER_COUNT; i++)
 		{
 			int			j;
 			for (j = 0; j < headers_show[i].max_length; j++)
 				printf("-");
 			if (i < (SHOW_HEADER_COUNT - 1))
 				printf("-+-");
 			else
 				printf("-");
 		}
 		printf("\n");
 	}
 	for (cell = nodes.head; cell; cell = cell->next)
@@ -437,6 +415,11 @@ do_cluster_show(void)
 		{
 			printf(_("  - %s\n"), cell->string);
 		}
 		if (runtime_options.verbose == false && connection_error_found == true)
 		{
 			log_hint(_("execute with --verbose option to see connection error messages"));
 		}
 	}
 	/*
@@ -619,9 +602,12 @@ do_cluster_crosscheck(void)
 	t_node_status_cube **cube;
-	bool		error_found = false;
+	bool		connection_error_found = false;
 	int			error_code = SUCCESS;
 	ItemList	warnings = {NULL, NULL};
 	n = build_cluster_crosscheck(&cube, &name_length, &warnings, &error_code);
 	n = build_cluster_crosscheck(&cube, &name_length);
 	if (runtime_options.output_mode == OM_CSV)
 	{
 		for (i = 0; i < n; i++)
@@ -643,6 +629,11 @@ do_cluster_crosscheck(void)
 					   cube[i]->node_id,
 					   cube[j]->node_id,
 					   max_node_status);
 				if (max_node_status == -1)
 				{
 					connection_error_found = true;
 				}
 			}
 		}
@@ -700,16 +691,16 @@ do_cluster_crosscheck(void)
 				{
 					case -2:
 						c = '?';
 						error_found = true;
 						break;
 					case -1:
 						c = 'x';
-						error_found = true;
+						connection_error_found = true;
 						break;
 					case 0:
 						c = '*';
 						break;
 					default:
 						log_error("unexpected node status value %i", max_node_status);
 						exit(ERR_INTERNAL);
 				}
@@ -718,6 +709,13 @@ do_cluster_crosscheck(void)
 			printf("\n");
 		}
 		if (warnings.head != NULL && runtime_options.terse == false)
 		{
 			log_warning(_("following problems detected:"));
 			print_item_list(&warnings);
 		}
 	}
 	/* clean up allocated cube array */
@@ -744,13 +742,23 @@ do_cluster_crosscheck(void)
 		free(cube);
 	}
-	if (error_found == true)
+	/* errors detected by build_cluster_crosscheck() have priority */
 	if (connection_error_found == true)
 	{
-		exit(ERR_NODE_STATUS);
+		error_code = ERR_NODE_STATUS;
 	}
 	exit(error_code);
 }
 /*
 * CLUSTER MATRIX
 *
 * Parameters:
 *   --csv
 */
 void
 do_cluster_matrix()
 {
@@ -763,18 +771,30 @@ do_cluster_matrix()
 	t_node_matrix_rec **matrix_rec_list;
-	bool		error_found = false;
+	bool		connection_error_found = false;
 	int			error_code = SUCCESS;
 	ItemList	warnings = {NULL, NULL};
-	n = build_cluster_matrix(&matrix_rec_list, &name_length);
+	n = build_cluster_matrix(&matrix_rec_list, &name_length, &warnings, &error_code);
 	if (runtime_options.output_mode == OM_CSV)
 	{
 		for (i = 0; i < n; i++)
 		{
 			for (j = 0; j < n; j++)
 			{
 				printf("%d,%d,%d\n",
 					   matrix_rec_list[i]->node_id,
 					   matrix_rec_list[i]->node_status_list[j]->node_id,
 					   matrix_rec_list[i]->node_status_list[j]->node_status);
 				if (matrix_rec_list[i]->node_status_list[j]->node_status == -2
 					|| matrix_rec_list[i]->node_status_list[j]->node_status == -1)
 				{
 					connection_error_found = true;
 				}
 			}
 		}
 	}
 	else
 	{
@@ -803,16 +823,16 @@ do_cluster_matrix()
 				{
 					case -2:
 						c = '?';
 						error_found = true;
 						break;
 					case -1:
 						c = 'x';
-						error_found = true;
+						connection_error_found = true;
 						break;
 					case 0:
 						c = '*';
 						break;
 					default:
 						log_error("unexpected node status value %i", matrix_rec_list[i]->node_status_list[j]->node_status);
 						exit(ERR_INTERNAL);
 				}
@@ -820,6 +840,13 @@ do_cluster_matrix()
 			}
 			printf("\n");
 		}
 		if (warnings.head != NULL && runtime_options.terse == false)
 		{
 			log_warning(_("following problems detected:"));
 			print_item_list(&warnings);
 		}
 	}
 	for (i = 0; i < n; i++)
@@ -834,10 +861,13 @@ do_cluster_matrix()
 	free(matrix_rec_list);
-	if (error_found == true)
+	/* actual database connection errors have priority */
 	if (connection_error_found == true)
 	{
-		exit(ERR_NODE_STATUS);
+		error_code = ERR_NODE_STATUS;
 	}
 	exit(error_code);
 }
@@ -866,7 +896,7 @@ matrix_set_node_status(t_node_matrix_rec **matrix_rec_list, int n, int node_id,
 static int
-build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
+build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code)
 {
 	PGconn	   *conn = NULL;
 	int			i = 0,
@@ -895,7 +925,12 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 		local_node_id = runtime_options.node_id;
 	}
-	get_all_node_records(conn, &nodes);
+	if (get_all_node_records(conn, &nodes) == false)
 	{
 		/* get_all_node_records() will display the error */
 		PQfinish(conn);
 		exit(ERR_BAD_CONFIG);
 	}
 	PQfinish(conn);
 	conn = NULL;
@@ -909,7 +944,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 	/*
 	 * Allocate an empty matrix record list
 	 *
-	 * -2 == NULL  ? -1 == Error x 0 == OK    *
+	 * -2 == NULL  ? -1 == Error x 0 == OK
 	 */
 	matrix_rec_list = (t_node_matrix_rec **) pg_malloc0(sizeof(t_node_matrix_rec) * nodes.node_count);
@@ -972,7 +1007,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 		host = param_get(&remote_conninfo, "host");
-		node_conn = establish_db_connection(cell->node_info->conninfo, false);
+		node_conn = establish_db_connection_quiet(cell->node_info->conninfo);
 		connection_status =
 			(PQstatus(node_conn) == CONNECTION_OK) ? 0 : -1;
@@ -1009,24 +1044,12 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 		 * remote repmgr - those are the only values it needs to work, and
 		 * saves us making assumptions about the location of repmgr.conf
 		 */
-		appendPQExpBuffer(&command,
+		appendPQExpBufferChar(&command, '"');
 						  "\"%s -d '%s' ",
 						  make_pg_path(progname()),
 						  cell->node_info->conninfo);
 		make_remote_repmgr_path(&command, cell->node_info);
-		if (strlen(pg_bindir))
+		appendPQExpBufferStr(&command,
-		{
+							 " cluster show --csv -L NOTICE --terse\"");
 			appendPQExpBuffer(&command,
 							  "--pg_bindir=");
 			appendShellString(&command,
 							  pg_bindir);
 			appendPQExpBuffer(&command,
 							  " ");
 		}
 		appendPQExpBuffer(&command,
 						  " cluster show --csv\"");
 		log_verbose(LOG_DEBUG, "build_cluster_matrix(): executing:\n  %s", command.data);
@@ -1041,32 +1064,50 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 		termPQExpBuffer(&command);
-		for (j = 0; j < nodes.node_count; j++)
+		/* no output returned - probably SSH error */
 		if (p[0] == '\0' || p[0] == '\n')
 		{
-			if (sscanf(p, "%d,%d", &x, &y) != 2)
+			item_list_append_format(warnings,
 									"node %i inaccessible via SSH",
 									connection_node_id);
 			*error_code = ERR_BAD_SSH;
 		}
 		else
 		{
 			for (j = 0; j < nodes.node_count; j++)
 			{
-				fprintf(stderr, _("cannot parse --csv output: %s\n"), p);
+				if (sscanf(p, "%d,%d", &x, &y) != 2)
-				PQfinish(node_conn);
+				{
-				exit(ERR_INTERNAL);
+					matrix_set_node_status(matrix_rec_list,
 										   nodes.node_count,
 										   connection_node_id,
 										   x,
 										   -2);
 					item_list_append_format(warnings,
 											"unable to parse --csv output for node %i; output returned was:\n\"%s\"",
 											connection_node_id, p);
 					*error_code = ERR_INTERNAL;
 				}
 				else
 				{
 					matrix_set_node_status(matrix_rec_list,
 										   nodes.node_count,
 										   connection_node_id,
 										   x,
 										   (y == -1) ? -1 : 0);
 				}
 				while (*p && (*p != '\n'))
 					p++;
 				if (*p == '\n')
 					p++;
 			}
 			matrix_set_node_status(matrix_rec_list,
 								   nodes.node_count,
 								   connection_node_id,
 								   x,
 								   (y == -1) ? -1 : 0);
 			while (*p && (*p != '\n'))
 				p++;
 			if (*p == '\n')
 				p++;
 		}
 		termPQExpBuffer(&command_output);
 		PQfinish(node_conn);
 		free_conninfo_params(&remote_conninfo);
 		node_conn = NULL;
 	}
 	*matrix_rec_dest = matrix_rec_list;
@@ -1079,7 +1120,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 static int
-build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
+build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, ItemList *warnings, int *error_code)
 {
 	PGconn	   *conn = NULL;
 	int			h,
@@ -1100,7 +1141,12 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
 	else
 		conn = establish_db_connection_by_params(&source_conninfo, true);
-	get_all_node_records(conn, &nodes);
+	if (get_all_node_records(conn, &nodes) == false)
 	{
 		/* get_all_node_records() will display the error */
 		PQfinish(conn);
 		exit(ERR_BAD_CONFIG);
 	}
 	PQfinish(conn);
 	conn = NULL;
@@ -1187,28 +1233,13 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
 		initPQExpBuffer(&command);
-		appendPQExpBuffer(&command,
+		make_remote_repmgr_path(&command, cell->node_info);
 						  "%s -d '%s' --node-id=%i ",
 						  make_pg_path(progname()),
 						  cell->node_info->conninfo,
 						  remote_node_id);
-		if (strlen(pg_bindir))
+		appendPQExpBufferStr(&command,
-		{
+							 " cluster matrix --csv -L NOTICE --terse");
 			appendPQExpBuffer(&command,
 							  "--pg_bindir=");
 			appendShellString(&command,
 							  pg_bindir);
 			appendPQExpBuffer(&command,
 							  " ");
 		}
 		appendPQExpBuffer(&command,
 						  "cluster matrix --csv 2>/dev/null");
 		initPQExpBuffer(&command_output);
 		/* fix to work with --node-id */
 		if (cube[i]->node_id == config_file_options.node_id)
 		{
 			(void) local_command_simple(command.data,
@@ -1249,9 +1280,13 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
 		p = command_output.data;
-		if (!strlen(command_output.data))
+		if (p[0] == '\0' || p[0] == '\n')
 		{
 			item_list_append_format(warnings,
 									"node %i inaccessible via SSH",
 									remote_node_id);
 			termPQExpBuffer(&command_output);
 			*error_code = ERR_BAD_SSH;
 			continue;
 		}
@@ -1263,16 +1298,23 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
 			if (sscanf(p, "%d,%d,%d", &matrix_rec_node_id, &node_status_node_id, &node_status) != 3)
 			{
-				fprintf(stderr, _("cannot parse --csv output: %s\n"), p);
+				cube_set_node_status(cube,
-				exit(ERR_INTERNAL);
+									 nodes.node_count,
 									 remote_node_id,
 									 matrix_rec_node_id,
 									 node_status_node_id,
 									 -2);
 				*error_code = ERR_INTERNAL;
 			}
 			else
 			{
 				cube_set_node_status(cube,
 									 nodes.node_count,
 									 remote_node_id,
 									 matrix_rec_node_id,
 									 node_status_node_id,
 									 node_status);
 			}
 			cube_set_node_status(cube,
 								 nodes.node_count,
 								 remote_node_id,
 								 matrix_rec_node_id,
 								 node_status_node_id,
 								 node_status);
 			while (*p && (*p != '\n'))
 				p++;
@@ -1332,6 +1374,7 @@ do_cluster_cleanup(void)
 	PGconn	   *conn = NULL;
 	PGconn	   *primary_conn = NULL;
 	int			entries_to_delete = 0;
 	PQExpBufferData event_details;
 	conn = establish_db_connection(config_file_options.conninfo, true);
@@ -1343,9 +1386,17 @@ do_cluster_cleanup(void)
 	log_debug(_("number of days of monitoring history to retain: %i"), runtime_options.keep_history);
-	entries_to_delete = get_number_of_monitoring_records_to_delete(primary_conn, runtime_options.keep_history);
+	entries_to_delete = get_number_of_monitoring_records_to_delete(primary_conn,
 																   runtime_options.keep_history,
 																   runtime_options.node_id);
-	if (entries_to_delete == 0)
+	if (entries_to_delete < 0)
 	{
 		log_error(_("unable to query number of monitoring records to clean up"));
 		PQfinish(primary_conn);
 		exit(ERR_DB_QUERY);
 	}
 	else if (entries_to_delete == 0)
 	{
 		log_info(_("no monitoring records to delete"));
 		PQfinish(primary_conn);
@@ -1355,10 +1406,23 @@ do_cluster_cleanup(void)
 	log_debug("at least %i monitoring records for deletion",
 			  entries_to_delete);
-	if (delete_monitoring_records(primary_conn, runtime_options.keep_history) == false)
+	initPQExpBuffer(&event_details);
 	if (delete_monitoring_records(primary_conn, runtime_options.keep_history, runtime_options.node_id) == false)
 	{
-		log_error(_("unable to delete monitoring records"));
+		appendPQExpBufferStr(&event_details,
 						  _("unable to delete monitoring records"));
 		log_error("%s", event_details.data);
 		log_detail("%s", PQerrorMessage(primary_conn));
 		create_event_notification(primary_conn,
 								  &config_file_options,
 								  config_file_options.node_id,
 								  "cluster_cleanup",
 								  false,
 								  event_details.data);
 		PQfinish(primary_conn);
 		exit(ERR_DB_QUERY);
 	}
@@ -1370,19 +1434,40 @@ do_cluster_cleanup(void)
 		log_detail("%s", PQerrorMessage(primary_conn));
 	}
-
+	if (runtime_options.keep_history == 0)
 	PQfinish(primary_conn);
 	if (runtime_options.keep_history > 0)
 	{
-		log_notice(_("monitoring records older than %i day(s) deleted"),
+		appendPQExpBufferStr(&event_details,
-				   runtime_options.keep_history);
+						  _("all monitoring records deleted"));
 	}
 	else
 	{
-		log_info(_("all monitoring records deleted"));
+		appendPQExpBufferStr(&event_details,
 						  _("monitoring records deleted"));
 	}
 	if (runtime_options.node_id != UNKNOWN_NODE_ID)
 		appendPQExpBuffer(&event_details,
 						  _(" for node %i"),
 						  runtime_options.node_id);
 	if (runtime_options.keep_history > 0)
 		appendPQExpBuffer(&event_details,
 						  _("; records newer than %i day(s) retained"),
 						  runtime_options.keep_history);
 	create_event_notification(primary_conn,
 							  &config_file_options,
 							  config_file_options.node_id,
 							  "cluster_cleanup",
 							  true,
 							  event_details.data);
 	log_notice("%s", event_details.data);
 	termPQExpBuffer(&event_details);
 	PQfinish(primary_conn);
 	return;
 }
--- a/repmgr-action-daemon.c
+++ b/repmgr-action-daemon.c
@@ -0,0 +1,420 @@
 /*
 * repmgr-action-daemon.c
 *
 * Implements repmgrd actions for the repmgr command line utility
 * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include "repmgr.h"
 #include "repmgr-client-global.h"
 #include "repmgr-action-daemon.h"
 /*
 * Possibly also show:
 *  - repmgrd start time?
 *  - repmgrd mode
 *  - priority
 *  - whether promotion candidate (due to zero priority/different location)
 */
 typedef enum
 {
 	STATUS_ID = 0,
 	STATUS_NAME,
 	STATUS_ROLE,
 	STATUS_PG,
 	STATUS_RUNNING,
 	STATUS_PID,
 	STATUS_PAUSED
 } StatusHeader;
 #define STATUS_HEADER_COUNT 7
 struct ColHeader headers_status[STATUS_HEADER_COUNT];
 static void fetch_node_records(PGconn *conn, NodeInfoList *node_list);
 static void _do_repmgr_pause(bool pause);
 void
 do_daemon_status(void)
 {
 	PGconn	   *conn = NULL;
 	NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
 	NodeInfoListCell *cell = NULL;
 	int i;
 	RepmgrdInfo **repmgrd_info;
 	ItemList	warnings = {NULL, NULL};
 	/* Connect to local database to obtain cluster connection data */
 	log_verbose(LOG_INFO, _("connecting to database"));
 	if (strlen(config_file_options.conninfo))
 		conn = establish_db_connection(config_file_options.conninfo, true);
 	else
 		conn = establish_db_connection_by_params(&source_conninfo, true);
 	fetch_node_records(conn, &nodes);
 	repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
 	if (repmgrd_info == NULL)
 	{
 		log_error(_("unable to allocate memory"));
 		exit(ERR_OUT_OF_MEMORY);
 	}
 	strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
 	strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
 	strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
 	strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
 	strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
 	strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
 	strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
 	for (i = 0; i < STATUS_HEADER_COUNT; i++)
 	{
 		headers_status[i].max_length = strlen(headers_status[i].title);
 	}
 	i = 0;
 	for (cell = nodes.head; cell; cell = cell->next)
 	{
 		int j;
 		repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
 		repmgrd_info[i]->node_id = cell->node_info->node_id;
 		repmgrd_info[i]->pid = UNKNOWN_PID;
 		repmgrd_info[i]->paused = false;
 		repmgrd_info[i]->running = false;
 		repmgrd_info[i]->pg_running = true;
 		cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
 		if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
 		{
 			if (runtime_options.verbose)
 			{
 				char		error[MAXLEN];
 				strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
 				item_list_append_format(&warnings,
 										"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
 										cell->node_info->node_name, cell->node_info->node_id, trim(error));
 			}
 			else
 			{
 				item_list_append_format(&warnings,
 										"unable to  connect to node \"%s\" (ID: %i)",
 										cell->node_info->node_name, cell->node_info->node_id);
 			}
 			repmgrd_info[i]->pg_running = false;
 			maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running"));
 			maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a"));
 			maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
 		}
 		else
 		{
 			maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running"));
 			repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
 			repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
 			if (repmgrd_info[i]->running == true)
 			{
 				maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("running"));
 			}
 			else
 			{
 				maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("not running"));
 			}
 			if (repmgrd_info[i]->pid == UNKNOWN_PID)
 			{
 				maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
 			}
 			else
 			{
 				maxlen_snprintf(repmgrd_info[i]->pid_text, "%i", repmgrd_info[i]->pid);
 			}
 			repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
 			PQfinish(cell->node_info->conn);
 		}
 		headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
 		headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
 		headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
 		headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
 		headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
 		for (j = 0; j < STATUS_HEADER_COUNT; j++)
 		{
 			if (headers_status[j].cur_length > headers_status[j].max_length)
 			{
 				headers_status[j].max_length = headers_status[j].cur_length;
 			}
 		}
 		i++;
 	}
 	/* Print column header row (text mode only) */
 	if (runtime_options.output_mode == OM_TEXT)
 	{
 		print_status_header(STATUS_HEADER_COUNT, headers_status);
 	}
 	i = 0;
 	for (cell = nodes.head; cell; cell = cell->next)
 	{
 		if (runtime_options.output_mode == OM_CSV)
 		{
 			printf("%i,%s,%s,%i,%i,%i,%i\n",
 				   cell->node_info->node_id,
 				   cell->node_info->node_name,
 				   get_node_type_string(cell->node_info->type),
 				   repmgrd_info[i]->pg_running ? 1 : 0,
 				   repmgrd_info[i]->running ? 1 : 0,
 				   repmgrd_info[i]->pid,
 				   repmgrd_info[i]->paused ? 1 : 0);
 		}
 		else
 		{
 			printf(" %-*i ",  headers_status[STATUS_ID].max_length, cell->node_info->node_id);
 			printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
 			printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
 			printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
 			printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running);
 			printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
 			if (repmgrd_info[i]->pid == UNKNOWN_PID)
 				printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, "n/a");
 			else
 				printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? "yes" : "no");
 			printf("\n");
 		}
 		free(repmgrd_info[i]);
 		i++;
 	}
 	free(repmgrd_info);
 	/* emit any warnings */
 	if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
 	{
 		ItemListCell *cell = NULL;
 		printf(_("\nWARNING: following issues were detected\n"));
 		for (cell = warnings.head; cell; cell = cell->next)
 		{
 			printf(_("  - %s\n"), cell->string);
 		}
 		if (runtime_options.verbose == false)
 		{
 			log_hint(_("execute with --verbose option to see connection error messages"));
 		}
 	}
 }
 void
 do_daemon_pause(void)
 {
 	_do_repmgr_pause(true);
 }
 void
 do_daemon_unpause(void)
 {
 	_do_repmgr_pause(false);
 }
 static void
 _do_repmgr_pause(bool pause)
 {
 	PGconn	   *conn = NULL;
 	NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
 	NodeInfoListCell *cell = NULL;
 	RepmgrdInfo **repmgrd_info;
 	int i;
 	int error_nodes = 0;
 	repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
 	if (repmgrd_info == NULL)
 	{
 		log_error(_("unable to allocate memory"));
 		exit(ERR_OUT_OF_MEMORY);
 	}
 	/* Connect to local database to obtain cluster connection data */
 	log_verbose(LOG_INFO, _("connecting to database"));
 	if (strlen(config_file_options.conninfo))
 		conn = establish_db_connection(config_file_options.conninfo, true);
 	else
 		conn = establish_db_connection_by_params(&source_conninfo, true);
 	fetch_node_records(conn, &nodes);
 	i = 0;
 	for (cell = nodes.head; cell; cell = cell->next)
 	{
 		repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
 		repmgrd_info[i]->node_id = cell->node_info->node_id;
 		log_verbose(LOG_DEBUG, "pausing node %i (%s)",
 					cell->node_info->node_id,
 					cell->node_info->node_name);
 		cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
 		if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
 		{
 			log_warning(_("unable to connect to node %i"),
 						cell->node_info->node_id);
 			error_nodes++;
 		}
 		else
 		{
 			if (runtime_options.dry_run == true)
 			{
 				if (pause == true)
 				{
 					log_info(_("would pause node %i (%s) "),
 							 cell->node_info->node_id,
 							 cell->node_info->node_name);
 				}
 				else
 				{
 					log_info(_("would unpause node %i (%s) "),
 							 cell->node_info->node_id,
 							 cell->node_info->node_name);
 				}
 			}
 			else
 			{
 				bool success = repmgrd_pause(cell->node_info->conn, pause);
 				if (success == false)
 					error_nodes++;
 				log_notice(_("node %i (%s) %s"),
 						   cell->node_info->node_id,
 						   cell->node_info->node_name,
 						   success == true
 								? pause == true ? "paused" : "unpaused"
 		   						: pause == true ? "not paused" : "not unpaused");
 			}
 			PQfinish(cell->node_info->conn);
 		}
 		i++;
 	}
 	if (error_nodes > 0)
 	{
 		if (pause == true)
 		{
 			log_error(_("unable to pause %i node(s)"), error_nodes);
 		}
 		else
 		{
 			log_error(_("unable to unpause %i node(s)"), error_nodes);
 		}
 		log_hint(_("execute \"repmgr daemon status\" to view current status"));
 		exit(ERR_REPMGRD_PAUSE);
 	}
 	exit(SUCCESS);
 }
 void
 fetch_node_records(PGconn *conn, NodeInfoList *node_list)
 {
 	bool success = get_all_node_records(conn, node_list);
 	if (success == false)
 	{
 		/* get_all_node_records() will display any error message */
 		PQfinish(conn);
 		exit(ERR_BAD_CONFIG);
 	}
 	if (node_list->node_count == 0)
 	{
 		log_error(_("no node records were found"));
 		log_hint(_("ensure at least one node is registered"));
 		PQfinish(conn);
 		exit(ERR_BAD_CONFIG);
 	}
 }
 void do_daemon_help(void)
 {
 	print_help_header();
 	printf(_("Usage:\n"));
 	printf(_("    %s [OPTIONS] daemon status\n"),  progname());
 	printf(_("    %s [OPTIONS] daemon pause\n"),   progname());
 	printf(_("    %s [OPTIONS] daemon unpause\n"), progname());
 	puts("");
 	printf(_("DAEMON STATUS\n"));
 	puts("");
 	printf(_("  \"daemon status\" shows the status of repmgrd on each node in the cluster\n"));
 	puts("");
 	printf(_("    --csv                     emit output as CSV\n"));
 	printf(_("    --verbose                 show text of database connection error messages\n"));
 	puts("");
 	printf(_("DAEMON PAUSE\n"));
 	puts("");
 	printf(_("  \"daemon pause\" instructs repmgrd on each node to pause failover detection\n"));
 	puts("");
 	printf(_("    --dry-run               check if nodes are reachable but don't pause repmgrd\n"));
 	puts("");
 	printf(_("DAEMON PAUSE\n"));
 	puts("");
 	printf(_("  \"daemon unpause\"  instructs repmgrd on each node to resume failover detection\n"));
 	puts("");
 	printf(_("    --dry-run               check if nodes are reachable but don't unpause repmgrd\n"));
 	puts("");
 	puts("");
 }
--- a/repmgr-action-daemon.h
+++ b/repmgr-action-daemon.h
@@ -0,0 +1,28 @@
 /*
 * repmgr-action-daemon.h
 * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #ifndef _REPMGR_ACTION_DAEMON_H_
 #define _REPMGR_ACTION_DAEMON_H_
 extern void do_daemon_status(void);
 extern void do_daemon_pause(void);
 extern void do_daemon_unpause(void);
 extern void do_daemon_help(void);
 #endif
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
@@ -189,16 +189,16 @@ do_node_status(void)
 		if (enabled == true)
 		{
-			appendPQExpBuffer(&archiving_status, "enabled");
+			appendPQExpBufferStr(&archiving_status, "enabled");
 		}
 		else
 		{
-			appendPQExpBuffer(&archiving_status, "disabled");
+			appendPQExpBufferStr(&archiving_status, "disabled");
 		}
 		if (enabled == false && recovery_type == RECTYPE_STANDBY)
 		{
-			appendPQExpBuffer(&archiving_status, " (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
+			appendPQExpBufferStr(&archiving_status, " (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
 		}
 		key_value_list_set(&node_status,
@@ -506,7 +506,7 @@ do_node_status(void)
 		/* output missing slot information */
-		appendPQExpBuffer(&output, "\n");
+		appendPQExpBufferChar(&output, '\n');
 		appendPQExpBuffer(&output,
 						  "\"missing_replication_slots\",%i",
 						  missing_slots.node_count);
@@ -590,13 +590,13 @@ _do_node_status_is_shutdown_cleanly(void)
 	initPQExpBuffer(&output);
-	appendPQExpBuffer(&output,
+	appendPQExpBufferStr(&output,
 					  "--state=");
 	/* sanity-check we're dealing with a PostgreSQL directory */
 	if (is_pg_dir(config_file_options.data_directory) == false)
 	{
-		appendPQExpBuffer(&output, "UNKNOWN");
+		appendPQExpBufferStr(&output, "UNKNOWN");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -659,10 +659,10 @@ _do_node_status_is_shutdown_cleanly(void)
 	switch (node_status)
 	{
 		case NODE_STATUS_UP:
-			appendPQExpBuffer(&output, "RUNNING");
+			appendPQExpBufferStr(&output, "RUNNING");
 			break;
 		case NODE_STATUS_SHUTTING_DOWN:
-			appendPQExpBuffer(&output, "SHUTTING_DOWN");
+			appendPQExpBufferStr(&output, "SHUTTING_DOWN");
 			break;
 		case NODE_STATUS_DOWN:
 			appendPQExpBuffer(&output,
@@ -670,10 +670,10 @@ _do_node_status_is_shutdown_cleanly(void)
 							  format_lsn(checkPoint));
 			break;
 		case NODE_STATUS_UNCLEAN_SHUTDOWN:
-			appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN");
+			appendPQExpBufferStr(&output, "UNCLEAN_SHUTDOWN");
 			break;
 		case NODE_STATUS_UNKNOWN:
-			appendPQExpBuffer(&output, "UNKNOWN");
+			appendPQExpBufferStr(&output, "UNKNOWN");
 			break;
 	}
@@ -847,7 +847,7 @@ do_node_check(void)
 								  ",\"%s\"",
 								  cell->details);
 			}
-			appendPQExpBuffer(&output, "\n");
+			appendPQExpBufferChar(&output, '\n');
 		}
 	}
 	else
@@ -869,7 +869,7 @@ do_node_check(void)
 								  " (%s)",
 								  cell->details);
 			}
-			appendPQExpBuffer(&output, "\n");
+			appendPQExpBufferChar(&output, '\n');
 		}
 	}
@@ -899,12 +899,12 @@ do_node_check_replication_connection(void)
 	initPQExpBuffer(&output);
-	appendPQExpBuffer(&output,
+	appendPQExpBufferStr(&output,
-					  "--connection=");
+						 "--connection=");
 	if (runtime_options.remote_node_id == UNKNOWN_NODE_ID)
 	{
-		appendPQExpBuffer(&output, "UNKNOWN");
+		appendPQExpBufferStr(&output, "UNKNOWN");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -918,7 +918,7 @@ do_node_check_replication_connection(void)
 	if (record_status != RECORD_FOUND)
 	{
-		appendPQExpBuffer(&output, "UNKNOWN");
+		appendPQExpBufferStr(&output, "UNKNOWN");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -938,7 +938,7 @@ do_node_check_replication_connection(void)
 	if (PQstatus(repl_conn) != CONNECTION_OK)
 	{
-		appendPQExpBuffer(&output, "BAD");
+		appendPQExpBufferStr(&output, "BAD");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -946,7 +946,7 @@ do_node_check_replication_connection(void)
 	PQfinish(repl_conn);
-	appendPQExpBuffer(&output, "OK");
+	appendPQExpBufferStr(&output, "OK");
 	printf("%s\n", output.data);
 	termPQExpBuffer(&output);
@@ -1042,9 +1042,8 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
 				break;
 			case OM_NAGIOS:
 			case OM_TEXT:
-				appendPQExpBuffer(
+				appendPQExpBufferStr(&details,
-								  &details,
+									 "unable to check archive_status directory");
 								  "unable to check archive_status directory");
 				break;
 			default:
@@ -1172,8 +1171,8 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
 	if (missing_nodes_count == 0)
 	{
 		if (expected_nodes_count == 0)
-			appendPQExpBuffer(&details,
+			appendPQExpBufferStr(&details,
-							  "this node has no downstream nodes");
+								 "this node has no downstream nodes");
 		else
 			appendPQExpBuffer(&details,
 							  "%i of %i downstream nodes attached",
@@ -1194,20 +1193,18 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
 		if (mode != OM_NAGIOS)
 		{
-			appendPQExpBuffer(&details, "; missing: ");
+			appendPQExpBufferStr(&details, "; missing: ");
 			for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
 			{
 				if (first == false)
-					appendPQExpBuffer(&details,
+					appendPQExpBufferStr(&details,
-									  ", ");
+										 ", ");
 				else
 					first = false;
 				if (first == false)
-					appendPQExpBuffer(
+					appendPQExpBufferStr(&details, missing_cell->string);
 									  &details,
 									  "%s", missing_cell->string);
 			}
 		}
 	}
@@ -1307,8 +1304,8 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
 		switch (mode)
 		{
 			case OM_OPTFORMAT:
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  "--lag=0");
+									 "--lag=0");
 				break;
 			case OM_NAGIOS:
 				appendPQExpBuffer(&details,
@@ -1319,13 +1316,13 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
 			case OM_TEXT:
 				if (node_info->type == WITNESS)
 				{
-					appendPQExpBuffer(&details,
+					appendPQExpBufferStr(&details,
-									  "N/A - node is witness");
+										 "N/A - node is witness");
 				}
 				else
 				{
-					appendPQExpBuffer(&details,
+					appendPQExpBufferStr(&details,
-									  "N/A - node is primary");
+										 "N/A - node is primary");
 				}
 				break;
 			default:
@@ -1406,9 +1403,8 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
 					break;
 				case OM_NAGIOS:
 				case OM_TEXT:
-					appendPQExpBuffer(
+					appendPQExpBufferStr(&details,
-									  &details,
+										 "unable to query replication lag");
 									  "unable to query replication lag");
 					break;
 				default:
@@ -1508,39 +1504,39 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
 			if (recovery_type == RECTYPE_STANDBY)
 			{
 				status = CHECK_STATUS_CRITICAL;
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  _("node is registered as primary but running as standby"));
+									 _("node is registered as primary but running as standby"));
 			}
 			else
 			{
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  _("node is primary"));
+									 _("node is primary"));
 			}
 			break;
 		case STANDBY:
 			if (recovery_type == RECTYPE_PRIMARY)
 			{
 				status = CHECK_STATUS_CRITICAL;
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  _("node is registered as standby but running as primary"));
+									 _("node is registered as standby but running as primary"));
 			}
 			else
 			{
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  _("node is standby"));
+									 _("node is standby"));
 			}
 			break;
 		case WITNESS:
 			if (recovery_type == RECTYPE_STANDBY)
 			{
 				status = CHECK_STATUS_CRITICAL;
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  _("node is registered as witness but running as standby"));
+									 _("node is registered as witness but running as standby"));
 			}
 			else
 			{
-				appendPQExpBuffer(&details,
+				appendPQExpBufferStr(&details,
-								  _("node is witness"));
+									 _("node is witness"));
 			}
 			break;
 		case BDR:
@@ -1551,8 +1547,8 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
 				if (is_bdr_db(conn, &output) == false)
 				{
 					status = CHECK_STATUS_CRITICAL;
-					appendPQExpBuffer(&details,
+					appendPQExpBufferStr(&details,
-									  "%s", output.data);
+										 output.data);
 				}
 				termPQExpBuffer(&output);
@@ -1561,13 +1557,13 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
 					if (is_active_bdr_node(conn, node_info->node_name) == false)
 					{
 						status = CHECK_STATUS_CRITICAL;
-						appendPQExpBuffer(&details,
+						appendPQExpBufferStr(&details,
-										  _("node is not an active BDR node"));
+											 _("node is not an active BDR node"));
 					}
 					else
 					{
-						appendPQExpBuffer(&details,
+						appendPQExpBufferStr(&details,
-										  _("node is an active BDR node"));
+											 _("node is an active BDR node"));
 					}
 				}
 			}
@@ -1624,13 +1620,13 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
 	if (server_version_num < 90400)
 	{
-		appendPQExpBuffer(&details,
+		appendPQExpBufferStr(&details,
-						  _("replication slots not available for this PostgreSQL version"));
+							 _("replication slots not available for this PostgreSQL version"));
 	}
 	else if (node_info->total_replication_slots == 0)
 	{
-		appendPQExpBuffer(&details,
+		appendPQExpBufferStr(&details,
-						  _("node has no replication slots"));
+							 _("node has no replication slots"));
 	}
 	else if (node_info->inactive_replication_slots == 0)
 	{
@@ -1700,8 +1696,8 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 	if (server_version_num < 90400)
 	{
-		appendPQExpBuffer(&details,
+		appendPQExpBufferStr(&details,
-						  _("replication slots not available for this PostgreSQL version"));
+							 _("replication slots not available for this PostgreSQL version"));
 	}
 	else
 	{
@@ -1711,9 +1707,8 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 		if (missing_slots.node_count == 0)
 		{
-			appendPQExpBuffer(&details,
+			appendPQExpBufferStr(&details,
-						  _("node has no missing replication slots"));
+								 _("node has no missing replication slots"));
 		}
 		else
 		{
@@ -1728,7 +1723,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 			if (missing_slots.node_count)
 			{
-				appendPQExpBuffer(&details, ": ");
+				appendPQExpBufferStr(&details, ": ");
 				for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
 				{
@@ -1738,10 +1733,10 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 					}
 					else
 					{
-						appendPQExpBuffer(&details, ", ");
+						appendPQExpBufferStr(&details, ", ");
 					}
-					appendPQExpBuffer(&details, "%s", missing_slot_cell->node_info->slot_name);
+					appendPQExpBufferStr(&details, missing_slot_cell->node_info->slot_name);
 				}
 			}
 		}
@@ -1784,7 +1779,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 			if (list_output != NULL)
 			{
 				check_status_list_set(list_output,
-									  "Replication slots",
+									  "Missing replication slots",
 									  status,
 									  details.data);
 			}
@@ -2137,8 +2132,8 @@ do_node_rejoin(void)
 			exit(ERR_BAD_CONFIG);
 		}
-		appendPQExpBuffer(&msg,
+		appendPQExpBufferStr(&msg,
-						  _("prerequisites for using pg_rewind are met"));
+							 _("prerequisites for using pg_rewind are met"));
 		if (runtime_options.dry_run == true)
 		{
@@ -2417,6 +2412,54 @@ do_node_rejoin(void)
 		success = is_downstream_node_attached(upstream_conn, config_file_options.node_name);
 	}
 	/*
 	 * Handle replication slots:
 	 *  - if a slot for the new upstream exists, delete that
 	 *  - warn about any other inactive replication slots
 	 */
 	if (runtime_options.force_rewind_used == false && config_file_options.use_replication_slots)
 	{
 		PGconn	   *local_conn = NULL;
 		local_conn = establish_db_connection(config_file_options.conninfo, false);
 		if (PQstatus(local_conn) != CONNECTION_OK)
 		{
 			log_warning(_("unable to connect to local node to check replication slot status"));
 			log_hint(_("execute \"repmgr node check\" to check inactive slots and drop manually if necessary"));
 		}
 		else
 		{
 			KeyValueList inactive_replication_slots = {NULL, NULL};
 			KeyValueListCell *cell = NULL;
 			int inactive_count = 0;
 			PQExpBufferData slotinfo;
 			drop_replication_slot_if_exists(local_conn,
 											config_file_options.node_id,
 											primary_node_record.slot_name);
 			(void) get_inactive_replication_slots(local_conn, &inactive_replication_slots);
 			initPQExpBuffer(&slotinfo);
 			for (cell = inactive_replication_slots.head; cell; cell = cell->next)
 			{
 				appendPQExpBuffer(&slotinfo,
 								  "  - %s (%s)", cell->key, cell->value);
 				inactive_count++;
 			}
 			if (inactive_count > 0)
 			{
 				log_warning(_("%i inactive replication slots detected"), inactive_count);
 				log_detail(_("inactive replication slots:\n%s"), slotinfo.data);
 				log_hint(_("these replication slots may need to be removed manually"));
 			}
 			termPQExpBuffer(&slotinfo);
 			PQfinish(local_conn);
 		}
 	}
 	if (success == true)
 	{
@@ -2426,7 +2469,8 @@ do_node_rejoin(void)
 	else
 	{
 		/*
-		 * if we reach here, no record found in upstream node's pg_stat_replication */
+		 * if we reach here, no record found in upstream node's pg_stat_replication
 		 */
 		log_notice(_("NODE REJOIN has completed but node is not yet reattached to upstream"));
 		log_hint(_("you will need to manually check the node's replication status"));
 	}
--- a/repmgr-action-primary.c
+++ b/repmgr-action-primary.c
@@ -64,12 +64,10 @@ do_primary_register(void)
 			PQfinish(conn);
 			exit(ERR_BAD_CONFIG);
 		}
-		else
+
-		{
+		log_error(_("unable to determine server's recovery type"));
-			log_error(_("connection to node lost"));
+		PQfinish(conn);
-			PQfinish(conn);
+		exit(ERR_DB_CONN);
 			exit(ERR_DB_CONN);
 		}
 	}
 	log_verbose(LOG_INFO, _("server is not in recovery"));
@@ -172,8 +170,8 @@ do_primary_register(void)
 											&node_info);
 		if (record_created == true)
 		{
-			appendPQExpBuffer(&event_description,
+			appendPQExpBufferStr(&event_description,
-							  "existing primary record updated");
+								 "existing primary record updated");
 		}
 		else
 		{
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
--- a/repmgr-action-witness.c
+++ b/repmgr-action-witness.c
@@ -218,7 +218,13 @@ do_witness_register(void)
 	 * if repmgr.nodes contains entries, delete if -F/--force provided,
 	 * otherwise exit with error
 	 */
-	get_all_node_records(witness_conn, &nodes);
+	if (get_all_node_records(witness_conn, &nodes) == false)
 	{
 		/* get_all_node_records() will display the error */
 		PQfinish(witness_conn);
 		PQfinish(primary_conn);
 		exit(ERR_BAD_CONFIG);
 	}
 	log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
--- a/repmgr-client-global.h
+++ b/repmgr-client-global.h
@@ -97,6 +97,7 @@ typedef struct
 	bool		force_rewind_used;
 	char		force_rewind_path[MAXPGPATH];
 	bool		siblings_follow;
 	bool		repmgrd_no_pause;
 	/* "node status" options */
 	bool		is_shutdown_cleanly;
@@ -156,7 +157,7 @@ typedef struct
 		/* "standby register" options */ \
 		false, -1, DEFAULT_WAIT_START,   \
 		/* "standby switchover" options */ \
-		false, false, "", false,		   \
+		false, false, "", false, false,	\
 		/* "node status" options */ \
 		false, \
 		/* "node check" options */ \
@@ -193,6 +194,14 @@ typedef enum
 } t_server_action;
 typedef struct ColHeader
 {
 	char		title[MAXLEN];
 	int			max_length;
 	int			cur_length;
 } ColHeader;
 /* global configuration structures */
 extern t_runtime_options runtime_options;
@@ -228,7 +237,10 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc
 extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf);
 extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record);
 /* display functions */
 extern void print_help_header(void);
 extern void print_status_header(int cols, ColHeader *headers);
 /* server control functions */
 extern void get_server_action(t_server_action action, char *script, char *data_dir);
@@ -237,5 +249,6 @@ extern void get_node_config_directory(char *config_dir_buf);
 extern void get_node_data_directory(char *data_dir_buf);
 extern void init_node_record(t_node_info *node_record);
 extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
 extern void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
 #endif							/* _REPMGR_CLIENT_GLOBAL_H_ */
--- a/repmgr-client.c
+++ b/repmgr-client.c
@@ -30,10 +30,15 @@
 * NODE STATUS
 * NODE CHECK
 *
 * DAEMON STATUS
 * DAEMON PAUSE
 * DAEMON UNPAUSE
 *
 * For internal use:
 * NODE REJOIN
 * NODE SERVICE
 *
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
@@ -62,6 +67,7 @@
 #include "repmgr-action-bdr.h"
 #include "repmgr-action-node.h"
 #include "repmgr-action-cluster.h"
 #include "repmgr-action-daemon.h"
 #include <storage/fd.h>			/* for PG_TEMP_FILE_PREFIX */
@@ -438,6 +444,10 @@ main(int argc, char **argv)
 				runtime_options.siblings_follow = true;
 				break;
 			case OPT_REPMGRD_NO_PAUSE:
 				runtime_options.repmgrd_no_pause = true;
 				break;
 				/*----------------------
 				 * "node status" options
 				 *----------------------
@@ -900,6 +910,21 @@ main(int argc, char **argv)
 			else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
 				action = CLUSTER_CLEANUP;
 		}
 		else if (strcasecmp(repmgr_command, "DAEMON") == 0)
 		{
 			if (help_option == true)
 			{
 				do_daemon_help();
 				exit(SUCCESS);
 			}
 			if (strcasecmp(repmgr_action, "STATUS") == 0)
 				action = DAEMON_STATUS;
 			else if (strcasecmp(repmgr_action, "PAUSE") == 0)
 				action = DAEMON_PAUSE;
 			else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
 				action = DAEMON_UNPAUSE;
 		}
 		else
 		{
 			valid_repmgr_command_found = false;
@@ -1298,6 +1323,17 @@ main(int argc, char **argv)
 			do_cluster_cleanup();
 			break;
 			/* DAEMON */
 		case DAEMON_STATUS:
 			do_daemon_status();
 			break;
 		case DAEMON_PAUSE:
 			do_daemon_pause();
 			break;
 		case DAEMON_UNPAUSE:
 			do_daemon_unpause();
 			break;
 		default:
 			/* An action will have been determined by this point  */
 			break;
@@ -1520,6 +1556,7 @@ check_cli_parameters(const int action)
 			case PRIMARY_UNREGISTER:
 			case STANDBY_UNREGISTER:
 			case WITNESS_UNREGISTER:
 			case CLUSTER_CLEANUP:
 			case CLUSTER_EVENT:
 			case CLUSTER_MATRIX:
 			case CLUSTER_CROSSCHECK:
@@ -1743,6 +1780,18 @@ check_cli_parameters(const int action)
 		}
 	}
 	if (runtime_options.repmgrd_no_pause == true)
 	{
 		switch (action)
 		{
 			case STANDBY_SWITCHOVER:
 				break;
 			default:
 				item_list_append_format(&cli_warnings,
 										_("--repmgrd-no-pause will be ignored when executing %s"),
 										action_name(action));
 		}
 	}
 	if (runtime_options.config_files[0] != '\0')
 	{
@@ -1771,6 +1820,8 @@ check_cli_parameters(const int action)
 			case WITNESS_UNREGISTER:
 			case NODE_REJOIN:
 			case NODE_SERVICE:
 			case DAEMON_PAUSE:
 			case DAEMON_UNPAUSE:
 				break;
 			default:
 				item_list_append_format(&cli_warnings,
@@ -1850,6 +1901,14 @@ action_name(const int action)
 			return "CLUSTER MATRIX";
 		case CLUSTER_CROSSCHECK:
 			return "CLUSTER CROSSCHECK";
 		case DAEMON_STATUS:
 			return "DAEMON STATUS";
 		case DAEMON_PAUSE:
 			return "DAEMON PAUSE";
 		case DAEMON_UNPAUSE:
 			return "DAEMON UNPAUSE";
 	}
 	return "UNKNOWN ACTION";
@@ -1877,6 +1936,42 @@ print_error_list(ItemList *error_list, int log_level)
 }
 void
 print_status_header(int cols, ColHeader *headers)
 {
 	int i;
 	for (i = 0; i < cols; i++)
 	{
 		if (i == 0)
 			printf(" ");
 		else
 			printf(" | ");
 		printf("%-*s",
 			   headers[i].max_length,
 			   headers[i].title);
 	}
 	printf("\n");
 	printf("-");
 	for (i = 0; i < cols; i++)
 	{
 		int			j;
 		for (j = 0; j < headers[i].max_length; j++)
 			printf("-");
 		if (i < (cols - 1))
 			printf("-+-");
 		else
 			printf("-");
 	}
 	printf("\n");
 }
 void
 print_help_header(void)
 {
@@ -1905,6 +2000,7 @@ do_help(void)
 	printf(_("    %s [OPTIONS] node    {status|check|rejoin|service}\n"), progname());
 	printf(_("    %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname());
 	printf(_("    %s [OPTIONS] witness {register|unregister}\n"), progname());
 	printf(_("    %s [OPTIONS] daemon  {status|pause|unpause}\n"), progname());
 	puts("");
@@ -2576,11 +2672,29 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf
 void
 make_remote_repmgr_path(PQExpBufferData *output_buf, t_node_info *remote_node_record)
 {
 	if (config_file_options.repmgr_bindir[0] != '\0')
 	{
 		int			len = strlen(config_file_options.repmgr_bindir);
 		appendPQExpBufferStr(output_buf,
 							 config_file_options.repmgr_bindir);
 		/* Add trailing slash */
 		if (config_file_options.repmgr_bindir[len - 1] != '/')
 		{
 			appendPQExpBufferChar(output_buf, '/');
 		}
 	}
 	else if (pg_bindir[0] != '\0')
 	{
 		appendPQExpBufferStr(output_buf,
 							 pg_bindir);
 	}
 	appendPQExpBuffer(output_buf,
 					  "%s -f %s ",
-					  make_pg_path(progname()),
+					  progname(),
 					  remote_node_record->config_file);
 }
@@ -2978,3 +3092,45 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea
 	return can_use;
 }
 void
 drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
 {
 	t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
 	RecordStatus record_status = get_slot_record(conn, slot_name, &slot_info);
 	log_verbose(LOG_DEBUG, "attempting to delete slot \"%s\" on node %i",
 				slot_name, node_id);
 	if (record_status != RECORD_FOUND)
 	{
 		/* this is a good thing */
 		log_verbose(LOG_INFO,
 					_("slot \"%s\" does not exist on node %i, nothing to remove"),
 					slot_name, node_id);
 	}
 	else
 	{
 		if (slot_info.active == false)
 		{
 			if (drop_replication_slot(conn, slot_name) == true)
 			{
 				log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
 			}
 			else
 			{
 				log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
 			}
 		}
 		/*
 		 * if active replication slot exists, call Houston as we have a
 		 * problem
 		 */
 		else
 		{
 			log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
 		}
 	}
 }
--- a/repmgr-client.h
+++ b/repmgr-client.h
@@ -45,6 +45,9 @@
 #define CLUSTER_MATRIX		   19
 #define CLUSTER_CROSSCHECK	   20
 #define CLUSTER_EVENT		   21
 #define DAEMON_STATUS		   22
 #define DAEMON_PAUSE		   23
 #define DAEMON_UNPAUSE		   24
 /* command line options without short versions */
 #define OPT_HELP						   1001
@@ -88,6 +91,7 @@
 #define OPT_RECOVERY_CONF_ONLY             1039
 #define OPT_NO_WAIT                        1040
 #define OPT_MISSING_SLOTS                  1041
 #define OPT_REPMGRD_NO_PAUSE               1042
 /* deprecated since 3.3 */
 #define OPT_DATA_DIR						999
@@ -156,6 +160,7 @@ static struct option long_options[] =
 */
 	{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
 	{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},
 	{"repmgrd-no-pause", no_argument, NULL, OPT_REPMGRD_NO_PAUSE},
 /* "node status" options */
 	{"is-shutdown-cleanly", no_argument, NULL, OPT_IS_SHUTDOWN_CLEANLY},
--- a/repmgr.c
+++ b/repmgr.c
@@ -26,6 +26,7 @@
 #include "access/xlog.h"
 #include "miscadmin.h"
 #include "replication/walreceiver.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "storage/procarray.h"
@@ -43,14 +44,21 @@
 #include "lib/stringinfo.h"
 #include "access/xact.h"
 #include "utils/snapmgr.h"
 #include "pgstat.h"
 #if (PG_VERSION_NUM >= 90400)
 #include "pgstat.h"
 #else
 #define PGSTAT_STAT_PERMANENT_DIRECTORY             "pg_stat"
 #endif
 #include "voting.h"
 #define UNKNOWN_NODE_ID		-1
 #define UNKNOWN_PID			-1
 #define TRANCHE_NAME "repmgrd"
 #define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt"
 PG_MODULE_MAGIC;
@@ -66,6 +74,9 @@ typedef struct repmgrdSharedState
 	LWLockId	lock;			/* protects search/modification */
 	TimestampTz last_updated;
 	int			local_node_id;
 	int			repmgrd_pid;
 	char		repmgrd_pidfile[MAXPGPATH];
 	bool		repmgrd_paused;
 	/* streaming failover */
 	NodeVotingStatus voting_status;
 	int			current_electoral_term;
@@ -112,6 +123,25 @@ PG_FUNCTION_INFO_V1(am_bdr_failover_handler);
 Datum		unset_bdr_failover_handler(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
 Datum		set_repmgrd_pid(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(set_repmgrd_pid);
 Datum		get_repmgrd_pid(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(get_repmgrd_pid);
 Datum		get_repmgrd_pidfile(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(get_repmgrd_pidfile);
 Datum		repmgrd_is_running(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(repmgrd_is_running);
 Datum		repmgrd_pause(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(repmgrd_pause);
 Datum		repmgrd_is_paused(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(repmgrd_is_paused);
 /*
 * Module load callback
@@ -185,6 +215,9 @@ repmgr_shmem_startup(void)
 #endif
 		shared_state->local_node_id = UNKNOWN_NODE_ID;
 		shared_state->repmgrd_pid = UNKNOWN_PID;
 		memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
 		shared_state->repmgrd_paused = false;
 		shared_state->current_electoral_term = 0;
 		shared_state->voting_status = VS_NO_VOTE;
 		shared_state->candidate_node_id = UNKNOWN_NODE_ID;
@@ -204,6 +237,8 @@ Datum
 set_local_node_id(PG_FUNCTION_ARGS)
 {
 	int			local_node_id = UNKNOWN_NODE_ID;
 	int			stored_node_id = UNKNOWN_NODE_ID;
 	int			paused = -1;
 	if (!shared_state)
 		PG_RETURN_NULL();
@@ -213,6 +248,34 @@ set_local_node_id(PG_FUNCTION_ARGS)
 	local_node_id = PG_GETARG_INT32(0);
 	/* read state file and if exists/valid, update "repmgrd_paused" */
 	{
 		FILE	   *file = NULL;
 		file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_R);
 		if (file != NULL)
 		{
 			int			buffer_size = 128;
 			char		buffer[buffer_size];
 			if (fgets(buffer, buffer_size, file) != NULL)
 			{
 				if (sscanf(buffer, "%i:%i", &stored_node_id, &paused) != 2)
 				{
 					elog(WARNING, "unable to parse repmgrd state file");
 				}
 				else
 				{
 					elog(DEBUG1, "node_id: %i; paused: %i", stored_node_id, paused);
 				}
 			}
 			FreeFile(file);
 		}
 	}
 	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 	/* only set local_node_id once, as it should never change */
@@ -221,6 +284,19 @@ set_local_node_id(PG_FUNCTION_ARGS)
 		shared_state->local_node_id = local_node_id;
 	}
 	/* only update if state file valid */
 	if (stored_node_id == shared_state->local_node_id)
 	{
 		if (paused == 0)
 		{
 			shared_state->repmgrd_paused = false;
 		}
 		else if (paused == 1)
 		{
 			shared_state->repmgrd_paused = true;
 		}
 	}
 	LWLockRelease(shared_state->lock);
 	PG_RETURN_VOID();
@@ -416,9 +492,191 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
 		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 		shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
 		LWLockRelease(shared_state->lock);
 	}
 	LWLockRelease(shared_state->lock);
 	PG_RETURN_VOID();
 }
 /*
 * Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd
 * process not running (TODO!)
 */
 Datum
 get_repmgrd_pid(PG_FUNCTION_ARGS)
 {
 	int repmgrd_pid = UNKNOWN_PID;
 	if (!shared_state)
 		PG_RETURN_NULL();
 	LWLockAcquire(shared_state->lock, LW_SHARED);
 	repmgrd_pid = shared_state->repmgrd_pid;
 	LWLockRelease(shared_state->lock);
 	PG_RETURN_INT32(repmgrd_pid);
 }
 /*
 * Returns the repmgrd pidfile
 */
 Datum
 get_repmgrd_pidfile(PG_FUNCTION_ARGS)
 {
 	char repmgrd_pidfile[MAXPGPATH];
 	if (!shared_state)
 		PG_RETURN_NULL();
 	memset(repmgrd_pidfile, 0, MAXPGPATH);
 	LWLockAcquire(shared_state->lock, LW_SHARED);
 	strncpy(repmgrd_pidfile, shared_state->repmgrd_pidfile, MAXPGPATH);
 	LWLockRelease(shared_state->lock);
 	if (repmgrd_pidfile[0] == '\0')
 		PG_RETURN_NULL();
 	PG_RETURN_TEXT_P(cstring_to_text(repmgrd_pidfile));
 }
 Datum
 set_repmgrd_pid(PG_FUNCTION_ARGS)
 {
 	int repmgrd_pid = UNKNOWN_PID;
 	char *repmgrd_pidfile = NULL;
 	if (!shared_state)
 		PG_RETURN_VOID();
 	if (PG_ARGISNULL(0))
 	{
 		repmgrd_pid = UNKNOWN_PID;
 	}
 	else
 	{
 		repmgrd_pid = PG_GETARG_INT32(0);
 	}
 	elog(DEBUG3, "set_repmgrd_pid(): provided pid is %i", repmgrd_pid);
 	if (repmgrd_pid != UNKNOWN_PID && !PG_ARGISNULL(1))
 	{
 		repmgrd_pidfile = text_to_cstring(PG_GETARG_TEXT_PP(1));
 		elog(INFO, "set_repmgrd_pid(): provided pidfile is %s", repmgrd_pidfile);
 	}
 	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 	shared_state->repmgrd_pid = repmgrd_pid;
 	memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
 	if(repmgrd_pidfile != NULL)
 	{
 		strncpy(shared_state->repmgrd_pidfile, repmgrd_pidfile, MAXPGPATH);
 	}
 	LWLockRelease(shared_state->lock);
 	PG_RETURN_VOID();
 }
 Datum
 repmgrd_is_running(PG_FUNCTION_ARGS)
 {
 	int repmgrd_pid = UNKNOWN_PID;
 	int kill_ret;
 	if (!shared_state)
 		PG_RETURN_NULL();
 	LWLockAcquire(shared_state->lock, LW_SHARED);
 	repmgrd_pid = shared_state->repmgrd_pid;
 	LWLockRelease(shared_state->lock);
 	/* No PID registered - assume not running */
 	if (repmgrd_pid == UNKNOWN_PID)
 	{
 		PG_RETURN_BOOL(false);
 	}
 	kill_ret = kill(repmgrd_pid, 0);
 	if (kill_ret == 0)
 	{
 		PG_RETURN_BOOL(true);
 	}
 	PG_RETURN_BOOL(false);
 }
 Datum
 repmgrd_pause(PG_FUNCTION_ARGS)
 {
 	bool		pause;
 	FILE	   *file = NULL;
 	StringInfoData buf;
 	if (!shared_state)
 		PG_RETURN_NULL();
 	if (PG_ARGISNULL(0))
 		PG_RETURN_NULL();
 	pause = PG_GETARG_BOOL(0);
 	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 	shared_state->repmgrd_paused = pause;
 	LWLockRelease(shared_state->lock);
 	/* write state to file */
 	file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_W);
 	if (file == NULL)
 	{
 		elog(DEBUG1, "unable to allocate %s", REPMGRD_STATE_FILE);
 		// XXX anything else we can do? log?
 		PG_RETURN_VOID();
 	}
 	elog(DEBUG1, "allocated");
 	initStringInfo(&buf);
 	LWLockAcquire(shared_state->lock, LW_SHARED);
 	appendStringInfo(&buf, "%i:%i",
 					 shared_state->local_node_id,
 					 pause ? 1 : 0);
 	LWLockRelease(shared_state->lock);
 	// XXX check success
 	fwrite(buf.data, strlen(buf.data) + 1, 1, file);
 	resetStringInfo(&buf);
 	FreeFile(file);
 	PG_RETURN_VOID();
 }
 Datum
 repmgrd_is_paused(PG_FUNCTION_ARGS)
 {
 	bool is_paused;
 	if (!shared_state)
 		PG_RETURN_NULL();
 	LWLockAcquire(shared_state->lock, LW_SHARED);
 	is_paused = shared_state->repmgrd_paused;
 	LWLockRelease(shared_state->lock);
 	PG_RETURN_BOOL(is_paused);
 }
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -143,6 +143,15 @@
 					# Debian/Ubuntu users: you will probably need to
 					# set this to the directory where `pg_ctl` is located,
 					# e.g. /usr/lib/postgresql/9.6/bin/
 					#
 					# *NOTE* "pg_bindir" is only used when repmgr directly
 					# executes PostgreSQL binaries; any user-defined scripts
 					# *must* be specified with the full path
 #repmgr_bindir=''			# Path to repmgr binary directory (location of the repmgr
 					# binary. Only needed if the repmgr executable is not in
                                        # the system $PATH or the path defined in "pg_bindir".
 #use_primary_conninfo_password=false	# explicitly set "password" in recovery.conf's
 					# "primary_conninfo" parameter using the value contained
 					# in the environment variable PGPASSWORD
@@ -156,7 +165,7 @@
 # Examples:
 #
 #   pg_ctl_options='-s'
-#   pg_basebackup_options='--label=repmgr_backup
+#   pg_basebackup_options='--label=repmgr_backup'
 #   rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
 #   ssh_options=-o "StrictHostKeyChecking no"
@@ -183,11 +192,11 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 					# parameter can be provided multiple times.
 #restore_command=''			# This will be placed in the recovery.conf file generated
-                                        # by repmgr.
+					# by repmgr.
 #archive_cleanup_command=''		# This will be placed in the recovery.conf file generated
-                                        # by repmgr. Note we recommend using Barman for managing
+					# by repmgr. Note we recommend using Barman for managing
-                                        # WAL archives (see: https://www.pgbarman.org )
+					# WAL archives (see: https://www.pgbarman.org )
 #recovery_min_apply_delay=		# If provided, "recovery_min_apply_delay" in recovery.conf
 					# will be set to this value (PostgreSQL 9.4 and later).
@@ -226,6 +235,8 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 # These settings apply when switching roles between a primary and a standby
 # ("repmgr standby switchover").
 #shutdown_check_timeout=60		# The max length of time (in seconds) to wait for the demotion
 					# candidate (current primary) to shut down
 #standby_reconnect_timeout=60		# The max length of time (in seconds) to wait
 					# for the demoted standby to reconnect to the promoted
 					# primary (note: this value should be equal to or greater
@@ -259,10 +270,10 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 # are defaults.
 #repmgrd_pid_file=			# Path of PID file to use for repmgrd; if not set, a PID file will
-                                        # be generated in a temporary directory specified by the environment
+					# be generated in a temporary directory specified by the environment
-                                        # variable $TMPDIR, or if not set, in "/tmp". This value can be overridden
+					# variable $TMPDIR, or if not set, in "/tmp". This value can be overridden
-                                        # by the command line option "-p/--pid-file"; the command line option
+					# by the command line option "-p/--pid-file"; the command line option
-                                        # "--no-pid-file" will force PID file creation to be skipped.
+					# "--no-pid-file" will force PID file creation to be skipped.
 #failover=manual			# one of 'automatic', 'manual'.
 					# determines what action to take in the event of upstream failure
 					#
@@ -276,9 +287,9 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 					# a value of zero prevents the node being promoted to primary
 					# (default: 100)
-#reconnect_attempts=6			# Number attempts which will be made to reconnect to an unreachable
+#reconnect_attempts=6			# Number of attempts which will be made to reconnect to an unreachable
 					# primary (or other upstream node)
-#reconnect_interval=10			# Interval between attempts  to reconnect to an unreachable
+#reconnect_interval=10			# Interval between attempts to reconnect to an unreachable
 					# primary (or other upstream node)
 #promote_command=			# command repmgrd executes when promoting a new primary; use something like:
 					#
@@ -332,7 +343,7 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 #
 # Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
 #
-# For more details, see: https://repmgr.org/docs/4.0/configuration-service-commands.html
+# For more details, see: https://repmgr.org/docs/4.1/configuration-service-commands.html
 #service_start_command = ''
 #service_stop_command = ''
@@ -376,7 +387,7 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 #------------------------------------------------------------------------------
 #bdr_local_monitoring_only=false         # Only monitor the local node; no checks will be
-                                         # performed on the other node
+					 # performed on the other node
 #bdr_recovery_timeout                    # If a BDR node was offline and has become available
-                                         # maximum length of time in seconds to wait for the
+					 # maximum length of time in seconds to wait for the
-                                         # node to reconnect to the cluster
+					 # node to reconnect to the cluster
--- a/repmgr.control
+++ b/repmgr.control
@@ -1,6 +1,6 @@
 # repmgr extension
 comment = 'Replication manager for PostgreSQL'
-default_version = '4.1'
+default_version = '4.2'
 module_pathname = '$libdir/repmgr'
 relocatable = false
 schema = repmgr
--- a/repmgr.h
+++ b/repmgr.h
@@ -53,6 +53,7 @@
 #define UNKNOWN_TIMELINE_ID -1
 #define UNKNOWN_SYSTEM_IDENTIFIER 0
 #define UNKNOWN_PID			-1
 #define NODE_NOT_FOUND		-1
 #define NO_UPSTREAM_NODE	-1
@@ -84,6 +85,7 @@
 #define DEFAULT_WAIT_START                   30  /* seconds */
 #define DEFAULT_PROMOTE_CHECK_TIMEOUT        60  /* seconds */
 #define DEFAULT_PROMOTE_CHECK_INTERVAL       1   /* seconds */
 #define DEFAULT_SHUTDOWN_CHECK_TIMEOUT       60  /* seconds */
 #define DEFAULT_STANDBY_RECONNECT_TIMEOUT    60  /* seconds */
 #define DEFAULT_NODE_REJOIN_TIMEOUT          60  /* seconds */
--- a/repmgr_version.h.in
+++ b/repmgr_version.h.in
@@ -1,2 +1,2 @@
 #define REPMGR_VERSION_DATE ""
-#define REPMGR_VERSION "4.1dev"
+#define REPMGR_VERSION "4.2"
--- a/repmgrd-bdr.c
+++ b/repmgrd-bdr.c
@@ -150,7 +150,13 @@ monitor_bdr(void)
 	 * retrieve list of all nodes - we'll need these if the DB connection goes
 	 * away
 	 */
-	get_all_node_records(local_conn, &nodes);
+	if (get_all_node_records(local_conn, &nodes) == false)
 	{
 		/* get_all_node_records() will display the error */
 		PQfinish(local_conn);
 		exit(ERR_BAD_CONFIG);
 	}
 	/* we're expecting all (both) nodes to be up */
 	for (cell = nodes.head; cell; cell = cell->next)
@@ -214,7 +220,8 @@ monitor_bdr(void)
 								log_warning(_("unable to connect to node %s (ID %i)"),
 											cell->node_info->node_name, cell->node_info->node_id);
-								cell->node_info->conn = try_reconnect(cell->node_info);
+								//cell->node_info->conn = try_reconnect(cell->node_info);
 								try_reconnect(&cell->node_info->conn, cell->node_info);
 								/* node has recovered - log and continue */
 								if (cell->node_info->node_status == NODE_STATUS_UP)
@@ -293,7 +300,7 @@ loop:
 			/*
 			 * if we can reload, then could need to change local_conn
 			 */
-			if (reload_config(&config_file_options))
+			if (reload_config(&config_file_options, BDR))
 			{
 				PQfinish(local_conn);
 				local_conn = establish_db_connection(config_file_options.conninfo, true);
@@ -303,11 +310,12 @@ loop:
 			got_SIGHUP = false;
 		}
 		/* XXX this looks like it will never be called */
 		if (got_SIGHUP)
 		{
 			log_debug("SIGHUP received");
-			if (reload_config(&config_file_options))
+			if (reload_config(&config_file_options, BDR))
 			{
 				PQfinish(local_conn);
 				local_conn = establish_db_connection(config_file_options.conninfo, true);
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
--- a/repmgrd.c
+++ b/repmgrd.c
@@ -35,7 +35,7 @@
 static char *config_file = NULL;
 static bool verbose = false;
-static char pid_file[MAXPGPATH];
+char pid_file[MAXPGPATH];
 static bool daemonize = true;
 static bool show_pid_file = false;
 static bool no_pid_file = false;
@@ -101,7 +101,8 @@ main(int argc, char **argv)
 		{"config-file", required_argument, NULL, 'f'},
 /* daemon options */
-		{"daemonize", optional_argument, NULL, 'd'},
+		{"daemonize-short", optional_argument, NULL, 'd'},
 		{"daemonize", optional_argument, NULL, OPT_DAEMONIZE},
 		{"pid-file", required_argument, NULL, 'p'},
 		{"show-pid-file", no_argument, NULL, 's'},
 		{"no-pid-file", no_argument, NULL, OPT_NO_PID_FILE},
@@ -175,10 +176,11 @@ main(int argc, char **argv)
 				/* daemon options */
 			case 'd':
-				if (optarg != NULL)
+				daemonize = true;
-				{
+				break;
-					daemonize = parse_bool(optarg, "-d/--daemonize", &cli_errors);
+
-				}
+			case OPT_DAEMONIZE:
 				daemonize = parse_bool(optarg, "-d/--daemonize", &cli_errors);
 				break;
 			case 'p':
@@ -320,8 +322,6 @@ main(int argc, char **argv)
 		strncpy(config_file_options.log_level, cli_log_level, MAXLEN);
 	}
 	log_notice(_("repmgrd (repmgr %s) starting up"), REPMGR_VERSION);
 	/*
 	 * -m/--monitoring-history, if provided, will override repmgr.conf's
 	 * monitoring_history; this is for backwards compatibility as it's
@@ -349,6 +349,8 @@ main(int argc, char **argv)
 	logger_init(&config_file_options, progname());
 	log_notice(_("repmgrd (%s %s) starting up"), progname(), REPMGR_VERSION);
 	if (verbose)
 		logger_set_verbose();
@@ -488,6 +490,9 @@ main(int argc, char **argv)
 		check_and_create_pid_file(pid_file);
 	}
 	repmgrd_set_pid(local_conn, getpid(), pid_file);
 #ifndef WIN32
 	setup_event_handlers();
 #endif
@@ -760,7 +765,8 @@ show_help(void)
 	puts("");
 	printf(_("Daemon configuration options:\n"));
-	printf(_("  -d, --daemonize[=true/false]\n"));
+	printf(_("  -d\n"));
 	printf(_("  --daemonize[=true/false]\n"));
 	printf(_("                            detach process from foreground (default: true)\n"));
 	printf(_("  -p, --pid-file=PATH       use the specified PID file\n"));
 	printf(_("  -s, --show-pid-file       show PID file which would be used by the current configuration\n"));
@@ -770,10 +776,10 @@ show_help(void)
 }
-PGconn *
+void
-try_reconnect(t_node_info *node_info)
+try_reconnect(PGconn **conn, t_node_info *node_info)
 {
-	PGconn	   *conn;
+	PGconn	   *our_conn;
 	t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
 	int			i;
@@ -782,7 +788,6 @@ try_reconnect(t_node_info *node_info)
 	initialize_conninfo_params(&conninfo_params, false);
 	/* we assume by now the conninfo string is parseable */
 	(void) parse_conninfo_string(node_info->conninfo, &conninfo_params, NULL, false);
@@ -805,18 +810,47 @@ try_reconnect(t_node_info *node_info)
 			 * degraded monitoring? - make that configurable
 			 */
-			conn = establish_db_connection_by_params(&conninfo_params, false);
+			our_conn = establish_db_connection_by_params(&conninfo_params, false);
-			if (PQstatus(conn) == CONNECTION_OK)
+			if (PQstatus(our_conn) == CONNECTION_OK)
 			{
 				free_conninfo_params(&conninfo_params);
 				log_info(_("connection to node %i succeeded"), node_info->node_id);
 				if (PQstatus(*conn) == CONNECTION_BAD)
 				{
 					log_verbose(LOG_INFO, "original connection handle returned CONNECTION_BAD, using new connection");
 					close_connection(conn);
 					*conn = our_conn;
 				}
 				else
 				{
 					ExecStatusType ping_result;
 					ping_result = connection_ping(*conn);
 					if (ping_result != PGRES_TUPLES_OK)
 					{
 						log_info("original connnection no longer available, using new connection");
 						close_connection(conn);
 						*conn = our_conn;
 					}
 					else
 					{
 						log_info(_("original connection is still available"));
 						PQfinish(our_conn);
 					}
 				}
 				node_info->node_status = NODE_STATUS_UP;
-				return conn;
+
 				return;
 			}
-			close_connection(&conn);
+			close_connection(&our_conn);
-			log_notice(_("unable to reconnect to node"));
+			log_notice(_("unable to reconnect to node %i"), node_info->node_id);
 		}
 		if (i + 1 < max_attempts)
@@ -835,7 +869,7 @@ try_reconnect(t_node_info *node_info)
 	free_conninfo_params(&conninfo_params);
-	return NULL;
+	return;
 }
@@ -873,6 +907,9 @@ print_monitoring_state(MonitoringState monitoring_state)
 void
 terminate(int retval)
 {
 	if (PQstatus(local_conn)  == CONNECTION_OK)
 		repmgrd_set_pid(local_conn, UNKNOWN_PID, NULL);
 	logger_shutdown();
 	if (pid_file[0] != '\0')
--- a/repmgrd.h
+++ b/repmgrd.h
@@ -11,6 +11,7 @@
 #include "portability/instr_time.h"
 #define OPT_NO_PID_FILE                  1000
 #define OPT_DAEMONIZE                    1001
 extern volatile sig_atomic_t got_SIGHUP;
 extern MonitoringState monitoring_state;
@@ -20,8 +21,9 @@ extern t_configuration_options config_file_options;
 extern t_node_info local_node_info;
 extern PGconn *local_conn;
 extern bool startup_event_logged;
 extern char pid_file[MAXPGPATH];
-PGconn	   *try_reconnect(t_node_info *node_info);
+void		try_reconnect(PGconn **conn, t_node_info *node_info);
 int			calculate_elapsed(instr_time start_time);
 const char *print_monitoring_state(MonitoringState monitoring_state);
--- a/strutil.c
+++ b/strutil.c
@@ -87,17 +87,17 @@ append_where_clause(PQExpBufferData *where_clause, const char *format,...)
 	if (where_clause->data[0] == '\0')
 	{
-		appendPQExpBuffer(where_clause,
+		appendPQExpBufferStr(where_clause,
-						  " WHERE ");
+							 " WHERE ");
 	}
 	else
 	{
-		appendPQExpBuffer(where_clause,
+		appendPQExpBufferStr(where_clause,
-						  " AND ");
+							 " AND ");
 	}
-	appendPQExpBuffer(where_clause,
+	appendPQExpBufferStr(where_clause,
-					  "%s", stringbuf);
+						 stringbuf);
 }
`@@ -1,4 +1,4 @@`
	`AC_INIT([repmgr], [4.1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])`	`AC_INIT([repmgr], [4.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])`

	`AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])`	`AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])`
`@@ -1 +1 @@`
	`<!ENTITY repmgrversion "4.1dev">`	`<!ENTITY repmgrversion "4.2">`
`@@ -1,2 +1,2 @@`
	`#define REPMGR_VERSION_DATE ""`	`#define REPMGR_VERSION_DATE ""`
	`#define REPMGR_VERSION "4.1dev"`	`#define REPMGR_VERSION "4.2"`