doc: emphasise that BDR2 support is for BDR2 only

doc: add a link to the current documentation from the contents page
doc: clarify "cluster show" error codes
2026-03-23 07:06:30 +00:00 · 2019-04-05 11:16:14 +09:00 · 2019-04-03 10:45:26 +09:00 · 2019-03-18 10:51:04 +09:00 · 2019-03-15 15:08:19 +09:00 · 2019-03-15 14:02:59 +09:00
85 changed files with 7084 additions and 2024 deletions
--- a/FAQ.md
+++ b/FAQ.md
@@ -1,8 +1,10 @@
 FAQ - Frequently Asked Questions about repmgr
 =============================================

-The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/4.0/appendix-faq.html "repmgr FAQ")
+The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/current/appendix-faq.html "repmgr FAQ")

 The repmgr 3.x FAQ can be found here:

    https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
+
+Note that repmgr 3.x is no longer supported.
--- a/31
+++ b/31
@@ -1,4 +1,33 @@
-4.1.0   2018-??-??
+4.2     2018-10-24
+        repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
+          GitHub #504 (Ian)
+        repmgr: add "--node-id" option to "repmgr cluster cleanup"; GitHub #493 (Ian)
+        repmgr: report unreachable nodes when running "repmgr cluster (matrix|crosscheck);
+          GitHub #246 (Ian)
+        repmgr: add configuration file parameter "repmgr_bindir"; GitHub #246 (Ian)
+        repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
+        repmgrd: fix parsing of -d/--daemonize option (Ian)
+        repmgrd: support "pausing" of repmgrd (Ian)
+
+4.1.1   2018-09-05
+        logging: explicitly log the text of failed queries as ERRORs to
+          assist logfile analysis; GitHub #498
+        repmgr: truncate version string, if necessary; GitHub #490 (Ian)
+        repmgr: improve messages emitted during "standby promote" (Ian)
+        repmgr: "standby clone" - don't copy external config files in --dry-run
+          mode; GitHub #491 (Ian)
+        repmgr: add "cluster_cleanup" event; GitHub #492 (Ian)
+        repmgr: (standby switchover) improve detection of free walsenders;
+          GitHub #495 (Ian)
+        repmgr: (node rejoin) improve replication slot handling; GitHub #499 (Ian)
+        repmgrd: ensure that sending SIGHUP always results in the log file
+          being reopened; GitHub #485 (Ian)
+        repmgrd: report version number *after* logger initialisation; GitHub #487 (Ian)
+        repmgrd: fix startup on witness node when local data is stale; GitHub #488/#489 (Ian)
+        repmgrd: improve cascaded standby failover handling; GitHub #480 (Ian)
+        repmgrd: improve reconnection handling (Ian)
+
+4.1.0   2018-07-31
        repmgr: change default log_level to INFO, add documentation; GitHub #470 (Ian)
        repmgr: add "--missing-slots" check to "repmgr node check" (Ian)
        repmgr: improve command line error handling; GitHub #464 (Ian)
--- a/Makefile.in
+++ b/Makefile.in
@@ -13,8 +13,9 @@ DATA = \
  repmgr--unpackaged--4.0.sql \
  repmgr--4.0.sql \
  repmgr--4.0--4.1.sql \
-  repmgr--4.1.sql
-
+  repmgr--4.1.sql \
+  repmgr--4.1--4.2.sql \
+  repmgr--4.2.sql

 REGRESS = repmgr_extension

@@ -29,19 +30,24 @@ all: \
 PG_CPPFLAGS = -std=gnu89 -I$(includedir_internal) -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
 SHLIB_LINK = $(libpq)

-HEADERS = $(wildcard *.h)
+

 OBJS = \
 	repmgr.o

 include Makefile.global

+ifeq ($(vpath_build),yes)
+	HEADERS = $(wildcard *.h)
+else
+	HEADERS_built = $(wildcard *.h)
+endif

 $(info Building against PostgreSQL $(MAJORVERSION))

 REPMGR_CLIENT_OBJS = repmgr-client.o \
 	repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
-	repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
+	repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
 	configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
 REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
 DATE=$(shell date "+%Y-%m-%d")
@@ -85,6 +91,7 @@ additional-clean:
 	rm -f repmgr-action-bdr.o
 	rm -f repmgr-action-node.o
 	rm -f repmgr-action-cluster.o
+	rm -f repmgr-action-daemon.o
 	rm -f repmgrd.o
 	rm -f repmgrd-physical.o
 	rm -f repmgrd-bdr.o
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ operations.
 `repmgr 4` is a complete rewrite of the existing `repmgr` codebase, allowing
 the use of all of the latest features in PostgreSQL replication.

-PostgreSQL 10, 9.6 and 9.5 are fully supported.
+PostgreSQL 11, 10, 9.6 and 9.5 are fully supported.
 PostgreSQL 9.4 and 9.3 are supported, with some restrictions.

 `repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
@@ -19,7 +19,7 @@ PostgreSQL 9.4 and 9.3 are supported, with some restrictions.

 `repmgr 4` supports monitoring of a two-node BDR 2.0 cluster on PostgreSQL 9.6
 only. Note that BDR 2.0 is not publicly available; please contact 2ndQuadrant
-for details. `repmgr 4` will support future public BDR releases.
+for details.


 Documentation
@@ -27,7 +27,7 @@ Documentation

 The main `repmgr` documentation is available here:

-> [repmgr 4 documentation](https://repmgr.org/docs/4.0/index.html)
+> [repmgr 4 documentation](https://repmgr.org/docs/4.2/index.html)

 The `README` file for `repmgr` 3.x is available here:

--- a/configfile.c
+++ b/configfile.c
@@ -28,6 +28,7 @@ char		config_file_path[MAXPGPATH] = "";
 static bool config_file_provided = false;
 bool		config_file_found = false;

+static void parse_config(t_configuration_options *options, bool terse);
 static void _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *warning_list);

 static void _parse_line(char *buf, char *name, char *value);
@@ -87,8 +88,7 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o

 			if (pwd != NULL)
 			{
-				appendPQExpBuffer(&fullpath,
-								  "%s", pwd);
+				appendPQExpBufferStr(&fullpath, pwd);
 			}
 			else
 			{
@@ -104,9 +104,7 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
 					exit(ERR_BAD_CONFIG);
 				}

-				appendPQExpBuffer(&fullpath,
-								  "%s",
-								  cwd);
+				appendPQExpBufferStr(&fullpath, cwd);
 			}

 			appendPQExpBuffer(&fullpath,
@@ -238,7 +236,7 @@ end_search:
 }


-void
+static void
 parse_config(t_configuration_options *options, bool terse)
 {
 	/* Collate configuration file errors here for friendlier reporting */
@@ -287,6 +285,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	memset(options->data_directory, 0, sizeof(options->data_directory));
 	memset(options->config_directory, 0, sizeof(options->data_directory));
 	memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
+	memset(options->repmgr_bindir, 0, sizeof(options->repmgr_bindir));
 	options->replication_type = REPLICATION_TYPE_PHYSICAL;

 	/*-------------
@@ -334,6 +333,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	 * standby switchover settings
 	 *------------------------
 	 */
+	options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
 	options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;

 	/*-----------------
@@ -488,6 +488,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		}
 		else if (strcmp(name, "pg_bindir") == 0)
 			strncpy(options->pg_bindir, value, MAXPGPATH);
+		else if (strcmp(name, "repmgr_bindir") == 0)
+			strncpy(options->repmgr_bindir, value, MAXPGPATH);

 		else if (strcmp(name, "replication_type") == 0)
 		{
@@ -544,6 +546,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 			options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);

 		/* standby switchover settings */
+		else if (strcmp(name, "shutdown_check_timeout") == 0)
+			options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
 		else if (strcmp(name, "standby_reconnect_timeout") == 0)
 			options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);

@@ -785,7 +789,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		PQconninfoFree(conninfo_options);
 	}

-
 	/* set values for parameters which default to other parameters */

 	/*
@@ -813,13 +816,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	if (options->archive_ready_warning >= options->archive_ready_critical)
 	{
 		item_list_append(error_list,
-						 _("\archive_ready_critical\" must be greater than  \"archive_ready_warning\""));
+						 _("\"archive_ready_critical\" must be greater than  \"archive_ready_warning\""));
 	}

 	if (options->replication_lag_warning >= options->replication_lag_critical)
 	{
 		item_list_append(error_list,
-						 _("\replication_lag_critical\" must be greater than  \"replication_lag_warning\""));
+						 _("\"replication_lag_critical\" must be greater than  \"replication_lag_warning\""));
 	}

 	if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
@@ -1052,11 +1055,13 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
 * - repmgrd_standby_startup_timeout
 * - retry_promote_interval_secs
 *
- * non-changeable options
+ * non-changeable options (repmgrd references these from the "repmgr.nodes"
+ * table, not the configuration file)
 *
 * - node_id
 * - node_name
 * - data_directory
+ * - location
 * - priority
 * - replication_type
 *
@@ -1065,7 +1070,7 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL

 */
 bool
-reload_config(t_configuration_options *orig_options)
+reload_config(t_configuration_options *orig_options, t_server_type server_type)
 {
 	PGconn	   *conn;
 	t_configuration_options new_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
@@ -1081,6 +1086,20 @@ reload_config(t_configuration_options *orig_options)

 	_parse_config(&new_options, &config_errors, &config_warnings);

+
+	if (server_type == PRIMARY || server_type == STANDBY)
+	{
+		if (new_options.promote_command[0] == '\0')
+		{
+			item_list_append(&config_errors, _("\"promote_command\": required parameter was not found"));
+		}
+
+		if (new_options.follow_command[0] == '\0')
+		{
+			item_list_append(&config_errors, _("\"follow_command\": required parameter was not found"));
+		}
+	}
+
 	if (config_errors.head != NULL)
 	{
 		ItemListCell *cell = NULL;
@@ -1089,8 +1108,8 @@ reload_config(t_configuration_options *orig_options)

 		initPQExpBuffer(&errors);

-		appendPQExpBuffer(&errors,
-						  "following errors were detected:\n");
+		appendPQExpBufferStr(&errors,
+							 "following errors were detected:\n");

 		for (cell = config_errors.head; cell; cell = cell->next)
 		{
@@ -1258,7 +1277,7 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}

-	/* promote_delay */
+	/* promote_delay (for testing use only; not documented */
 	if (orig_options->promote_delay != new_options.promote_delay)
 	{
 		orig_options->promote_delay = new_options.promote_delay;
@@ -1512,6 +1531,9 @@ parse_bool(const char *s, const char *config_item, ItemList *error_list)
 {
 	PQExpBufferData errors;

+	if (s == NULL)
+		return true;
+
 	if (strcasecmp(s, "0") == 0)
 		return false;

--- a/configfile.h
+++ b/configfile.h
@@ -75,6 +75,7 @@ typedef struct
 	char		data_directory[MAXPGPATH];
 	char		config_directory[MAXPGPATH];
 	char		pg_bindir[MAXPGPATH];
+	char		repmgr_bindir[MAXPGPATH];
 	int			replication_type;

 	/* log settings */
@@ -103,6 +104,7 @@ typedef struct
 	int			standby_follow_timeout;

 	/* standby switchover settings */
+	int			shutdown_check_timeout;
 	int			standby_reconnect_timeout;

 	/* node rejoin settings */
@@ -170,7 +172,7 @@ typedef struct

 #define T_CONFIGURATION_OPTIONS_INITIALIZER { \
 		/* node information */ \
-		UNKNOWN_NODE_ID, "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL,	\
+		UNKNOWN_NODE_ID, "", "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL,	\
 		/* log settings */ \
 		"", "", "", DEFAULT_LOG_STATUS_INTERVAL,	\
 		/* standby clone settings */ \
@@ -181,6 +183,7 @@ typedef struct
 		DEFAULT_PRIMARY_FOLLOW_TIMEOUT,	\
 		DEFAULT_STANDBY_FOLLOW_TIMEOUT,	\
 		/* standby switchover settings */ \
+		DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
 		DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
 		/* node rejoin settings */ \
 		DEFAULT_NODE_REJOIN_TIMEOUT, \
@@ -273,13 +276,13 @@ typedef struct
 	"", "", "", "" \
 }

+#include "dbutils.h"

 void		set_progname(const char *argv0);
 const char *progname(void);

 void		load_config(const char *config_file, bool verbose, bool terse, t_configuration_options *options, char *argv0);
-void		parse_config(t_configuration_options *options, bool terse);
-bool		reload_config(t_configuration_options *orig_options);
+bool		reload_config(t_configuration_options *orig_options, t_server_type server_type);

 bool		parse_recovery_conf(const char *data_dir, t_recovery_conf *conf);

--- a/18
+++ b/18
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for repmgr 4.1.
+# Generated by GNU Autoconf 2.69 for repmgr 4.2.
 #
 # Report bugs to <pgsql-bugs@postgresql.org>.
 #
@@ -582,8 +582,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='repmgr'
 PACKAGE_TARNAME='repmgr'
-PACKAGE_VERSION='4.1'
-PACKAGE_STRING='repmgr 4.1'
+PACKAGE_VERSION='4.2'
+PACKAGE_STRING='repmgr 4.2'
 PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
 PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'

@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures repmgr 4.1 to adapt to many kinds of systems.
+\`configure' configures repmgr 4.2 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1239,7 +1239,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of repmgr 4.1:";;
+     short | recursive ) echo "Configuration of repmgr 4.2:";;
   esac
  cat <<\_ACEOF

@@ -1313,7 +1313,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-repmgr configure 4.1
+repmgr configure 4.2
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by repmgr $as_me 4.1, which was
+It was created by repmgr $as_me 4.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by repmgr $as_me 4.1, which was
+This file was extended by repmgr $as_me 4.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -2422,7 +2422,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-repmgr config.status 4.1
+repmgr config.status 4.2
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.in
+++ b/configure.in
@@ -1,4 +1,4 @@
-AC_INIT([repmgr], [4.1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
+AC_INIT([repmgr], [4.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])

 AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])

--- a/controldata.c
+++ b/controldata.c
@@ -227,7 +227,15 @@ get_controlfile(const char *DataDir)

 	control_file_info->control_file_processed = true;

-	if (version_num >= 90500)
+	if (version_num >= 110000)
+	{
+		ControlFileData11 *ptr = (struct ControlFileData11 *)ControlFileDataPtr;
+		control_file_info->system_identifier = ptr->system_identifier;
+		control_file_info->state = ptr->state;
+		control_file_info->checkPoint = ptr->checkPoint;
+		control_file_info->data_checksum_version = ptr->data_checksum_version;
+	}
+	else if (version_num >= 90500)
 	{
 		ControlFileData95 *ptr = (struct ControlFileData95 *)ControlFileDataPtr;
 		control_file_info->system_identifier = ptr->system_identifier;
--- a/controldata.h
+++ b/controldata.h
@@ -265,6 +265,71 @@ typedef struct ControlFileData95

 } ControlFileData95;

+/*
+ * Following field removed in 11:
+ *
+ *  XLogRecPtr	prevCheckPoint;
+ *
+ * In 10, following field appended *after* "data_checksum_version":
+ *
+ * 	char		mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
+ *
+ * (but we don't care about that)
+ */
+
+typedef struct ControlFileData11
+{
+	uint64		system_identifier;
+
+	uint32		pg_control_version;		/* PG_CONTROL_VERSION */
+	uint32		catalog_version_no;		/* see catversion.h */
+
+	DBState		state;			/* see enum above */
+	pg_time_t	time;			/* time stamp of last pg_control update */
+	XLogRecPtr	checkPoint;		/* last check point record ptr */
+
+	CheckPoint95	checkPointCopy; /* copy of last check point record */
+
+	XLogRecPtr	unloggedLSN;	/* current fake LSN value, for unlogged rels */
+
+	XLogRecPtr	minRecoveryPoint;
+	TimeLineID	minRecoveryPointTLI;
+	XLogRecPtr	backupStartPoint;
+	XLogRecPtr	backupEndPoint;
+	bool		backupEndRequired;
+
+	int			wal_level;
+	bool		wal_log_hints;
+	int			MaxConnections;
+	int			max_worker_processes;
+	int			max_prepared_xacts;
+	int			max_locks_per_xact;
+	bool		track_commit_timestamp;
+
+	uint32		maxAlign;		/* alignment requirement for tuples */
+	double		floatFormat;	/* constant 1234567.0 */
+
+	uint32		blcksz;			/* data block size for this DB */
+	uint32		relseg_size;	/* blocks per segment of large relation */
+
+	uint32		xlog_blcksz;	/* block size within WAL files */
+	uint32		xlog_seg_size;	/* size of each WAL segment */
+
+	uint32		nameDataLen;	/* catalog name field width */
+	uint32		indexMaxKeys;	/* max number of columns in an index */
+
+	uint32		toast_max_chunk_size;	/* chunk size in TOAST tables */
+	uint32		loblksize;		/* chunk size in pg_largeobject */
+
+	bool		enableIntTimes; /* int64 storage enabled? */
+
+	bool		float4ByVal;	/* float4 pass-by-value? */
+	bool		float8ByVal;	/* float8, int8, etc pass-by-value? */
+
+	uint32		data_checksum_version;
+
+} ControlFileData11;
+


 extern DBState get_db_state(const char *data_directory);
--- a/dbutils.c
+++ b/dbutils.c
--- a/dbutils.h
+++ b/dbutils.h
@@ -47,6 +47,7 @@ typedef enum
 typedef enum
 {
 	REPMGR_INSTALLED = 0,
+	REPMGR_OLD_VERSION_INSTALLED,
 	REPMGR_AVAILABLE,
 	REPMGR_UNAVAILABLE,
 	REPMGR_UNKNOWN
@@ -104,6 +105,20 @@ typedef enum
 } BackupState;


+/*
+ * Struct to store extension version information
+ */
+
+typedef struct s_extension_versions {
+	char		default_version[8];
+	char		installed_version[8];
+} t_extension_versions;
+
+#define T_EXTENSION_VERSIONS_INITIALIZER { \
+	"", \
+	"", \
+}
+
 /*
 * Struct to store node information
 */
@@ -327,6 +342,21 @@ typedef struct
    UNKNOWN_TIMELINE_ID, \
 	InvalidXLogRecPtr \
 }
+
+
+typedef struct RepmgrdInfo {
+	int node_id;
+	int pid;
+	char pid_text[MAXLEN];
+	char pid_file[MAXLEN];
+	bool pg_running;
+	char pg_running_text[MAXLEN];
+	bool running;
+	char repmgrd_running[MAXLEN];
+	bool paused;
+} RepmgrdInfo;
+
+
 /* global variables */

 extern int	server_version_num;
@@ -346,15 +376,13 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
 bool		atobool(const char *value);

 /* connection functions */
-PGconn *establish_db_connection(const char *conninfo,
+PGconn	   *establish_db_connection(const char *conninfo,
 						const bool exit_on_error);
 PGconn	   *establish_db_connection_quiet(const char *conninfo);
-
-PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
+PGconn	   *establish_db_connection_by_params(t_conninfo_param_list *param_list,
 								  const bool exit_on_error);
-PGconn *establish_primary_db_connection(PGconn *conn,
+PGconn	   *establish_primary_db_connection(PGconn *conn,
 								const bool exit_on_error);
-
 PGconn	   *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
 PGconn	   *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);

@@ -380,7 +408,6 @@ bool		has_passfile(void);
 bool		begin_transaction(PGconn *conn);
 bool		commit_transaction(PGconn *conn);
 bool		rollback_transaction(PGconn *conn);
-bool		check_cluster_schema(PGconn *conn);

 /* GUC manipulation functions */
 bool		set_config(PGconn *conn, const char *config_param, const char *config_value);
@@ -399,9 +426,14 @@ bool		identify_system(PGconn *repl_conn, t_system_identification *identification
 bool		repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
 int			repmgrd_get_local_node_id(PGconn *conn);
 BackupState	server_in_exclusive_backup_mode(PGconn *conn);
+void		repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile);
+pid_t		repmgrd_get_pid(PGconn *conn);
+bool		repmgrd_is_running(PGconn *conn);
+bool		repmgrd_is_paused(PGconn *conn);
+bool		repmgrd_pause(PGconn *conn, bool pause);

 /* extension functions */
-ExtensionStatus get_repmgr_extension_status(PGconn *conn);
+ExtensionStatus get_repmgr_extension_status(PGconn *conn, t_extension_versions *extversions);

 /* node management functions */
 void		checkpoint(PGconn *conn);
@@ -421,7 +453,7 @@ t_node_info *get_node_record_pointer(PGconn *conn, int node_id);
 bool		get_local_node_record(PGconn *conn, int node_id, t_node_info *node_info);
 bool		get_primary_node_record(PGconn *conn, t_node_info *node_info);

-void		get_all_node_records(PGconn *conn, NodeInfoList *node_list);
+bool		get_all_node_records(PGconn *conn, NodeInfoList *node_list);
 void		get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
 void		get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
 void		get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
@@ -475,7 +507,7 @@ int			wait_connection_availability(PGconn *conn, long long timeout);
 /* node availability functions */
 bool		is_server_available(const char *conninfo);
 bool		is_server_available_params(t_conninfo_param_list *param_list);
-void		connection_ping(PGconn *conn);
+ExecStatusType	connection_ping(PGconn *conn);

 /* monitoring functions  */
 void
@@ -491,8 +523,8 @@ add_monitoring_record(PGconn *primary_conn,
 					  long long unsigned int apply_lag_bytes
 );

-int			get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history);
-bool		delete_monitoring_records(PGconn *primary_conn, int keep_history);
+int			get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history, int node_id);
+bool		delete_monitoring_records(PGconn *primary_conn, int keep_history, int node_id);



--- a/doc/appendix-faq.sgml
+++ b/doc/appendix-faq.sgml
@@ -21,13 +21,17 @@
      in PostgreSQL 9.3, as well as improved automated failover support
      via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
      and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
-      series will no longer be actively maintained.
+      series is no longer maintained.
     </para>
     <para>
      &repmgr; 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
      with PostgreSQL 9.3, we recommend using repmgr 4.x. &repmgr; 2.x is
      no longer maintained.
     </para>
+     <para>
+       See also <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
+       and <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
+     </para>
  </sect2>

  <sect2 id="faq-replication-slots-advantage" xreflabel="Advantages of replication slots">
@@ -35,15 +39,25 @@
   <para>
    Replication slots, introduced in PostgreSQL 9.4, ensure that the
    primary server will retain WAL files until they have been consumed
-    by all standby servers. This makes WAL file management much easier,
-    and if used &repmgr; will no longer insist on a fixed minimum number
-    (default: 5000) of WAL files being retained.
+    by all standby servers. This means standby servers should never
+    fail due to not being able to retrieve required WAL files from the
+    primary.
   </para>
   <para>
    However this does mean that if a standby is no longer connected to the
    primary, the presence of the replication slot will cause WAL files
-    to be retained indefinitely.
+    to be retained indefinitely, and eventually lead to disk space
+    exhaustion.
   </para>
+
+   <tip>
+     <para>
+       2ndQuadrant's recommended configuration is to configure
+       <ulink url="https://www.pgbarman.org/">Barman</ulink> as a fallback
+       source of WAL files, rather than maintain replication slots for
+       each standby. See also: <link linkend="cloning-from-barman-restore-command">Using Barman as a WAL file source</link>.
+     </para>
+   </tip>
  </sect2>

  <sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
@@ -108,6 +122,82 @@
     is not possible, contact your vendor for assistance.
   </para>
  </sect2>
+
+  <sect2 id="faq-old-packages">
+   <title>How can I obtain old versions of &repmgr; packages?</title>
+   <para>
+     See appendix <xref linkend="packages-old-versions"> for details.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-required-for-replication">
+    <title>Is &repmgr; required for streaming replication?</title>
+    <para>
+      No.
+    </para>
+    <para>
+     &repmgr; (together with <application>repmgrd</application>) assists with
+     <emphasis>managing</emphasis> replication. It does not actually perform replication, which
+     is part of the core PostgreSQL functionality.
+    </para>
+  </sect2>
+
+  <sect2 id="faq-what-if-repmgr-uninstalled">
+   <title>Will replication stop working if &repmgr; is uninstalled?</title>
+   <para>
+     No. See preceding question.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-version-mix">
+   <title>Does it matter if different &repmgr; versions are present in the replication cluster?</title>
+   <para>
+     Yes. If different &quot;major&quot; &repmgr; versions (e.g. 3.3.x and 4.1.x) are present,
+     &repmgr; (in particular <application>repmgrd</application>)
+     may not run, or run properly, or in the worst case (if different <application>repmgrd</application>
+     versions are running and there are differences in the failover implementation) break
+     your replication cluster.
+   </para>
+   <para>
+     If different &quot;minor&quot; &repmgr; versions (e.g. 4.1.1 and 4.1.6) are installed,
+     &repmgr; will function, but we strongly recommend always running the same version
+     to ensure there are no unexpected suprises, e.g. a newer version behaving slightly
+     differently to the older version.
+   </para>
+   <para>
+     See also <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-upgrade-repmgr">
+    <title>Should I upgrade &repmgr;?</title>
+    <para>
+      Yes.
+    </para>
+    <para>
+      We don't release new versions for fun, you know. Upgrading may require a little effort,
+      but running an older &repmgr; version with bugs which have since been fixed may end up
+      costing you more effort. The same applies to PostgreSQL itself.
+    </para>
+
+  </sect2>
+
+  <sect2 id="faq-repmgr-conf-data-directory">
+    <title>Why do I need to specify the data directory location in repmgr.conf?</title>
+    <para>
+      In some circumstances &repmgr; may need to access a PostgreSQL data
+      directory while the PostgreSQL server is not running, e.g. to confirm
+      it shut down cleanly during a <link linkend="performing-switchover">switchover</link>.
+    </para>
+    <para>
+      Additionally, this provides support when using &repmgr; on PostgreSQL 9.6 and
+      earlier, where the <literal>repmgr</literal> user is not a superuser; in that
+      case the <literal>repmgr</literal> user will not be able to access the
+      <literal>data_directory</literal> configuration setting, access to which is restricted
+      to superusers. (In PostgreSQL 10 and later, non-superusers can be added to the
+      group <option>pg_read_all_settings</option> which will enable them to read this setting).
+    </para>
+  </sect2>
 </sect1>

 <sect1 id="faq-repmgr" xreflabel="repmgr">
@@ -239,11 +329,22 @@
     Under some circumstances event notifications can be generated for servers
     which have not yet been registered; it's also useful to retain a record
     of events which includes servers removed from the replication cluster
-     which no longer have an entry in the <literal>repmrg.nodes</literal> table.
+     which no longer have an entry in the <literal>repmgr.nodes</literal> table.
   </para>
  </sect2>

-
+  <sect2 id="faq-repmgr-recovery-conf-quoted-values" xreflabel="Quoted values in recovery.conf">
+    <title>Why are some values in <filename>recovery.conf</filename> surrounded by pairs of single quotes?</title>
+    <para>
+      This is to ensure that user-supplied values which are written as parameter values in <filename>recovery.conf</filename>
+      are escaped correctly and do not cause errors when <filename>recovery.conf</filename> is parsed.
+    </para>
+    <para>
+      The escaping is performed by an internal PostgreSQL routine, which leaves strings consisting
+      of digits and alphabetical characters only as-is, but wraps everything else in pairs of single quotes,
+      even if the string does not contain any characters which need escaping.
+    </para>
+  </sect2>


 </sect1>
@@ -255,7 +356,7 @@
  <sect2 id="faq-repmgrd-prevent-promotion" xreflabel="Prevent standby from being promoted to primary">
   <title>How can I prevent a node from ever being promoted to primary?</title>
   <para>
-    In `repmgr.conf`, set its priority to a value of 0 or less; apply the changed setting with
+     In <filename>repmgr.conf</filename>, set its priority to a value of <literal>0</literal>; apply the changed setting with
    <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>.
   </para>
   <para>
@@ -303,5 +404,36 @@
   </para>
  </sect2>

+  <sect2 id="faq-repmgrd-pg-bindir" xreflabel="repmgrd does not apply pg_bindir to promote_command or follow_command">
+    <title>
+      <application>repmgrd</application> ignores pg_bindir when executing <varname>promote_command</varname> or <varname>follow_command</varname>
+    </title>
+    <para>
+      <varname>promote_command</varname> or <varname>follow_command</varname> can be user-defined scripts,
+      so &repmgr; will not apply <option>pg_bindir</option> even if excuting &repmgr;. Always provide the full
+      path; see <xref linkend="repmgrd-automatic-failover-configuration"> for more details.
+    </para>
+  </sect2>
+
+  <sect2 id="faq-repmgrd-startup-no-upstream" xreflabel="repmgrd does not start if upstream node is not running">
+    <title>
+      <application>repmgrd</application> aborts startup with the error "<literal>upstream node must be running before repmgrd can start</literal>"
+    </title>
+    <para>
+      <application>repmgrd</application> does this to avoid starting up on a replication cluster
+      which is not in a healthy state. If the upstream is unavailable, <application>repmgrd</application>
+      may initiate a failover immediately after starting up, which could have unintended side-effects,
+      particularly if <application>repmgrd</application> is not running on other nodes.
+    </para>
+    <para>
+      In particular, it's possible that the node's local copy of the <literal>repmgr.nodes</literal> copy
+      is out-of-date, which may lead to incorrect failover behaviour.
+    </para>
+    <para>
+      The onus is therefore on the adminstrator to manually set the cluster to a stable, healthy state before
+      starting <application>repmgrd</application>.
+    </para>
+  </sect2>
+
 </sect1>
 </appendix>
--- a/doc/appendix-packages.sgml
+++ b/doc/appendix-packages.sgml
@@ -12,10 +12,17 @@

  <sect1 id="packages-centos" xreflabel="CentOS packages">
    <title>CentOS Packages</title>
+
    <indexterm>
      <primary>packages</primary>
      <secondary>CentOS packages</secondary>
    </indexterm>
+
+    <indexterm>
+      <primary>CentOS</primary>
+      <secondary>package information</secondary>
+    </indexterm>
+
    <para>
      Currently, &repmgr; RPM packages are provided for versions 6.x and 7.x of CentOS. These should also
      work on matching versions of Red Hat Enterprise Linux, Scientific Linux and Oracle Enterprise Linux;
@@ -53,11 +60,11 @@
          <tbody>
            <row>
              <entry>Repository URL:</entry>
-              <entry><ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink></entry>
+              <entry><ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink></entry>
            </row>
            <row>
              <entry>Repository documentation:</entry>
-              <entry><ulink url="https://repmgr.org/docs/4.0/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/4.0/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
+              <entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
            </row>
          </tbody>
        </tgroup>
@@ -237,6 +244,12 @@
      <primary>packages</primary>
      <secondary>Debian/Ubuntu packages</secondary>
    </indexterm>
+
+    <indexterm>
+      <primary>Debian/Ubuntu</primary>
+      <secondary>package information</secondary>
+    </indexterm>
+
    <para>
      &repmgr; <literal>.deb</literal> packages are provided via the
      PostgreSQL Community APT repository, and are available for each community-supported
@@ -253,6 +266,23 @@
      </para>


+      <table id="apt-2ndquadrant-repository">
+        <title>2ndQuadrant public repository</title>
+        <tgroup cols="2">
+          <tbody>
+            <row>
+              <entry>Repository URL:</entry>
+              <entry><ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink></entry>
+            </row>
+            <row>
+              <entry>Repository documentation:</entry>
+              <entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </table>
+
+
      <table id="apt-repository">
        <title>PostgreSQL Community APT repository (PGDG)</title>
        <tgroup cols="2">
@@ -365,6 +395,127 @@

  </sect1>

+  <sect1 id="packages-snapshot" xreflabel="Snapshot packages">
+    <title>Snapshot packages</title>
+    <indexterm>
+      <primary>snapshot packages</primary>
+    </indexterm>
+    <indexterm>
+      <primary>packages</primary>
+      <secondary>snaphots</secondary>
+    </indexterm>
+
+    <para>
+      For testing new features and bug fixes, from time to time 2ndQuadrant provides
+      so-called &quot;snapshot packages&quot; via its public repository. These packages
+      are built from the &repmgr; source at a particular point in time, and are not formal
+      releases.
+    </para>
+    <note>
+      <para>
+        We do not recommend installing these packages in a production environment
+        unless specifically advised.
+      </para>
+    </note>
+    <para>
+      To install a snapshot package, it's necessary to install the 2ndQuadrant public snapshot repository,
+      following the instructions here: <ulink url="https://dl.2ndquadrant.com/default/release/site/">https://dl.2ndquadrant.com/default/release/site/</ulink> but replace <literal>release</literal> with <literal>snapshot</literal>
+      in the appropriate URL.
+    </para>
+    <para>
+      For example, to install the snapshot RPM repository for PostgreSQL 9.6, execute (as <literal>root</literal>):
+      <programlisting>
+curl https://dl.2ndquadrant.com/default/snapshot/get/9.6/rpm | bash</programlisting>
+
+      or as a normal user with root sudo access:
+      <programlisting>
+curl https://dl.2ndquadrant.com/default/snapshot/get/9.6/rpm | sudo bash</programlisting>
+    </para>
+    <para>
+      Alternatively you can browse the repository here:
+      <ulink url="https://dl.2ndquadrant.com/default/snapshot/browse/">https://dl.2ndquadrant.com/default/snapshot/browse/</ulink>.
+    </para>
+    <para>
+      Once the repository is installed, installing or updating &repmgr; will result in the latest snapshot
+      package being installed.
+    </para>
+    <para>
+      The package name will be formatted like this:
+      <programlisting>
+repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
+      containg the snapshot build number (here: <literal>320</literal>) and the hash
+      of the <application>git</application> commit it was built from (here: <literal>g5113ab0</literal>).
+    </para>
+
+    <para>
+      Note that the next formal release (in the above example <literal>4.1.1</literal>), once available,
+      will install in place of any snapshot builds.
+    </para>
+
+  </sect1>
+
+  <sect1 id="packages-old-versions" xreflabel="Installing old package versions">
+    <title>Installing old package versions</title>
+    <indexterm>
+      <primary>old packages</primary>
+    </indexterm>
+    <indexterm>
+      <primary>packages</primary>
+      <secondary>old versions</secondary>
+    </indexterm>
+
+    <sect2 id="packages-old-versions-debian" xreflabel="old Debian package versions">
+      <title>Debian/Ubuntu</title>
+      <para>
+        An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
+        <ulink url="http://atalia.postgresql.org/morgue/r/repmgr/">http://atalia.postgresql.org/morgue/r/repmgr/</ulink>
+      </para>
+    </sect2>
+
+    <sect2 id="packages-old-versions-rhel-centos" xreflabel="old RHEL/CentOS package versions">
+      <title>RHEL/CentOS</title>
+      <para>
+        Old RPM packages (<literal>3.2</literal> and later) can be retrieved from the
+        (deprecated) 2ndQuadrant repository at
+        <ulink url="http://packages.2ndquadrant.com/">http://packages.2ndquadrant.com/</ulink>
+        by installing the appropriate repository RPM:
+      </para>
+
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>
+            <ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
+          </simpara>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        Old versions can be located with e.g.:
+        <programlisting>
+          yum --showduplicates list repmgr96</programlisting>
+        (substitute the appropriate package name; see <xref linkend="packages-centos">) and installed with:
+        <programlisting>
+          yum install {package_name}-{version}</programlisting>
+        where <literal>{package_name}</literal> is the base package name (e.g. <literal>repmgr96</literal>)
+        and <literal>{version}</literal> is the version listed by the
+        <command> yum --showduplicates list ...</command> command, e.g. <literal>4.0.6-1.rhel6</literal>.
+      </para>
+      <para>For example:
+        <programlisting>
+          yum install repmgr96-4.0.6-1.rhel6</programlisting>
+      </para>
+
+    </sect2>
+  </sect1>
+

  <sect1 id="packages-packager-info" xreflabel="Information for packagers">
    <title>Information for packagers</title>
@@ -373,7 +524,7 @@
      <secondary>information for packagers</secondary>
    </indexterm>
    <para>
-      We recommend patching the following  parameters when
+      We recommend patching the following parameters when
      building the package as built-in default values for user convenience.
      These values can nevertheless be overridden by the user, if desired.
    </para>
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -15,9 +15,304 @@
    See also: <xref linkend="upgrading-repmgr">
  </para>

+  <sect1 id="release-4.2">
+    <title>Release 4.2</title>
+    <para><emphasis>Wed October 24, 2018</emphasis></para>
+
+    <para>
+      &repmgr; 4.2 is a major release, with the main new feature being the
+      ability to <link linkend="repmgrd-pausing">pause repmgrd</link>, e.g. during planned maintenance
+      operations. Various other usability enhancements and a couple of bug fixes are also included;
+      see notes below for details.
+    </para>
+    <para>
+      A restart of the PostgreSQL server <emphasis>is</emphasis> required
+      for this release. For detailed upgrade instructions, see
+      <link linkend="upgrading-major-version">Upgrading a major version release</link>.
+    </para>
+
+    <sect2>
+      <title>Configuration file changes</title>
+      <para>
+        <itemizedlist>
+          <listitem>
+            <para>
+              New parameter <varname>shutdown_check_timeout</varname> (default: 60 seconds) added;
+              this provides an explicit timeout for
+              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
+              to check that the demotion candidate (current primary) has shut down. Previously, the parameters
+              <literal>reconnect_attempts</literal> and <literal>reconnect_interval</literal>
+              were used to calculate a timeout, but these are actually
+              intended for primary failure detection. (GitHub #504).
+            </para>
+          </listitem>
+        </itemizedlist>
+
+        <itemizedlist>
+          <listitem>
+            <para>
+              New parameter <varname>repmgr_bindir</varname> added, to facilitate remote invocation of repmgr
+              when the repmgr binary is located somewhere other than the PostgreSQL binary directory, as it
+              cannot be assumed all package maintainers will install &repmgr; there.
+            </para>
+            <para>
+              This parameter is optional; if not set (the default), &repmgr; will fall back
+              to <option>pg_bindir</option> (if set).
+            </para>
+            <para>
+              (GitHub #246).
+            </para>
+          </listitem>
+        </itemizedlist>
+      </para>
+
+    </sect2>
+
+    <sect2>
+      <title>repmgr enhancements</title>
+      <para>
+       <itemizedlist>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-cluster-cleanup">repmgr cluster cleanup</link></command>
+              now accepts the <option>--node-id</option> option to delete records for only one
+              node. (GitHub #493).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              When running
+              <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command> and
+              <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>,
+              &repmgr; will report nodes unreachable via SSH, and emit return code <literal>ERR_BAD_SSH</literal>.
+              (GitHub #246).
+            </para>
+            <note>
+              <para>
+                Users relying on
+                <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
+                to return a non-zero return code as a way of detecting connectivity errors should be aware
+                that <literal>ERR_BAD_SSH</literal> will be returned if there is an SSH connection error
+                from the node where the command is executed, even if the command is able to establish
+                that PostgreSQL connectivity is fine. Therefore the exact return code should be checked
+                to determine what kind of connectivity error has been detected.
+              </para>
+            </note>
+          </listitem>
+
+        </itemizedlist>
+      </para>
+    </sect2>
+
+
+    <sect2>
+      <title>repmgrd enhancements</title>
+      <para>
+       <itemizedlist>
+
+          <listitem>
+            <para>
+              <application>repmgrd</application> can now be &quot;paused&quot;, i.e. instructed
+              not to take any action such as a failover, even if the prerequisites for such an
+              action are detected.
+            </para>
+            <para>
+              This removes the need to stop <application>repmgrd</application> on all nodes when
+              performing a planned operation such as a switchover.
+            </para>
+            <para>
+              For further details, see <link linkend="repmgrd-pausing">Pausing repmgrd</link>.
+            </para>
+          </listitem>
+
+       </itemizedlist>
+      </para>
+    </sect2>
+
+    <sect2>
+      <title>Bug fixes</title>
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              &repmgr;: fix &quot;Missing replication slots&quot; label in
+              <command><link linkend="repmgr-node-check">repmgr node check</link></command>.  (GitHub #507)
+            </para>
+          </listitem>
+
+
+          <listitem>
+            <para>
+              <application>repmgrd</application>: fix parsing of <option>-d/--daemonize</option> option.
+            </para>
+          </listitem>
+
+        </itemizedlist>
+      </para>
+    </sect2>
+  </sect1>
+
+  <sect1 id="release-4.1.1">
+    <title>Release 4.1.1</title>
+    <para><emphasis>Wed September 5, 2018</emphasis></para>
+    <para>
+      repmgr 4.1.1 contains a number of usability enhancements and bug fixes.
+    </para>
+    <para>
+	  We recommend upgrading to this version as soon as possible.
+	  This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.1.0;
+      <application>repmgrd</application> (if running) should be restarted.
+      See <xref linkend="upgrading-repmgr"> for more details.
+	</para>
+
+    <sect2>
+      <title>repmgr enhancements</title>
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover --dry-run</link></command>
+              no longer copies external configuration files to test they can be copied; this avoids making
+              any changes to the target system. (GitHub #491).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-cluster-cleanup">repmgr cluster cleanup</link></command>:
+              add <literal>cluster_cleanup</literal> event. (GitHub #492).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>:
+              improve detection of free walsenders. (GitHub #495).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Improve messages emitted during
+              <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>.
+            </para>
+          </listitem>
+
+        </itemizedlist>
+      </para>
+   </sect2>
+
+
+    <sect2>
+      <title>repmgrd enhancements</title>
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              Always reopen the log file after
+              receiving <literal>SIGHUP</literal>. Previously this only happened if
+              a configuration file change was detected.
+              (GitHub #485).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Report version number <emphasis>after</emphasis>
+              logger initialisation. (GitHub #487).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Improve cascaded standby failover handling. (GitHub #480).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Improve reconnection handling after brief network outages; if
+              monitoring data being collected, this could lead to orphaned
+              sessions on the primary. (GitHub #480).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Check <varname>promote_command</varname> and <varname>follow_command</varname>
+              are defined when reloading configuration. These were checked on startup but
+              not reload by <application>repmgrd</application>, which made it possible to
+              make <application>repmgrd</application> with invalid values. It's unlikely
+              anyone would want to do this, but we should make it impossible anyway.
+              (GitHub #486).
+            </para>
+          </listitem>
+
+        </itemizedlist>
+      </para>
+    </sect2>
+
+    <sect2>
+      <title>Other</title>
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              Text of any failed queries will now be logged as <literal>ERROR</literal> to assist
+              logfile analysis at log levels higher than <literal>DEBUG</literal>.
+              (GitHub #498).
+            </para>
+          </listitem>
+        </itemizedlist>
+      </para>
+    </sect2>
+
+    <sect2>
+      <title>Bug fixes</title>
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:
+              remove new upstream's replication slot if it still exists on the rejoined
+              standby. (GitHub #499).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <application>repmgrd</application>: fix startup on witness node when local data is stale. (GitHub #488, #489).
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Truncate version string reported by PostgreSQL if necessary; some
+              distributions insert additional detail after the actual version.
+              (GitHub #490).
+            </para>
+          </listitem>
+
+
+        </itemizedlist>
+      </para>
+    </sect2>
+
+
+  </sect1>
+
+
+
  <sect1 id="release-4.1.0">
    <title>Release 4.1.0</title>
-    <para><emphasis>???? ??, 2018</emphasis></para>
+    <para><emphasis>Tue July 31, 2018</emphasis></para>
    <para>
      &repmgr; 4.1.0 introduces some changes to <application>repmgrd</application>
      behaviour and some additional configuration parameters.
@@ -29,19 +324,20 @@
       <itemizedlist>
          <listitem>
            <para>
-              <application>repmgrd</application> (if running) must be restarted.
+              Execute <command>ALTER EXTENSION repmgr UPDATE</command>
+              on the primary server in the database where &repmgr; is installed.
            </para>
          </listitem>
          <listitem>
            <para>
-              Execute <command>ALTER EXTENSION repmgr UPGRADE</command>
-              on the primary server in the database where &repmgr; is installed.
+              <application>repmgrd</application> must be restarted on all nodes where it is running.
            </para>
          </listitem>
+
       </itemizedlist>

       A restart of the PostgreSQL server is <emphasis>not</emphasis> required
-       for this release.
+       for this release (unless upgrading from repmgr 3.x).
    </para>
    <para>
       See <xref linkend="upgrading-repmgr-extension"> for more details.
@@ -53,6 +349,17 @@
      review the changes listed below.
    </para>

+    <note>
+      <para>
+        <emphasis>Repository changes</emphasis>
+      </para>
+      <para>
+        Coinciding with this release, the 2ndQuadrant repository structure has changed.
+        See section <xref linkend="installation-packages"> for details, particularly
+        if you are using a RPM-based system.
+      </para>
+    </note>
+
    <sect2>
      <title>Configuration file changes</title>

@@ -214,7 +521,7 @@

  <sect1 id="release-4.0.6">
    <title>Release 4.0.6</title>
-    <para><emphasis>June 14, 2018</emphasis></para>
+    <para><emphasis>Thu June 14, 2018</emphasis></para>
    <para>
 	  &repmgr; 4.0.6 contains a number of bug fixes and usability enhancements.
    </para>
--- a/doc/appendix-signatures.sgml
+++ b/doc/appendix-signatures.sgml
@@ -5,14 +5,14 @@
   <title>repmgr source code signing key</title>
   <para>
     The signing key ID used for <application>repmgr</application> source code bundles is:
-     <ulink url="http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr">
+     <ulink url="https://repmgr.org/download/SOURCE-GPG-KEY-repmgr">
       <literal>0x297F1DCC</literal></ulink>.
   </para>

   <para>
     To download the <application>repmgr</application> source key to your computer:
     <programlisting>
-       curl -s http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr | gpg --import
+       curl -s https://repmgr.org/download/SOURCE-GPG-KEY-repmgr | gpg --import
       gpg --fingerprint 0x297F1DCC
     </programlisting>
     then verify that the fingerprint is the expected value:
--- a/doc/bdr-failover.md
+++ b/doc/bdr-failover.md
@@ -4,5 +4,5 @@ BDR failover with repmgrd
 This document has been integrated into the main `repmgr` documentation
 and is now located here:

-> [BDR failover with repmgrd](https://repmgr.org/docs/4.0/repmgrd-bdr.html)
+> [BDR failover with repmgrd](https://repmgr.org/docs/current/repmgrd-bdr.html)

--- a/doc/changes-in-repmgr4.md
+++ b/doc/changes-in-repmgr4.md
@@ -4,4 +4,4 @@ Changes in repmgr 4
 This document has been integrated into the main `repmgr` documentation
 and is now located here:

-> [Release notes](https://repmgr.org/docs/4.0/release-4.0.html)
+> [Release notes](https://repmgr.org/docs/current/release-4.0.html)
--- a/doc/cloning-standbys.sgml
+++ b/doc/cloning-standbys.sgml
@@ -243,8 +243,8 @@
    </simpara>
    <simpara>
     As an alternative we recommend using 2ndQuadrant's <ulink url="https://www.pgbarman.org/">Barman</ulink>,
-     which offloads WAL management to a separate server, negating the need to use replication
-     slots to reserve WAL. See section <xref linkend="cloning-from-barman">
+     which offloads WAL management to a separate server, removing the requirement to use a replication
+     slot for each individual standby to reserve WAL. See section <xref linkend="cloning-from-barman">
     for more details on using &repmgr; together with Barman.
    </simpara>
   </tip>
@@ -352,10 +352,12 @@
      provide additional parameters for <command>pg_basebackup</command> to customise the
      cloning process.
    </para>
+
    <para>
     By default, <command>pg_basebackup</command> performs a checkpoint before beginning the backup
     process. However, a normal checkpoint may take some time to complete;
-     a fast checkpoint can be forced with the <literal>-c/--fast-checkpoint</literal> option.
+     a fast checkpoint can be forced with <command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>'s
+     <literal>-c/--fast-checkpoint</literal> option.
     Note that this may impact performance of the server being cloned from (typically the primary)
     so should be used with care.
    </para>
@@ -370,6 +372,18 @@
      Other options can be passed to <command>pg_basebackup</command> by including them
      in the <filename>repmgr.conf</filename> setting <varname>pg_basebackup_options</varname>.
    </para>
+
+    <para>
+      Not that by default, &repmgr; executes <command>pg_basebackup</command> with <option>-X/--wal-method</option>
+      (PostgreSQL 9.6 and earlier: <option>-X/--xlog-method</option>) set to <literal>stream</literal>.
+      From PostgreSQL 9.6, if replication slots are in use, it will also create a replication slot before
+      running the base backup, and execute <command>pg_basebackup</command> with the
+      <option>-S/--slot</option> option set to the name of the previously created replication slot.
+    </para>
+    <para>
+      These parameters can set by the user in <varname>pg_basebackup_options</varname>, in which case they
+      will override the &repmgr; default values. However normally there's no reason to do this.
+    </para>
    <para>
      If using a separate directory to store WAL files, provide the option <literal>--waldir</literal>
      (<literal>--xlogdir</literal> in PostgreSQL 9.6 and earlier) with the absolute path to the
--- a/doc/configuration-file-service-commands.sgml
+++ b/doc/configuration-file-service-commands.sgml
@@ -17,15 +17,15 @@
    <link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
  </para>
  <para>
-    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
+    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
    server. However this can lead to various problems, particularly when PostgreSQL has been
-    installed from packages, and expecially so if <application>systemd</application> is in use.
+    installed from packages, and especially so if <application>systemd</application> is in use.
  </para>


  <note>
    <para>
-      If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
+      If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
      See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
      entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
    </para>
@@ -48,6 +48,13 @@
    service_reload_command</programlisting>
  </para>

+  <note>
+    <para>
+      &repmgr; will not apply <option>pg_bindir</option> when executing any of these commands;
+      these can be user-defined scripts so must always be specified with the full path.
+    </para>
+  </note>
+
  <note>
    <para>
      It's also possible to specify a <varname>service_promote_command</varname>.
@@ -67,12 +74,12 @@

  <para>
    To confirm which command &repmgr; will execute for each action, use
-    <command>repmgr node service --list --action=...</command>, e.g.:
+    <command><link linkend="repmgr-node-service">repmgr node service --list-actions --action=...</link></command>, e.g.:
    <programlisting>
-      repmgr -f /etc/repmgr.conf node service --list --action=stop
-      repmgr -f /etc/repmgr.conf node service --list --action=start
-      repmgr -f /etc/repmgr.conf node service --list --action=restart
-      repmgr -f /etc/repmgr.conf node service --list --action=reload</programlisting>
+      repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
+      repmgr -f /etc/repmgr.conf node service --list-actions --action=start
+      repmgr -f /etc/repmgr.conf node service --list-actions --action=restart
+      repmgr -f /etc/repmgr.conf node service --list-actions --action=reload</programlisting>
  </para>

  <para>
@@ -92,7 +99,7 @@
      Defaults:postgres !requiretty
      postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
        /usr/bin/systemctl start postgresql-9.6, \
-        /usr/bin/systemctl restart postgresql-9.6 \
+        /usr/bin/systemctl restart postgresql-9.6, \
        /usr/bin/systemctl reload postgresql-9.6</programlisting>
  </para>

--- a/doc/configuration-file.sgml
+++ b/doc/configuration-file.sgml
@@ -1,15 +1,15 @@
-<sect1 id="configuration-file" xreflabel="configuration file location">
+<sect1 id="configuration-file" xreflabel="configuration file">
  <indexterm>
    <primary>repmgr.conf</primary>
-    <secondary>location</secondary>
  </indexterm>

  <indexterm>
    <primary>configuration</primary>
-    <secondary>repmgr.conf location</secondary>
+    <secondary>repmgr.conf</secondary>
  </indexterm>

-  <title>Configuration file location</title>
+  <title>Configuration file</title>
+
  <para>
    <application>repmgr</application> and <application>repmgrd</application>
    use a common configuration file, by default called
@@ -21,6 +21,55 @@
    for more details.
  </para>

+  <sect2 id="configuration-file-format" xreflabel="configuration file format">
+
+    <indexterm>
+      <primary>repmgr.conf</primary>
+      <secondary>format</secondary>
+    </indexterm>
+
+    <title>Configuration file format</title>
+
+    <para>
+      <filename>repmgr.conf</filename> is a plain text file with one parameter/value
+      combination per line.
+    </para>
+    <para>
+      Whitespace is insignificant (except within a quoted parameter value) and blank lines are ignored.
+      Hash marks (#) designate the remainder of the line as a comment. Parameter values that are not simple
+      identifiers or numbers should be single-quoted. Note that single quote can not be embedded
+      in a parameter value.
+    </para>
+    <important>
+      <para>
+        &repmgr; will interpret double-quotes as being part of a string value; only use single quotes
+        to quote parameter values.
+      </para>
+    </important>
+
+    <para>
+      Example of a valid <filename>repmgr.conf</filename> file:
+      <programlisting>
+# repmgr.conf
+
+node_id=1
+node_name= node1
+conninfo ='host=node1 dbname=repmgr user=repmgr connect_timeout=2'
+data_directory = /var/lib/pgsql/11/data</programlisting>
+
+    </para>
+  </sect2>
+
+
+
+  <sect2 id="configuration-file-location" xreflabel="configuration file location">
+  <indexterm>
+    <primary>repmgr.conf</primary>
+    <secondary>location</secondary>
+  </indexterm>
+
+  <title>Configuration file location</title>
+
  <para>
   The configuration file will be searched for in the following locations:
   <itemizedlist spacing="compact" mark="bullet">
@@ -50,7 +99,7 @@
   Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
   an error will be raised if it is not found or not readable, and no attempt will be made to
   check default locations; this is to prevent <application>repmgr</application> unexpectedly
-   reading the wrong configuraton file.
+   reading the wrong configuration file.
  </para>

  <note>
@@ -65,5 +114,7 @@
      to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
      <filename>/path/to/repmgr.conf</filename>).
    </para>
-  </note>
-</sect1>
+   </note>
+
+   </sect2>
+ </sect1>
--- a/doc/configuration.sgml
+++ b/doc/configuration.sgml
@@ -1,6 +1,292 @@
 <chapter id="configuration" xreflabel="Configuration">
  <title>repmgr configuration</title>

+  <sect1 id="configuration-prerequisites" xreflabel="Prerequisites for configuration">
+    <indexterm>
+      <primary>configuration</primary>
+      <secondary>prerequisites</secondary>
+    </indexterm>
+
+    <indexterm>
+      <primary>configuration</primary>
+      <secondary>ssh</secondary>
+    </indexterm>
+
+    <title>Prerequisites for configuration</title>
+    <para>
+     Following software must be installed on both servers:
+     <itemizedlist spacing="compact" mark="bullet">
+      <listitem>
+       <simpara><application>PostgreSQL</application></simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        <application>repmgr</application>
+       </simpara>
+      </listitem>
+     </itemizedlist>
+    </para>
+
+    <para>
+      At network level, connections between the PostgreSQL port (default: <literal>5432</literal>)
+      must be possible between all nodes.
+    </para>
+
+    <para>
+      Passwordless <command>SSH</command> connectivity between all servers in the replication cluster
+      is not required, but is necessary in the following cases:
+      <itemizedlist>
+        <listitem>
+          <simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
+            data directory (as is the case with e.g. <link linkend="packages-debian-ubuntu">Debian packages</link>);
+            in this case <command>rsync</command> must also be installed on all servers.
+          </simpara>
+        </listitem>
+        <listitem>
+          <simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
+        </listitem>
+        <listitem>
+          <simpara>
+            when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
+            and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
+          </simpara>
+        </listitem>
+      </itemizedlist>
+    </para>
+
+    <tip>
+      <simpara>
+        Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
+        This will make it faster to detect any SSH connection errors.
+      </simpara>
+    </tip>
+
+  <sect2 id="configuration-postgresql" xreflabel="PostgreSQL configuration">
+    <indexterm>
+      <primary>configuration</primary>
+      <secondary>PostgreSQL</secondary>
+    </indexterm>
+
+    <indexterm>
+      <primary>PostgreSQL configuration</primary>
+    </indexterm>
+
+    <title>PostgreSQL configuration for &repmgr;</title>
+    <para>
+      The following PostgreSQL configuration parameters may need to be changed in order
+      for &repmgr; (and replication itself) to function correctly.
+    </para>
+    <variablelist>
+
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>hot_standby</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>hot_standby</option></term>
+        <listitem>
+          <para>
+            <option>hot_standby</option> must always be set to <literal>on</literal>, as &repmgr; needs
+            to be able to connect to each server it manages.
+          </para>
+          <para>
+            Note that <option>hot_standby</option> defaults to <literal>on</literal> from PostgreSQL 10
+            and later; in PostgreSQL 9.6 and earlier, the default was <literal>off</literal>.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY">hot_standby</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>wal_level</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>wal_level</option></term>
+        <listitem>
+          <para>
+            <option>wal_level</option> must be one of <option>replica</option> or <option>logical</option>
+            (PostgreSQL 9.5 and earlier: one of <option>hot_standby</option> or <option>logical</option>).
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL">wal_level</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>max_wal_senders</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>max_wal_senders</option></term>
+        <listitem>
+          <para>
+            <option>max_wal_senders</option> must be set to a value of <literal>2</literal> or greater.
+            In general you will need one WAL sender for each standby which will attach to the PostgreSQL
+            instance; additionally &repmgr; will require two free WAL senders in order to clone further
+            standbys.
+          </para>
+          <para>
+            <option>max_wal_senders</option> should be set to an appropriate value on all PostgreSQL
+            instances in the replication cluster which may potentially become a primary server or
+            (in cascading replication) the upstream server of a standby.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-WAL-SENDERS">max_wal_senders</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>max_replication_slots</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>max_replication_slots</option></term>
+        <listitem>
+          <para>
+            If you are intending to use replication slots, <option>max_replication_slots</option>
+            must be set to a non-zero value.
+          </para>
+          <para>
+            <option>max_replication_slots</option> should be set to an appropriate value on all PostgreSQL
+            instances in the replication cluster which may potentially become a primary server or
+            (in cascading replication) the upstream server of a standby.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-REPLICATION-SLOTS">max_replication_slots</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>wal_log_hints</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>wal_log_hints</option></term>
+        <listitem>
+          <para>If you are intending to use <application>pg_rewind</application>,
+            and the cluster was not initialised using data checksums, you may want to consider enabling
+            <option>wal_log_hints</option>.
+          </para>
+          <para>
+            For more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LOG-HINTS">wal_log_hints</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>archive_mode</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>archive_mode</option></term>
+        <listitem>
+          <para>
+            We suggest setting <option>archive_mode</option> to <literal>on</literal> (and
+            <option>archive_command</option> to <literal>/bin/true</literal>; see below)
+            even if you are currently not planning to use WAL file archiving.
+          </para>
+          <para>
+            This will make it simpler to set up WAL file archiving if it is ever required,
+            as changes to <option>archive_mode</option> require a full PostgreSQL server
+            restart, while <option>archive_command</option> changes can be applied via a normal
+            configuration reload.
+          </para>
+          <para>
+            However, &repmgr; itself does not require WAL file archiving.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-MODE">archive_mode</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>archive_command</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>archive_command</option></term>
+        <listitem>
+          <para>
+            If you have set  <option>archive_mode</option> to <literal>on</literal> but are not currently planning
+            to use WAL file archiving, set <option>archive_command</option> to a command which does nothing but returns
+            <literal>true</literal>, such as <command>/bin/true</command>. See above for details.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND">archive_command</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+
+        <indexterm>
+          <primary>wal_keep_segments</primary>
+          <secondary>PostgreSQL configuration</secondary>
+        </indexterm>
+
+        <term><option>wal_keep_segments</option></term>
+        <listitem>
+          <para>
+            Normally there is no need to set <option>wal_keep_segments</option> (default: <literal>0</literal>), as it
+            is <emphasis>not</emphasis> a reliable way of ensuring that all required WAL segments are available to standbys.
+            Replication slots and/or an archiving solution such as Barman are recommended to ensure standbys have a reliable
+            source of WAL segments at all times.
+          </para>
+          <para>
+            The only reason ever to set  <option>wal_keep_segments</option> is you have
+            you have configured <option>pg_basebackup_options</option>
+            in <filename>repmgr.conf</filename> to include the setting <literal>--wal-method=fetch</literal>
+            (PostgreSQL 9.6 and earlier: <literal>--xlog-method=fetch</literal>)
+            <emphasis>and</emphasis> you have <emphasis>not</emphasis> set <option>restore_command</option>
+            in <filename>repmgr.conf</filename> to fetch WAL files from a reliable source such as Barman,
+            in which case you'll need to set <option>wal_keep_segments</option>
+            to a sufficiently high number to ensure that all WAL files required by the standby
+            are retained. However we do not recommend managing replication in this way.
+          </para>
+          <para>
+            PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-WAL-KEEP-SEGMENTS">wal_keep_segments</ulink>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+    <para>
+      See also the <link linkend="quickstart-postgresql-configuration">PostgreSQL configuration</link> section in the
+      <link linkend="quickstart">Quick-start guide</link>.
+    </para>
+  </sect2>
+
+
+  </sect1>
+
+
  &configuration-file;
  &configuration-file-required-settings;
  &configuration-file-log-settings;
--- a/doc/configuring-witness-server.sgml
+++ b/doc/configuring-witness-server.sgml
@@ -16,15 +16,22 @@
 <para>
   A typical use case for a witness server is a two-node streaming replication
   setup, where the primary and standby are in different locations (data centres).
-   By creating a witness server in the same location as the primary, if the primary
-   becomes unavailable  it's possible for the standby to decide whether it can
-   promote itself without risking a "split brain" scenario: if it can't see either the
+   By creating a witness server in the same location (data centre) as the primary,
+   if the primary becomes unavailable it's possible for the standby to decide whether
+   it can promote itself without risking a "split brain" scenario: if it can't see either the
   witness or the primary server, it's likely there's a network-level interruption
   and it should not promote itself. If it can seen the witness but not the primary,
   this proves there is no network interruption and the primary itself is unavailable,
   and it can therefore promote itself (and ideally take action to fence the
   former primary).
 </para>
+ <note>
+   <para>
+     <emphasis>Never</emphasis> install a witness server on the same physical host
+     as another node in the replication cluster managed by &repmgr; - it's essential
+     the witness is not affected in any way by failure of another node.
+   </para>
+ </note>
 <para>
   For more complex replication scenarios,e.g. with multiple datacentres, it may
   be preferable to use location-based failover, which ensures that only nodes
--- a/doc/event-notifications.sgml
+++ b/doc/event-notifications.sgml
@@ -147,58 +147,76 @@
 <para>
  By default, all notification types will be passed to the designated script;
  the notification types can be filtered to explicitly named ones using the
-  <varname>event_notifications</varname> parameter:
+  <varname>event_notifications</varname> parameter.
+ </para>
+
+ <para>
+   Events generated by the &repmgr; command:

  <itemizedlist spacing="compact" mark="bullet">

   <listitem>
-    <simpara><literal>primary_register</literal></simpara>
+     <simpara><literal><link linkend="repmgr-primary-register-events">cluster_created</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>primary_unregister</literal></simpara>
+     <simpara><literal><link linkend="repmgr-primary-register-events">primary_register</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_register</literal></simpara>
+     <simpara><literal><link linkend="repmgr-primary-unregister-events">primary_unregister</link></literal></simpara>
+   </listitem>
+
+   <listitem>
+    <simpara><literal><link linkend="repmgr-standby-clone-events">standby_clone</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_register_sync</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-register-events">standby_register</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_unregister</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-register-events">standby_register_sync</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_clone</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-unregister-events">standby_unregister</link></literal></simpara>
+   </listitem>
+
+   <listitem>
+    <simpara><literal><link linkend="repmgr-standby-promote-events">standby_promote</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_promote</literal></simpara>
+    <simpara><literal><link linkend="repmgr-standby-follow-events">standby_follow</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_follow</literal></simpara>
+     <simpara><literal><link linkend="repmgr-standby-switchover-events">standby_switchover</link></literal></simpara>
+   </listitem>
+
+   <listitem>
+     <simpara><literal><link linkend="repmgr-witness-register-events">witness_register</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_disconnect_manual</literal></simpara>
+    <simpara><literal><link linkend="repmgr-witness-unregister-events">witness_unregister</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_failure</literal></simpara>
+    <simpara><literal><link linkend="repmgr-node-rejoin-events">node_rejoin</link></literal></simpara>
   </listitem>
   <listitem>
-    <simpara><literal>standby_recovery</literal></simpara>
-   </listitem>
-   <listitem>
-    <simpara><literal>witness_register</literal></simpara>
-   </listitem>
-   <listitem>
-    <simpara><literal>witness_unregister</literal></simpara>
-   </listitem>
-   <listitem>
-    <simpara><literal>node_rejoin</literal></simpara>
+    <simpara><literal><link linkend="repmgr-cluster-cleanup-events">cluster_cleanup</link></literal></simpara>
   </listitem>
+
+  </itemizedlist>
+ </para>
+
+ <para>
+   Events generated by <application>repmgrd</application> (streaming replication mode):
+
+   <itemizedlist spacing="compact" mark="bullet">
   <listitem>
    <simpara><literal>repmgrd_start</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_shutdown</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_reload</literal></simpara>
+   </listitem>
   <listitem>
    <simpara><literal>repmgrd_failover_promote</literal></simpara>
   </listitem>
@@ -208,15 +226,41 @@
   <listitem>
    <simpara><literal>repmgrd_failover_aborted</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_standby_reconnect</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_promote_error</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_local_disconnect</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_local_reconnect</literal></simpara>
+   </listitem>
   <listitem>
    <simpara><literal>repmgrd_upstream_disconnect</literal></simpara>
   </listitem>
   <listitem>
    <simpara><literal>repmgrd_upstream_reconnect</literal></simpara>
   </listitem>
+
   <listitem>
-    <simpara><literal>repmgrd_promote_error</literal></simpara>
+    <simpara><literal>standby_disconnect_manual</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>standby_failure</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>standby_recovery</literal></simpara>
+   </listitem>
+
+   </itemizedlist>
+ </para>
+
+  <para>
+   Events generated by <application>repmgrd</application> (BDR mode):
+   <itemizedlist spacing="compact" mark="bullet">
   <listitem>
    <simpara><literal>bdr_failover</literal></simpara>
   </listitem>
--- a/doc/filelist.sgml
+++ b/doc/filelist.sgml
@@ -58,6 +58,7 @@
 <!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
 <!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
 <!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
+<!ENTITY repmgrd-pausing SYSTEM "repmgrd-pausing.sgml">
 <!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">

 <!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
@@ -73,11 +74,15 @@
 <!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
 <!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
 <!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
+<!ENTITY repmgr-node-service SYSTEM "repmgr-node-service.sgml">
 <!ENTITY repmgr-cluster-show SYSTEM "repmgr-cluster-show.sgml">
 <!ENTITY repmgr-cluster-matrix SYSTEM "repmgr-cluster-matrix.sgml">
 <!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
 <!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
 <!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
+<!ENTITY repmgr-daemon-status SYSTEM "repmgr-daemon-status.sgml">
+<!ENTITY repmgr-daemon-pause SYSTEM "repmgr-daemon-pause.sgml">
+<!ENTITY repmgr-daemon-unpause SYSTEM "repmgr-daemon-unpause.sgml">

 <!ENTITY appendix-release-notes  SYSTEM "appendix-release-notes.sgml">
 <!ENTITY appendix-faq      SYSTEM "appendix-faq.sgml">
--- a/doc/install-packages.sgml
+++ b/doc/install-packages.sgml
@@ -16,7 +16,7 @@
  <para>
 	&repmgr; RPM packages for RedHat/CentOS variants and Fedora are available from the
 	<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
-	<ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>; see following
+	<ulink url="https://dl.2ndquadrant.com/">public repository</ulink>; see following
 	section for details.
  </para>
  <para>
@@ -29,9 +29,10 @@
  </para>
  <note>
    <para>
-      &repmgr; packages are designed to be compatible with the community-provided PostgreSQL packages.
+      &repmgr; RPM packages are designed to be compatible with the community-provided PostgreSQL packages
+      and 2ndQuadrant's <ulink url="https://www.2ndquadrant.com/en/resources/2ndqpostgres/">2ndQPostgres</ulink>.
      They may not work with vendor-specific packages such as those provided by RedHat for RHEL
-      customers, as the filesystem layout may be different to the community RPMs.
+      customers, as the PostgreSQL filesystem layout may be different to the community RPMs.
      Please contact your support vendor for assistance.
    </para>
  </note>
@@ -46,67 +47,77 @@
  <sect3 id="installation-packages-redhat-2ndq">
    <title>2ndQuadrant public RPM yum repository</title>

-	<note>
-	  <para>
-		<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> previously provided a dedicated
-        &repmgr; repository at
-        <ulink url="http://packages.2ndquadrant.com/repmgr/">http://packages.2ndquadrant.com/repmgr/</ulink>.
-		This repository will be deprecated in a future release as it is now replaced by
-		the <ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>
-		documented below.
-	  </para>
-	</note>
-
    <para>
-      Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
      <ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
-	  <ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink> for 2ndQuadrant software,
-	  including &repmgr;. We recommend using this for all future &repmgr; releases.
-	</para>
-	<para>
-	  General instructions for using this repository can be found on its
-	  <ulink url="https://rpm.2ndquadrant.com/">homepage</ulink>. Specific instructions
-	  for installing &repmgr; follow below.
-	</para>
+      <ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
+      including &repmgr;. We recommend using this for all future &repmgr; releases.
+    </para>
+    <para>
+      General instructions for using this repository can be found on its
+      <ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
+      for installing &repmgr; follow below.
+    </para>
    <para>
      <emphasis>Installation</emphasis>

      <itemizedlist>
-		<listitem>
-		  <para>
-			Locate the repository RPM for your PostgreSQL version from the list at:
-			<ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink>
-		  </para>
-		</listitem>
+	<listitem>
+	  <para>
+	    Locate the repository RPM for your PostgreSQL version from the list at:
+	    <ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
+	  </para>
+	</listitem>

        <listitem>
          <para>
-            Install the repository RPM for your distribution and PostgreSQL version
-			(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
-		  </para>
-		  <para>
-			For example, for PostgreSQL 10 on CentOS, execute:
-			<programlisting>
-sudo yum install https://rpm.2ndquadrant.com/site/content/2ndquadrant-repo-10-1-1.el7.noarch.rpm
-			</programlisting>
-		  </para>
-		  <para>
-			Verify that the repository is installed with:
-			<programlisting>
+            Install the repository definition for your distribution and PostgreSQL version
+	    (this enables the 2ndQuadrant repository as a source of &repmgr; packages).
+	  </para>
+	  <para>
+	    For example, for PostgreSQL 10 on CentOS, execute:
+	    <programlisting>
+curl https://dl.2ndquadrant.com/default/release/get/10/rpm | sudo bash</programlisting>
+	  </para>
+
+	  <para>
+	    For PostgreSQL 9.6 on CentOS, execute:
+	    <programlisting>
+curl https://dl.2ndquadrant.com/default/release/get/9.6/rpm | sudo bash</programlisting>
+	  </para>
+
+
+	  <para>
+	    Verify that the repository is installed with:
+	    <programlisting>
 sudo yum repolist</programlisting>
-			The output should contain two entries like this:
-			<programlisting>
-2ndquadrant-repo-10/7/x86_64         2ndQuadrant packages for PG10 for rhel 7 - x86_64           1
-2ndquadrant-repo-10-debug/7/x86_64   2ndQuadrant packages for PG10 for rhel 7 - x86_64 - Debug   1</programlisting>
-		  </para>
-		</listitem>
+	    The output should contain two entries like this:
+	    <programlisting>
+2ndquadrant-dl-default-release-pg10/7/x86_64        2ndQuadrant packages (PG10) for 7 - x86_64          4
+2ndquadrant-dl-default-release-pg10-debug/7/x86_64  2ndQuadrant packages (PG10) for 7 - x86_64 - Debug  3</programlisting>
+	  </para>
+	</listitem>

        <listitem>
          <para>
            Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
            <programlisting>
-$ yum install repmgr10</programlisting>
+sudo yum install repmgr10</programlisting>
          </para>
+          <note>
+            <para>
+              For packages for PostgreSQL 9.6 and earlier, the package name does not contain
+              a period between major and minor version numbers, e.g.
+              <literal>repmgr96</literal>.
+            </para>
+          </note>
+          <tip>
+            <para>
+              To determine the names of available packages, execute:
+              <programlisting>
+yum search repmgr</programlisting>
+            </para>
+          </tip>
+
        </listitem>
      </itemizedlist>
    </para>
@@ -175,61 +186,51 @@ $ yum install repmgr10</programlisting>
    <title>2ndQuadrant public apt repository for Debian/Ubuntu</title>

    <para>
-      Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
      <ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
-	  <ulink url="https://apt.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
-	  including &repmgr;.
-	</para>
-	<para>
-	  General instructions for using this repository can be found on its
-	  <ulink url="https://apt.2ndquadrant.com/">homepage</ulink>. Specific instructions
-	  for installing &repmgr; follow below.
-	</para>
+      <ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
+      including &repmgr;.
+    </para>
+    <para>
+      General instructions for using this repository can be found on its
+      <ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
+      for installing &repmgr; follow below.
+    </para>

    <para>
+
      <emphasis>Installation</emphasis>

      <itemizedlist>

-		<listitem>
-		  <para>
-			If not already present, install the  <application>apt-transport-https</application> package:
-			<programlisting>
-sudo apt-get install apt-transport-https</programlisting>
-		  </para>
-		</listitem>
+	<listitem>
+	  <para>
+            Install the repository definition for your distribution and PostgreSQL version
+	    (this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
+            <programlisting>
+curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlisting>
+	  </para>
+          <note>
+            <para>
+              This will automatically install the following additional packages, if not already present:
+              <itemizedlist spacing="compact" mark="bullet">
+                <listitem>
+                  <simpara><literal>lsb-release</literal></simpara>
+                </listitem>
+                <listitem>
+                  <simpara><literal>apt-transport-https</literal></simpara>
+                </listitem>
+              </itemizedlist>
+            </para>
+          </note>
+        </listitem>

-		<listitem>
-		  <para>
-			Create <filename>/etc/apt/sources.list.d/2ndquadrant.list</filename> as follows:
-			<programlisting>
-sudo sh -c 'echo "deb https://apt.2ndquadrant.com/ $(lsb_release -cs)-2ndquadrant main" > /etc/apt/sources.list.d/2ndquadrant.list'</programlisting>
-		  </para>
-		</listitem>

-		<listitem>
-		  <para>
-			Install the 2ndQuadrant <ulink url="https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc">repository key</ulink>:
-			<programlisting>
-sudo apt-get install curl ca-certificates
-curl https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc | sudo apt-key add -</programlisting>
-		  </para>
-		</listitem>
-
-		<listitem>
-		  <para>
-			Update the package list
-			<programlisting>
- sudo apt-get update</programlisting>
-		  </para>
-		</listitem>
-
-		<listitem>
-		  <para>
+	<listitem>
+	  <para>
            Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
            <programlisting>
-$ apt-get install postgresql-10-repmgr</programlisting>
-		  </para>
+sudo apt-get install postgresql-10-repmgr</programlisting>
+	  </para>
          <note>
            <para>
            For packages for PostgreSQL 9.6 and earlier, the package name includes
@@ -237,11 +238,11 @@ $ apt-get install postgresql-10-repmgr</programlisting>
            <literal>postgresql-9.6-repmgr</literal>.
            </para>
          </note>
-		</listitem>
+	</listitem>

-	  </itemizedlist>
+      </itemizedlist>

-	</para>
+    </para>

  </sect3>
 </sect2>
--- a/doc/install-requirements.sgml
+++ b/doc/install-requirements.sgml
@@ -13,8 +13,9 @@
  </para>

  <para>
-   From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.3, including PostgreSQL 10.
-   Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
+   &repmgr; 4.x is compatible with all PostgreSQL versions from 9.3. See
+   section <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
+   for an overview of version compatibility.
  </para>

  <note>
@@ -31,34 +32,33 @@
  <para>
   &repmgr; must be installed on each server in the replication cluster.
   If installing repmgr from packages, the package version must match the PostgreSQL
-   version. If installing from source, repmgr must be compiled against the same
+   version. If installing from source, &repmgr; must be compiled against the same
   major version.
  </para>

+  <note>
+   <simpara>
+     The same &quot;major&quot; &repmgr; version (e.g. <literal>4.2.x</literal>) <emphasis>must</emphasis>
+     be installed on all node in the replication cluster. We strongly recommend keeping all
+     nodes on the same (preferably latest) &quot;minor&quot; &repmgr; version to minimize the risk
+     of incompatibilities.
+   </simpara>
+   <simpara>
+     If different &quot;major&quot; &repmgr; versions (e.g. 3.3.x and 4.1.x)
+     are installed on different nodes, in the best case &repmgr; (in particular <application>repmgrd</application>)
+     will not run. In the worst case, you will end up with a broken cluster.
+   </simpara>
+  </note>
+
  <para>
-   A dedicated system user for &repmgr; is *not* required; as many &repmgr; and
+   A dedicated system user for &repmgr; is <emphasis>not</emphasis> required; as many &repmgr; and
   <application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
   these commands should be executed by the <literal>postgres</literal> user.
  </para>

  <para>
-   Passwordless <command>ssh</command> connectivity between all servers in the replication cluster
-   is not required, but is necessary in the following cases:
-   <itemizedlist>
-     <listitem>
-       <simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
-       data directory (in which case <command>rsync</command> is also required)</simpara>
-     </listitem>
-     <listitem>
-       <simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
-     </listitem>
-     <listitem>
-       <simpara>
-        when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
-        and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
-       </simpara>
-     </listitem>
-   </itemizedlist>
+    See also <link linkend="configuration-prerequisites">Prerequisites for configuration</link>
+    for information on networking requirements.
  </para>

  <tip>
@@ -69,4 +69,111 @@
    terminated if your <command>ssh</command> session to the server is interrupted or closed.
    </simpara>
  </tip>
+
+  <sect2 id="install-compatibility-matrix">
+
+    <indexterm>
+      <primary>repmgr</primary>
+      <secondary>compatibility matrix</secondary>
+    </indexterm>
+
+    <indexterm>
+      <primary>compatibility matrix</primary>
+    </indexterm>
+
+    <title>&repmgr; compatibility matrix</title>
+    <para>
+      The following table provides an overview of which &repmgr; version supports
+      which PostgreSQL version.
+    </para>
+
+
+    <table id="repmgr-compatibility-matrix">
+      <title>&repmgr; compatibility matrix</title>
+
+      <tgroup cols="2">
+        <thead>
+          <row>
+            <entry>
+              &repmgr; version
+            </entry>
+            <entry>
+              Latest release
+            </entry>
+            <entry>
+              Supported PostgreSQL versions
+            </entry>
+          </row>
+        </thead>
+
+        <tbody>
+          <row>
+            <entry>
+              &repmgr; 4.x
+            </entry>
+            <entry>
+              <link linkend="release-4.2">4.2</link> (2018-10-24)
+            </entry>
+            <entry>
+              9.3, 9.4, 9.5, 9.6, 10, 11
+            </entry>
+          </row>
+
+          <row>
+            <entry>
+              &repmgr; 3.x
+            </entry>
+            <entry>
+              <ulink url="https://repmgr.org/release-notes-3.3.2.html">3.3.2</ulink> (2017-05-30)
+            </entry>
+            <entry>
+              9.3, 9.4, 9.5, 9.6
+            </entry>
+          </row>
+
+          <row>
+            <entry>
+              &repmgr; 2.x
+            </entry>
+            <entry>
+              <ulink url="https://repmgr.org/release-notes-2.0.3.html">2.0.3</ulink> (2015-04-16)
+            </entry>
+            <entry>
+              9.0, 9.1, 9.2, 9.3, 9.4
+            </entry>
+          </row>
+        </tbody>
+
+      </tgroup>
+    </table>
+
+    <important>
+      <para>
+        The &repmgr; 2.x and 3.x series are no longer maintained or supported.
+        We strongly recommend  upgrading to the latest &repmgr; version.
+      </para>
+    </important>
+
+
+    <para>
+      Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
+    </para>
+    <itemizedlist spacing="compact" mark="bullet">
+
+      <listitem>
+        <para>
+          PostgreSQL 9.3 does not support replication slots, so corresponding &repmgr; functionality
+          is not available.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          In PostgreSQL 9.3 and PostgreSQL 9.4, <command>pg_rewind</command> is not part of the core
+          distribution. <command>pg_rewind</command> will need to be compiled separately to be able
+          to use any &repmgr; functionality which takes advantage of it.
+        </para>
+      </listitem>
+    </itemizedlist>
+
+  </sect2>
 </sect1>
--- a/doc/install-source.sgml
+++ b/doc/install-source.sgml
@@ -12,8 +12,8 @@
   To install &repmgr; the prerequisites for compiling
   &postgres; must be installed. These are described in &postgres;'s
   documentation
-   on <ulink url="https://www.postgresql.org/docs/current/install-requirements.html">build requirements</ulink>
-   and <ulink url="https://www.postgresql.org/docs/current/docguide-toolsets.html">build requirements for documentation</ulink>.
+   on <ulink url="https://www.postgresql.org/docs/current/static/install-requirements.html">build requirements</ulink>
+   and <ulink url="https://www.postgresql.org/docs/current/static/docguide-toolsets.html">build requirements for documentation</ulink>.
  </para>

  <para>
@@ -26,12 +26,68 @@
      add the <ulink
      url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
      repository to your <filename>sources.list</filename> if you
-      have not already done so. Then install the pre-requisites for
-      building PostgreSQL with:
+      have not already done so, and ensure the source repository is enabled.
+     </para>
+     <tip>
+       <para>
+         If not configured, the source repository can be added by including
+         a <literal>deb-src</literal> line as a copy of the existing <literal>deb</literal>
+         line in the repository file, which is usually
+         <filename>/etc/apt/sources.list.d/pgdg.list</filename>, e.g.:
+         <programlisting>
+deb http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main
+deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisting>
+       </para>
+     </tip>
+     <para>
+      Then install the prerequisites for
+      building PostgreSQL with e.g.:
      <programlisting>
       sudo apt-get update
       sudo apt-get build-dep postgresql-9.6</programlisting>
      </para>
+
+     <important>
+       <simpara>
+         Select the appropriate PostgreSQL version for your target repmgr version.
+       </simpara>
+     </important>
+
+     <note>
+       <para>
+       If using <command>apt-get build-dep</command> is not possible, the
+       following packages may need to be installed manually:
+
+         <itemizedlist spacing="compact" mark="bullet">
+
+           <listitem>
+             <simpara><literal>llibedit-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibkrb5-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibpam0g-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibreadline-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibselinux1-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibssl-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibxml2-dev</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>llibxslt1-dev</literal></simpara>
+           </listitem>
+         </itemizedlist>
+       </para>
+     </note>
+
    </listitem>
    <listitem>
     <para>
@@ -45,15 +101,45 @@
       sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
       sudo yum-builddep postgresql96</programlisting>
     </para>
+
+     <important>
+       <simpara>
+         Select the appropriate PostgreSQL version for your target repmgr version.
+       </simpara>
+     </important>
+     <note>
+       <para>
+         If using <command>yum-builddep</command> is not possible, the
+         following packages may need to be installed manually:
+
+         <itemizedlist spacing="compact" mark="bullet">
+
+           <listitem>
+             <simpara><literal>libselinux-devel</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>libxml2-devel</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>libxslt-devel</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>openssl-devel</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>pam-devel</literal></simpara>
+           </listitem>
+           <listitem>
+             <simpara><literal>readline-devel</literal></simpara>
+           </listitem>
+
+         </itemizedlist>
+       </para>
+     </note>
    </listitem>
   </itemizedlist>
  </para>

-  <note>
-    <simpara>
-      Select the appropriate PostgreSQL versions for your target repmgr version.
-    </simpara>
-  </note>
 </sect2>


@@ -80,7 +166,7 @@
   </para>

   <para>
-    There are also tags for each &repmgr; release, e.g. <filename>4.0.5</filename>.
+    There are also tags for each &repmgr; release, e.g. <literal>v4.2.0</literal>.
   </para>

   <para>
@@ -165,7 +251,7 @@
   <note>
     <simpara>
       Due to changes in PostgreSQL's documentation build system from PostgreSQL 10,
-       the documentation can currently only be built agains PostgreSQL 9.6 or earlier.
+       the documentation can currently only be built against PostgreSQL 9.6 or earlier.
       This limitation will be fixed when time and resources permit.
     </simpara>
   </note>
--- a/doc/quickstart.sgml
+++ b/doc/quickstart.sgml
@@ -1,6 +1,10 @@
 <chapter id="quickstart" xreflabel="Quick-start guide">
 <title>Quick-start guide</title>

+ <indexterm>
+   <primary>quickstart</primary>
+ </indexterm>
+
 <para>
  This section gives a quick introduction to &repmgr;, including setting up a
  sample &repmgr; installation and a basic replication cluster.
@@ -50,7 +54,8 @@
    </para>
    <para>
      If you want <application>repmgr</application> to copy configuration files which are
-      located outside the PostgreSQL data directory, and/or to test <command>switchover</command>
+      located outside the PostgreSQL data directory, and/or to test
+      <command><link linkend="repmgr-standby-switchover">switchover</link></command>
      functionality, you will also need passwordless SSH connections between both servers, and
      <application>rsync</application> should be installed.
    </para>
@@ -63,7 +68,7 @@
    </tip>
 </sect1>

- <sect1 id="quickstart-postgresql-configuration">
+ <sect1 id="quickstart-postgresql-configuration" xreflabel="PostgreSQL configuration">
   <title>PostgreSQL configuration</title>
   <para>
    On the primary server, a PostgreSQL instance must be initialised and running.
@@ -78,6 +83,13 @@

    max_wal_senders = 10

+    # Enable replication slots; set this figure to at least one more
+    # than the number of standbys which will connect to this server.
+    # Note that repmgr will only make use of replication slots if
+    # "use_replication_slots" is set to "true" in repmgr.conf
+
+    max_replication_slots = 0
+
    # Ensure WAL files contain enough information to enable read-only queries
    # on the standby.
    #
@@ -102,16 +114,6 @@
    # you WALs in a secure place. /bin/true is an example of a command that
    # ignores archiving. Use something more sensible.
    archive_command = '/bin/true'
-
-    # If you have configured "pg_basebackup_options"
-    # in "repmgr.conf" to include the setting "--xlog-method=fetch" (from
-    # PostgreSQL 10 "--wal-method=fetch"), *and* you have not set
-    # "restore_command" in "repmgr.conf"to fetch WAL files from another
-    # source such as Barman, you'll need to set "wal_keep_segments" to a
-    # high enough value to ensure that all WAL files generated while
-    # the standby is being cloned are retained until the standby starts up.
-    #
-    # wal_keep_segments = 5000
   </programlisting>
   <tip>
    <simpara>
@@ -126,6 +128,9 @@
     and the cluster was not initialised using data checksums, you may want to consider enabling
     <varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
   </para>
+    <para>
+      See also the <link linkend="configuration-postgresql">PostgreSQL configuration</link> section in the <link linkend="configuration">repmgr configuaration guide</link>.
+    </para>
 </sect1>

 <sect1 id="quickstart-repmgr-user-database">
@@ -196,11 +201,20 @@
 <sect1 id="quickstart-standby-preparation">
  <title>Preparing the standby</title>
  <para>
-   On the standby, do not create a PostgreSQL instance, but do ensure the destination
+   On the standby, do <emphasis>not</emphasis> create a PostgreSQL instance (i.e.
+   do not execute <application>initdb</application> or any database creation
+   scripts provided by packages), but do ensure the destination
   data directory (and any other directories which you want PostgreSQL to use)
   exist and are owned by the <literal>postgres</literal> system user. Permissions
   must be set to <literal>0700</literal> (<literal>drwx------</literal>).
  </para>
+  <tip>
+    <simpara>
+      &repmgr; will place a copy of the primary's database files in this directory.
+      It will however refuse to run if a PostgreSQL instance has already been
+      created there.
+    </simpara>
+  </tip>
  <para>
   Check the primary database is reachable from the standby using <application>psql</application>:
  </para>
@@ -237,14 +251,42 @@
   server. See sections <xref linkend="configuration"> and <xref linkend="configuration-file">
   for further details about <filename>repmgr.conf</filename>.
  </para>
+
+  <note>
+    <para>
+      &repmgr; only uses <option>pg_bindir</option> when it executes
+      PostgreSQL binaries directly.
+    </para>
+    <para>
+      For user-defined scripts such as <option>promote_command</option> and the
+      various <option>service_*_command</option>s, you <emphasis>must</emphasis>
+      always explicitly provide the full path to the binary or script being
+      executed, even if it is &repmgr; itself.
+    </para>
+    <para>
+      This is because these options can contain user-defined scripts in arbitrary
+      locations, so prepending <option>pg_bindir</option> may break them.
+    </para>
+  </note>
+
  <tip>
   <simpara>
    For Debian-based distributions we recommend explictly setting
-    <literal>pg_bindir</literal> to the directory where <command>pg_ctl</command> and other binaries
+    <option>pg_bindir</option> to the directory where <command>pg_ctl</command> and other binaries
    not in the standard path are located. For PostgreSQL 9.6 this would be <filename>/usr/lib/postgresql/9.6/bin/</filename>.
   </simpara>
  </tip>

+  <tip>
+    <simpara>
+      If your distribution places the &repmgr; binaries in a location other than the
+      PostgreSQL installation directory, specify this with <option>repmgr_bindir</option>
+      to enable &repmgr; to perform operations (e.g.
+      <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>)
+      on other nodes.
+    </simpara>
+  </tip>
+
  <para>
   See the file
   <ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>
--- a/doc/repmgr-cluster-cleanup.sgml
+++ b/doc/repmgr-cluster-cleanup.sgml
@@ -15,9 +15,14 @@
    <title>Description</title>
    <para>
      Purges monitoring history from the <literal>repmgr.monitoring_history</literal> table to
-      prevent excessive table growth. Use the <literal>-k/--keep-history</literal> to specify the
-      number of days of monitoring history to retain. This command can be used
-      manually or as a cronjob.
+      prevent excessive table growth.
+    </para>
+    <para>
+      By default <emphasis>all</emphasis> data will be removed; Use the <option>-k/--keep-history</option>
+      option to specify the number of days of monitoring history to retain.
+    </para>
+    <para>
+      This command can be executed manually or as a cronjob.
    </para>
  </refsect1>

@@ -38,4 +43,35 @@
      <filename>repmgr.conf</filename>.
    </para>
  </refsect1>
+
+  <refsect1 id="repmgr-cluster-cleanup-events">
+    <title>Event notifications</title>
+    <para>
+      A <literal>cluster_cleanup</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--node-id</option></term>
+        <listitem>
+          <para>
+            Only delete monitoring records for the specified node.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+      For more details see the sections <xref linkend="repmgrd-monitoring"> and
+      <xref linkend="repmgrd-monitoring-configuration">.
+    </para>
+  </refsect1>
+
 </refentry>
--- a/doc/repmgr-cluster-crosscheck.sgml
+++ b/doc/repmgr-cluster-crosscheck.sgml
@@ -55,12 +55,37 @@
        </listitem>
      </varlistentry>

+      <varlistentry>
+        <term><option>ERR_BAD_SSH (12)</option></term>
+        <listitem>
+          <para>
+            One or more nodes could not be accessed via SSH.
+          </para>
+          <note>
+            <simpara>
+              This only applies to nodes unreachable from the node where
+              this command is executed.
+            </simpara>
+            <simpara>
+              It's also possible that the crosscheck establishes that
+              connections between PostgreSQL on all nodes are functioning,
+              even if SSH access between some nodes is not possible.
+            </simpara>
+          </note>
+        </listitem>
+      </varlistentry>
+
      <varlistentry>
        <term><option>ERR_NODE_STATUS (25)</option></term>
        <listitem>
          <para>
-            One or more nodes could not be reached.
+            PostgreSQL on one or more nodes could not be reached.
          </para>
+          <note>
+            <simpara>
+              This error code overrides <option>ERR_BAD_SSH</option>.
+            </simpara>
+          </note>
        </listitem>
      </varlistentry>

--- a/doc/repmgr-cluster-matrix.sgml
+++ b/doc/repmgr-cluster-matrix.sgml
@@ -115,12 +115,26 @@
        </listitem>
      </varlistentry>

+      <varlistentry>
+        <term><option>ERR_BAD_SSH (12)</option></term>
+        <listitem>
+          <para>
+            One or more nodes could not be accessed via SSH.
+          </para>
+        </listitem>
+      </varlistentry>
+
      <varlistentry>
        <term><option>ERR_NODE_STATUS (25)</option></term>
        <listitem>
          <para>
-            One or more nodes could not be reached.
+            PostgreSQL on one or more nodes could not be reached.
          </para>
+          <note>
+            <simpara>
+              This error code overrides <option>ERR_BAD_SSH</option>.
+            </simpara>
+          </note>
        </listitem>
      </varlistentry>

--- a/doc/repmgr-cluster-show.sgml
+++ b/doc/repmgr-cluster-show.sgml
@@ -81,36 +81,56 @@

  <refsect1>
    <title>Options</title>
-    <para>
-      <command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
-      outputs the replication cluster's status in a simple CSV format, suitable for
-      parsing by scripts:
-      <programlisting>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--csv</option></term>
+        <listitem>
+		  <para>
+			<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
+			outputs the replication cluster's status in a simple CSV format, suitable for
+			parsing by scripts, e.g.:
+			<programlisting>
    $ repmgr -f /etc/repmgr.conf cluster show --csv
    1,-1,-1
    2,0,0
    3,0,1</programlisting>
-    </para>
-    <para>
-      The columns have following meanings:
-      <itemizedlist spacing="compact" mark="bullet">
-        <listitem>
-          <simpara>
-            node ID
-          </simpara>
-        </listitem>
-        <listitem>
-          <simpara>
+		  </para>
+		  <para>
+			The columns have following meanings:
+			<itemizedlist spacing="compact" mark="bullet">
+			  <listitem>
+				<simpara>
+				  node ID
+				</simpara>
+			  </listitem>
+			  <listitem>
+				<simpara>
            availability (0 = available, -1 = unavailable)
-          </simpara>
-        </listitem>
+				</simpara>
+			  </listitem>
+			  <listitem>
+				<simpara>
+				  recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
+				</simpara>
+			  </listitem>
+			</itemizedlist>
+		  </para>
+		</listitem>
+	  </varlistentry>
+
+      <varlistentry>
+        <term><option>--verbose</option></term>
        <listitem>
-          <simpara>
-            recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
-          </simpara>
+          <para>
+			Display the full text of any database connection error messages
+          </para>
        </listitem>
-      </itemizedlist>
-    </para>
+      </varlistentry>
+
+	</variablelist>
+
  </refsect1>


@@ -130,11 +150,31 @@
        </listitem>
      </varlistentry>

+      <varlistentry>
+        <term><option>ERR_BAD_CONFIG (1)</option></term>
+        <listitem>
+          <para>
+            An issue was encountered while attempting to retrieve
+            &repmgr; metadata.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>ERR_DB_CONN (6)</option></term>
+        <listitem>
+          <para>
+            &repmgr; was unable to connect to the local PostgreSQL instance.
+          </para>
+        </listitem>
+      </varlistentry>
+
      <varlistentry>
        <term><option>ERR_NODE_STATUS (25)</option></term>
        <listitem>
          <para>
-            One or more issues were detected.
+            One or more issues were detected with the replication configuration,
+            e.g. a node was not in its expected state.
          </para>
        </listitem>
      </varlistentry>
@@ -145,7 +185,7 @@
  <refsect1>
    <title>See also</title>
    <para>
-     <xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">
+     <xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">, <xref linkend="repmgr-daemon-status">
    </para>
  </refsect1>

--- a/doc/repmgr-daemon-pause.sgml
+++ b/doc/repmgr-daemon-pause.sgml
@@ -0,0 +1,109 @@
+<refentry id="repmgr-daemon-pause">
+  <indexterm>
+    <primary>repmgr daemon pause</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr daemon pause</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr daemon pause</refname>
+    <refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to pause failover operations</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      This command can be run on any active node in the replication cluster to instruct all
+      running <application>repmgrd</application> instances to &quot;pause&quot; themselves, i.e. take no
+      action (such as promoting themselves or following a new primary) if a failover event is detected.
+    </para>
+    <para>
+      This functionality is useful for performing maintenance operations, such as switchovers
+      or upgrades, which might otherwise trigger a failover if <application>repmgrd</application>
+      is running normally.
+    </para>
+    <note>
+      <para>
+        It's important to wait a few seconds after restarting PostgreSQL on any node before running
+        <command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
+        on the restarted node will take a second or two before it has updated its status.
+      </para>
+    </note>
+    <para>
+      <xref linkend="repmgr-daemon-unpause"> will instruct all previously paused <application>repmgrd</application>
+      instances to resume normal failover operation.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+      <command>repmgr daemon pause</command> can be executed on any active node in the
+      replication cluster. A valid <filename>repmgr.conf</filename> file is required.
+      It will have no effect on previously paused nodes.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+    <programlisting>
+$ repmgr -f /etc/repmgr.conf daemon pause
+NOTICE: node 1 (node1) paused
+NOTICE: node 2 (node2) paused
+NOTICE: node 3 (node3) paused</programlisting>
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check if nodes are reachable but don't pause <application>repmgrd</application>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Exit codes</title>
+    <para>
+      Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
+    </para>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>SUCCESS (0)</option></term>
+        <listitem>
+          <para>
+            <application>repmgrd</application> could be paused on all nodes.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>ERR_REPMGRD_PAUSE (26)</option></term>
+        <listitem>
+          <para>
+           <application>repmgrd</application> could not be paused on one or mode nodes.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+      <xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-daemon-status">
+    </para>
+  </refsect1>
+</refentry>
+
--- a/doc/repmgr-daemon-status.sgml
+++ b/doc/repmgr-daemon-status.sgml
@@ -0,0 +1,165 @@
+<refentry id="repmgr-daemon-status">
+  <indexterm>
+    <primary>repmgr daemon status</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr daemon status</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr daemon status</refname>
+    <refpurpose>display information about the status of <application>repmgrd</application> on each node in the cluster</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      This command provides an overview over all active nodes in the cluster and the state
+      of each node's <application>repmgrd</application> instance. It can be used to check
+      the result of <xref linkend="repmgr-daemon-pause"> and <xref linkend="repmgr-daemon-unpause">
+      operations.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+      <command>repmgr daemon status</command> can be executed on any active node in the
+      replication cluster. A valid <filename>repmgr.conf</filename> file is required.
+    </para>
+
+    <note>
+      <para>
+        After restarting PostgreSQL on any node, the <application>repmgrd</application> instance
+        will take a second or two before it is able to update its status. Until then,
+        <application>repmgrd</application> will be shown as not running.
+      </para>
+    </note>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Examples</title>
+    <para>
+      <application>repmgrd</application> running normally on all nodes:
+    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
+ ID | Name  | Role    | Status  | repmgrd | PID  | Paused?
+----+-------+---------+---------+---------+------+---------
+ 1  | node1 | primary | running | running | 7851 | no
+ 2  | node2 | standby | running | running | 7889 | no
+ 3  | node3 | standby | running | running | 7918 | no</programlisting>
+    </para>
+
+    <para>
+      <application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
+    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
+ ID | Name  | Role    | Status  | repmgrd | PID  | Paused?
+----+-------+---------+---------+---------+------+---------
+ 1  | node1 | primary | running | running | 7851 | yes
+ 2  | node2 | standby | running | running | 7889 | yes
+ 3  | node3 | standby | running | running | 7918 | yes</programlisting>
+    </para>
+
+    <para>
+      <application>repmgrd</application> not running on one node:
+    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
+ ID | Name  | Role    | Status  | repmgrd     | PID  | Paused?
+----+-------+---------+---------+-------------+------+---------
+ 1  | node1 | primary | running | running     | 7851 | yes
+ 2  | node2 | standby | running | not running | n/a  | n/a
+ 3  | node3 | standby | running | running     | 7918 | yes</programlisting>
+    </para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--csv</option></term>
+        <listitem>
+		  <para>
+			<command>repmgr daemon status</command> accepts an optional parameter <literal>--csv</literal>, which
+			outputs the replication cluster's status in a simple CSV format, suitable for
+			parsing by scripts, e.g.:
+			<programlisting>
+    $ repmgr -f /etc/repmgr.conf daemon status --csv
+    1,node1,primary,1,1,10204,1
+    2,node2,standby,1,0,-1,1
+    3,node3,standby,1,1,10225,1</programlisting>
+		  </para>
+		  <para>
+			The columns have following meanings:
+			<itemizedlist spacing="compact" mark="bullet">
+			  <listitem>
+				<simpara>
+				  node ID
+				</simpara>
+			  </listitem>
+
+			  <listitem>
+				<simpara>
+                  node name
+				</simpara>
+			  </listitem>
+
+			  <listitem>
+				<simpara>
+                  node type (primary or standby)
+				</simpara>
+			  </listitem>
+
+			  <listitem>
+				<simpara>
+                  PostgreSQL server running
+				</simpara>
+			  </listitem>
+
+			  <listitem>
+				<simpara>
+                  <application>repmgrd</application> running (1 = running, 0 = not running)
+				</simpara>
+			  </listitem>
+
+			  <listitem>
+				<simpara>
+                  <application>repmgrd</application> PID (-1 if not running)
+				</simpara>
+			  </listitem>
+
+			  <listitem>
+				<simpara>
+                  <application>repmgrd</application> paused (1 = paused, 0 = not paused)
+				</simpara>
+			  </listitem>
+
+			</itemizedlist>
+		  </para>
+		</listitem>
+	  </varlistentry>
+
+      <varlistentry>
+        <term><option>--verbose</option></term>
+        <listitem>
+          <para>
+			Display the full text of any database connection error messages
+          </para>
+        </listitem>
+      </varlistentry>
+
+	</variablelist>
+
+  </refsect1>
+
+
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+      <xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-cluster-show">
+    </para>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-daemon-unpause.sgml
+++ b/doc/repmgr-daemon-unpause.sgml
@@ -0,0 +1,103 @@
+<refentry id="repmgr-daemon-unpause">
+  <indexterm>
+    <primary>repmgr daemon unpause</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr daemon unpause</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr daemon unpause</refname>
+    <refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to resume failover operations</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      This command can be run on any active node in the replication cluster to instruct all
+      running <application>repmgrd</application> instances to &quot;unpause&quot;
+      (following a previous execution of <xref linkend="repmgr-daemon-pause">)
+      and resume normal failover/monitoring operation.
+    </para>
+
+    <note>
+      <para>
+        It's important to wait a few seconds after restarting PostgreSQL on any node before running
+        <command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
+        on the restarted node will take a second or two before it has updated its status.
+      </para>
+    </note>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+     <command>repmgr daemon unpause</command> can be executed on any active node in the
+      replication cluster. A valid <filename>repmgr.conf</filename> file is required.
+      It will have no effect on nodes which are not already paused.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+    <programlisting>
+$ repmgr -f /etc/repmgr.conf daemon unpause
+NOTICE: node 1 (node1) unpaused
+NOTICE: node 2 (node2) unpaused
+NOTICE: node 3 (node3) unpaused</programlisting>
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check if nodes are reachable but don't unpause <application>repmgrd</application>.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Exit codes</title>
+    <para>
+      Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
+    </para>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>SUCCESS (0)</option></term>
+        <listitem>
+          <para>
+            <application>repmgrd</application> could be unpaused on all nodes.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>ERR_REPMGRD_PAUSE (26)</option></term>
+        <listitem>
+          <para>
+           <application>repmgrd</application> could not be unpaused on one or mode nodes.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+      <xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-status">
+    </para>
+  </refsect1>
+</refentry>
+
--- a/doc/repmgr-node-check.sgml
+++ b/doc/repmgr-node-check.sgml
@@ -30,7 +30,8 @@
            Replication lag: OK (N/A - node is primary)
            WAL archiving: OK (0 pending files)
            Downstream servers: OK (2 of 2 downstream nodes attached)
-            Replication slots: OK (node has no replication slots)</programlisting>
+            Replication slots: OK (node has no replication slots)
+            Missing replication slots: OK (node has no missing replication slots)</programlisting>
    </para>
  </refsect1>
  <refsect1>
--- a/doc/repmgr-node-rejoin.sgml
+++ b/doc/repmgr-node-rejoin.sgml
@@ -28,6 +28,10 @@
        If the node is running and needs to be attached to the current primary, use
        <xref linkend="repmgr-standby-follow">.
      </para>
+      <para>
+        Note <xref linkend="repmgr-standby-follow"> can only be used for standbys which have not diverged
+        from the rest of the cluster.
+      </para>
    </tip>
  </refsect1>

@@ -63,10 +67,10 @@
        <term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
        <listitem>
          <para>
-            Execute <application>pg_rewind</application> if necessary.
+            Execute <application>pg_rewind</application>.
          </para>
          <para>
-            It is only necessary to provide the <application>pg_rewind</application>
+            It is only necessary to provide the <application>pg_rewind</application> path
            if using PostgreSQL 9.3 or 9.4, and <application>pg_rewind</application>
            is not installed in the PostgreSQL <filename>bin</filename> directory.
          </para>
@@ -115,7 +119,7 @@

    </variablelist>
  </refsect1>
- <refsect1>
+  <refsect1>
    <title>Configuration file settings</title>

    <para>
@@ -132,8 +136,9 @@
 	  </itemizedlist>
 	</para>

- </refsect1>
-  <refsect1>
+  </refsect1>
+
+  <refsect1 id="repmgr-node-rejoin-events">
    <title>Event notifications</title>
    <para>
      A <literal>node_rejoin</literal> <link linkend="event-notifications">event notification</link> will be generated.
@@ -188,7 +193,7 @@
    </note>

    <para>
-      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
+      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
      pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
      to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
    </para>
@@ -221,6 +226,15 @@
    INFO: pg_rewind would now be executed
    DETAIL: pg_rewind command is:
      pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
+
+    <note>
+      <para>
+        If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
+        this checks the prerequisites for using <application>pg_rewind</application>, but cannot
+        predict the outcome of actually executing <application>pg_rewind</application>.
+      </para>
+    </note>
+
    <programlisting>
    $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
         --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
--- a/doc/repmgr-node-service.sgml
+++ b/doc/repmgr-node-service.sgml
@@ -0,0 +1,151 @@
+<refentry id="repmgr-node-service">
+  <indexterm>
+    <primary>repmgr node service</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr node service</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr node service</refname>
+    <refpurpose>show or execute the system service command to stop/start/restart/reload/promote a node</refpurpose>
+  </refnamediv>
+
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Shows or executes the system service command to stop/start/restart/reload a node.
+    </para>
+    <para>
+      This command is mainly meant for internal &repmgr; usage, but is useful for
+      confirming the command configuration.
+    </para>
+  </refsect1>
+
+  <refsect1>
+
+    <title>Options</title>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Log the steps which would be taken, including displaying the command which would be executed.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--action</option></term>
+        <listitem>
+          <para>
+            The action to perform. One of <literal>start</literal>, <literal>stop</literal>,
+            <literal>restart</literal>, <literal>reload</literal> or <literal>promote</literal>.
+          </para>
+          <para>
+            If the parameter <option>--list-actions</option> is provided together with
+            <option>--action</option>, the command which would be executed will be printed.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+        <term><option>--list-actions</option></term>
+        <listitem>
+          <para>
+            List all configured commands.
+          </para>
+          <para>
+            If the parameter <option>--action</option> is provided together with
+            <option>--list-actions</option>, the command which would be executed for that
+            particular action will be printed.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+     <varlistentry>
+        <term><option>--checkpoint</option></term>
+        <listitem>
+          <para>
+            Issue a <command>CHECKPOINT</command> before stopping or restarting the node.
+          </para>
+        </listitem>
+     </varlistentry>
+    </variablelist>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Exit codes</title>
+    <para>
+      Following exit codes can be emitted by <command>repmgr node service</command>:
+    </para>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>SUCCESS (0)</option></term>
+        <listitem>
+          <para>
+            No issues were detected.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>ERR_LOCAL_COMMAND (5)</option></term>
+        <listitem>
+          <para>
+            Execution of the system service command failed.
+          </para>
+        </listitem>
+      </varlistentry>
+
+   </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Examples</title>
+    <para>
+      See what action would be taken for a restart:
+      <programlisting>
+[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint --dry-run
+INFO: a CHECKPOINT would be issued here
+INFO: would execute server command "sudo service postgresql-11 restart"</programlisting>
+    </para>
+
+    <para>
+      Restart the PostgreSQL instance:
+      <programlisting>
+[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint
+NOTICE: issuing CHECKPOINT
+DETAIL: executing server command "sudo service postgresql-11 restart"
+Redirecting to /bin/systemctl restart postgresql-11.service</programlisting>
+    </para>
+
+    <para>
+      List all commands:
+      <programlisting>
+[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions
+Following commands would be executed for each action:
+
+    start: "sudo service postgresql-11 start"
+     stop: "sudo service postgresql-11 stop"
+  restart: "sudo service postgresql-11 restart"
+   reload: "sudo service postgresql-11 reload"
+  promote: "/usr/pgsql-11/bin/pg_ctl  -w -D '/var/lib/pgsql/11/data' promote"</programlisting>
+    </para>
+
+    <para>
+      List a single command:
+      <programlisting>
+[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions --action=promote
+/usr/pgsql-11/bin/pg_ctl  -w -D '/var/lib/pgsql/11/data' promote      </programlisting>
+    </para>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-primary-register.sgml
+++ b/doc/repmgr-primary-register.sgml
@@ -75,10 +75,18 @@
  </refsect1>


-  <refsect1>
+  <refsect1 id="repmgr-primary-register-events">
    <title>Event notifications</title>
    <para>
-      A <literal>primary_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
+      Following <link linkend="event-notifications">event notifications</link> will be generated:
+      <itemizedlist spacing="compact" mark="bullet">
+        <listitem>
+          <simpara><literal>cluster_created</literal></simpara>
+        </listitem>
+        <listitem>
+          <simpara><literal>primary_register</literal></simpara>
+        </listitem>
+      </itemizedlist>
    </para>
  </refsect1>

--- a/doc/repmgr-primary-unregister.sgml
+++ b/doc/repmgr-primary-unregister.sgml
@@ -64,7 +64,7 @@

  </refsect1>

-  <refsect1>
+  <refsect1 id="repmgr-primary-unregister-events">
    <title>Event notifications</title>
    <para>
      A <literal>primary_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-clone.sgml
+++ b/doc/repmgr-standby-clone.sgml
@@ -49,7 +49,7 @@
    not be copied by default. &repmgr; can copy these files, either to the same
    location on the standby server (provided appropriate directory and file permissions
    are available), or into the standby's data directory. This requires passwordless
-    SSH access to the primary server. Add the option <literal>--copy-external-config-files</literal>
+    SSH access to the primary server. Add the option <option>--copy-external-config-files</option>
    to the <command>repmgr standby clone</command> command; by default files will be copied to
    the same path as on the upstream server. Note that the user executing <command>repmgr</command>
    must have write access to those directories.
@@ -59,12 +59,29 @@
    <literal>--copy-external-config-files=pgdata</literal>, but note that
    any include directives in the copied files may need to be updated.
   </para>
+
+   <note>
+	 <para>
+	   When executing <command>repmgr standby clone</command> with the
+	   <option>--copy-external-config-files</option> aand <option>--dry-run</option>
+	   options, &repmgr; will check the SSH connection to the source node, but
+	   will not verify whether the files can actually be copied.
+	 </para>
+	 <para>
+	   During the actual clone operation, a check will be made before the database itself
+	   is cloned to determine whether the files can actually be copied; if any problems are
+	   encountered, the clone operation will be aborted, enabling the user to fix
+	   any issues before retrying the clone operation.
+	 </para>
+   </note>
+
   <tip>
    <simpara>
     For reliable configuration file management we recommend using a
     configuration management tool such as Ansible, Chef, Puppet or Salt.
    </simpara>
   </tip>
+
  </refsect1>

  <refsect1 id="repmgr-standby-clone-recovery-conf">
@@ -333,7 +350,7 @@
    </variablelist>
  </refsect1>

-  <refsect1>
+  <refsect1 id="repmgr-standby-clone-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_clone</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-follow.sgml
+++ b/doc/repmgr-standby-follow.sgml
@@ -94,7 +94,7 @@
    </variablelist>
  </refsect1>

-  <refsect1>
+  <refsect1 id="repmgr-standby-follow-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-promote.sgml
+++ b/doc/repmgr-standby-promote.sgml
@@ -50,7 +50,7 @@
  </refsect1>


-  <refsect1>
+  <refsect1 id="repmgr-standby-promote-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_promote</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-standby-register.sgml
+++ b/doc/repmgr-standby-register.sgml
@@ -159,7 +159,7 @@
    </variablelist>
  </refsect1>

-  <refsect1>
+  <refsect1 id="repmgr-standby-register-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_register</literal> <link linkend="event-notifications">event notification</link>
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -35,6 +35,10 @@
        &repmgr; will attempt to check for potential issues but cannot guarantee
        a successful switchover.
      </para>
+      <para>
+        &repmgr; will refuse to perform the switchover if an exclusive backup is running on
+        the current primary.
+      </para>
    </note>
    <para>
      For more details on performing a switchover, including preparation and configuration,
@@ -43,11 +47,14 @@

    <note>
      <para>
-        <application>repmgrd</application> should not be active on any nodes while a switchover is being
-        executed. This restriction may be lifted in a later version.
+        From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
+        <application>repmgrd</application> instances to pause operations while the switchover
+        is being carried out, to prevent <application>repmgrd</application> from
+        unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
      </para>
      <para>
-        &repmgr; will not perform the switchover if an exclusive backup is running on the current primary.
+        Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
+        is not running on any nodes while a switchover is being executed.
      </para>
    </note>

@@ -61,8 +68,9 @@
        <term><option>--always-promote</option></term>
        <listitem>
          <para>
-            Promote standby to primary, even if it is behind original primary
-            (original primary will be shut down in any case).
+            Promote standby to primary, even if it is behind or has diverged
+            from the original primary. The original primary will be shut down in any case,
+            and will need to be manually reintegrated into the replication cluster.
          </para>
        </listitem>
      </varlistentry>
@@ -122,6 +130,21 @@
        </listitem>
      </varlistentry>

+      <varlistentry>
+        <term><option>--repmgrd-no-pause</option></term>
+        <listitem>
+          <para>
+            Don't pause <application>repmgrd</application> while executing a switchover.
+          </para>
+          <para>
+            This option should not be used unless you take steps by other means
+            to ensure <application>repmgrd</application> is paused or not
+            running on all nodes.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
     <varlistentry>
        <term><option>--siblings-follow</option></term>
        <listitem>
@@ -141,19 +164,7 @@
     Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
     switchover operation:
     <itemizedlist spacing="compact" mark="bullet">
-       <listitem>
-         <simpara>
-           <literal>reconnect_attempts</literal>: number of times to check the original primary
-           for a clean shutdown after executing the shutdown command, before aborting
-         </simpara>
-       </listitem>
-       <listitem>
-         <simpara>
-           <literal>reconnect_interval</literal>: interval (in seconds) to check the original
-           primary for a clean shutdown after executing the shutdown command (up to a maximum
-           of <literal>reconnect_attempts</literal> tries)
-         </simpara>
-       </listitem>
+
       <listitem>
         <simpara>
           <literal>replication_lag_critical</literal>:
@@ -163,10 +174,29 @@
         </simpara>
       </listitem>

+       <listitem>
+         <simpara>
+           <literal>shutdown_check_timeout</literal>: maximum number of seconds to wait for the
+           demotion candidate (current primary) to shut down, before aborting the switchover.
+         </simpara>
+         <simpara>
+           Note that this parameter is set on the node where <command>repmgr standby switchover</command>
+           is executed (promotion candidate); setting it on the demotion candidate (former primary) will
+           have no effect.
+         </simpara>
+         <note>
+           <para>
+             In versions prior to <link linkend="release-4.2">&repmgr; 4.2</link>, <command>repmgr standby switchover</command> would
+             use the values defined in <literal>reconnect_attempts</literal> and <literal>reconnect_interval</literal>
+             to determine the timeout for demotion candidate shutdown.
+           </para>
+         </note>
+       </listitem>
+
       <listitem>
         <simpara>
           <literal>standby_reconnect_timeout</literal>:
-           number of seconds to attempt to wait for the demoted primary
+           maximum number of seconds to attempt to wait for the demotion candidate (former primary)
           to reconnect to the promoted primary (default: 60 seconds)
         </simpara>
       </listitem>
@@ -183,12 +213,7 @@
      Execute with the <literal>--dry-run</literal> option to test the switchover as far as
      possible without actually changing the status of either node.
    </para>
-    <important>
-      <para>
-        <application>repmgrd</application> must be shut down on all nodes while a switchover is being
-        executed. This restriction will be removed in a future &repmgr; version.
-      </para>
-    </important>
+
    <para>
      External database connections, e.g. from an application, should not be permitted while
      the switchover is taking place. In particular, active transactions on the primary
@@ -196,7 +221,7 @@
    </para>
  </refsect1>

-  <refsect1>
+  <refsect1 id="repmgr-standby-switchover-events">
    <title>Event notifications</title>
    <para>
      <literal>standby_switchover</literal> and <literal>standby_promote</literal>
--- a/doc/repmgr-standby-unregister.sgml
+++ b/doc/repmgr-standby-unregister.sgml
@@ -59,7 +59,7 @@
    </variablelist>
  </refsect1>

-  <refsect1>
+  <refsect1 id="repmgr-standby-unregister-events">
    <title>Event notifications</title>
    <para>
      A <literal>standby_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-witness-register.sgml
+++ b/doc/repmgr-witness-register.sgml
@@ -23,14 +23,19 @@
      use of the witness server with <application>repmgrd</application>.
    </para>
    <para>
-      When executing <command>repmgr witness register</command>, connection information
-      for the cluster primary server must also be provided. &repmgr; will automatically
-      use the <varname>user</varname> and <varname>dbname</varname> values defined
-      in the <varname>conninfo</varname> string defined in the  witness node's
-      <filename>repmgr.conf</filename>, if these are not explicitly provided.
+      When executing <command>repmgr witness register</command>, database connection
+      information for the cluster primary server must also be provided.
    </para>
    <para>
-      Execute with the <literal>--dry-run</literal> option to check what would happen
+      In most cases it's only necessary to provide the primary's hostname with
+      the <option>-h</option>/<option>--host</option> option; &repmgr; will
+      automatically use the <varname>user</varname> and <varname>dbname</varname>
+      values defined in the <varname>conninfo</varname> string defined in the
+      witness node's <filename>repmgr.conf</filename>, unless these are explicitly
+      provided as command line options.
+    </para>
+    <para>
+      Execute with the <option>--dry-run</option> option to check what would happen
      without actually registering the witness server.
    </para>
  </refsect1>
@@ -50,7 +55,7 @@
  </refsect1>


-  <refsect1>
+  <refsect1 id="repmgr-witness-register-events">
    <title>Event notifications</title>
    <para>
      A <literal>witness_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr-witness-unregister.sgml
+++ b/doc/repmgr-witness-unregister.sgml
@@ -92,7 +92,7 @@
  </refsect1>


-  <refsect1>
+  <refsect1 id="repmgr-witness-unregister-events">
    <title>Event notifications</title>
    <para>
      A <literal>witness_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
--- a/doc/repmgr.sgml
+++ b/doc/repmgr.sgml
@@ -24,8 +24,14 @@
  <abstract>
   <para>
   This is the official documentation of &repmgr; &repmgrversion; for
-   use with PostgreSQL 9.3 - PostgreSQL 10.
-   It describes the functionality supported by the current version of &repmgr;.
+   use with PostgreSQL 9.3 - PostgreSQL 11.
+   </para>
+   <para>
+     &repmgr; is being continually developed and we strongly recommend using the
+     latest version. Please check the
+     <ulink url="https://repmgr.org/">repmgr website</ulink> for details
+     about the current &repmgr; version as well as the
+     <ulink url="https://repmgr.org/docs/current/index.html">current documentation</ulink>.
   </para>

   <para>
@@ -86,6 +92,7 @@
  &repmgrd-cascading-replication;
  &repmgrd-network-split;
  &repmgrd-witness-server;
+  &repmgrd-pausing;
  &repmgrd-degraded-monitoring;
  &repmgrd-monitoring;
  &repmgrd-bdr;
@@ -107,11 +114,15 @@
  &repmgr-node-status;
  &repmgr-node-check;
  &repmgr-node-rejoin;
+  &repmgr-node-service;
  &repmgr-cluster-show;
  &repmgr-cluster-matrix;
  &repmgr-cluster-crosscheck;
  &repmgr-cluster-event;
  &repmgr-cluster-cleanup;
+  &repmgr-daemon-status;
+  &repmgr-daemon-pause;
+  &repmgr-daemon-unpause;
 </part>

 &appendix-release-notes;
--- a/doc/repmgrd-bdr.sgml
+++ b/doc/repmgrd-bdr.sgml
@@ -10,12 +10,12 @@

  <title>BDR failover with repmgrd</title>
  <para>
-    &repmgr; 4.x provides support for monitoring BDR nodes and taking action in
+    &repmgr; 4.x provides support for monitoring a pair of BDR 2.x nodes and taking action in
    case one of the nodes fails.
  </para>
  <note>
    <simpara>
-      Due to the nature of BDR, it's only safe to use this solution for
+      Due to the nature of BDR 1.x/2.x, it's only safe to use this solution for
      a two-node scenario. Introducing additional nodes will create an inherent
      risk of node desynchronisation if a node goes down without being cleanly
      removed from the cluster.
@@ -31,8 +31,21 @@
    reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
  </para>

+  <note>
+    <simpara>
+      This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
+      It is <emphasis>not</emphasis> required for later BDR versions.
+    </simpara>
+  </note>
+
  <sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
    <title>Prerequisites</title>
+    <important>
+      <para>
+        This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
+        It is <emphasis>not</emphasis> required for later BDR versions.
+      </para>
+    </important>
    <para>
      &repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
      enabled and configured for a two-node BDR network. &repmgr; 4 packages
--- a/doc/repmgrd-configuration.sgml
+++ b/doc/repmgrd-configuration.sgml
@@ -34,24 +34,6 @@
      the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
    </para>

-    <para>
-      To apply configuration file changes to a running <application>repmgrd</application>
-      daemon, execute the operating system's r<application>repmgrd</application> service reload command
-      (see <xref linkend="appendix-packages"> for examples),
-      or for instances  which were manually started, execute <command>kill -HUP</command>, e.g.
-      <command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
-    </para>
-    <note>
-      <para>
-        Check the <application>repmgrd</application> log to see what changes were
-        applied, or if any issues were encountered when reloading the configuration.
-      </para>
-    </note>
-    <para>
-      Note that only a subset of configuration file parameters can be changed on a
-      running <application>repmgrd</application> daemon.
-    </para>
-

    <sect2 id="repmgrd-automatic-failover-configuration">
      <title>automatic failover configuration</title>
@@ -64,8 +46,17 @@
          follow_command='/usr/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
      </para>
      <para>
-        Adjust file paths as appropriate; we recomment specifying the full path to the &repmgr; binary.
+        Adjust file paths as appropriate; alway specify the full path to the &repmgr; binary.
      </para>
+
+      <note>
+        <para>
+          &repmgr; will not apply <option>pg_bindir</option> when executing <option>promote_command</option>
+          or <option>follow_command</option>; these can be user-defined scripts so must always be
+          specified with the full path.
+        </para>
+      </note>
+
      <para>
        Note that the <literal>--log-to-file</literal> option will cause
        output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
@@ -135,7 +126,7 @@
      </para>
    </sect2>

-    <sect2 id="repmgrd-monitoring-configuration">
+    <sect2 id="repmgrd-monitoring-configuration" xreflabel="repmgrd monitoring configuration">
      <indexterm>
        <primary>repmgrd</primary>
        <secondary>monitoring configuration</secondary>
@@ -158,6 +149,203 @@
      </para>
    </sect2>

+    <sect2 id="repmgrd-reloading-configuration"xreflabel="reloading repmgrd configuration">
+      <indexterm>
+        <primary>repmgrd</primary>
+        <secondary>applying configuration changes</secondary>
+      </indexterm>
+      <title>Applying configuration changes to repmgrd</title>
+      <para>
+        To apply configuration file changes to a running <application>repmgrd</application>
+        daemon, execute the operating system's <application>repmgrd</application> service reload command
+        (see <xref linkend="appendix-packages"> for examples),
+          or for instances  which were manually started, execute <command>kill -HUP</command>, e.g.
+          <command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
+      </para>
+      <tip>
+        <para>
+          Check the <application>repmgrd</application> log to see what changes were
+          applied, or if any issues were encountered when reloading the configuration.
+        </para>
+      </tip>
+      <para>
+        Note that only the following subset of configuration file parameters can be changed on a
+        running <application>repmgrd</application> daemon:
+      </para>
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>
+            <varname>async_query_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>bdr_local_monitoring_only</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>bdr_recovery_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>conninfo</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>degraded_monitoring_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>event_notification_command</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>event_notifications</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>failover</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>follow_command</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_facility</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_file</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_level</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_status_interval</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>monitor_interval_secs</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>monitoring_history</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>primary_notification_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>promote_command</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>reconnect_attempts</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>reconnect_interval</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>repmgrd_standby_startup_timeout</varname>
+          </simpara>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        The following set of configuration file parameters must be updated via
+        <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
+        as they require changes to the <literal>repmgr.nodes</literal> table so they are visible to
+        all nodes in the replication cluster:
+      </para>
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>
+            <varname>node_id</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>node_name</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>data_directory</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>location</varname>
+          </simpara>
+        </listitem>
+
+
+        <listitem>
+          <simpara>
+            <varname>priority</varname>
+          </simpara>
+        </listitem>
+
+      </itemizedlist>
+
+      <note>
+        <para>
+          After executing <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
+          <application>repmgrd</application> <emphasis>must</emphasis> be restarted for the changes to take effect.
+        </para>
+      </note>
+
+    </sect2>
+
  </sect1>

  <sect1 id="repmgrd-daemon">
@@ -266,7 +454,7 @@ REPMGRD_ENABLED=no
 #REPMGRD_CONF="/path/to/repmgr.conf"

 # additional options
-#REPMGRD_OPTS=""
+REPMGRD_OPTS="--daemonize=false"

 # user to run repmgrd as
 #REPMGRD_USER=postgres
@@ -281,6 +469,16 @@ REPMGRD_ENABLED=no
        Set <varname>REPMGRD_ENABLED</varname> to <literal>yes</literal>, and <varname>REPMGRD_CONF</varname>
        to the <filename>repmgr.conf</filename> file you are using.
      </para>
+      <tip>
+        <para>
+          See <xref linkend="packages-debian-ubuntu"> for details of the Debian/Ubuntu packages and
+          typical file locations (including <filename>repmgr.conf</filename>).
+        </para>
+      </tip>
+      <para>
+        From <application>repmgrd</application> 4.1, ensure <varname>REPMGRD_OPTS</varname> includes
+        <option>--daemonize=false</option>, as daemonization is handled by the service command.
+      </para>
      <para>
        If using <application>systemd</application>, you may need to execute <command>systemctl daemon-reload</command>.
        Also, if you attempted to start <application>repmgrd</application> using <command>systemctl start repmgrd</command>,
@@ -323,25 +521,34 @@ REPMGRD_ENABLED=no
     <secondary>repmgrd</secondary>
   </indexterm>

+   <indexterm>
+     <primary>repmgrd</primary>
+     <secondary>log rotation</secondary>
+   </indexterm>
+
  <title>repmgrd log rotation</title>
  <para>
   To ensure the current <application>repmgrd</application> logfile
   (specified in <filename>repmgr.conf</filename> with the parameter
-   <option>log_file</option> does not grow indefinitely, configure your
+   <option>log_file</option>) does not grow indefinitely, configure your
   system's <command>logrotate</command> to regularly rotate it.
  </para>
  <para>
   Sample configuration to rotate logfiles weekly with retention for
   up to 52 weeks and rotation forced if a file grows beyond 100Mb:
   <programlisting>
-    /var/log/postgresql/repmgr-9.6.log {
+    /var/log/repmgr/repmgrd.log {
        missingok
        compress
        rotate 52
        maxsize 100M
        weekly
        create 0600 postgres postgres
+        postrotate
+            /usr/bin/killall -HUP repmgrd
+        endscript
    }</programlisting>
  </para>
+
 </sect1>
 </chapter>
--- a/doc/repmgrd-degraded-monitoring.sgml
+++ b/doc/repmgrd-degraded-monitoring.sgml
@@ -1,4 +1,4 @@
-<chapter id="repmgrd-degraded-monitoring">
+<chapter id="repmgrd-degraded-monitoring" xreflabel="repmgrd degraded monitoring">
 <indexterm>
   <primary>repmgrd</primary>
   <secondary>degraded monitoring</secondary>
@@ -7,8 +7,8 @@
 <title>"degraded monitoring" mode</title>
 <para>
  In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
-  of monitoring the nodes' upstream server. In these cases it enters "degraded
-  monitoring" mode, where <application>repmgrd</application> remains active but is waiting for the situation
+  of monitoring the node's upstream server. In these cases it enters &quot;degraded monitoring&quot;
+  mode, where <application>repmgrd</application> remains active but is waiting for the situation
  to be resolved.
 </para>
 <para>
--- a/doc/repmgrd-monitoring.sgml
+++ b/doc/repmgrd-monitoring.sgml
@@ -1,4 +1,4 @@
-<chapter id="repmgrd-monitoring">
+<chapter id="repmgrd-monitoring" xreflabel="Monitoring with repmgrd">
 <indexterm>
   <primary>repmgrd</primary>
   <secondary>monitoring</secondary>
--- a/doc/repmgrd-network-split.sgml
+++ b/doc/repmgrd-network-split.sgml
@@ -40,8 +40,8 @@
  In a failover situation, <application>repmgrd</application> will check if any servers in the
  same location as the current primary node are visible.  If not, <application>repmgrd</application>
  will assume a network interruption and not promote any node in any
-  other location (it will however enter <xref linkend="repmgrd-degraded-monitoring"> mode until
-  a primary becomes visible).
+  other location (it will however enter <link linkend="repmgrd-degraded-monitoring">degraded monitoring</link>
+  mode until a primary becomes visible).
 </para>

 </chapter>
--- a/doc/repmgrd-pausing.sgml
+++ b/doc/repmgrd-pausing.sgml
@@ -0,0 +1,178 @@
+<chapter id="repmgrd-pausing" xreflabel="Pausing repmgrd">
+
+  <indexterm>
+    <primary>repmgrd</primary>
+    <secondary>pausing</secondary>
+  </indexterm>
+
+  <indexterm>
+    <primary>pausing repmgrd</primary>
+  </indexterm>
+
+  <title>Pausing repmgrd</title>
+
+  <para>
+    In normal operation, <application>repmgrd</application> monitors the state of the
+    PostgreSQL node it is running on, and will take appropriate action if problems
+    are detected, e.g. (if so configured) promote the node to primary, if the existing
+    primary has been determined as failed.
+  </para>
+
+  <para>
+    However, <application>repmgrd</application> is unable to distinguish between
+    planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
+    or installing PostgreSQL maintenance released), and an actual server outage. In versions prior to
+    &repmgr; 4.2 it was necessary to stop <application>repmgrd</application> on all nodes (or at least
+    on all nodes where <application>repmgrd</application> is
+    <link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
+    to prevent <application>repmgrd</application> from making unintentional changes to the
+    replication cluster.
+  </para>
+
+  <para>
+    From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
+    can now be &quot;paused&quot;, i.e. instructed not to take any action such as performing a failover.
+    This can be done from any node in the cluster, removing the need to stop/restart
+    each <application>repmgrd</application> individually.
+  </para>
+
+  <note>
+    <para>
+      For major PostgreSQL upgrades, e.g. from PostgreSQL 10 to PostgreSQL 11,
+      <application>repmgrd</application> should be shut down completely and only started up
+      once the &repmgr; packages for the new PostgreSQL major version have been installed.
+    </para>
+  </note>
+
+  <sect1 id="repmgrd-pausing-prerequisites">
+    <title>Prerequisites for pausing <application>repmgrd</application></title>
+    <para>
+      In order to be able to pause/unpause <application>repmgrd</application>, following
+      prerequisites must be met:
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara><link linkend="release-4.2">&repmgr; 4.2</link> or later must be installed on all nodes.</simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version).</simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            PostgreSQL on all nodes must be accessible from the node where the
+            <literal>pause</literal>/<literal>unpause</literal> operation is executed, using the
+            <varname>conninfo</varname> string shown by <link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>.
+          </simpara>
+        </listitem>
+      </itemizedlist>
+    </para>
+    <note>
+      <para>
+        These conditions are required for normal &repmgr; operation in any case.
+      </para>
+    </note>
+
+  </sect1>
+
+  <sect1 id="repmgrd-pausing-execution">
+    <title>Pausing/unpausing <application>repmgrd</application></title>
+    <para>
+      To pause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link>, e.g.:
+   <programlisting>
+$ repmgr -f /etc/repmgr.conf daemon pause
+NOTICE: node 1 (node1) paused
+NOTICE: node 2 (node2) paused
+NOTICE: node 3 (node3) paused</programlisting>
+    </para>
+    <para>
+      The state of <application>repmgrd</application> on each node can be checked with
+      <link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>, e.g.:
+    <programlisting>$ repmgr -f /etc/repmgr.conf daemon status
+ ID | Name  | Role    | Status  | repmgrd | PID  | Paused?
+----+-------+---------+---------+---------+------+---------
+ 1  | node1 | primary | running | running | 7851 | yes
+ 2  | node2 | standby | running | running | 7889 | yes
+ 3  | node3 | standby | running | running | 7918 | yes</programlisting>
+    </para>
+
+    <note>
+      <para>
+        If executing a switchover with  <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
+		&repmgr; will automatically pause/unpause <application>repmgrd</application> as part of the switchover process.
+      </para>
+    </note>
+
+    <para>
+      If the primary (in this example, <literal>node1</literal>) is stopped, <application>repmgrd</application>
+      running on one of the standbys (here: <literal>node2</literal>) will react like this:
+      <programlisting>
+[2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
+[2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts
+[2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt
+...
+[2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt
+[2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts
+[2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts
+[2018-09-20 12:22:25] [NOTICE] node is paused
+[2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state
+[2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator
+[2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode</programlisting>
+    </para>
+    <para>
+      If the primary becomes available again (e.g. following a software upgrade), <application>repmgrd</application>
+      will automatically reconnect, e.g.:
+      <programlisting>
+[2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring</programlisting>
+    </para>
+
+    <para>
+      To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
+   <programlisting>
+$ repmgr -f /etc/repmgr.conf daemon unpause
+NOTICE: node 1 (node1) unpaused
+NOTICE: node 2 (node2) unpaused
+NOTICE: node 3 (node3) unpaused</programlisting>
+    </para>
+
+    <note>
+      <para>
+        If the previous primary is no longer accessible when <application>repmgrd</application>
+        is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using
+        <link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>,
+		and any standbys attached to the new primary with
+		<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>.
+      </para>
+      <para>
+        This is to prevent <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
+        resulting in the automatic promotion of a new primary, which may be a problem particularly
+        in larger clusters, where <application>repmgrd</application> could select a different promotion
+        candidate to the one intended by the administrator.
+      </para>
+    </note>
+
+  <sect2 id="repmgrd-pausing-details">
+    <title>Details on the <application>repmgrd</application> pausing mechanism</title>
+
+    <para>
+      The pause state of each node will be stored over a PostgreSQL restart.
+    </para>
+
+	<para>
+	  <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
+	  <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link> can be
+	  executed even if <application>repmgrd</application> is not running; in this case,
+	  <application>repmgrd</application> will start up in whichever pause state has been set.
+	</para>
+    <note>
+      <para>
+		<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
+		<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
+		<emphasis>do not</emphasis> stop/start <application>repmgrd</application>.
+      </para>
+    </note>
+  </sect2>
+  </sect1>
+</chapter>
+
--- a/doc/switchover.sgml
+++ b/doc/switchover.sgml
@@ -19,9 +19,10 @@
 </para>
 <para>
  <command>repmgr standby switchover</command> differs from other &repmgr;
-  actions in that it also performs actions on another server (the demotion
-  candidate), which means passwordless SSH access is required to that server
-  from the one where <command>repmgr standby switchover</command> is executed.
+  actions in that it also performs actions on other servers (the demotion
+  candidate, and optionally any other servers which are to follow the new primary),
+  which means passwordless SSH access is required to those servers from the one where
+  <command>repmgr standby switchover</command> is executed.
 </para>
 <note>
  <simpara>
@@ -60,6 +61,13 @@
    &repmgr; being able to shut down the current primary server quickly and cleanly.
   </para>

+   <para>
+     Ensure that the promotion candidate has sufficient free walsenders available
+     (PostgreSQL configuration item <varname>max_wal_senders</varname>), and if replication
+     slots are in use, at least one free slot is available for the demotion candidate (
+     PostgreSQL configuration item <varname>max_replication_slots</varname>).
+   </para>
+
   <para>
     Ensure that a passwordless SSH connection is possible from the promotion candidate
     (standby) to the demotion candidate (current primary). If <literal>--siblings-follow</literal>
@@ -76,11 +84,12 @@

   <para>
    Double-check which commands will be used to stop/start/restart the current
-    primary; on the current primary execute:
+    primary; this can be done by e.g. executing <command><link linkend="repmgr-node-service">repmgr node service</link></command>
+    on the current primary:
    <programlisting>
-     repmgr -f /etc/repmgr.conf node service --list --action=stop
-     repmgr -f /etc/repmgr.conf node service --list --action=start
-     repmgr -f /etc/repmgr.conf node service --list --action=restart</programlisting>
+     repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
+     repmgr -f /etc/repmgr.conf node service --list-actions --action=start
+     repmgr -f /etc/repmgr.conf node service --list-actions --action=restart</programlisting>

   </para>

@@ -146,12 +155,18 @@
    manually with <command>repmgr node check --archive-ready</command>.
   </para>

-   <note>
-     <para>
-       Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
-       promoting a node. This restriction will be removed in a future &repmgr; version.
-     </para>
-   </note>
+    <note>
+      <para>
+        From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
+        <application>repmgrd</application> instances to pause operations while the switchover
+        is being carried out, to prevent <application>repmgrd</application> from
+        unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
+      </para>
+      <para>
+        Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
+        is not running on any nodes while a switchover is being executed.
+      </para>
+    </note>


   <para>
@@ -296,7 +311,21 @@
     2  | node2 | primary | * running |          | default  | host=node2 dbname=repmgr user=repmgr
   </programlisting>
  </para>
+  <para>
+    If <application>repmgrd</application> is in use, it's worth double-checking that
+    all nodes are unpaused by executing <command><link linkend="repmgr-daemon-status">repmgr-daemon-status</link></command>.
+  </para>
+
+   <note>
+     <para>
+       Users of &repmgr; versions prior to 4.2 will need to manually restart <application>repmgrd</application>
+       on all nodes after the switchover is completed.
+     </para>
+    </note>
+
 </sect1>
+
+
 <sect1 id="switchover-caveats" xreflabel="Caveats">
  <indexterm>
   <primary>switchover</primary>
@@ -322,17 +351,76 @@
      for details.
     </simpara>
    </listitem>
-    <listitem>
-     <simpara>
-      <application>repmgrd</application> should not be running with setting <varname>failover=automatic</varname>
-      in <filename>repmgr.conf</filename> when a switchover is carried out, otherwise the
-      <application>repmgrd</application> daemon may try and promote a standby by itself.
-     </simpara>
-    </listitem>
   </itemizedlist>
  </para>
-  <para>
-   We hope to remove some of these restrictions in future versions of &repmgr;.
-  </para>
 </sect1>
+
+ <sect1 id="switchover-troubleshooting" xreflabel="Troubleshooting">
+   <indexterm>
+     <primary>switchover</primary>
+     <secondary>troubleshooting</secondary>
+   </indexterm>
+   <title>Troubleshooting switchover issues</title>
+
+   <para>
+     As <link linkend="performing-switchover">emphasised previously</link>, performing a switchover
+     is a non-trivial operation and there are a number of potential issues which can occur.
+     While &repmgr; attempts to perform sanity checks, there's no guaranteed way of determining the success of
+     a switchover without actually carrying it out.
+   </para>
+
+   <sect2 id="switchover-troubleshooting-primary-shutdown">
+     <title>Demotion candidate (old primary) does not shut down</title>
+     <para>
+       &repmgr; may abort a switchover with a message like:
+       <programlisting>
+ERROR: shutdown of the primary server could not be confirmed
+HINT: check the primary server status before performing any further actions</programlisting>
+     </para>
+     <para>
+       This means the shutdown of the old primary has taken longer than &repmgr; expected,
+       and it has given up waiting.
+     </para>
+     <para>
+       In this case, check the PostgreSQL log on the primary server to see what is going
+       on. It's entirely possible the shutdown process is just taking longer than the
+       timeout set by the configuration parameter <varname>shutdown_check_timeout</varname>
+       (default: 60 seconds), in which case you may need to adjust this parameter.
+     </para>
+     <note>
+       <para>
+         Note that <varname>shutdown_check_timeout</varname> is set on the node where
+         <command>repmgr standby switchover</command> is executed (promotion candidate); setting it on the
+         demotion candidate (former primary) will have no effect.
+       </para>
+     </note>
+     <para>
+       If the primary server has shut down cleanly, and no other node has been promoted,
+       it is safe to restart it, in which case the replication cluster will be restored
+       to its original configuration.
+     </para>
+   </sect2>
+
+   <sect2 id="switchover-troubleshooting-exclusive-backup">
+     <title>Switchover aborts with an &quot;exclusive backup&quot; error</title>
+     <para>
+       &repmgr; may abort a switchover with a message like:
+       <programlisting>
+ERROR: unable to perform a switchover while primary server is in exclusive backup mode
+HINT: stop backup before attempting the switchover</programlisting>
+     </para>
+     <para>
+       This means an exclusive backup is running on the current primary; interrupting this
+       will not only abort the backup, but potentially leave the primary with an ambiguous
+       backup state.
+     </para>
+     <para>
+       To proceed, either wait until the backup has finished, or cancel it with the command
+       <command>SELECT pg_stop_backup()</command>. For more details see the PostgreSQL
+       documentation section
+       <ulink url="https://www.postgresql.org/docs/current/static/continuous-archiving.html#BACKUP-LOWLEVEL-BASE-BACKUP-EXCLUSIVE">Making an exclusive low level backup</ulink>.
+     </para>
+   </sect2>
+ </sect1>
+
 </chapter>
--- a/doc/upgrading-from-repmgr3.md
+++ b/doc/upgrading-from-repmgr3.md
@@ -4,6 +4,6 @@ Upgrading from repmgr 3
 This document has been integrated into the main `repmgr` documentation
 and is now located here:

-> [Upgrading from repmgr 3.x](https://repmgr.org/docs/4.0/upgrading-from-repmgr-3.html)
+> [Upgrading from repmgr 3.x](https://repmgr.org/docs/current/upgrading-from-repmgr-3.html)


--- a/doc/upgrading-repmgr.sgml
+++ b/doc/upgrading-repmgr.sgml
@@ -7,9 +7,9 @@
 <title>Upgrading repmgr</title>

 <para>
-  &repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
+  &repmgr; is updated regularly with minor releases (e.g. 4.0.1 to 4.0.2)
  containing bugfixes and other minor improvements. Any substantial new
-  functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
+  functionality will be included in a major release (e.g. 4.0 to 4.1).
 </para>

 <sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
@@ -19,43 +19,202 @@
  </indexterm>
  <title>Upgrading repmgr 4.x and later</title>
  <para>
-    &repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
-    of the two following steps:
-    <orderedlist>
-      <listitem>
-        <simpara>
-          Install the updated package (or compile the updated source)
-        </simpara>
-      </listitem>
-      <listitem>
-        <simpara>
-          <application>repmgrd</application> (if running) must be restarted.
-        </simpara>
-      </listitem>
-      <listitem>
-        <simpara>
-          For major releases, e.g. from <literal>4.0.x</literal> to <literal>4.1</literal>,
-          execute <command>ALTER EXTENSION repmgr UPDATE</command>
-          on the primary node in the database where the &repmgr; extension is installed.
-        </simpara>
-        <simpara>
-          This will update the extension metadata and, if necessary, apply
-          changes to the &repmgr; extension objects.
-        </simpara>
-      </listitem>
-    </orderedlist>
+    From version 4, &repmgr; consists of three elements:
+     <itemizedlist spacing="compact" mark="bullet">
+
+       <listitem>
+         <simpara>
+           the <application>repmgr</application> and <application>repmgrd</application> executables
+         </simpara>
+       </listitem>
+
+       <listitem>
+         <simpara>
+           the objects for the &repmgr; PostgreSQL extension (SQL files for creating/updating
+           repmgr metadata, and the extension control file)
+         </simpara>
+       </listitem>
+
+       <listitem>
+         <simpara>
+           the shared library module used by <application>repmgrd</application> which
+           is resident in the PostgreSQL backend
+         </simpara>
+       </listitem>
+     </itemizedlist>
+  </para>
+  <para>
+    With <emphasis>minor releases</emphasis>, usually changes are only made to the <application>repmgr</application>
+    and <application>repmgrd</application> executables. In this case, the upgrade is quite straightforward,
+    and is simply a case of installing the new version, and restarting <application>repmgrd</application>
+    (if running).
  </para>

  <para>
-    Always check the <link linkend="appendix-release-notes">release notes</link> for every
-    release as they may contain upgrade instructions particular to individual versions.
+    For <emphasis>major releases</emphasis>, the &repmgr; PostgreSQL extension will need to be updated
+    to the latest version. Additionally, if the shared library module has been updated (this is sometimes,
+    but not always the case), PostgreSQL itself will need to be restarted on each node.
  </para>
+  <important>
+    <para>
+      Always check the <link linkend="appendix-release-notes">release notes</link> for every
+      release as they may contain upgrade instructions particular to individual versions.
+    </para>
+  </important>

-  <para>
-    Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
-    changes to the shared object file used by <application>repmgrd</application>; check the
-    release notes for details.
-  </para>
+  <sect2 id="upgrading-minor-version" xreflabel="Upgrading a minor version release">
+	<indexterm>
+	  <primary>upgrading</primary>
+	  <secondary>minor release</secondary>
+	</indexterm>
+	<title>Upgrading a minor version release</title>
+
+    <para>
+      The process for installing minor version upgrades is quite straightforward:
+
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>
+            install the new &repmgr; version
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            restart <application>repmgrd</application> on all nodes where it is running
+          </simpara>
+        </listitem>
+
+      </itemizedlist>
+
+    </para>
+
+    <note>
+	  <para>
+        Some packaging systems (e.g. <link linkend="packages-debian-ubuntu">Debian/Ubuntu</link>
+        may restart <application>repmgrd</application> as part of the package upgrade process.
+      </para>
+    </note>
+
+	<para>
+	  Minor version upgrades can be performed in any order on the nodes in the replication
+	  cluster.
+	</para>
+
+	<para>
+	  A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
+	</para>
+
+    <note>
+	  <para>
+	    The same &repmgr; &quot;major version&quot; (e.g. <literal>4.2</literal>) must be
+	    installed on all nodes in the replication cluster. While it's possible to have differing
+	    &repmgr; &quot;minor versions&quot; (e.g. <literal>4.2.1</literal>)  on different nodes,
+	    we strongly recommend updating all nodes to the latest minor version.
+	  </para>
+    </note>
+
+  </sect2>
+
+  <sect2 id="upgrading-major-version" xreflabel="Upgrading a major version release">
+	<indexterm>
+	  <primary>upgrading</primary>
+	  <secondary>major release</secondary>
+	</indexterm>
+	<title>Upgrading a major version release</title>
+	<para>
+	  &quot;major version&quot; upgrades need to be planned more carefully, as they may include
+	  changes to the &repmgr; metadata (which need to be propagated from the primary to all
+	  standbys) and/or changes to the shared object file used by <application>repmgrd</application>
+	  (which require a PostgreSQL restart).
+	</para>
+	<para>
+	  With this in mind,
+	</para>
+
+	<para>
+      <orderedlist>
+
+		<listitem>
+          <simpara>
+			Stop <application>repmgrd</application> (if in use) on all nodes where it is running.
+          </simpara>
+		</listitem>
+
+		<listitem>
+          <simpara>
+			Disable the <application>repmgrd</application> service on all nodes where it is in use;
+            this is to prevent packages from prematurely restarting <application>repmgrd</application>.
+          </simpara>
+		</listitem>
+
+		<listitem>
+          <simpara>
+			Install the updated package (or compile the updated source) on all nodes.
+          </simpara>
+		</listitem>
+
+        <listitem>
+          <para>
+            If running a <literal>systemd</literal>-based Linux distribution, execute (as <literal>root</literal>,
+            or with appropriate <literal>sudo</literal> permissions):
+            <programlisting>
+systemctl daemon-reload</programlisting>
+          </para>
+        </listitem>
+
+		<listitem>
+          <simpara>
+			If the &repmgr; shared library module has been updated (check the <link linkend="appendix-release-notes">release notes</link>!),
+            restart PostgreSQL, then <application>repmgrd</application> (if in use)	on each node,
+            The order in which this is applied to individual nodes is not critical,
+			and it's also fine to restart PostgreSQL on all nodes first before starting <application>repmgrd</application>.
+		  </simpara>
+		  <simpara>
+			Note that if the upgrade requires a PostgreSQL restart, <application>repmgrd</application>
+			will only function correctly once all nodes have been restarted.
+          </simpara>
+		</listitem>
+
+		<listitem>
+          <para>
+			On the primary node, execute
+			<programlisting>
+ALTER EXTENSION repmgr UPDATE</programlisting>
+			in the database where &repmgr; is installed.
+          </para>
+		</listitem>
+
+		<listitem>
+          <simpara>
+			Reenable the <application>repmgrd</application> service on all nodes where it is in use, and
+            ensure it is running.
+          </simpara>
+		</listitem>
+
+	  </orderedlist>
+	</para>
+	<tip>
+	  <para>
+		If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
+		with a PostgreSQL minor version upgrade, which will require a restart in any case.
+		New PostgreSQL minor version are usually released every couple of months.
+	  </para>
+	</tip>
+  </sect2>
+
+  <sect2 id="upgrading-check-repmgrd" xreflabel="Checking repmgrd status after an upgrade">
+	<indexterm>
+	  <primary>upgrading</primary>
+	  <secondary>checking repmgrd status</secondary>
+	</indexterm>
+	<title>Checking repmgrd status after an upgrade</title>
+	<para>
+      From <link linkend="release-4.2">repmgr 4.2</link>, once the upgrade is complete, execute the <command><link linkend="repmgr-daemon-status">repmgr daemon status</link></command>
+      command (on any node) to show an overview of the status of <application>repmgrd</application> on all nodes.
+    </para>
+  </sect2>
 </sect1>

 <sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
@@ -95,6 +254,13 @@
    be recreated by <application>pg_upgrade</application>. These will need to
    be recreated manually.
  </para>
+  <tip>
+	<para>
+	  Use <command><link linkend="repmgr-node-check">repmgr node check</link></command>
+	  to determine which replacation slots need to be recreated.
+	</para>
+  </tip>
+
 </sect1>


--- a/doc/version.sgml
+++ b/doc/version.sgml
@@ -1 +1 @@
-<!ENTITY repmgrversion "4.1dev">
+<!ENTITY repmgrversion "4.2">
--- a/errcode.h
+++ b/errcode.h
@@ -47,5 +47,6 @@
 #define ERR_FOLLOW_FAIL 23
 #define ERR_REJOIN_FAIL 24
 #define ERR_NODE_STATUS 25
+#define ERR_REPMGRD_PAUSE 26

 #endif							/* _ERRCODE_H_ */
--- a/repmgr--4.1--4.2.sql
+++ b/repmgr--4.1--4.2.sql
@@ -0,0 +1,32 @@
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
+
+CREATE FUNCTION get_repmgrd_pid()
+  RETURNS INT
+  AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION get_repmgrd_pidfile()
+  RETURNS TEXT
+  AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION repmgrd_is_running()
+  RETURNS BOOL
+  AS 'MODULE_PATHNAME', 'repmgrd_is_running'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION repmgrd_pause(BOOL)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'repmgrd_pause'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION repmgrd_is_paused()
+  RETURNS BOOL
+  AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
+  LANGUAGE C STRICT;
--- a/repmgr--4.1.sql
+++ b/repmgr--4.1.sql
@@ -145,7 +145,6 @@ CREATE FUNCTION unset_bdr_failover_handler()
  AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
  LANGUAGE C STRICT;

-
 CREATE VIEW repmgr.replication_status AS
  SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
 	     n.type AS node_type, n.active, last_monitor_time,
--- a/repmgr--4.2.sql
+++ b/repmgr--4.2.sql
@@ -0,0 +1,197 @@
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
+
+CREATE TABLE repmgr.nodes (
+  node_id          INTEGER     PRIMARY KEY,
+  upstream_node_id INTEGER     NULL REFERENCES nodes (node_id) DEFERRABLE,
+  active           BOOLEAN     NOT NULL DEFAULT TRUE,
+  node_name        TEXT        NOT NULL,
+  type             TEXT        NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
+  location         TEXT        NOT NULL DEFAULT 'default',
+  priority         INT         NOT NULL DEFAULT 100,
+  conninfo         TEXT        NOT NULL,
+  repluser         VARCHAR(63) NOT NULL,
+  slot_name        TEXT        NULL,
+  config_file      TEXT        NOT NULL
+);
+
+CREATE TABLE repmgr.events (
+  node_id          INTEGER NOT NULL,
+  event            TEXT NOT NULL,
+  successful       BOOLEAN NOT NULL DEFAULT TRUE,
+  event_timestamp  TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  details          TEXT NULL
+);
+
+DO $repmgr$
+DECLARE
+  DECLARE server_version_num INT;
+BEGIN
+  SELECT setting
+    FROM pg_catalog.pg_settings
+   WHERE name = 'server_version_num'
+    INTO server_version_num;
+  IF server_version_num >= 90400 THEN
+    EXECUTE $repmgr_func$
+CREATE TABLE repmgr.monitoring_history (
+  primary_node_id                INTEGER NOT NULL,
+  standby_node_id                INTEGER NOT NULL,
+  last_monitor_time              TIMESTAMP WITH TIME ZONE NOT NULL,
+  last_apply_time                TIMESTAMP WITH TIME ZONE,
+  last_wal_primary_location      PG_LSN NOT NULL,
+  last_wal_standby_location      PG_LSN,
+  replication_lag                BIGINT NOT NULL,
+  apply_lag                      BIGINT NOT NULL
+)
+    $repmgr_func$;
+  ELSE
+    EXECUTE $repmgr_func$
+CREATE TABLE repmgr.monitoring_history (
+  primary_node_id                INTEGER NOT NULL,
+  standby_node_id                INTEGER NOT NULL,
+  last_monitor_time              TIMESTAMP WITH TIME ZONE NOT NULL,
+  last_apply_time                TIMESTAMP WITH TIME ZONE,
+  last_wal_primary_location      TEXT NOT NULL,
+  last_wal_standby_location      TEXT,
+  replication_lag                BIGINT NOT NULL,
+  apply_lag                      BIGINT NOT NULL
+)
+    $repmgr_func$;
+  END IF;
+END$repmgr$;
+
+
+
+CREATE INDEX idx_monitoring_history_time
+          ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
+
+CREATE VIEW repmgr.show_nodes AS
+   SELECT n.node_id,
+          n.node_name,
+          n.active,
+          n.upstream_node_id,
+          un.node_name AS upstream_node_name,
+          n.type,
+          n.priority,
+          n.conninfo
+     FROM repmgr.nodes n
+LEFT JOIN repmgr.nodes un
+       ON un.node_id = n.upstream_node_id;
+
+
+/* XXX update upgrade scripts! */
+CREATE TABLE repmgr.voting_term (
+  term INT NOT NULL
+);
+
+CREATE UNIQUE INDEX voting_term_restrict
+ON repmgr.voting_term ((TRUE));
+
+CREATE RULE voting_term_delete AS
+   ON DELETE TO repmgr.voting_term
+   DO INSTEAD NOTHING;
+
+
+/* ================= */
+/* repmgrd functions */
+/* ================= */
+
+/* monitoring functions */
+
+CREATE FUNCTION set_local_node_id(INT)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'set_local_node_id'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION get_local_node_id()
+  RETURNS INT
+  AS 'MODULE_PATHNAME', 'get_local_node_id'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION standby_set_last_updated()
+  RETURNS TIMESTAMP WITH TIME ZONE
+  AS 'MODULE_PATHNAME', 'standby_set_last_updated'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION standby_get_last_updated()
+  RETURNS TIMESTAMP WITH TIME ZONE
+  AS 'MODULE_PATHNAME', 'standby_get_last_updated'
+  LANGUAGE C STRICT;
+
+/* failover functions */
+
+CREATE FUNCTION notify_follow_primary(INT)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'notify_follow_primary'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION get_new_primary()
+  RETURNS INT
+  AS 'MODULE_PATHNAME', 'get_new_primary'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION reset_voting_status()
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'reset_voting_status'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION am_bdr_failover_handler(INT)
+  RETURNS BOOL
+  AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION unset_bdr_failover_handler()
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION get_repmgrd_pid()
+  RETURNS INT
+  AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION get_repmgrd_pidfile()
+  RETURNS TEXT
+  AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION repmgrd_is_running()
+  RETURNS BOOL
+  AS 'MODULE_PATHNAME', 'repmgrd_is_running'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION repmgrd_pause(BOOL)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'repmgrd_pause'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION repmgrd_is_paused()
+  RETURNS BOOL
+  AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
+  LANGUAGE C STRICT;
+
+
+CREATE VIEW repmgr.replication_status AS
+  SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
+ 	     n.type AS node_type, n.active, last_monitor_time,
+         CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
+         m.last_wal_standby_location,
+         CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
+         CASE WHEN n.type='standby' THEN
+           CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
+           ELSE NULL
+         END AS replication_time_lag,
+         CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
+         AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
+    FROM repmgr.monitoring_history m
+    JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
+   WHERE (m.standby_node_id, m.last_monitor_time) IN (
+	          SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
+			    FROM repmgr.monitoring_history m1 GROUP BY 1
+         );
+
--- a/repmgr-action-bdr.c
+++ b/repmgr-action-bdr.c
@@ -126,7 +126,7 @@ do_bdr_register(void)
 	}

 	/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
-	extension_status = get_repmgr_extension_status(conn);
+	extension_status = get_repmgr_extension_status(conn, NULL);

 	if (extension_status == REPMGR_UNKNOWN)
 	{
@@ -191,7 +191,7 @@ do_bdr_register(void)
 	{
 		NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER;

-		get_all_node_records(conn, &local_node_records);
+		(void) get_all_node_records(conn, &local_node_records);

 		if (local_node_records.node_count == 0)
 		{
@@ -232,14 +232,14 @@ do_bdr_register(void)
 				}

 				/* check repmgr schema exists, skip if not */
-				other_node_extension_status = get_repmgr_extension_status(bdr_node_conn);
+				other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);

 				if (other_node_extension_status != REPMGR_INSTALLED)
 				{
 					continue;
 				}

-				get_all_node_records(bdr_node_conn, &existing_nodes);
+				(void) get_all_node_records(bdr_node_conn, &existing_nodes);

 				for (cell = existing_nodes.head; cell; cell = cell->next)
 				{
@@ -442,7 +442,7 @@ do_bdr_unregister(void)
 		exit(ERR_BAD_CONFIG);
 	}

-	extension_status = get_repmgr_extension_status(conn);
+	extension_status = get_repmgr_extension_status(conn, NULL);
 	if (extension_status != REPMGR_INSTALLED)
 	{
 		log_error(_("repmgr is not installed on database \"%s\""), dbname);
--- a/repmgr-action-cluster.c
+++ b/repmgr-action-cluster.c
@@ -26,7 +26,6 @@

 #define SHOW_HEADER_COUNT 7

-
 typedef enum
 {
 	SHOW_ID = 0,
@@ -51,21 +50,13 @@ typedef enum
 }			EventHeader;


-
-struct ColHeader
-{
-	char		title[MAXLEN];
-	int			max_length;
-	int			cur_length;
-};
-
 struct ColHeader headers_show[SHOW_HEADER_COUNT];
 struct ColHeader headers_event[EVENT_HEADER_COUNT];



-static int	build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length);
-static int	build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length);
+static int	build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code);
+static int	build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length, ItemList *warnings, int *error_code);
 static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);

 /*
@@ -84,6 +75,7 @@ do_cluster_show(void)
 	ItemList	warnings = {NULL, NULL};
 	bool		success = false;
 	bool		error_found = false;
+	bool		connection_error_found = false;

 	/* Connect to local database to obtain cluster connection data */
 	log_verbose(LOG_INFO, _("connecting to database"));
@@ -141,14 +133,26 @@ do_cluster_show(void)
 		}
 		else
 		{
-			char		error[MAXLEN];
-
-			strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
 			cell->node_info->node_status = NODE_STATUS_DOWN;
 			cell->node_info->recovery_type = RECTYPE_UNKNOWN;
-			item_list_append_format(&warnings,
-									"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
-									cell->node_info->node_name, cell->node_info->node_id, trim(error));
+
+			connection_error_found = true;
+
+			if (runtime_options.verbose)
+			{
+				char		error[MAXLEN];
+
+				strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
+				item_list_append_format(&warnings,
+										"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
+										cell->node_info->node_name, cell->node_info->node_id, trim(error));
+			}
+			else
+			{
+				item_list_append_format(&warnings,
+										"unable to connect to node \"%s\" (ID: %i)",
+										cell->node_info->node_name, cell->node_info->node_id);
+			}
 		}

 		initPQExpBuffer(&details);
@@ -170,16 +174,16 @@ do_cluster_show(void)
 							switch (cell->node_info->recovery_type)
 							{
 								case RECTYPE_PRIMARY:
-									appendPQExpBuffer(&details, "* running");
+									appendPQExpBufferStr(&details, "* running");
 									break;
 								case RECTYPE_STANDBY:
-									appendPQExpBuffer(&details, "! running as standby");
+									appendPQExpBufferStr(&details, "! running as standby");
 									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) is registered as primary but running as standby",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
 								case RECTYPE_UNKNOWN:
-									appendPQExpBuffer(&details, "! unknown");
+									appendPQExpBufferStr(&details, "! unknown");
 									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) has unknown replication status",
 															cell->node_info->node_name, cell->node_info->node_id);
@@ -190,14 +194,14 @@ do_cluster_show(void)
 						{
 							if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
 							{
-								appendPQExpBuffer(&details, "! running");
+								appendPQExpBufferStr(&details, "! running");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
 							else
 							{
-								appendPQExpBuffer(&details, "! running as standby");
+								appendPQExpBufferStr(&details, "! running as standby");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
 														cell->node_info->node_name, cell->node_info->node_id);
@@ -210,7 +214,7 @@ do_cluster_show(void)
 						/* node is unreachable but marked active */
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "? unreachable");
+							appendPQExpBufferStr(&details, "? unreachable");
 							item_list_append_format(&warnings,
 													"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
 													cell->node_info->node_name, cell->node_info->node_id);
@@ -218,7 +222,7 @@ do_cluster_show(void)
 						/* node is unreachable and marked as inactive */
 						else
 						{
-							appendPQExpBuffer(&details, "- failed");
+							appendPQExpBufferStr(&details, "- failed");
 							error_found = true;
 						}
 					}
@@ -234,16 +238,16 @@ do_cluster_show(void)
 							switch (cell->node_info->recovery_type)
 							{
 								case RECTYPE_STANDBY:
-									appendPQExpBuffer(&details, "  running");
+									appendPQExpBufferStr(&details, "  running");
 									break;
 								case RECTYPE_PRIMARY:
-									appendPQExpBuffer(&details, "! running as primary");
+									appendPQExpBufferStr(&details, "! running as primary");
 									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) is registered as standby but running as primary",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
 								case RECTYPE_UNKNOWN:
-									appendPQExpBuffer(&details, "! unknown");
+									appendPQExpBufferStr(&details, "! unknown");
 									item_list_append_format(
 															&warnings,
 															"node \"%s\" (ID: %i) has unknown replication status",
@@ -255,14 +259,14 @@ do_cluster_show(void)
 						{
 							if (cell->node_info->recovery_type == RECTYPE_STANDBY)
 							{
-								appendPQExpBuffer(&details, "! running");
+								appendPQExpBufferStr(&details, "! running");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
 							else
 							{
-								appendPQExpBuffer(&details, "! running as primary");
+								appendPQExpBufferStr(&details, "! running as primary");
 								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
@@ -275,14 +279,14 @@ do_cluster_show(void)
 						/* node is unreachable but marked active */
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "? unreachable");
+							appendPQExpBufferStr(&details, "? unreachable");
 							item_list_append_format(&warnings,
 													"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
 													cell->node_info->node_name, cell->node_info->node_id);
 						}
 						else
 						{
-							appendPQExpBuffer(&details, "- failed");
+							appendPQExpBufferStr(&details, "- failed");
 							error_found = true;
 						}
 					}
@@ -296,11 +300,11 @@ do_cluster_show(void)
 					{
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "* running");
+							appendPQExpBufferStr(&details, "* running");
 						}
 						else
 						{
-							appendPQExpBuffer(&details, "! running");
+							appendPQExpBufferStr(&details, "! running");
 							error_found = true;
 						}
 					}
@@ -309,11 +313,11 @@ do_cluster_show(void)
 					{
 						if (cell->node_info->active == true)
 						{
-							appendPQExpBuffer(&details, "? unreachable");
+							appendPQExpBufferStr(&details, "? unreachable");
 						}
 						else
 						{
-							appendPQExpBuffer(&details, "- failed");
+							appendPQExpBufferStr(&details, "- failed");
 							error_found = true;
 						}
 					}
@@ -322,7 +326,7 @@ do_cluster_show(void)
 			case UNKNOWN:
 				{
 					/* this should never happen */
-					appendPQExpBuffer(&details, "? unknown node type");
+					appendPQExpBufferStr(&details, "? unknown node type");
 						error_found = true;
 				}
 				break;
@@ -351,36 +355,10 @@ do_cluster_show(void)

 	}

+	/* Print column header row (text mode only) */
 	if (runtime_options.output_mode == OM_TEXT)
 	{
-		for (i = 0; i < SHOW_HEADER_COUNT; i++)
-		{
-			if (i == 0)
-				printf(" ");
-			else
-				printf(" | ");
-
-			printf("%-*s",
-				   headers_show[i].max_length,
-				   headers_show[i].title);
-		}
-		printf("\n");
-		printf("-");
-
-		for (i = 0; i < SHOW_HEADER_COUNT; i++)
-		{
-			int			j;
-
-			for (j = 0; j < headers_show[i].max_length; j++)
-				printf("-");
-
-			if (i < (SHOW_HEADER_COUNT - 1))
-				printf("-+-");
-			else
-				printf("-");
-		}
-
-		printf("\n");
+		print_status_header(SHOW_HEADER_COUNT, headers_show);
 	}

 	for (cell = nodes.head; cell; cell = cell->next)
@@ -437,6 +415,11 @@ do_cluster_show(void)
 		{
 			printf(_("  - %s\n"), cell->string);
 		}
+
+		if (runtime_options.verbose == false && connection_error_found == true)
+		{
+			log_hint(_("execute with --verbose option to see connection error messages"));
+		}
 	}

 	/*
@@ -619,9 +602,12 @@ do_cluster_crosscheck(void)

 	t_node_status_cube **cube;

-	bool		error_found = false;
+	bool		connection_error_found = false;
+	int			error_code = SUCCESS;
+	ItemList	warnings = {NULL, NULL};
+
+	n = build_cluster_crosscheck(&cube, &name_length, &warnings, &error_code);

-	n = build_cluster_crosscheck(&cube, &name_length);
 	if (runtime_options.output_mode == OM_CSV)
 	{
 		for (i = 0; i < n; i++)
@@ -643,6 +629,11 @@ do_cluster_crosscheck(void)
 					   cube[i]->node_id,
 					   cube[j]->node_id,
 					   max_node_status);
+
+				if (max_node_status == -1)
+				{
+					connection_error_found = true;
+				}
 			}

 		}
@@ -700,16 +691,16 @@ do_cluster_crosscheck(void)
 				{
 					case -2:
 						c = '?';
-						error_found = true;
 						break;
 					case -1:
 						c = 'x';
-						error_found = true;
+						connection_error_found = true;
 						break;
 					case 0:
 						c = '*';
 						break;
 					default:
+						log_error("unexpected node status value %i", max_node_status);
 						exit(ERR_INTERNAL);
 				}

@@ -718,6 +709,13 @@ do_cluster_crosscheck(void)

 			printf("\n");
 		}
+
+		if (warnings.head != NULL && runtime_options.terse == false)
+		{
+			log_warning(_("following problems detected:"));
+			print_item_list(&warnings);
+		}
+
 	}

 	/* clean up allocated cube array */
@@ -744,13 +742,23 @@ do_cluster_crosscheck(void)
 		free(cube);
 	}

-	if (error_found == true)
+	/* errors detected by build_cluster_crosscheck() have priority */
+	if (connection_error_found == true)
 	{
-		exit(ERR_NODE_STATUS);
+		error_code = ERR_NODE_STATUS;
 	}
+
+	exit(error_code);
+
 }


+/*
+ * CLUSTER MATRIX
+ *
+ * Parameters:
+ *   --csv
+ */
 void
 do_cluster_matrix()
 {
@@ -763,18 +771,30 @@ do_cluster_matrix()

 	t_node_matrix_rec **matrix_rec_list;

-	bool		error_found = false;
+	bool		connection_error_found = false;
+	int			error_code = SUCCESS;
+	ItemList	warnings = {NULL, NULL};

-	n = build_cluster_matrix(&matrix_rec_list, &name_length);
+	n = build_cluster_matrix(&matrix_rec_list, &name_length, &warnings, &error_code);

 	if (runtime_options.output_mode == OM_CSV)
 	{
 		for (i = 0; i < n; i++)
+		{
 			for (j = 0; j < n; j++)
+			{
 				printf("%d,%d,%d\n",
 					   matrix_rec_list[i]->node_id,
 					   matrix_rec_list[i]->node_status_list[j]->node_id,
 					   matrix_rec_list[i]->node_status_list[j]->node_status);
+
+				if (matrix_rec_list[i]->node_status_list[j]->node_status == -2
+					|| matrix_rec_list[i]->node_status_list[j]->node_status == -1)
+				{
+					connection_error_found = true;
+				}
+			}
+		}
 	}
 	else
 	{
@@ -803,16 +823,16 @@ do_cluster_matrix()
 				{
 					case -2:
 						c = '?';
-						error_found = true;
 						break;
 					case -1:
 						c = 'x';
-						error_found = true;
+						connection_error_found = true;
 						break;
 					case 0:
 						c = '*';
 						break;
 					default:
+						log_error("unexpected node status value %i", matrix_rec_list[i]->node_status_list[j]->node_status);
 						exit(ERR_INTERNAL);
 				}

@@ -820,6 +840,13 @@ do_cluster_matrix()
 			}
 			printf("\n");
 		}
+
+		if (warnings.head != NULL && runtime_options.terse == false)
+		{
+			log_warning(_("following problems detected:"));
+			print_item_list(&warnings);
+		}
+
 	}

 	for (i = 0; i < n; i++)
@@ -834,10 +861,13 @@ do_cluster_matrix()

 	free(matrix_rec_list);

-	if (error_found == true)
+	/* actual database connection errors have priority */
+	if (connection_error_found == true)
 	{
-		exit(ERR_NODE_STATUS);
+		error_code = ERR_NODE_STATUS;
 	}
+
+	exit(error_code);
 }


@@ -866,7 +896,7 @@ matrix_set_node_status(t_node_matrix_rec **matrix_rec_list, int n, int node_id,


 static int
-build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
+build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code)
 {
 	PGconn	   *conn = NULL;
 	int			i = 0,
@@ -895,7 +925,12 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 		local_node_id = runtime_options.node_id;
 	}

-	get_all_node_records(conn, &nodes);
+	if (get_all_node_records(conn, &nodes) == false)
+	{
+		/* get_all_node_records() will display the error */
+		PQfinish(conn);
+		exit(ERR_BAD_CONFIG);
+	}

 	PQfinish(conn);
 	conn = NULL;
@@ -909,7 +944,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 	/*
 	 * Allocate an empty matrix record list
 	 *
-	 * -2 == NULL  ? -1 == Error x 0 == OK    *
+	 * -2 == NULL  ? -1 == Error x 0 == OK
 	 */

 	matrix_rec_list = (t_node_matrix_rec **) pg_malloc0(sizeof(t_node_matrix_rec) * nodes.node_count);
@@ -972,7 +1007,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)

 		host = param_get(&remote_conninfo, "host");

-		node_conn = establish_db_connection(cell->node_info->conninfo, false);
+		node_conn = establish_db_connection_quiet(cell->node_info->conninfo);

 		connection_status =
 			(PQstatus(node_conn) == CONNECTION_OK) ? 0 : -1;
@@ -1009,24 +1044,12 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)
 		 * remote repmgr - those are the only values it needs to work, and
 		 * saves us making assumptions about the location of repmgr.conf
 		 */
-		appendPQExpBuffer(&command,
-						  "\"%s -d '%s' ",
-						  make_pg_path(progname()),
-						  cell->node_info->conninfo);
+		appendPQExpBufferChar(&command, '"');

+		make_remote_repmgr_path(&command, cell->node_info);

-		if (strlen(pg_bindir))
-		{
-			appendPQExpBuffer(&command,
-							  "--pg_bindir=");
-			appendShellString(&command,
-							  pg_bindir);
-			appendPQExpBuffer(&command,
-							  " ");
-		}
-
-		appendPQExpBuffer(&command,
-						  " cluster show --csv\"");
+		appendPQExpBufferStr(&command,
+							 " cluster show --csv -L NOTICE --terse\"");

 		log_verbose(LOG_DEBUG, "build_cluster_matrix(): executing:\n  %s", command.data);

@@ -1041,32 +1064,50 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)

 		termPQExpBuffer(&command);

-		for (j = 0; j < nodes.node_count; j++)
+		/* no output returned - probably SSH error */
+		if (p[0] == '\0' || p[0] == '\n')
 		{
-			if (sscanf(p, "%d,%d", &x, &y) != 2)
+			item_list_append_format(warnings,
+									"node %i inaccessible via SSH",
+									connection_node_id);
+			*error_code = ERR_BAD_SSH;
+		}
+		else
+		{
+			for (j = 0; j < nodes.node_count; j++)
 			{
-				fprintf(stderr, _("cannot parse --csv output: %s\n"), p);
-				PQfinish(node_conn);
-				exit(ERR_INTERNAL);
+				if (sscanf(p, "%d,%d", &x, &y) != 2)
+				{
+					matrix_set_node_status(matrix_rec_list,
+										   nodes.node_count,
+										   connection_node_id,
+										   x,
+										   -2);
+
+					item_list_append_format(warnings,
+											"unable to parse --csv output for node %i; output returned was:\n\"%s\"",
+											connection_node_id, p);
+					*error_code = ERR_INTERNAL;
+				}
+				else
+				{
+					matrix_set_node_status(matrix_rec_list,
+										   nodes.node_count,
+										   connection_node_id,
+										   x,
+										   (y == -1) ? -1 : 0);
+				}
+
+				while (*p && (*p != '\n'))
+					p++;
+				if (*p == '\n')
+					p++;
 			}
-
-			matrix_set_node_status(matrix_rec_list,
-								   nodes.node_count,
-								   connection_node_id,
-								   x,
-								   (y == -1) ? -1 : 0);
-
-			while (*p && (*p != '\n'))
-				p++;
-			if (*p == '\n')
-				p++;
 		}

 		termPQExpBuffer(&command_output);
 		PQfinish(node_conn);
 		free_conninfo_params(&remote_conninfo);
-
-		node_conn = NULL;
 	}

 	*matrix_rec_dest = matrix_rec_list;
@@ -1079,7 +1120,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length)


 static int
-build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
+build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, ItemList *warnings, int *error_code)
 {
 	PGconn	   *conn = NULL;
 	int			h,
@@ -1100,7 +1141,12 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
 	else
 		conn = establish_db_connection_by_params(&source_conninfo, true);

-	get_all_node_records(conn, &nodes);
+	if (get_all_node_records(conn, &nodes) == false)
+	{
+		/* get_all_node_records() will display the error */
+		PQfinish(conn);
+		exit(ERR_BAD_CONFIG);
+	}

 	PQfinish(conn);
 	conn = NULL;
@@ -1187,28 +1233,13 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)

 		initPQExpBuffer(&command);

-		appendPQExpBuffer(&command,
-						  "%s -d '%s' --node-id=%i ",
-						  make_pg_path(progname()),
-						  cell->node_info->conninfo,
-						  remote_node_id);
+		make_remote_repmgr_path(&command, cell->node_info);

-		if (strlen(pg_bindir))
-		{
-			appendPQExpBuffer(&command,
-							  "--pg_bindir=");
-			appendShellString(&command,
-							  pg_bindir);
-			appendPQExpBuffer(&command,
-							  " ");
-		}
-
-		appendPQExpBuffer(&command,
-						  "cluster matrix --csv 2>/dev/null");
+		appendPQExpBufferStr(&command,
+							 " cluster matrix --csv -L NOTICE --terse");

 		initPQExpBuffer(&command_output);

-		/* fix to work with --node-id */
 		if (cube[i]->node_id == config_file_options.node_id)
 		{
 			(void) local_command_simple(command.data,
@@ -1249,9 +1280,13 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)

 		p = command_output.data;

-		if (!strlen(command_output.data))
+		if (p[0] == '\0' || p[0] == '\n')
 		{
+			item_list_append_format(warnings,
+									"node %i inaccessible via SSH",
+									remote_node_id);
 			termPQExpBuffer(&command_output);
+			*error_code = ERR_BAD_SSH;
 			continue;
 		}

@@ -1263,16 +1298,23 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)

 			if (sscanf(p, "%d,%d,%d", &matrix_rec_node_id, &node_status_node_id, &node_status) != 3)
 			{
-				fprintf(stderr, _("cannot parse --csv output: %s\n"), p);
-				exit(ERR_INTERNAL);
+				cube_set_node_status(cube,
+									 nodes.node_count,
+									 remote_node_id,
+									 matrix_rec_node_id,
+									 node_status_node_id,
+									 -2);
+				*error_code = ERR_INTERNAL;
+			}
+			else
+			{
+				cube_set_node_status(cube,
+									 nodes.node_count,
+									 remote_node_id,
+									 matrix_rec_node_id,
+									 node_status_node_id,
+									 node_status);
 			}
-
-			cube_set_node_status(cube,
-								 nodes.node_count,
-								 remote_node_id,
-								 matrix_rec_node_id,
-								 node_status_node_id,
-								 node_status);

 			while (*p && (*p != '\n'))
 				p++;
@@ -1332,6 +1374,7 @@ do_cluster_cleanup(void)
 	PGconn	   *conn = NULL;
 	PGconn	   *primary_conn = NULL;
 	int			entries_to_delete = 0;
+	PQExpBufferData event_details;

 	conn = establish_db_connection(config_file_options.conninfo, true);

@@ -1343,9 +1386,17 @@ do_cluster_cleanup(void)

 	log_debug(_("number of days of monitoring history to retain: %i"), runtime_options.keep_history);

-	entries_to_delete = get_number_of_monitoring_records_to_delete(primary_conn, runtime_options.keep_history);
+	entries_to_delete = get_number_of_monitoring_records_to_delete(primary_conn,
+																   runtime_options.keep_history,
+																   runtime_options.node_id);

-	if (entries_to_delete == 0)
+	if (entries_to_delete < 0)
+	{
+		log_error(_("unable to query number of monitoring records to clean up"));
+		PQfinish(primary_conn);
+		exit(ERR_DB_QUERY);
+	}
+	else if (entries_to_delete == 0)
 	{
 		log_info(_("no monitoring records to delete"));
 		PQfinish(primary_conn);
@@ -1355,10 +1406,23 @@ do_cluster_cleanup(void)
 	log_debug("at least %i monitoring records for deletion",
 			  entries_to_delete);

-	if (delete_monitoring_records(primary_conn, runtime_options.keep_history) == false)
+	initPQExpBuffer(&event_details);
+
+	if (delete_monitoring_records(primary_conn, runtime_options.keep_history, runtime_options.node_id) == false)
 	{
-		log_error(_("unable to delete monitoring records"));
+		appendPQExpBufferStr(&event_details,
+						  _("unable to delete monitoring records"));
+
+		log_error("%s", event_details.data);
 		log_detail("%s", PQerrorMessage(primary_conn));
+
+		create_event_notification(primary_conn,
+								  &config_file_options,
+								  config_file_options.node_id,
+								  "cluster_cleanup",
+								  false,
+								  event_details.data);
+
 		PQfinish(primary_conn);
 		exit(ERR_DB_QUERY);
 	}
@@ -1370,19 +1434,40 @@ do_cluster_cleanup(void)
 		log_detail("%s", PQerrorMessage(primary_conn));
 	}

-
-	PQfinish(primary_conn);
-
-	if (runtime_options.keep_history > 0)
+	if (runtime_options.keep_history == 0)
 	{
-		log_notice(_("monitoring records older than %i day(s) deleted"),
-				   runtime_options.keep_history);
+		appendPQExpBufferStr(&event_details,
+						  _("all monitoring records deleted"));
 	}
 	else
 	{
-		log_info(_("all monitoring records deleted"));
+		appendPQExpBufferStr(&event_details,
+						  _("monitoring records deleted"));
 	}

+	if (runtime_options.node_id != UNKNOWN_NODE_ID)
+		appendPQExpBuffer(&event_details,
+						  _(" for node %i"),
+						  runtime_options.node_id);
+
+	if (runtime_options.keep_history > 0)
+		appendPQExpBuffer(&event_details,
+						  _("; records newer than %i day(s) retained"),
+						  runtime_options.keep_history);
+
+	create_event_notification(primary_conn,
+							  &config_file_options,
+							  config_file_options.node_id,
+							  "cluster_cleanup",
+							  true,
+							  event_details.data);
+
+	log_notice("%s", event_details.data);
+
+	termPQExpBuffer(&event_details);
+	PQfinish(primary_conn);
+
+
 	return;
 }

@@ -1442,7 +1527,7 @@ do_cluster_help(void)

 	printf(_("CLUSTER CLEANUP\n"));
 	puts("");
-	printf(_("  \"cluster cleanup\" purges records from the \"repmgr.monitor\" table.\n"));
+	printf(_("  \"cluster cleanup\" purges records from the \"repmgr.monitoring_history\" table.\n"));
 	puts("");
 	printf(_("    -k, --keep-history=VALUE  retain indicated number of days of history (default: 0)\n"));
 	puts("");
--- a/repmgr-action-daemon.c
+++ b/repmgr-action-daemon.c
@@ -0,0 +1,420 @@
+/*
+ * repmgr-action-daemon.c
+ *
+ * Implements repmgrd actions for the repmgr command line utility
+ * Copyright (c) 2ndQuadrant, 2010-2018
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "repmgr.h"
+
+#include "repmgr-client-global.h"
+#include "repmgr-action-daemon.h"
+
+
+
+/*
+ * Possibly also show:
+ *  - repmgrd start time?
+ *  - repmgrd mode
+ *  - priority
+ *  - whether promotion candidate (due to zero priority/different location)
+ */
+
+typedef enum
+{
+	STATUS_ID = 0,
+	STATUS_NAME,
+	STATUS_ROLE,
+	STATUS_PG,
+	STATUS_RUNNING,
+	STATUS_PID,
+	STATUS_PAUSED
+} StatusHeader;
+
+#define STATUS_HEADER_COUNT 7
+
+struct ColHeader headers_status[STATUS_HEADER_COUNT];
+
+static void fetch_node_records(PGconn *conn, NodeInfoList *node_list);
+static void _do_repmgr_pause(bool pause);
+
+
+void
+do_daemon_status(void)
+{
+	PGconn	   *conn = NULL;
+	NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
+	NodeInfoListCell *cell = NULL;
+	int i;
+	RepmgrdInfo **repmgrd_info;
+	ItemList	warnings = {NULL, NULL};
+
+	/* Connect to local database to obtain cluster connection data */
+	log_verbose(LOG_INFO, _("connecting to database"));
+
+	if (strlen(config_file_options.conninfo))
+		conn = establish_db_connection(config_file_options.conninfo, true);
+	else
+		conn = establish_db_connection_by_params(&source_conninfo, true);
+
+	fetch_node_records(conn, &nodes);
+
+	repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
+
+	if (repmgrd_info == NULL)
+	{
+		log_error(_("unable to allocate memory"));
+		exit(ERR_OUT_OF_MEMORY);
+	}
+
+	strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
+	strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
+	strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
+	strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
+	strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
+	strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
+	strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
+
+	for (i = 0; i < STATUS_HEADER_COUNT; i++)
+	{
+		headers_status[i].max_length = strlen(headers_status[i].title);
+	}
+
+	i = 0;
+
+	for (cell = nodes.head; cell; cell = cell->next)
+	{
+		int j;
+
+		repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
+		repmgrd_info[i]->node_id = cell->node_info->node_id;
+		repmgrd_info[i]->pid = UNKNOWN_PID;
+		repmgrd_info[i]->paused = false;
+		repmgrd_info[i]->running = false;
+		repmgrd_info[i]->pg_running = true;
+
+		cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
+
+		if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
+		{
+			if (runtime_options.verbose)
+			{
+				char		error[MAXLEN];
+
+				strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
+
+				item_list_append_format(&warnings,
+										"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
+										cell->node_info->node_name, cell->node_info->node_id, trim(error));
+			}
+			else
+			{
+				item_list_append_format(&warnings,
+										"unable to  connect to node \"%s\" (ID: %i)",
+										cell->node_info->node_name, cell->node_info->node_id);
+			}
+
+			repmgrd_info[i]->pg_running = false;
+			maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running"));
+			maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a"));
+			maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
+		}
+		else
+		{
+			maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running"));
+
+			repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
+
+			repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
+
+			if (repmgrd_info[i]->running == true)
+			{
+				maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("running"));
+			}
+			else
+			{
+				maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("not running"));
+			}
+
+			if (repmgrd_info[i]->pid == UNKNOWN_PID)
+			{
+				maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
+			}
+			else
+			{
+				maxlen_snprintf(repmgrd_info[i]->pid_text, "%i", repmgrd_info[i]->pid);
+			}
+
+			repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
+
+			PQfinish(cell->node_info->conn);
+		}
+
+
+		headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
+		headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
+		headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
+		headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
+		headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
+
+		for (j = 0; j < STATUS_HEADER_COUNT; j++)
+		{
+			if (headers_status[j].cur_length > headers_status[j].max_length)
+			{
+				headers_status[j].max_length = headers_status[j].cur_length;
+			}
+		}
+
+		i++;
+	}
+
+	/* Print column header row (text mode only) */
+	if (runtime_options.output_mode == OM_TEXT)
+	{
+		print_status_header(STATUS_HEADER_COUNT, headers_status);
+	}
+
+	i = 0;
+
+	for (cell = nodes.head; cell; cell = cell->next)
+	{
+		if (runtime_options.output_mode == OM_CSV)
+		{
+			printf("%i,%s,%s,%i,%i,%i,%i\n",
+				   cell->node_info->node_id,
+				   cell->node_info->node_name,
+				   get_node_type_string(cell->node_info->type),
+				   repmgrd_info[i]->pg_running ? 1 : 0,
+				   repmgrd_info[i]->running ? 1 : 0,
+				   repmgrd_info[i]->pid,
+				   repmgrd_info[i]->paused ? 1 : 0);
+		}
+		else
+		{
+			printf(" %-*i ",  headers_status[STATUS_ID].max_length, cell->node_info->node_id);
+			printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
+			printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
+
+			printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
+			printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running);
+			printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
+
+			if (repmgrd_info[i]->pid == UNKNOWN_PID)
+				printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, "n/a");
+			else
+				printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? "yes" : "no");
+
+			printf("\n");
+		}
+
+		free(repmgrd_info[i]);
+		i++;
+	}
+
+	free(repmgrd_info);
+
+	/* emit any warnings */
+
+	if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
+	{
+		ItemListCell *cell = NULL;
+
+		printf(_("\nWARNING: following issues were detected\n"));
+		for (cell = warnings.head; cell; cell = cell->next)
+		{
+			printf(_("  - %s\n"), cell->string);
+		}
+
+		if (runtime_options.verbose == false)
+		{
+			log_hint(_("execute with --verbose option to see connection error messages"));
+		}
+	}
+}
+
+void
+do_daemon_pause(void)
+{
+	_do_repmgr_pause(true);
+}
+
+void
+do_daemon_unpause(void)
+{
+	_do_repmgr_pause(false);
+}
+
+
+static void
+_do_repmgr_pause(bool pause)
+{
+	PGconn	   *conn = NULL;
+	NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
+	NodeInfoListCell *cell = NULL;
+	RepmgrdInfo **repmgrd_info;
+	int i;
+	int error_nodes = 0;
+
+	repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
+
+	if (repmgrd_info == NULL)
+	{
+		log_error(_("unable to allocate memory"));
+		exit(ERR_OUT_OF_MEMORY);
+	}
+
+	/* Connect to local database to obtain cluster connection data */
+	log_verbose(LOG_INFO, _("connecting to database"));
+
+	if (strlen(config_file_options.conninfo))
+		conn = establish_db_connection(config_file_options.conninfo, true);
+	else
+		conn = establish_db_connection_by_params(&source_conninfo, true);
+
+	fetch_node_records(conn, &nodes);
+
+	i = 0;
+
+	for (cell = nodes.head; cell; cell = cell->next)
+	{
+		repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
+		repmgrd_info[i]->node_id = cell->node_info->node_id;
+
+		log_verbose(LOG_DEBUG, "pausing node %i (%s)",
+					cell->node_info->node_id,
+					cell->node_info->node_name);
+		cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
+
+		if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
+		{
+			log_warning(_("unable to connect to node %i"),
+						cell->node_info->node_id);
+			error_nodes++;
+		}
+		else
+		{
+			if (runtime_options.dry_run == true)
+			{
+				if (pause == true)
+				{
+					log_info(_("would pause node %i (%s) "),
+							 cell->node_info->node_id,
+							 cell->node_info->node_name);
+				}
+				else
+				{
+					log_info(_("would unpause node %i (%s) "),
+							 cell->node_info->node_id,
+							 cell->node_info->node_name);
+				}
+			}
+			else
+			{
+				bool success = repmgrd_pause(cell->node_info->conn, pause);
+
+				if (success == false)
+					error_nodes++;
+
+				log_notice(_("node %i (%s) %s"),
+						   cell->node_info->node_id,
+						   cell->node_info->node_name,
+						   success == true
+								? pause == true ? "paused" : "unpaused"
+		   						: pause == true ? "not paused" : "not unpaused");
+			}
+			PQfinish(cell->node_info->conn);
+		}
+		i++;
+	}
+
+	if (error_nodes > 0)
+	{
+		if (pause == true)
+		{
+			log_error(_("unable to pause %i node(s)"), error_nodes);
+		}
+		else
+		{
+			log_error(_("unable to unpause %i node(s)"), error_nodes);
+		}
+
+		log_hint(_("execute \"repmgr daemon status\" to view current status"));
+
+		exit(ERR_REPMGRD_PAUSE);
+	}
+
+	exit(SUCCESS);
+}
+
+
+
+void
+fetch_node_records(PGconn *conn, NodeInfoList *node_list)
+{
+	bool success = get_all_node_records(conn, node_list);
+
+	if (success == false)
+	{
+		/* get_all_node_records() will display any error message */
+		PQfinish(conn);
+		exit(ERR_BAD_CONFIG);
+	}
+
+	if (node_list->node_count == 0)
+	{
+		log_error(_("no node records were found"));
+		log_hint(_("ensure at least one node is registered"));
+		PQfinish(conn);
+		exit(ERR_BAD_CONFIG);
+	}
+}
+
+
+void do_daemon_help(void)
+{
+	print_help_header();
+
+	printf(_("Usage:\n"));
+	printf(_("    %s [OPTIONS] daemon status\n"),  progname());
+	printf(_("    %s [OPTIONS] daemon pause\n"),   progname());
+	printf(_("    %s [OPTIONS] daemon unpause\n"), progname());
+	puts("");
+
+	printf(_("DAEMON STATUS\n"));
+	puts("");
+	printf(_("  \"daemon status\" shows the status of repmgrd on each node in the cluster\n"));
+	puts("");
+	printf(_("    --csv                     emit output as CSV\n"));
+	printf(_("    --verbose                 show text of database connection error messages\n"));
+	puts("");
+
+	printf(_("DAEMON PAUSE\n"));
+	puts("");
+	printf(_("  \"daemon pause\" instructs repmgrd on each node to pause failover detection\n"));
+	puts("");
+	printf(_("    --dry-run               check if nodes are reachable but don't pause repmgrd\n"));
+	puts("");
+
+	printf(_("DAEMON PAUSE\n"));
+	puts("");
+	printf(_("  \"daemon unpause\"  instructs repmgrd on each node to resume failover detection\n"));
+	puts("");
+	printf(_("    --dry-run               check if nodes are reachable but don't unpause repmgrd\n"));
+	puts("");
+
+
+	puts("");
+}
--- a/repmgr-action-daemon.h
+++ b/repmgr-action-daemon.h
@@ -0,0 +1,28 @@
+/*
+ * repmgr-action-daemon.h
+ * Copyright (c) 2ndQuadrant, 2010-2018
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _REPMGR_ACTION_DAEMON_H_
+#define _REPMGR_ACTION_DAEMON_H_
+
+
+extern void do_daemon_status(void);
+extern void do_daemon_pause(void);
+extern void do_daemon_unpause(void);
+
+extern void do_daemon_help(void);
+#endif
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
@@ -189,16 +189,16 @@ do_node_status(void)

 		if (enabled == true)
 		{
-			appendPQExpBuffer(&archiving_status, "enabled");
+			appendPQExpBufferStr(&archiving_status, "enabled");
 		}
 		else
 		{
-			appendPQExpBuffer(&archiving_status, "disabled");
+			appendPQExpBufferStr(&archiving_status, "disabled");
 		}

 		if (enabled == false && recovery_type == RECTYPE_STANDBY)
 		{
-			appendPQExpBuffer(&archiving_status, " (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
+			appendPQExpBufferStr(&archiving_status, " (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
 		}

 		key_value_list_set(&node_status,
@@ -506,7 +506,7 @@ do_node_status(void)

 		/* output missing slot information */

-		appendPQExpBuffer(&output, "\n");
+		appendPQExpBufferChar(&output, '\n');
 		appendPQExpBuffer(&output,
 						  "\"missing_replication_slots\",%i",
 						  missing_slots.node_count);
@@ -590,13 +590,13 @@ _do_node_status_is_shutdown_cleanly(void)

 	initPQExpBuffer(&output);

-	appendPQExpBuffer(&output,
+	appendPQExpBufferStr(&output,
 					  "--state=");

 	/* sanity-check we're dealing with a PostgreSQL directory */
 	if (is_pg_dir(config_file_options.data_directory) == false)
 	{
-		appendPQExpBuffer(&output, "UNKNOWN");
+		appendPQExpBufferStr(&output, "UNKNOWN");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -659,10 +659,10 @@ _do_node_status_is_shutdown_cleanly(void)
 	switch (node_status)
 	{
 		case NODE_STATUS_UP:
-			appendPQExpBuffer(&output, "RUNNING");
+			appendPQExpBufferStr(&output, "RUNNING");
 			break;
 		case NODE_STATUS_SHUTTING_DOWN:
-			appendPQExpBuffer(&output, "SHUTTING_DOWN");
+			appendPQExpBufferStr(&output, "SHUTTING_DOWN");
 			break;
 		case NODE_STATUS_DOWN:
 			appendPQExpBuffer(&output,
@@ -670,10 +670,10 @@ _do_node_status_is_shutdown_cleanly(void)
 							  format_lsn(checkPoint));
 			break;
 		case NODE_STATUS_UNCLEAN_SHUTDOWN:
-			appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN");
+			appendPQExpBufferStr(&output, "UNCLEAN_SHUTDOWN");
 			break;
 		case NODE_STATUS_UNKNOWN:
-			appendPQExpBuffer(&output, "UNKNOWN");
+			appendPQExpBufferStr(&output, "UNKNOWN");
 			break;
 	}

@@ -847,7 +847,7 @@ do_node_check(void)
 								  ",\"%s\"",
 								  cell->details);
 			}
-			appendPQExpBuffer(&output, "\n");
+			appendPQExpBufferChar(&output, '\n');
 		}
 	}
 	else
@@ -869,7 +869,7 @@ do_node_check(void)
 								  " (%s)",
 								  cell->details);
 			}
-			appendPQExpBuffer(&output, "\n");
+			appendPQExpBufferChar(&output, '\n');
 		}
 	}

@@ -899,12 +899,12 @@ do_node_check_replication_connection(void)


 	initPQExpBuffer(&output);
-	appendPQExpBuffer(&output,
-					  "--connection=");
+	appendPQExpBufferStr(&output,
+						 "--connection=");

 	if (runtime_options.remote_node_id == UNKNOWN_NODE_ID)
 	{
-		appendPQExpBuffer(&output, "UNKNOWN");
+		appendPQExpBufferStr(&output, "UNKNOWN");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -918,7 +918,7 @@ do_node_check_replication_connection(void)

 	if (record_status != RECORD_FOUND)
 	{
-		appendPQExpBuffer(&output, "UNKNOWN");
+		appendPQExpBufferStr(&output, "UNKNOWN");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -938,7 +938,7 @@ do_node_check_replication_connection(void)

 	if (PQstatus(repl_conn) != CONNECTION_OK)
 	{
-		appendPQExpBuffer(&output, "BAD");
+		appendPQExpBufferStr(&output, "BAD");
 		printf("%s\n", output.data);
 		termPQExpBuffer(&output);
 		return;
@@ -946,7 +946,7 @@ do_node_check_replication_connection(void)

 	PQfinish(repl_conn);

-	appendPQExpBuffer(&output, "OK");
+	appendPQExpBufferStr(&output, "OK");
 	printf("%s\n", output.data);
 	termPQExpBuffer(&output);

@@ -1042,9 +1042,8 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
 				break;
 			case OM_NAGIOS:
 			case OM_TEXT:
-				appendPQExpBuffer(
-								  &details,
-								  "unable to check archive_status directory");
+				appendPQExpBufferStr(&details,
+									 "unable to check archive_status directory");
 				break;

 			default:
@@ -1172,8 +1171,8 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
 	if (missing_nodes_count == 0)
 	{
 		if (expected_nodes_count == 0)
-			appendPQExpBuffer(&details,
-							  "this node has no downstream nodes");
+			appendPQExpBufferStr(&details,
+								 "this node has no downstream nodes");
 		else
 			appendPQExpBuffer(&details,
 							  "%i of %i downstream nodes attached",
@@ -1194,20 +1193,18 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou

 		if (mode != OM_NAGIOS)
 		{
-			appendPQExpBuffer(&details, "; missing: ");
+			appendPQExpBufferStr(&details, "; missing: ");

 			for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
 			{
 				if (first == false)
-					appendPQExpBuffer(&details,
-									  ", ");
+					appendPQExpBufferStr(&details,
+										 ", ");
 				else
 					first = false;

 				if (first == false)
-					appendPQExpBuffer(
-									  &details,
-									  "%s", missing_cell->string);
+					appendPQExpBufferStr(&details, missing_cell->string);
 			}
 		}
 	}
@@ -1307,8 +1304,8 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
 		switch (mode)
 		{
 			case OM_OPTFORMAT:
-				appendPQExpBuffer(&details,
-								  "--lag=0");
+				appendPQExpBufferStr(&details,
+									 "--lag=0");
 				break;
 			case OM_NAGIOS:
 				appendPQExpBuffer(&details,
@@ -1319,13 +1316,13 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
 			case OM_TEXT:
 				if (node_info->type == WITNESS)
 				{
-					appendPQExpBuffer(&details,
-									  "N/A - node is witness");
+					appendPQExpBufferStr(&details,
+										 "N/A - node is witness");
 				}
 				else
 				{
-					appendPQExpBuffer(&details,
-									  "N/A - node is primary");
+					appendPQExpBufferStr(&details,
+										 "N/A - node is primary");
 				}
 				break;
 			default:
@@ -1406,9 +1403,8 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
 					break;
 				case OM_NAGIOS:
 				case OM_TEXT:
-					appendPQExpBuffer(
-									  &details,
-									  "unable to query replication lag");
+					appendPQExpBufferStr(&details,
+										 "unable to query replication lag");
 					break;

 				default:
@@ -1508,39 +1504,39 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
 			if (recovery_type == RECTYPE_STANDBY)
 			{
 				status = CHECK_STATUS_CRITICAL;
-				appendPQExpBuffer(&details,
-								  _("node is registered as primary but running as standby"));
+				appendPQExpBufferStr(&details,
+									 _("node is registered as primary but running as standby"));
 			}
 			else
 			{
-				appendPQExpBuffer(&details,
-								  _("node is primary"));
+				appendPQExpBufferStr(&details,
+									 _("node is primary"));
 			}
 			break;
 		case STANDBY:
 			if (recovery_type == RECTYPE_PRIMARY)
 			{
 				status = CHECK_STATUS_CRITICAL;
-				appendPQExpBuffer(&details,
-								  _("node is registered as standby but running as primary"));
+				appendPQExpBufferStr(&details,
+									 _("node is registered as standby but running as primary"));
 			}
 			else
 			{
-				appendPQExpBuffer(&details,
-								  _("node is standby"));
+				appendPQExpBufferStr(&details,
+									 _("node is standby"));
 			}
 			break;
 		case WITNESS:
 			if (recovery_type == RECTYPE_STANDBY)
 			{
 				status = CHECK_STATUS_CRITICAL;
-				appendPQExpBuffer(&details,
-								  _("node is registered as witness but running as standby"));
+				appendPQExpBufferStr(&details,
+									 _("node is registered as witness but running as standby"));
 			}
 			else
 			{
-				appendPQExpBuffer(&details,
-								  _("node is witness"));
+				appendPQExpBufferStr(&details,
+									 _("node is witness"));
 			}
 			break;
 		case BDR:
@@ -1551,8 +1547,8 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
 				if (is_bdr_db(conn, &output) == false)
 				{
 					status = CHECK_STATUS_CRITICAL;
-					appendPQExpBuffer(&details,
-									  "%s", output.data);
+					appendPQExpBufferStr(&details,
+										 output.data);
 				}
 				termPQExpBuffer(&output);

@@ -1561,13 +1557,13 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
 					if (is_active_bdr_node(conn, node_info->node_name) == false)
 					{
 						status = CHECK_STATUS_CRITICAL;
-						appendPQExpBuffer(&details,
-										  _("node is not an active BDR node"));
+						appendPQExpBufferStr(&details,
+											 _("node is not an active BDR node"));
 					}
 					else
 					{
-						appendPQExpBuffer(&details,
-										  _("node is an active BDR node"));
+						appendPQExpBufferStr(&details,
+											 _("node is an active BDR node"));
 					}
 				}
 			}
@@ -1624,13 +1620,13 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check

 	if (server_version_num < 90400)
 	{
-		appendPQExpBuffer(&details,
-						  _("replication slots not available for this PostgreSQL version"));
+		appendPQExpBufferStr(&details,
+							 _("replication slots not available for this PostgreSQL version"));
 	}
 	else if (node_info->total_replication_slots == 0)
 	{
-		appendPQExpBuffer(&details,
-						  _("node has no replication slots"));
+		appendPQExpBufferStr(&details,
+							 _("node has no replication slots"));
 	}
 	else if (node_info->inactive_replication_slots == 0)
 	{
@@ -1700,8 +1696,8 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf

 	if (server_version_num < 90400)
 	{
-		appendPQExpBuffer(&details,
-						  _("replication slots not available for this PostgreSQL version"));
+		appendPQExpBufferStr(&details,
+							 _("replication slots not available for this PostgreSQL version"));
 	}
 	else
 	{
@@ -1711,9 +1707,8 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf

 		if (missing_slots.node_count == 0)
 		{
-			appendPQExpBuffer(&details,
-						  _("node has no missing replication slots"));
-
+			appendPQExpBufferStr(&details,
+								 _("node has no missing replication slots"));
 		}
 		else
 		{
@@ -1728,7 +1723,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf

 			if (missing_slots.node_count)
 			{
-				appendPQExpBuffer(&details, ": ");
+				appendPQExpBufferStr(&details, ": ");

 				for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
 				{
@@ -1738,10 +1733,10 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 					}
 					else
 					{
-						appendPQExpBuffer(&details, ", ");
+						appendPQExpBufferStr(&details, ", ");
 					}

-					appendPQExpBuffer(&details, "%s", missing_slot_cell->node_info->slot_name);
+					appendPQExpBufferStr(&details, missing_slot_cell->node_info->slot_name);
 				}
 			}
 		}
@@ -1784,7 +1779,7 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
 			if (list_output != NULL)
 			{
 				check_status_list_set(list_output,
-									  "Replication slots",
+									  "Missing replication slots",
 									  status,
 									  details.data);
 			}
@@ -2137,8 +2132,8 @@ do_node_rejoin(void)
 			exit(ERR_BAD_CONFIG);
 		}

-		appendPQExpBuffer(&msg,
-						  _("prerequisites for using pg_rewind are met"));
+		appendPQExpBufferStr(&msg,
+							 _("prerequisites for using pg_rewind are met"));

 		if (runtime_options.dry_run == true)
 		{
@@ -2417,6 +2412,54 @@ do_node_rejoin(void)
 		success = is_downstream_node_attached(upstream_conn, config_file_options.node_name);
 	}

+	/*
+	 * Handle replication slots:
+	 *  - if a slot for the new upstream exists, delete that
+	 *  - warn about any other inactive replication slots
+	 */
+	if (runtime_options.force_rewind_used == false && config_file_options.use_replication_slots)
+	{
+		PGconn	   *local_conn = NULL;
+		local_conn = establish_db_connection(config_file_options.conninfo, false);
+
+		if (PQstatus(local_conn) != CONNECTION_OK)
+		{
+			log_warning(_("unable to connect to local node to check replication slot status"));
+			log_hint(_("execute \"repmgr node check\" to check inactive slots and drop manually if necessary"));
+		}
+		else
+		{
+			KeyValueList inactive_replication_slots = {NULL, NULL};
+			KeyValueListCell *cell = NULL;
+			int inactive_count = 0;
+			PQExpBufferData slotinfo;
+
+			drop_replication_slot_if_exists(local_conn,
+											config_file_options.node_id,
+											primary_node_record.slot_name);
+
+			(void) get_inactive_replication_slots(local_conn, &inactive_replication_slots);
+
+			initPQExpBuffer(&slotinfo);
+			for (cell = inactive_replication_slots.head; cell; cell = cell->next)
+			{
+				appendPQExpBuffer(&slotinfo,
+								  "  - %s (%s)", cell->key, cell->value);
+				inactive_count++;
+			}
+
+			if (inactive_count > 0)
+			{
+				log_warning(_("%i inactive replication slots detected"), inactive_count);
+				log_detail(_("inactive replication slots:\n%s"), slotinfo.data);
+				log_hint(_("these replication slots may need to be removed manually"));
+			}
+
+			termPQExpBuffer(&slotinfo);
+
+			PQfinish(local_conn);
+		}
+	}

 	if (success == true)
 	{
@@ -2426,7 +2469,8 @@ do_node_rejoin(void)
 	else
 	{
 		/*
-		 * if we reach here, no record found in upstream node's pg_stat_replication */
+		 * if we reach here, no record found in upstream node's pg_stat_replication
+		 */
 		log_notice(_("NODE REJOIN has completed but node is not yet reattached to upstream"));
 		log_hint(_("you will need to manually check the node's replication status"));
 	}
@@ -2919,6 +2963,7 @@ do_node_help(void)
 	printf(_("    --dry-run             show what action would be performed, but don't execute it\n"));
 	printf(_("    --action              action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
 	printf(_("    --list-actions        show what command would be performed for each action\n"));
+	printf(_("    --checkpoint          issue a CHECKPOINT before stopping or restarting the node\n"));
 	puts("");


--- a/repmgr-action-primary.c
+++ b/repmgr-action-primary.c
@@ -64,12 +64,10 @@ do_primary_register(void)
 			PQfinish(conn);
 			exit(ERR_BAD_CONFIG);
 		}
-		else
-		{
-			log_error(_("connection to node lost"));
-			PQfinish(conn);
-			exit(ERR_DB_CONN);
-		}
+
+		log_error(_("unable to determine server's recovery type"));
+		PQfinish(conn);
+		exit(ERR_DB_CONN);
 	}

 	log_verbose(LOG_INFO, _("server is not in recovery"));
@@ -172,8 +170,8 @@ do_primary_register(void)
 											&node_info);
 		if (record_created == true)
 		{
-			appendPQExpBuffer(&event_description,
-							  "existing primary record updated");
+			appendPQExpBufferStr(&event_description,
+								 "existing primary record updated");
 		}
 		else
 		{
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
--- a/repmgr-action-witness.c
+++ b/repmgr-action-witness.c
@@ -37,6 +37,7 @@ do_witness_register(void)
 	PGconn	   *witness_conn = NULL;
 	PGconn	   *primary_conn = NULL;
 	RecoveryType recovery_type = RECTYPE_UNKNOWN;
+	ExtensionStatus extension_status = REPMGR_UNKNOWN;
 	NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
 	t_node_info node_record = T_NODE_INFO_INITIALIZER;
 	RecordStatus record_status = RECORD_NOT_FOUND;
@@ -214,27 +215,45 @@ do_witness_register(void)
 		}
 	}

+	extension_status = get_repmgr_extension_status(witness_conn, NULL);
+
 	/*
-	 * if repmgr.nodes contains entries, delete if -F/--force provided,
-	 * otherwise exit with error
+	 * Check if the witness database already contains node records;
+	 * only do this if the extension is actually installed.
 	 */
-	get_all_node_records(witness_conn, &nodes);
-
-	log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
-
-	if (nodes.node_count > 0)
+	if (extension_status == REPMGR_INSTALLED
+	 || extension_status == REPMGR_OLD_VERSION_INSTALLED)
 	{
-		if (!runtime_options.force)
+		/*
+		 * if repmgr.nodes contains entries, exit with error unless
+		 * -F/--force provided (which will cause the existing records
+		 * to be overwritten)
+		 */
+
+		if (get_all_node_records(witness_conn, &nodes) == false)
 		{
-			log_error(_("witness node is already initialised and contains node records"));
-			log_hint(_("use option -F/--force to reinitialise the node"));
-			PQfinish(primary_conn);
+			/* get_all_node_records() will display the error */
 			PQfinish(witness_conn);
+			PQfinish(primary_conn);
 			exit(ERR_BAD_CONFIG);
 		}
-	}

-	clear_node_info_list(&nodes);
+		log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
+
+		if (nodes.node_count > 0)
+		{
+			if (!runtime_options.force)
+			{
+				log_error(_("witness node is already initialised and contains node records"));
+				log_hint(_("use option -F/--force to reinitialise the node"));
+				PQfinish(primary_conn);
+				PQfinish(witness_conn);
+				exit(ERR_BAD_CONFIG);
+			}
+		}
+
+		clear_node_info_list(&nodes);
+	}

 	if (runtime_options.dry_run == true)
 	{
@@ -465,13 +484,15 @@ void do_witness_help(void)
 	printf(_("Usage:\n"));
 	printf(_("    %s [OPTIONS] witness register\n"), progname());
 	printf(_("    %s [OPTIONS] witness unregister\n"), progname());
-
+	puts("");
 	printf(_("WITNESS REGISTER\n"));
 	puts("");
 	printf(_("  \"witness register\" registers a witness node.\n"));
 	puts("");
-	printf(_("  Requires provision of connection information for the primary\n"));
+	printf(_("  Requires provision of connection information for the primary node,\n"));
+	printf(_("  typically usually just the host name.\n"));
 	puts("");
+	printf(_("  -h/--host                host name of the primary node\n"));
 	printf(_("  --dry-run                check prerequisites but don't make any changes\n"));
 	printf(_("  -F, --force              overwrite an existing node record\n"));
 	puts("");
--- a/repmgr-client-global.h
+++ b/repmgr-client-global.h
@@ -97,6 +97,7 @@ typedef struct
 	bool		force_rewind_used;
 	char		force_rewind_path[MAXPGPATH];
 	bool		siblings_follow;
+	bool		repmgrd_no_pause;

 	/* "node status" options */
 	bool		is_shutdown_cleanly;
@@ -156,7 +157,7 @@ typedef struct
 		/* "standby register" options */ \
 		false, -1, DEFAULT_WAIT_START,   \
 		/* "standby switchover" options */ \
-		false, false, "", false,		   \
+		false, false, "", false, false,	\
 		/* "node status" options */ \
 		false, \
 		/* "node check" options */ \
@@ -193,6 +194,14 @@ typedef enum
 } t_server_action;


+typedef struct ColHeader
+{
+	char		title[MAXLEN];
+	int			max_length;
+	int			cur_length;
+} ColHeader;
+
+

 /* global configuration structures */
 extern t_runtime_options runtime_options;
@@ -228,7 +237,10 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc
 extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf);

 extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record);
+
+/* display functions */
 extern void print_help_header(void);
+extern void print_status_header(int cols, ColHeader *headers);

 /* server control functions */
 extern void get_server_action(t_server_action action, char *script, char *data_dir);
@@ -237,5 +249,6 @@ extern void get_node_config_directory(char *config_dir_buf);
 extern void get_node_data_directory(char *data_dir_buf);
 extern void init_node_record(t_node_info *node_record);
 extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
+extern void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);

 #endif							/* _REPMGR_CLIENT_GLOBAL_H_ */
--- a/repmgr-client.c
+++ b/repmgr-client.c
@@ -29,11 +29,14 @@
 *
 * NODE STATUS
 * NODE CHECK
- *
- * For internal use:
 * NODE REJOIN
 * NODE SERVICE
 *
+ * DAEMON STATUS
+ * DAEMON PAUSE
+ * DAEMON UNPAUSE
+ *
+ *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
@@ -62,6 +65,7 @@
 #include "repmgr-action-bdr.h"
 #include "repmgr-action-node.h"
 #include "repmgr-action-cluster.h"
+#include "repmgr-action-daemon.h"

 #include <storage/fd.h>			/* for PG_TEMP_FILE_PREFIX */

@@ -438,6 +442,10 @@ main(int argc, char **argv)
 				runtime_options.siblings_follow = true;
 				break;

+			case OPT_REPMGRD_NO_PAUSE:
+				runtime_options.repmgrd_no_pause = true;
+				break;
+
 				/*----------------------
 				 * "node status" options
 				 *----------------------
@@ -766,6 +774,7 @@ main(int argc, char **argv)
 	 *   BDR { REGISTER | UNREGISTER } |
 	 *   NODE { STATUS | CHECK | REJOIN | SERVICE } |
 	 *   CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
+	 *   DAEMON { STATUS | PAUSE | UNPAUSE }
 	 *
 	 * [node] is an optional hostname, provided instead of the -h/--host
 	 * option
@@ -799,6 +808,7 @@ main(int argc, char **argv)
 				action = PRIMARY_REGISTER;
 			else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
 				action = PRIMARY_UNREGISTER;
+			/* allow "primary check"/"primary status" as aliases for "node check"/"node status" */
 			else if (strcasecmp(repmgr_action, "CHECK") == 0)
 				action = NODE_CHECK;
 			else if (strcasecmp(repmgr_action, "STATUS") == 0)
@@ -825,6 +835,7 @@ main(int argc, char **argv)
 				action = STANDBY_FOLLOW;
 			else if (strcasecmp(repmgr_action, "SWITCHOVER") == 0)
 				action = STANDBY_SWITCHOVER;
+			/* allow "standby check"/"standby status" as aliases for "node check"/"node status" */
 			else if (strcasecmp(repmgr_action, "CHECK") == 0)
 				action = NODE_CHECK;
 			else if (strcasecmp(repmgr_action, "STATUS") == 0)
@@ -900,6 +911,21 @@ main(int argc, char **argv)
 			else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
 				action = CLUSTER_CLEANUP;
 		}
+		else if (strcasecmp(repmgr_command, "DAEMON") == 0)
+		{
+			if (help_option == true)
+			{
+				do_daemon_help();
+				exit(SUCCESS);
+			}
+
+			if (strcasecmp(repmgr_action, "STATUS") == 0)
+				action = DAEMON_STATUS;
+			else if (strcasecmp(repmgr_action, "PAUSE") == 0)
+				action = DAEMON_PAUSE;
+			else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
+				action = DAEMON_UNPAUSE;
+		}
 		else
 		{
 			valid_repmgr_command_found = false;
@@ -1298,6 +1324,17 @@ main(int argc, char **argv)
 			do_cluster_cleanup();
 			break;

+			/* DAEMON */
+		case DAEMON_STATUS:
+			do_daemon_status();
+			break;
+		case DAEMON_PAUSE:
+			do_daemon_pause();
+			break;
+		case DAEMON_UNPAUSE:
+			do_daemon_unpause();
+			break;
+
 		default:
 			/* An action will have been determined by this point  */
 			break;
@@ -1362,7 +1399,7 @@ check_cli_parameters(const int action)
 					if (!runtime_options.host_param_provided)
 					{
 						item_list_append_format(&cli_errors,
-												_("host name for the source node must be provided when executing %s"),
+												_("host name for the source node must be provided with -h/--host when executing %s"),
 												action_name(action));
 					}

@@ -1419,7 +1456,7 @@ check_cli_parameters(const int action)
 				if (!runtime_options.host_param_provided)
 				{
 					item_list_append_format(&cli_errors,
-											_("host name for the source node must be provided when executing %s"),
+											_("host name for the source node must be provided with -h/--host when executing %s"),
 											action_name(action));
 				}
 			}
@@ -1520,6 +1557,7 @@ check_cli_parameters(const int action)
 			case PRIMARY_UNREGISTER:
 			case STANDBY_UNREGISTER:
 			case WITNESS_UNREGISTER:
+			case CLUSTER_CLEANUP:
 			case CLUSTER_EVENT:
 			case CLUSTER_MATRIX:
 			case CLUSTER_CROSSCHECK:
@@ -1743,6 +1781,18 @@ check_cli_parameters(const int action)
 		}
 	}

+	if (runtime_options.repmgrd_no_pause == true)
+	{
+		switch (action)
+		{
+			case STANDBY_SWITCHOVER:
+				break;
+			default:
+				item_list_append_format(&cli_warnings,
+										_("--repmgrd-no-pause will be ignored when executing %s"),
+										action_name(action));
+		}
+	}

 	if (runtime_options.config_files[0] != '\0')
 	{
@@ -1771,6 +1821,8 @@ check_cli_parameters(const int action)
 			case WITNESS_UNREGISTER:
 			case NODE_REJOIN:
 			case NODE_SERVICE:
+			case DAEMON_PAUSE:
+			case DAEMON_UNPAUSE:
 				break;
 			default:
 				item_list_append_format(&cli_warnings,
@@ -1850,6 +1902,14 @@ action_name(const int action)
 			return "CLUSTER MATRIX";
 		case CLUSTER_CROSSCHECK:
 			return "CLUSTER CROSSCHECK";
+
+		case DAEMON_STATUS:
+			return "DAEMON STATUS";
+		case DAEMON_PAUSE:
+			return "DAEMON PAUSE";
+		case DAEMON_UNPAUSE:
+			return "DAEMON UNPAUSE";
+
 	}

 	return "UNKNOWN ACTION";
@@ -1877,6 +1937,42 @@ print_error_list(ItemList *error_list, int log_level)
 }


+void
+print_status_header(int cols, ColHeader *headers)
+{
+	int i;
+
+	for (i = 0; i < cols; i++)
+	{
+		if (i == 0)
+			printf(" ");
+		else
+			printf(" | ");
+
+		printf("%-*s",
+			   headers[i].max_length,
+			   headers[i].title);
+	}
+	printf("\n");
+	printf("-");
+
+	for (i = 0; i < cols; i++)
+	{
+		int			j;
+
+		for (j = 0; j < headers[i].max_length; j++)
+			printf("-");
+
+		if (i < (cols - 1))
+			printf("-+-");
+		else
+			printf("-");
+	}
+
+	printf("\n");
+}
+
+
 void
 print_help_header(void)
 {
@@ -1905,10 +2001,11 @@ do_help(void)
 	printf(_("    %s [OPTIONS] node    {status|check|rejoin|service}\n"), progname());
 	printf(_("    %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname());
 	printf(_("    %s [OPTIONS] witness {register|unregister}\n"), progname());
+	printf(_("    %s [OPTIONS] daemon  {status|pause|unpause}\n"), progname());

 	puts("");

-	printf(_("  Execute \"%s {primary|standby|bdr|node|cluster} --help\" to see command-specific options\n"), progname());
+	printf(_("  Execute \"%s {primary|standby|bdr|node|cluster|witness|daemon} --help\" to see command-specific options\n"), progname());

 	puts("");

@@ -1956,8 +2053,6 @@ do_help(void)
 	printf(_("  -t, --terse                         don't display detail, hints and other non-critical output\n"));
 	printf(_("  -v, --verbose                       display additional log output (useful for debugging)\n"));

-
-
 	puts("");
 }

@@ -1984,8 +2079,9 @@ create_repmgr_extension(PGconn *conn)
 	bool		is_superuser = false;
 	PGconn	   *superuser_conn = NULL;
 	PGconn	   *schema_create_conn = NULL;
+	t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;

-	extension_status = get_repmgr_extension_status(conn);
+	extension_status = get_repmgr_extension_status(conn, &extversions);

 	switch (extension_status)
 	{
@@ -1997,8 +2093,15 @@ create_repmgr_extension(PGconn *conn)
 			log_error(_("\"repmgr\" extension is not available"));
 			return false;

+		case REPMGR_OLD_VERSION_INSTALLED:
+			log_error(_("an older version of the \"repmgr\" extension is installed"));
+			log_detail(_("version %s is installed but newer version %s is available"),
+					   extversions.installed_version,
+					   extversions.default_version);
+			log_hint(_("execute \"ALTER EXTENSION repmgr UPGRADE\""));
+			return false;
+
 		case REPMGR_INSTALLED:
-			/* TODO: check version */
 			log_info(_("\"repmgr\" extension is already installed"));
 			return true;

@@ -2576,11 +2679,29 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf
 void
 make_remote_repmgr_path(PQExpBufferData *output_buf, t_node_info *remote_node_record)
 {
+	if (config_file_options.repmgr_bindir[0] != '\0')
+	{
+		int			len = strlen(config_file_options.repmgr_bindir);
+
+		appendPQExpBufferStr(output_buf,
+							 config_file_options.repmgr_bindir);
+
+		/* Add trailing slash */
+		if (config_file_options.repmgr_bindir[len - 1] != '/')
+		{
+			appendPQExpBufferChar(output_buf, '/');
+		}
+	}
+	else if (pg_bindir[0] != '\0')
+	{
+		appendPQExpBufferStr(output_buf,
+							 pg_bindir);
+	}
+
 	appendPQExpBuffer(output_buf,
 					  "%s -f %s ",
-					  make_pg_path(progname()),
+					  progname(),
 					  remote_node_record->config_file);
-
 }


@@ -2978,3 +3099,45 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea

 	return can_use;
 }
+
+
+void
+drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
+{
+	t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
+	RecordStatus record_status = get_slot_record(conn, slot_name, &slot_info);
+
+	log_verbose(LOG_DEBUG, "attempting to delete slot \"%s\" on node %i",
+				slot_name, node_id);
+
+	if (record_status != RECORD_FOUND)
+	{
+		/* this is a good thing */
+		log_verbose(LOG_INFO,
+					_("slot \"%s\" does not exist on node %i, nothing to remove"),
+					slot_name, node_id);
+	}
+	else
+	{
+		if (slot_info.active == false)
+		{
+			if (drop_replication_slot(conn, slot_name) == true)
+			{
+				log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
+			}
+			else
+			{
+				log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
+			}
+		}
+
+		/*
+		 * if active replication slot exists, call Houston as we have a
+		 * problem
+		 */
+		else
+		{
+			log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
+		}
+	}
+}
--- a/repmgr-client.h
+++ b/repmgr-client.h
@@ -45,6 +45,9 @@
 #define CLUSTER_MATRIX		   19
 #define CLUSTER_CROSSCHECK	   20
 #define CLUSTER_EVENT		   21
+#define DAEMON_STATUS		   22
+#define DAEMON_PAUSE		   23
+#define DAEMON_UNPAUSE		   24

 /* command line options without short versions */
 #define OPT_HELP						   1001
@@ -88,6 +91,7 @@
 #define OPT_RECOVERY_CONF_ONLY             1039
 #define OPT_NO_WAIT                        1040
 #define OPT_MISSING_SLOTS                  1041
+#define OPT_REPMGRD_NO_PAUSE               1042

 /* deprecated since 3.3 */
 #define OPT_DATA_DIR						999
@@ -156,6 +160,7 @@ static struct option long_options[] =
 */
 	{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
 	{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},
+	{"repmgrd-no-pause", no_argument, NULL, OPT_REPMGRD_NO_PAUSE},

 /* "node status" options */
 	{"is-shutdown-cleanly", no_argument, NULL, OPT_IS_SHUTDOWN_CLEANLY},
--- a/repmgr.c
+++ b/repmgr.c
@@ -26,6 +26,7 @@
 #include "access/xlog.h"
 #include "miscadmin.h"
 #include "replication/walreceiver.h"
+#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "storage/procarray.h"
@@ -43,14 +44,21 @@
 #include "lib/stringinfo.h"
 #include "access/xact.h"
 #include "utils/snapmgr.h"
-#include "pgstat.h"

+#if (PG_VERSION_NUM >= 90400)
+#include "pgstat.h"
+#else
+#define PGSTAT_STAT_PERMANENT_DIRECTORY             "pg_stat"
+#endif

 #include "voting.h"

 #define UNKNOWN_NODE_ID		-1
+#define UNKNOWN_PID			-1

 #define TRANCHE_NAME "repmgrd"
+#define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt"
+#define REPMGRD_STATE_FILE_BUF_SIZE 128

 PG_MODULE_MAGIC;

@@ -66,6 +74,9 @@ typedef struct repmgrdSharedState
 	LWLockId	lock;			/* protects search/modification */
 	TimestampTz last_updated;
 	int			local_node_id;
+	int			repmgrd_pid;
+	char		repmgrd_pidfile[MAXPGPATH];
+	bool		repmgrd_paused;
 	/* streaming failover */
 	NodeVotingStatus voting_status;
 	int			current_electoral_term;
@@ -112,6 +123,25 @@ PG_FUNCTION_INFO_V1(am_bdr_failover_handler);
 Datum		unset_bdr_failover_handler(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);

+Datum		set_repmgrd_pid(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(set_repmgrd_pid);
+
+Datum		get_repmgrd_pid(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(get_repmgrd_pid);
+
+Datum		get_repmgrd_pidfile(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(get_repmgrd_pidfile);
+
+Datum		repmgrd_is_running(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(repmgrd_is_running);
+
+Datum		repmgrd_pause(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(repmgrd_pause);
+
+Datum		repmgrd_is_paused(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(repmgrd_is_paused);
+
+

 /*
 * Module load callback
@@ -185,6 +215,9 @@ repmgr_shmem_startup(void)
 #endif

 		shared_state->local_node_id = UNKNOWN_NODE_ID;
+		shared_state->repmgrd_pid = UNKNOWN_PID;
+		memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
+		shared_state->repmgrd_paused = false;
 		shared_state->current_electoral_term = 0;
 		shared_state->voting_status = VS_NO_VOTE;
 		shared_state->candidate_node_id = UNKNOWN_NODE_ID;
@@ -204,6 +237,8 @@ Datum
 set_local_node_id(PG_FUNCTION_ARGS)
 {
 	int			local_node_id = UNKNOWN_NODE_ID;
+	int			stored_node_id = UNKNOWN_NODE_ID;
+	int			paused = -1;

 	if (!shared_state)
 		PG_RETURN_NULL();
@@ -213,6 +248,34 @@ set_local_node_id(PG_FUNCTION_ARGS)

 	local_node_id = PG_GETARG_INT32(0);

+	/* read state file and if exists/valid, update "repmgrd_paused" */
+	{
+		FILE	   *file = NULL;
+
+		file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_R);
+
+		if (file != NULL)
+		{
+			int			buffer_size = REPMGRD_STATE_FILE_BUF_SIZE;
+			char		buffer[REPMGRD_STATE_FILE_BUF_SIZE];
+
+			if (fgets(buffer, buffer_size, file) != NULL)
+			{
+				if (sscanf(buffer, "%i:%i", &stored_node_id, &paused) != 2)
+				{
+					elog(WARNING, "unable to parse repmgrd state file");
+				}
+				else
+				{
+					elog(DEBUG1, "node_id: %i; paused: %i", stored_node_id, paused);
+				}
+			}
+
+			FreeFile(file);
+		}
+
+	}
+
 	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);

 	/* only set local_node_id once, as it should never change */
@@ -221,6 +284,19 @@ set_local_node_id(PG_FUNCTION_ARGS)
 		shared_state->local_node_id = local_node_id;
 	}

+	/* only update if state file valid */
+	if (stored_node_id == shared_state->local_node_id)
+	{
+		if (paused == 0)
+		{
+			shared_state->repmgrd_paused = false;
+		}
+		else if (paused == 1)
+		{
+			shared_state->repmgrd_paused = true;
+		}
+	}
+
 	LWLockRelease(shared_state->lock);

 	PG_RETURN_VOID();
@@ -416,9 +492,191 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
 		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);

 		shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
-
-		LWLockRelease(shared_state->lock);
 	}

+	LWLockRelease(shared_state->lock);
+
 	PG_RETURN_VOID();
 }
+
+
+/*
+ * Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd
+ * process not running (TODO!)
+ */
+Datum
+get_repmgrd_pid(PG_FUNCTION_ARGS)
+{
+	int repmgrd_pid = UNKNOWN_PID;
+
+	if (!shared_state)
+		PG_RETURN_NULL();
+
+	LWLockAcquire(shared_state->lock, LW_SHARED);
+	repmgrd_pid = shared_state->repmgrd_pid;
+	LWLockRelease(shared_state->lock);
+
+	PG_RETURN_INT32(repmgrd_pid);
+}
+
+
+/*
+ * Returns the repmgrd pidfile
+ */
+Datum
+get_repmgrd_pidfile(PG_FUNCTION_ARGS)
+{
+	char repmgrd_pidfile[MAXPGPATH];
+
+	if (!shared_state)
+		PG_RETURN_NULL();
+
+	memset(repmgrd_pidfile, 0, MAXPGPATH);
+
+	LWLockAcquire(shared_state->lock, LW_SHARED);
+	strncpy(repmgrd_pidfile, shared_state->repmgrd_pidfile, MAXPGPATH);
+	LWLockRelease(shared_state->lock);
+
+	if (repmgrd_pidfile[0] == '\0')
+		PG_RETURN_NULL();
+
+	PG_RETURN_TEXT_P(cstring_to_text(repmgrd_pidfile));
+}
+
+Datum
+set_repmgrd_pid(PG_FUNCTION_ARGS)
+{
+	int repmgrd_pid = UNKNOWN_PID;
+	char *repmgrd_pidfile = NULL;
+
+	if (!shared_state)
+		PG_RETURN_VOID();
+
+	if (PG_ARGISNULL(0))
+	{
+		repmgrd_pid = UNKNOWN_PID;
+	}
+	else
+	{
+		repmgrd_pid = PG_GETARG_INT32(0);
+	}
+
+	elog(DEBUG3, "set_repmgrd_pid(): provided pid is %i", repmgrd_pid);
+
+	if (repmgrd_pid != UNKNOWN_PID && !PG_ARGISNULL(1))
+	{
+		repmgrd_pidfile = text_to_cstring(PG_GETARG_TEXT_PP(1));
+		elog(INFO, "set_repmgrd_pid(): provided pidfile is %s", repmgrd_pidfile);
+	}
+
+	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
+
+	shared_state->repmgrd_pid = repmgrd_pid;
+	memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
+
+	if(repmgrd_pidfile != NULL)
+	{
+		strncpy(shared_state->repmgrd_pidfile, repmgrd_pidfile, MAXPGPATH);
+	}
+
+	LWLockRelease(shared_state->lock);
+	PG_RETURN_VOID();
+}
+
+
+Datum
+repmgrd_is_running(PG_FUNCTION_ARGS)
+{
+	int repmgrd_pid = UNKNOWN_PID;
+	int kill_ret;
+
+	if (!shared_state)
+		PG_RETURN_NULL();
+
+	LWLockAcquire(shared_state->lock, LW_SHARED);
+	repmgrd_pid = shared_state->repmgrd_pid;
+	LWLockRelease(shared_state->lock);
+
+	/* No PID registered - assume not running */
+	if (repmgrd_pid == UNKNOWN_PID)
+	{
+		PG_RETURN_BOOL(false);
+	}
+
+	kill_ret = kill(repmgrd_pid, 0);
+
+	if (kill_ret == 0)
+	{
+		PG_RETURN_BOOL(true);
+	}
+
+	PG_RETURN_BOOL(false);
+}
+
+
+Datum
+repmgrd_pause(PG_FUNCTION_ARGS)
+{
+	bool		pause;
+	FILE	   *file = NULL;
+	StringInfoData buf;
+
+	if (!shared_state)
+		PG_RETURN_NULL();
+
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	pause = PG_GETARG_BOOL(0);
+
+	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
+	shared_state->repmgrd_paused = pause;
+	LWLockRelease(shared_state->lock);
+
+	/* write state to file */
+	file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_W);
+
+	if (file == NULL)
+	{
+		elog(DEBUG1, "unable to allocate %s", REPMGRD_STATE_FILE);
+
+		// XXX anything else we can do? log?
+		PG_RETURN_VOID();
+	}
+
+	elog(DEBUG1, "allocated");
+
+	initStringInfo(&buf);
+
+	LWLockAcquire(shared_state->lock, LW_SHARED);
+
+	appendStringInfo(&buf, "%i:%i",
+					 shared_state->local_node_id,
+					 pause ? 1 : 0);
+	LWLockRelease(shared_state->lock);
+
+	// XXX check success
+	fwrite(buf.data, strlen(buf.data) + 1, 1, file);
+
+
+	resetStringInfo(&buf);
+	FreeFile(file);
+
+	PG_RETURN_VOID();
+}
+
+
+Datum
+repmgrd_is_paused(PG_FUNCTION_ARGS)
+{
+	bool is_paused;
+
+	if (!shared_state)
+		PG_RETURN_NULL();
+
+	LWLockAcquire(shared_state->lock, LW_SHARED);
+	is_paused = shared_state->repmgrd_paused;
+	LWLockRelease(shared_state->lock);
+
+	PG_RETURN_BOOL(is_paused);
+}
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -5,7 +5,13 @@
 # Some configuration items will be set with a default value; this
 # is noted for each item. Where no default value is shown, the
 # parameter will be treated as empty or false.
-
+#
+# IMPORTANT: string values can be provided as-is, or enclosed in single quotes
+# (but not double-quotes, which will be interpreted as part of the string), e.g.:
+#
+#  node_name=foo
+#  node_name = 'foo'
+#
 # =============================================================================
 # Required configuration items
 # =============================================================================
@@ -143,6 +149,15 @@
 					# Debian/Ubuntu users: you will probably need to
 					# set this to the directory where `pg_ctl` is located,
 					# e.g. /usr/lib/postgresql/9.6/bin/
+					#
+					# *NOTE* "pg_bindir" is only used when repmgr directly
+					# executes PostgreSQL binaries; any user-defined scripts
+					# *must* be specified with the full path
+
+#repmgr_bindir=''			# Path to repmgr binary directory (location of the repmgr
+					# binary. Only needed if the repmgr executable is not in
+                                        # the system $PATH or the path defined in "pg_bindir".
+
 #use_primary_conninfo_password=false	# explicitly set "password" in recovery.conf's
 					# "primary_conninfo" parameter using the value contained
 					# in the environment variable PGPASSWORD
@@ -156,7 +171,7 @@
 # Examples:
 #
 #   pg_ctl_options='-s'
-#   pg_basebackup_options='--label=repmgr_backup
+#   pg_basebackup_options='--label=repmgr_backup'
 #   rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
 #   ssh_options=-o "StrictHostKeyChecking no"

@@ -183,11 +198,11 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 					# parameter can be provided multiple times.

 #restore_command=''			# This will be placed in the recovery.conf file generated
-                                        # by repmgr.
+					# by repmgr.

 #archive_cleanup_command=''		# This will be placed in the recovery.conf file generated
-                                        # by repmgr. Note we recommend using Barman for managing
-                                        # WAL archives (see: https://www.pgbarman.org )
+					# by repmgr. Note we recommend using Barman for managing
+					# WAL archives (see: https://www.pgbarman.org )

 #recovery_min_apply_delay=		# If provided, "recovery_min_apply_delay" in recovery.conf
 					# will be set to this value (PostgreSQL 9.4 and later).
@@ -226,6 +241,8 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 # These settings apply when switching roles between a primary and a standby
 # ("repmgr standby switchover").

+#shutdown_check_timeout=60		# The max length of time (in seconds) to wait for the demotion
+					# candidate (current primary) to shut down
 #standby_reconnect_timeout=60		# The max length of time (in seconds) to wait
 					# for the demoted standby to reconnect to the promoted
 					# primary (note: this value should be equal to or greater
@@ -259,10 +276,10 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 # are defaults.

 #repmgrd_pid_file=			# Path of PID file to use for repmgrd; if not set, a PID file will
-                                        # be generated in a temporary directory specified by the environment
-                                        # variable $TMPDIR, or if not set, in "/tmp". This value can be overridden
-                                        # by the command line option "-p/--pid-file"; the command line option
-                                        # "--no-pid-file" will force PID file creation to be skipped.
+					# be generated in a temporary directory specified by the environment
+					# variable $TMPDIR, or if not set, in "/tmp". This value can be overridden
+					# by the command line option "-p/--pid-file"; the command line option
+					# "--no-pid-file" will force PID file creation to be skipped.
 #failover=manual			# one of 'automatic', 'manual'.
 					# determines what action to take in the event of upstream failure
 					#
@@ -276,9 +293,9 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 					# a value of zero prevents the node being promoted to primary
 					# (default: 100)

-#reconnect_attempts=6			# Number attempts which will be made to reconnect to an unreachable
+#reconnect_attempts=6			# Number of attempts which will be made to reconnect to an unreachable
 					# primary (or other upstream node)
-#reconnect_interval=10			# Interval between attempts  to reconnect to an unreachable
+#reconnect_interval=10			# Interval between attempts to reconnect to an unreachable
 					# primary (or other upstream node)
 #promote_command=			# command repmgrd executes when promoting a new primary; use something like:
 					#
@@ -332,7 +349,7 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 #
 # Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
 #
-# For more details, see: https://repmgr.org/docs/4.0/configuration-service-commands.html
+# For more details, see: https://repmgr.org/docs/4.1/configuration-service-commands.html

 #service_start_command = ''
 #service_stop_command = ''
@@ -376,7 +393,7 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 #------------------------------------------------------------------------------

 #bdr_local_monitoring_only=false         # Only monitor the local node; no checks will be
-                                         # performed on the other node
+					 # performed on the other node
 #bdr_recovery_timeout                    # If a BDR node was offline and has become available
-                                         # maximum length of time in seconds to wait for the
-                                         # node to reconnect to the cluster
+					 # maximum length of time in seconds to wait for the
+					 # node to reconnect to the cluster
--- a/repmgr.control
+++ b/repmgr.control
@@ -1,6 +1,6 @@
 # repmgr extension
 comment = 'Replication manager for PostgreSQL'
-default_version = '4.1'
+default_version = '4.2'
 module_pathname = '$libdir/repmgr'
 relocatable = false
 schema = repmgr
--- a/repmgr.h
+++ b/repmgr.h
@@ -53,6 +53,7 @@

 #define UNKNOWN_TIMELINE_ID -1
 #define UNKNOWN_SYSTEM_IDENTIFIER 0
+#define UNKNOWN_PID			-1

 #define NODE_NOT_FOUND		-1
 #define NO_UPSTREAM_NODE	-1
@@ -84,6 +85,7 @@
 #define DEFAULT_WAIT_START                   30  /* seconds */
 #define DEFAULT_PROMOTE_CHECK_TIMEOUT        60  /* seconds */
 #define DEFAULT_PROMOTE_CHECK_INTERVAL       1   /* seconds */
+#define DEFAULT_SHUTDOWN_CHECK_TIMEOUT       60  /* seconds */
 #define DEFAULT_STANDBY_RECONNECT_TIMEOUT    60  /* seconds */
 #define DEFAULT_NODE_REJOIN_TIMEOUT          60  /* seconds */

--- a/repmgr_version.h.in
+++ b/repmgr_version.h.in
@@ -1,2 +1,2 @@
 #define REPMGR_VERSION_DATE ""
-#define REPMGR_VERSION "4.1dev"
+#define REPMGR_VERSION "4.2"
--- a/repmgrd-bdr.c
+++ b/repmgrd-bdr.c
@@ -150,7 +150,13 @@ monitor_bdr(void)
 	 * retrieve list of all nodes - we'll need these if the DB connection goes
 	 * away
 	 */
-	get_all_node_records(local_conn, &nodes);
+	if (get_all_node_records(local_conn, &nodes) == false)
+	{
+		/* get_all_node_records() will display the error */
+		PQfinish(local_conn);
+		exit(ERR_BAD_CONFIG);
+	}
+

 	/* we're expecting all (both) nodes to be up */
 	for (cell = nodes.head; cell; cell = cell->next)
@@ -214,7 +220,8 @@ monitor_bdr(void)

 								log_warning(_("unable to connect to node %s (ID %i)"),
 											cell->node_info->node_name, cell->node_info->node_id);
-								cell->node_info->conn = try_reconnect(cell->node_info);
+								//cell->node_info->conn = try_reconnect(cell->node_info);
+								try_reconnect(&cell->node_info->conn, cell->node_info);

 								/* node has recovered - log and continue */
 								if (cell->node_info->node_status == NODE_STATUS_UP)
@@ -293,7 +300,7 @@ loop:
 			/*
 			 * if we can reload, then could need to change local_conn
 			 */
-			if (reload_config(&config_file_options))
+			if (reload_config(&config_file_options, BDR))
 			{
 				PQfinish(local_conn);
 				local_conn = establish_db_connection(config_file_options.conninfo, true);
@@ -303,11 +310,12 @@ loop:
 			got_SIGHUP = false;
 		}

+		/* XXX this looks like it will never be called */
 		if (got_SIGHUP)
 		{
 			log_debug("SIGHUP received");

-			if (reload_config(&config_file_options))
+			if (reload_config(&config_file_options, BDR))
 			{
 				PQfinish(local_conn);
 				local_conn = establish_db_connection(config_file_options.conninfo, true);
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
--- a/repmgrd.c
+++ b/repmgrd.c
@@ -35,7 +35,7 @@

 static char *config_file = NULL;
 static bool verbose = false;
-static char pid_file[MAXPGPATH];
+char pid_file[MAXPGPATH];
 static bool daemonize = true;
 static bool show_pid_file = false;
 static bool no_pid_file = false;
@@ -88,6 +88,7 @@ main(int argc, char **argv)

 	RecordStatus record_status;
 	ExtensionStatus extension_status = REPMGR_UNKNOWN;
+	t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;

 	FILE	   *fd;

@@ -101,7 +102,8 @@ main(int argc, char **argv)
 		{"config-file", required_argument, NULL, 'f'},

 /* daemon options */
-		{"daemonize", optional_argument, NULL, 'd'},
+		{"daemonize-short", optional_argument, NULL, 'd'},
+		{"daemonize", optional_argument, NULL, OPT_DAEMONIZE},
 		{"pid-file", required_argument, NULL, 'p'},
 		{"show-pid-file", no_argument, NULL, 's'},
 		{"no-pid-file", no_argument, NULL, OPT_NO_PID_FILE},
@@ -175,10 +177,11 @@ main(int argc, char **argv)
 				/* daemon options */

 			case 'd':
-				if (optarg != NULL)
-				{
-					daemonize = parse_bool(optarg, "-d/--daemonize", &cli_errors);
-				}
+				daemonize = true;
+				break;
+
+			case OPT_DAEMONIZE:
+				daemonize = parse_bool(optarg, "-d/--daemonize", &cli_errors);
 				break;

 			case 'p':
@@ -320,8 +323,6 @@ main(int argc, char **argv)
 		strncpy(config_file_options.log_level, cli_log_level, MAXLEN);
 	}

-	log_notice(_("repmgrd (repmgr %s) starting up"), REPMGR_VERSION);
-
 	/*
 	 * -m/--monitoring-history, if provided, will override repmgr.conf's
 	 * monitoring_history; this is for backwards compatibility as it's
@@ -349,6 +350,8 @@ main(int argc, char **argv)

 	logger_init(&config_file_options, progname());

+	log_notice(_("repmgrd (%s %s) starting up"), progname(), REPMGR_VERSION);
+
 	if (verbose)
 		logger_set_verbose();

@@ -387,7 +390,7 @@ main(int argc, char **argv)
 	 */

 	/* Check "repmgr" the extension is installed */
-	extension_status = get_repmgr_extension_status(local_conn);
+	extension_status = get_repmgr_extension_status(local_conn, &extversions);

 	if (extension_status != REPMGR_INSTALLED)
 	{
@@ -400,19 +403,32 @@ main(int argc, char **argv)
 			exit(ERR_DB_QUERY);
 		}

-		log_error(_("repmgr extension not found on this node"));
+		if (extension_status == REPMGR_OLD_VERSION_INSTALLED)
+		{
+			log_error(_("an older version of the \"repmgr\" extension is installed"));
+			log_detail(_("version %s is installed but newer version %s is available"),
+					   extversions.installed_version,
+					   extversions.default_version);
+			log_hint(_("verify the repmgr installation is updated properly before continuing"));

-		if (extension_status == REPMGR_AVAILABLE)
-		{
-			log_detail(_("repmgr extension is available but not installed in database \"%s\""),
-					   PQdb(local_conn));
 		}
-		else if (extension_status == REPMGR_UNAVAILABLE)
+		else
 		{
-			log_detail(_("repmgr extension is not available on this node"));
+			log_error(_("repmgr extension not found on this node"));
+
+			if (extension_status == REPMGR_AVAILABLE)
+			{
+				log_detail(_("repmgr extension is available but not installed in database \"%s\""),
+						   PQdb(local_conn));
+			}
+			else if (extension_status == REPMGR_UNAVAILABLE)
+			{
+				log_detail(_("repmgr extension is not available on this node"));
+			}
+
+			log_hint(_("check that this node is part of a repmgr cluster"));
 		}

-		log_hint(_("check that this node is part of a repmgr cluster"));
 		close_connection(&local_conn);
 		exit(ERR_BAD_CONFIG);
 	}
@@ -488,6 +504,9 @@ main(int argc, char **argv)
 		check_and_create_pid_file(pid_file);
 	}

+	repmgrd_set_pid(local_conn, getpid(), pid_file);
+
+
 #ifndef WIN32
 	setup_event_handlers();
 #endif
@@ -760,7 +779,8 @@ show_help(void)
 	puts("");

 	printf(_("Daemon configuration options:\n"));
-	printf(_("  -d, --daemonize[=true/false]\n"));
+	printf(_("  -d\n"));
+	printf(_("  --daemonize[=true/false]\n"));
 	printf(_("                            detach process from foreground (default: true)\n"));
 	printf(_("  -p, --pid-file=PATH       use the specified PID file\n"));
 	printf(_("  -s, --show-pid-file       show PID file which would be used by the current configuration\n"));
@@ -770,10 +790,10 @@ show_help(void)
 }


-PGconn *
-try_reconnect(t_node_info *node_info)
+void
+try_reconnect(PGconn **conn, t_node_info *node_info)
 {
-	PGconn	   *conn;
+	PGconn	   *our_conn;
 	t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;

 	int			i;
@@ -782,7 +802,6 @@ try_reconnect(t_node_info *node_info)

 	initialize_conninfo_params(&conninfo_params, false);

-
 	/* we assume by now the conninfo string is parseable */
 	(void) parse_conninfo_string(node_info->conninfo, &conninfo_params, NULL, false);

@@ -805,18 +824,47 @@ try_reconnect(t_node_info *node_info)
 			 * degraded monitoring? - make that configurable
 			 */

-			conn = establish_db_connection_by_params(&conninfo_params, false);
+			our_conn = establish_db_connection_by_params(&conninfo_params, false);

-			if (PQstatus(conn) == CONNECTION_OK)
+			if (PQstatus(our_conn) == CONNECTION_OK)
 			{
 				free_conninfo_params(&conninfo_params);

+				log_info(_("connection to node %i succeeded"), node_info->node_id);
+
+				if (PQstatus(*conn) == CONNECTION_BAD)
+				{
+					log_verbose(LOG_INFO, "original connection handle returned CONNECTION_BAD, using new connection");
+					close_connection(conn);
+					*conn = our_conn;
+				}
+				else
+				{
+					ExecStatusType ping_result;
+
+					ping_result = connection_ping(*conn);
+
+					if (ping_result != PGRES_TUPLES_OK)
+					{
+						log_info("original connnection no longer available, using new connection");
+						close_connection(conn);
+						*conn = our_conn;
+					}
+					else
+					{
+						log_info(_("original connection is still available"));
+
+						PQfinish(our_conn);
+					}
+				}
+
 				node_info->node_status = NODE_STATUS_UP;
-				return conn;
+
+				return;
 			}

-			close_connection(&conn);
-			log_notice(_("unable to reconnect to node"));
+			close_connection(&our_conn);
+			log_notice(_("unable to reconnect to node %i"), node_info->node_id);
 		}

 		if (i + 1 < max_attempts)
@@ -835,7 +883,7 @@ try_reconnect(t_node_info *node_info)

 	free_conninfo_params(&conninfo_params);

-	return NULL;
+	return;
 }


@@ -873,6 +921,9 @@ print_monitoring_state(MonitoringState monitoring_state)
 void
 terminate(int retval)
 {
+	if (PQstatus(local_conn)  == CONNECTION_OK)
+		repmgrd_set_pid(local_conn, UNKNOWN_PID, NULL);
+
 	logger_shutdown();

 	if (pid_file[0] != '\0')
--- a/repmgrd.h
+++ b/repmgrd.h
@@ -11,6 +11,7 @@
 #include "portability/instr_time.h"

 #define OPT_NO_PID_FILE                  1000
+#define OPT_DAEMONIZE                    1001

 extern volatile sig_atomic_t got_SIGHUP;
 extern MonitoringState monitoring_state;
@@ -20,8 +21,9 @@ extern t_configuration_options config_file_options;
 extern t_node_info local_node_info;
 extern PGconn *local_conn;
 extern bool startup_event_logged;
+extern char pid_file[MAXPGPATH];

-PGconn	   *try_reconnect(t_node_info *node_info);
+void		try_reconnect(PGconn **conn, t_node_info *node_info);

 int			calculate_elapsed(instr_time start_time);
 const char *print_monitoring_state(MonitoringState monitoring_state);
--- a/strutil.c
+++ b/strutil.c
@@ -87,17 +87,17 @@ append_where_clause(PQExpBufferData *where_clause, const char *format,...)

 	if (where_clause->data[0] == '\0')
 	{
-		appendPQExpBuffer(where_clause,
-						  " WHERE ");
+		appendPQExpBufferStr(where_clause,
+							 " WHERE ");
 	}
 	else
 	{
-		appendPQExpBuffer(where_clause,
-						  " AND ");
+		appendPQExpBufferStr(where_clause,
+							 " AND ");
 	}

-	appendPQExpBuffer(where_clause,
-					  "%s", stringbuf);
+	appendPQExpBufferStr(where_clause,
+						 stringbuf);

 }