doc: update "repmgr primary unregister" description

As noted by GitHub user yonj1e in GitHub #396.
doc: update FAQ
2026-03-23 15:16:29 +00:00 · 2018-03-08 15:01:25 +09:00 · 2018-03-08 10:04:30 +09:00 · 2018-03-08 10:04:30 +09:00 · 2018-03-07 19:14:18 +09:00 · 2018-03-07 14:17:04 +09:00
111 changed files with 9703 additions and 5458 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,8 @@ lib*.pc

 # test output
 /results/
+/regression.diffs
+/regression.out

 /doc/Makefile

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@ License and Contributions
 =========================

 `repmgr` is licensed under the GPL v3.  All of its code and documentation is
-Copyright 2010-2017, 2ndQuadrant Limited.  See the files COPYRIGHT and LICENSE for
+Copyright 2010-2018, 2ndQuadrant Limited.  See the files COPYRIGHT and LICENSE for
 details.

 The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
--- a/4
+++ b/4
@@ -1,4 +1,4 @@
-Copyright (c) 2010-2017, 2ndQuadrant Limited
+Copyright (c) 2010-2018, 2ndQuadrant Limited
 All rights reserved.

 This program is free software: you can redistribute it and/or modify
@@ -12,5 +12,5 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
-along with this program.  If not, see http://www.gnu.org/licenses/
+along with this program.  If not, see https://www.gnu.org/licenses/
 to obtain one.
--- a/FAQ.md
+++ b/FAQ.md
@@ -0,0 +1,10 @@
+FAQ - Frequently Asked Questions about repmgr
+=============================================
+
+The repmgr 4 FAQ is located here:
+
+    https://repmgr.org/docs/appendix-faq.html
+
+The repmgr 3.x FAQ can be found here:
+
+    https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
--- a/71
+++ b/71
@@ -1,6 +1,71 @@
-4.0     2017-10-04
-        Complete rewrite with many changes; see file "doc/upgrading-from-repmgr3.md"
-        for details.
+4.0.4   2018-03-08
+        repmgr: add "standby clone --recovery-conf-only" option; GitHub #382 (Ian)
+        repmgr: make "standby promote" timeout values configurable; GitHub #387 (Ian)
+        repmgr: improve replication slot warnings generated by "node status";
+          GitHub #385 (Ian)
+        repmgr: remove restriction on replication slots when cloning from
+          a Barman server; GitHub #379 (Ian)
+        repmgr: ensure "node rejoin" honours "--dry-run" option; GitHub #383 (Ian)
+        repmgr: fix --superuser handling when cloning a standby; GitHub #380 (Ian)
+        repmgr: update various help options; GitHub #391, #392 (hasegeli)
+        repmgrd: add event "repmgrd_shutdown"; GitHub #393 (Ian)
+        repmgrd: improve detection of status change from primary to standby (Ian)
+        repmgrd: improve log output in various situations (Ian)
+        repmgrd: improve reconnection to the local node after a failover (Ian)
+        repmgrd: ensure witness server connects to new primary after a failover (Ian)
+
+4.0.3   2018-02-15
+        repmgr: improve switchover handling when "pg_ctl" used to control the
+          server and logging output is not explicitly redirected (Ian)
+        repmgr: improve switchover log messages and exit code when old primary could
+          not be shut down cleanly (Ian)
+        repmgr: check demotion candidate can make a replication connection to the
+          promotion candidate before executing a switchover; GitHub #370 (Ian)
+        repmgr: add check for sufficient walsenders/replication slots before executing
+          a switchover; GitHub #371 (Ian)
+        repmgr: add --dry-run mode to "repmgr standby follow"; GitHub #368 (Ian)
+        repmgr: provide information about the primary node for "standby_register" and
+          "standby_follow" event notifications; GitHub #375 (Ian)
+        repmgr: add "standby_register_sync" event notification; GitHub #374 (Ian)
+        repmgr: output any connection error messages in "cluster show"'s list of
+          warnings; GitHub #369 (Ian)
+        repmgr: ensure an inactive data directory can be deleted; GitHub #366 (Ian)
+        repmgr: fix upstream node display in "repmgr node status"; GitHub #363 (fanf2)
+        repmgr: improve/clarify documentation and update --help output for
+          "primary unregister"; GitHub #373 (Ian)
+        repmgr: allow replication slots when Barman is configured; GitHub #379 (Ian)
+        repmgr: fix parsing of "pg_basebackup_options"; GitHub #376 (Ian)
+        repmgr: ensure "pg_subtrans" directory is created when cloning a standby in
+          Barman mode (Ian)
+        repmgr: fix primary node check in "witness register"; GitHub #377 (Ian)
+
+4.0.2   2018-01-18
+        repmgr: add missing -W option to getopt_long() invocation; GitHub #350 (Ian)
+        repmgr: automatically create slot name if missing; GitHub #343 (Ian)
+        repmgr: fixes to parsing output of remote repmgr invocations; GitHub #349 (Ian)
+        repmgr: BDR support - create missing connection replication set
+          if required; GitHub #347 (Ian)
+        repmgr: handle missing node record in "repmgr node rejoin"; GitHub #358 (Ian)
+        repmgr: enable documentation to be build as single HTML file; GitHub #353 (fanf2)
+        repmgr: recognize "--terse" option for "repmgr cluster event"; GitHub #360 (Ian)
+        repmgr: add "--wait-start" option for "repmgr standby register"; GitHub #356 (Ian)
+        repmgr: add "%p" event notification parameter for "repmgr standby switchover"
+          containing the node ID of the demoted primary (Ian)
+        docs: various fixes and updates (Ian, Daymel, Martín, ams)
+
+4.0.1   2017-12-13
+        repmgr: ensure "repmgr node check --action=" returns appropriate return
+          code; GitHub #340 (Ian)
+        repmgr: add missing schema qualification in get_all_node_records_with_upstream()
+          query GitHub #341 (Martín)
+        repmgr: initialise "voting_term" table in application, not extension SQL;
+          GitHub #344 (Ian)
+        repmgr: delete any replication slots copied by pg_rewind; GitHub #334 (Ian)
+        repmgr: fix configuration file sanity check; GitHub #342 (Ian)
+
+4.0.0   2017-11-21
+        Complete rewrite with many changes; for details see the repmgr 4.0.0 release
+        notes at: https://repmgr.org/docs/4.0/release-4.0.0.html

 3.3.2   2017-06-01
        Add support for PostgreSQL 10 (Ian)
--- a/Makefile.in
+++ b/Makefile.in
@@ -37,9 +37,10 @@ include Makefile.global
 $(info Building against PostgreSQL $(MAJORVERSION))

 REPMGR_CLIENT_OBJS = repmgr-client.o \
-	repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
+	repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
+	repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
 	configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
-REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o
+REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
 DATE=$(shell date "+%Y-%m-%d")

 repmgr_version.h: repmgr_version.h.in
@@ -77,6 +78,7 @@ additional-clean:
 	rm -f repmgr-client.o
 	rm -f repmgr-action-primary.o
 	rm -f repmgr-action-standby.o
+	rm -f repmgr-action-witness.o
 	rm -f repmgr-action-bdr.o
 	rm -f repmgr-action-node.o
 	rm -f repmgr-action-cluster.o
--- a/README.md
+++ b/README.md
--- a/compat.c
+++ b/compat.c
@@ -6,7 +6,7 @@
 *    supported PostgreSQL versions. They're unlikely to change but
 *    it would be worth keeping an eye on them for any fixes/improvements.
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
--- a/compat.h
+++ b/compat.h
@@ -1,6 +1,6 @@
 /*
 * compat.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
--- a/config.h.in
+++ b/config.h.in
@@ -1,4 +1,2 @@
 /* config.h.in.  Generated from configure.in by autoheader.  */

-/* Only build repmgr for BDR */
-#undef BDR_ONLY
--- a/configfile.c
+++ b/configfile.c
@@ -1,7 +1,7 @@
 /*
 * config.c - parse repmgr.conf and other configuration-related functionality
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -73,6 +73,59 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
 		strncpy(config_file_path, config_file, MAXPGPATH);
 		canonicalize_path(config_file_path);

+		/* relative path supplied - convert to absolute path */
+		if (config_file_path[0] != '/')
+		{
+			PQExpBufferData fullpath;
+			char *pwd = NULL;
+
+			initPQExpBuffer(&fullpath);
+
+			/*
+			 * we'll attempt to use $PWD to derive the effective path; getcwd()
+			 * will likely resolve symlinks, which may result in a path which
+			 * isn't permanent (e.g. if filesystem mountpoints change).
+			 */
+			pwd = getenv("PWD");
+
+			if (pwd != NULL)
+			{
+				appendPQExpBuffer(&fullpath,
+								  "%s", pwd);
+			}
+			else
+			{
+				/* $PWD not available - fall back to getcwd() */
+				char cwd[MAXPGPATH] = "";
+
+				if (getcwd(cwd, MAXPGPATH) == NULL)
+				{
+					log_error(_("unable to execute getcwd()"));
+					log_detail("%s", strerror(errno));
+
+					termPQExpBuffer(&fullpath);
+					exit(ERR_BAD_CONFIG);
+				}
+
+				appendPQExpBuffer(&fullpath,
+								  "%s",
+								  cwd);
+			}
+
+			appendPQExpBuffer(&fullpath,
+							  "/%s", config_file_path);
+
+			log_debug("relative configuration file converted to:\n  \"%s\"",
+					  fullpath.data);
+
+			strncpy(config_file_path, fullpath.data, MAXPGPATH);
+
+			termPQExpBuffer(&fullpath);
+
+			canonicalize_path(config_file_path);
+		}
+
+
 		if (stat(config_file_path, &stat_config) != 0)
 		{
 			log_error(_("provided configuration file \"%s\" not found: %s"),
@@ -81,6 +134,7 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
 			exit(ERR_BAD_CONFIG);
 		}

+
 		if (verbose == true)
 		{
 			log_notice(_("using provided configuration file \"%s\""), config_file);
@@ -249,7 +303,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL;

 	/*-----------------------
-	 * standby action settings
+	 * standby clone settings
 	 *------------------------
 	 */
 	options->use_replication_slots = false;
@@ -261,6 +315,14 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	memset(options->recovery_min_apply_delay, 0, sizeof(options->recovery_min_apply_delay));
 	options->recovery_min_apply_delay_provided = false;
 	options->use_primary_conninfo_password = false;
+	memset(options->passfile, 0, sizeof(options->passfile));
+
+	/*-----------------------
+	 * standby promote settings
+	 *------------------------
+	 */
+	options->promote_check_timeout = DEFAULT_PROMOTE_CHECK_TIMEOUT;
+	options->promote_check_interval = DEFAULT_PROMOTE_CHECK_INTERVAL;

 	/*-----------------
 	 * repmgrd settings
@@ -282,6 +344,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 	options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
 	options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
 	options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
+	options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
+
+	/*-------------
+	 * witness settings
+	 *-------------
+	 */
+	options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL;

 	/*-------------
 	 * BDR settings
@@ -441,6 +510,15 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		}
 		else if (strcmp(name, "use_primary_conninfo_password") == 0)
 			options->use_primary_conninfo_password = parse_bool(value, name, error_list);
+		else if (strcmp(name, "passfile") == 0)
+			strncpy(options->passfile, value, sizeof(options->passfile));
+
+		/* standby promote settings */
+		else if (strcmp(name, "promote_check_timeout") == 0)
+			options->promote_check_timeout = repmgr_atoi(value, name, error_list, 1);
+
+		else if (strcmp(name, "promote_check_interval") == 0)
+			options->promote_check_interval = repmgr_atoi(value, name, error_list, 1);

 		/* node check settings */
 		else if (strcmp(name, "archive_ready_warning") == 0)
@@ -493,6 +571,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 			options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
 		else if (strcmp(name, "primary_follow_timeout") == 0)
 			options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
+		else if (strcmp(name, "standby_reconnect_timeout") == 0)
+			options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
+
+		/* witness settings */
+		else if (strcmp(name, "witness_sync_interval") == 0)
+			options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1);

 		/* BDR settings */
 		else if (strcmp(name, "bdr_local_monitoring_only") == 0)
@@ -604,7 +688,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		 * Raise an error if a known parameter is provided with an empty
 		 * value. Currently there's no reason why empty parameters are needed;
 		 * if we want to accept those, we'd need to add stricter default
-		 * checking, as currently e.g. an empty `node` value will be converted
+		 * checking, as currently e.g. an empty `node_id` value will be converted
 		 * to '0'.
 		 */
 		if (known_parameter == true && !strlen(value))
@@ -677,7 +761,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
 		item_list_append(error_list,
 						 _("use \"barman_host\" for the hostname of the Barman server"));
 		item_list_append(error_list,
-						 _("use \"barman_server\" for the name of the [server] section in the Barman configururation file"));
+						 _("use \"barman_server\" for the name of the [server] section in the Barman configuration file"));

 	}

@@ -961,7 +1045,7 @@ reload_config(t_configuration_options *orig_options)
 		return false;
 	}

-	if (strcmp(new_options.node_name, orig_options->node_name) != 0)
+	if (strncmp(new_options.node_name, orig_options->node_name, MAXLEN) != 0)
 	{
 		log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
 		return false;
@@ -1005,7 +1089,7 @@ reload_config(t_configuration_options *orig_options)
 	}

 	/* conninfo */
-	if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
+	if (strncmp(orig_options->conninfo, new_options.conninfo, MAXLEN) != 0)
 	{
 		/* Test conninfo string works */
 		conn = establish_db_connection(new_options.conninfo, false);
@@ -1032,7 +1116,7 @@ reload_config(t_configuration_options *orig_options)
 	}

 	/* event_notification_command */
-	if (strcmp(orig_options->event_notification_command, new_options.event_notification_command) != 0)
+	if (strncmp(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN) != 0)
 	{
 		strncpy(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN);
 		log_info(_("\"event_notification_command\" is now \"%s\""), new_options.event_notification_command);
@@ -1041,7 +1125,7 @@ reload_config(t_configuration_options *orig_options)
 	}

 	/* event_notifications */
-	if (strcmp(orig_options->event_notifications_orig, new_options.event_notifications_orig) != 0)
+	if (strncmp(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN) != 0)
 	{
 		strncpy(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN);
 		log_info(_("\"event_notifications\" is now \"%s\""), new_options.event_notifications_orig);
@@ -1061,7 +1145,7 @@ reload_config(t_configuration_options *orig_options)
 	}

 	/* follow_command */
-	if (strcmp(orig_options->follow_command, new_options.follow_command) != 0)
+	if (strncmp(orig_options->follow_command, new_options.follow_command, MAXLEN) != 0)
 	{
 		strncpy(orig_options->follow_command, new_options.follow_command, MAXLEN);
 		log_info(_("\"follow_command\" is now \"%s\""), new_options.follow_command);
@@ -1098,7 +1182,7 @@ reload_config(t_configuration_options *orig_options)


 	/* promote_command */
-	if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
+	if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
 	{
 		strncpy(orig_options->promote_command, new_options.promote_command, MAXLEN);
 		log_info(_("\"promote_command\" is now \"%s\""), new_options.promote_command);
@@ -1138,18 +1222,18 @@ reload_config(t_configuration_options *orig_options)
 	 */

 	/* log_facility */
-	if (strcmp(orig_options->log_facility, new_options.log_facility) != 0)
+	if (strncmp(orig_options->log_facility, new_options.log_facility, MAXLEN) != 0)
 	{
-		strcpy(orig_options->log_facility, new_options.log_facility);
+		strncpy(orig_options->log_facility, new_options.log_facility, MAXLEN);
 		log_info(_("\"log_facility\" is now \"%s\""), new_options.log_facility);

 		log_config_changed = true;
 	}

 	/* log_file */
-	if (strcmp(orig_options->log_file, new_options.log_file) != 0)
+	if (strncmp(orig_options->log_file, new_options.log_file, MAXLEN) != 0)
 	{
-		strcpy(orig_options->log_file, new_options.log_file);
+		strncpy(orig_options->log_file, new_options.log_file, MAXLEN);
 		log_info(_("\"log_file\" is now \"%s\""), new_options.log_file);

 		log_config_changed = true;
@@ -1157,9 +1241,9 @@ reload_config(t_configuration_options *orig_options)


 	/* log_level */
-	if (strcmp(orig_options->log_level, new_options.log_level) != 0)
+	if (strncmp(orig_options->log_level, new_options.log_level, MAXLEN) != 0)
 	{
-		strcpy(orig_options->log_level, new_options.log_level);
+		strncpy(orig_options->log_level, new_options.log_level, MAXLEN);
 		log_info(_("\"log_level\" is now \"%s\""), new_options.log_level);

 		log_config_changed = true;
@@ -1533,31 +1617,109 @@ clear_event_notification_list(t_configuration_options *options)
 }


-bool
-parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list)
+int
+parse_output_to_argv(const char *string, char ***argv_array)
 {
 	int			options_len = 0;
 	char	   *options_string = NULL;
 	char	   *options_string_ptr = NULL;
+	int			c = 1,
+	   			argc_item = 1;
+	char	   *argv_item = NULL;
+	char	  **local_argv_array = NULL;
+	ItemListCell *cell;

 	/*
 	 * Add parsed options to this list, then copy to an array to pass to
 	 * getopt
 	 */
-	static ItemList option_argv = {NULL, NULL};
+	ItemList option_argv = {NULL, NULL};

-	char	   *argv_item = NULL;
-	int			c,
-				argc_item = 1;
+	options_len = strlen(string) + 1;
+	options_string = pg_malloc0(options_len);
+	options_string_ptr = options_string;
+
+	/* Copy the string before operating on it with strtok() */
+	strncpy(options_string, string, options_len);
+
+	/* Extract arguments into a list and keep a count of the total */
+	while ((argv_item = strtok(options_string_ptr, " ")) != NULL)
+	{
+		item_list_append(&option_argv, trim(argv_item));
+
+		argc_item++;
+
+		if (options_string_ptr != NULL)
+			options_string_ptr = NULL;
+	}
+
+	pfree(options_string);
+
+	/*
+	 * Array of argument values to pass to getopt_long - this will need to
+	 * include an empty string as the first value (normally this would be the
+	 * program name)
+	 */
+	local_argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2));
+
+	/* Insert a blank dummy program name at the start of the array */
+	local_argv_array[0] = pg_malloc0(1);
+
+	/*
+	 * Copy the previously extracted arguments from our list to the array
+	 */
+	for (cell = option_argv.head; cell; cell = cell->next)
+	{
+		int			argv_len = strlen(cell->string) + 1;
+
+		local_argv_array[c] = (char *)pg_malloc0(argv_len);
+
+		strncpy(local_argv_array[c], cell->string, argv_len);
+
+		c++;
+	}
+
+	local_argv_array[c] = NULL;
+
+	item_list_free(&option_argv);
+
+	*argv_array = local_argv_array;
+
+	return argc_item;
+}
+
+
+void
+free_parsed_argv(char ***argv_array)
+{
+	char	  **local_argv_array = *argv_array;
+	int			i = 0;
+
+	while (local_argv_array[i] != NULL)
+	{
+		pfree((char *)local_argv_array[i]);
+		i++;
+	}
+
+	pfree((char **)local_argv_array);
+	*argv_array = NULL;
+}
+
+
+bool
+parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list)
+{
+	bool		backup_options_ok = true;
+
+	int			c = 0,
+				argc_item = 0;

 	char	  **argv_array = NULL;
-	ItemListCell *cell = NULL;

 	int			optindex = 0;

 	struct option *long_options = NULL;

-	bool		backup_options_ok = true;

 	/* We're only interested in these options */
 	static struct option long_options_9[] =
@@ -1583,56 +1745,12 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
 	if (!strlen(pg_basebackup_options))
 		return backup_options_ok;

-	options_len = strlen(pg_basebackup_options) + 1;
-	options_string = pg_malloc(options_len);
-	options_string_ptr = options_string;
-
 	if (server_version_num >= 100000)
 		long_options = long_options_10;
 	else
 		long_options = long_options_9;

-	/* Copy the string before operating on it with strtok() */
-	strncpy(options_string, pg_basebackup_options, options_len);
-
-	/* Extract arguments into a list and keep a count of the total */
-	while ((argv_item = strtok(options_string_ptr, " ")) != NULL)
-	{
-		item_list_append(&option_argv, argv_item);
-
-		argc_item++;
-
-		if (options_string_ptr != NULL)
-			options_string_ptr = NULL;
-	}
-
-	/*
-	 * Array of argument values to pass to getopt_long - this will need to
-	 * include an empty string as the first value (normally this would be the
-	 * program name)
-	 */
-	argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2));
-
-	/* Insert a blank dummy program name at the start of the array */
-	argv_array[0] = pg_malloc0(1);
-
-	c = 1;
-
-	/*
-	 * Copy the previously extracted arguments from our list to the array
-	 */
-	for (cell = option_argv.head; cell; cell = cell->next)
-	{
-		int			argv_len = strlen(cell->string) + 1;
-
-		argv_array[c] = pg_malloc0(argv_len);
-
-		strncpy(argv_array[c], cell->string, argv_len);
-
-		c++;
-	}
-
-	argv_array[c] = NULL;
+	argc_item = parse_output_to_argv(pg_basebackup_options, &argv_array);

 	/* Reset getopt's optind variable */
 	optind = 0;
@@ -1676,15 +1794,7 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
 		backup_options_ok = false;
 	}

-	pfree(options_string);
-
-	{
-		int			i;
-
-		for (i = 0; i < argc_item + 2; i++)
-			pfree(argv_array[i]);
-	}
-	pfree(argv_array);
+	free_parsed_argv(&argv_array);

 	return backup_options_ok;
 }
--- a/configfile.h
+++ b/configfile.h
@@ -1,7 +1,7 @@
 /*
 * configfile.h
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 *
 * This program is free software: you can redistribute it and/or modify
@@ -82,7 +82,7 @@ typedef struct
 	char		log_file[MAXLEN];
 	int			log_status_interval;

-	/* standby action settings */
+	/* standby clone settings */
 	bool		use_replication_slots;
 	char		pg_basebackup_options[MAXLEN];
 	char		restore_command[MAXLEN];
@@ -90,6 +90,11 @@ typedef struct
 	char		recovery_min_apply_delay[MAXLEN];
 	bool		recovery_min_apply_delay_provided;
 	bool		use_primary_conninfo_password;
+	char		passfile[MAXPGPATH];
+
+	/* standby promote settings */
+	int			promote_check_timeout;
+	int			promote_check_interval;

 	/* node check settings */
 	int			archive_ready_warning;
@@ -97,6 +102,9 @@ typedef struct
 	int			replication_lag_warning;
 	int			replication_lag_critical;

+	/* witness settings */
+	int			witness_sync_interval;
+
 	/* repmgrd settings */
 	failover_mode_opt failover;
 	char		location[MAXLEN];
@@ -111,6 +119,7 @@ typedef struct
 	int			async_query_timeout;
 	int			primary_notification_timeout;
 	int			primary_follow_timeout;
+	int			standby_reconnect_timeout;

 	/* BDR settings */
 	bool		bdr_local_monitoring_only;
@@ -153,10 +162,14 @@ typedef struct
 		/* log settings */ \
 		"", "", "", DEFAULT_LOG_STATUS_INTERVAL,	\
 		/* standby action settings */ \
-		false, "", "", { NULL, NULL }, "", false, false, \
+		false, "", "", { NULL, NULL }, "", false, false, "",	\
+		/* standby promote settings */ \
+		DEFAULT_PROMOTE_CHECK_TIMEOUT, DEFAULT_PROMOTE_CHECK_INTERVAL, \
 		/* node check settings */ \
 		DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
 		DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
+		/* witness settings */ \
+		DEFAULT_WITNESS_SYNC_INTERVAL, \
 		/* repmgrd settings */ \
 		FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \
 		DEFAULT_MONITORING_INTERVAL, \
@@ -166,6 +179,7 @@ typedef struct
 		DEFAULT_ASYNC_QUERY_TIMEOUT, \
 		DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT,	\
 		DEFAULT_PRIMARY_FOLLOW_TIMEOUT,	\
+		DEFAULT_STANDBY_RECONNECT_TIMEOUT,	\
 		/* BDR settings */ \
 		false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
 		/* service settings */ \
@@ -242,7 +256,6 @@ typedef struct
 }


-
 void		set_progname(const char *argv0);
 const char *progname(void);

@@ -257,12 +270,15 @@ int repmgr_atoi(const char *s,
 			ItemList *error_list,
 			int minval);

-
 bool parse_pg_basebackup_options(const char *pg_basebackup_options,
 							t_basebackup_options *backup_options,
 							int server_version_num,
 							ItemList *error_list);

+int parse_output_to_argv(const char *string, char ***argv_array);
+void free_parsed_argv(char ***argv_array);
+
+
 /* called by repmgr-client and repmgrd */
 void		exit_with_cli_errors(ItemList *error_list);
 void		print_item_list(ItemList *item_list);
--- a/42
+++ b/42
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for repmgr 4.0.
+# Generated by GNU Autoconf 2.69 for repmgr 4.0.4.
 #
 # Report bugs to <pgsql-bugs@postgresql.org>.
 #
@@ -11,7 +11,7 @@
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2010-2017, 2ndQuadrant Ltd.
+# Copyright (c) 2010-2018, 2ndQuadrant Ltd.
 ## -------------------- ##
 ## M4sh Initialization. ##
 ## -------------------- ##
@@ -582,8 +582,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='repmgr'
 PACKAGE_TARNAME='repmgr'
-PACKAGE_VERSION='4.0'
-PACKAGE_STRING='repmgr 4.0'
+PACKAGE_VERSION='4.0.4'
+PACKAGE_STRING='repmgr 4.0.4'
 PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
 PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'

@@ -633,7 +633,6 @@ SHELL'
 ac_subst_files=''
 ac_user_opts='
 enable_option_checking
-with_bdr_only
 '
      ac_precious_vars='build_alias
 host_alias
@@ -1179,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures repmgr 4.0 to adapt to many kinds of systems.
+\`configure' configures repmgr 4.0.4 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1240,15 +1239,10 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of repmgr 4.0:";;
+     short | recursive ) echo "Configuration of repmgr 4.0.4:";;
   esac
  cat <<\_ACEOF

-Optional Packages:
-  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
-  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
-  --with-bdr-only         BDR-only build
-
 Some influential environment variables:
  PG_CONFIG   Location to find pg_config for target PostgreSQL (default PATH)

@@ -1319,14 +1313,14 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-repmgr configure 4.0
+repmgr configure 4.0.4
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.

-Copyright (c) 2010-2017, 2ndQuadrant Ltd.
+Copyright (c) 2010-2018, 2ndQuadrant Ltd.
 _ACEOF
  exit
 fi
@@ -1338,7 +1332,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by repmgr $as_me 4.0, which was
+It was created by repmgr $as_me 4.0.4, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -1694,20 +1688,6 @@ ac_config_headers="$ac_config_headers config.h"



-
-# Check whether --with-bdr_only was given.
-if test "${with_bdr_only+set}" = set; then :
-  withval=$with_bdr_only;
-fi
-
-if test "x$with_bdr_only" != "x"; then :
-
-$as_echo "#define BDR_ONLY \"1\"" >>confdefs.h
-
-
-fi
-
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
 $as_echo_n "checking for a sed that does not truncate output... " >&6; }
 if ${ac_cv_path_SED+:} false; then :
@@ -2379,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by repmgr $as_me 4.0, which was
+This file was extended by repmgr $as_me 4.0.4, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -2442,7 +2422,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-repmgr config.status 4.0
+repmgr config.status 4.0.4
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.in
+++ b/configure.in
@@ -1,17 +1,11 @@
-AC_INIT([repmgr], [4.0], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
+AC_INIT([repmgr], [4.0.4], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])

-AC_COPYRIGHT([Copyright (c) 2010-2017, 2ndQuadrant Ltd.])
+AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])

 AC_CONFIG_HEADER(config.h)

 AC_ARG_VAR([PG_CONFIG], [Location to find pg_config for target PostgreSQL (default PATH)])

-AC_ARG_WITH([bdr_only], [AS_HELP_STRING([--with-bdr-only], [BDR-only build])])
-AS_IF([test "x$with_bdr_only" != "x"],
-    [AC_DEFINE([BDR_ONLY], ["1"], [Only build repmgr for BDR])]
-)
-
-
 AC_PROG_SED

 if test -z "$PG_CONFIG"; then
--- a/controldata.c
+++ b/controldata.c
@@ -1,6 +1,6 @@
 /*
 * controldata.c
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
--- a/controldata.h
+++ b/controldata.h
@@ -1,6 +1,6 @@
 /*
 * controldata.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
--- a/dbutils.c
+++ b/dbutils.c
--- a/dbutils.h
+++ b/dbutils.h
@@ -1,7 +1,7 @@
 /*
 * dbutils.h
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@
 #include "strutil.h"
 #include "voting.h"

-#define REPMGR_NODES_COLUMNS "node_id, type, upstream_node_id, node_name, conninfo, repluser, slot_name, location, priority, active, config_file, '' AS upstream_node_name "
+#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name "
 #define BDR_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_status, node_name, node_local_dsn, node_init_from_dsn, node_read_only, node_seq_id"

 #define ERRBUFF_SIZE 512
@@ -38,6 +38,7 @@ typedef enum
 	UNKNOWN = 0,
 	PRIMARY,
 	STANDBY,
+	WITNESS,
 	BDR
 } t_server_type;

@@ -73,17 +74,18 @@ typedef enum
 {
 	NODE_STATUS_UNKNOWN = -1,
 	NODE_STATUS_UP,
+	NODE_STATUS_SHUTTING_DOWN,
 	NODE_STATUS_DOWN,
 	NODE_STATUS_UNCLEAN_SHUTDOWN
 } NodeStatus;

 typedef enum
 {
-	VR_VOTE_REFUSED = -1,
-	VR_POSITIVE_VOTE,
-	VR_NEGATIVE_VOTE
-} VoteRequestResult;
-
+	CONN_UNKNOWN = -1,
+	CONN_OK,
+	CONN_BAD,
+	CONN_ERROR
+} ConnectionStatus;

 typedef enum
 {
@@ -181,11 +183,13 @@ typedef struct s_event_info
 {
 	char	   *node_name;
 	char	   *conninfo_str;
+	int			node_id;
 } t_event_info;

 #define T_EVENT_INFO_INITIALIZER { \
 	NULL, \
- 	NULL \
+	NULL, \
+	UNKNOWN_NODE_ID \
 }


@@ -339,9 +343,6 @@ bool		atobool(const char *value);
 PGconn *establish_db_connection(const char *conninfo,
 						const bool exit_on_error);
 PGconn	   *establish_db_connection_quiet(const char *conninfo);
-PGconn *establish_db_connection_as_user(const char *conninfo,
-								const char *user,
-								const bool exit_on_error);

 PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
 								  const bool exit_on_error);
@@ -365,6 +366,7 @@ void		param_set_ine(t_conninfo_param_list *param_list, const char *param, const
 char	   *param_get(t_conninfo_param_list *param_list, const char *param);
 bool		parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char *errmsg, bool ignore_local_params);
 char	   *param_list_to_string(t_conninfo_param_list *param_list);
+bool		has_passfile(void);

 /* transaction functions */
 bool		begin_transaction(PGconn *conn);
@@ -375,10 +377,8 @@ bool		check_cluster_schema(PGconn *conn);
 /* GUC manipulation functions */
 bool		set_config(PGconn *conn, const char *config_param, const char *config_value);
 bool		set_config_bool(PGconn *conn, const char *config_param, bool state);
-int guc_set(PGconn *conn, const char *parameter, const char *op,
-		const char *value);
-int guc_set_typed(PGconn *conn, const char *parameter, const char *op,
-			  const char *value, const char *datatype);
+int		    guc_set(PGconn *conn, const char *parameter, const char *op, const char *value);
+int			guc_set_typed(PGconn *conn, const char *parameter, const char *op, const char *value, const char *datatype);
 bool		get_pg_setting(PGconn *conn, const char *setting, char *output);

 /* server information functions */
@@ -390,6 +390,7 @@ bool		can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferDat
 int			get_ready_archive_files(PGconn *conn, const char *data_directory);
 bool		identify_system(PGconn *repl_conn, t_system_identification *identification);
 bool		repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
+int			repmgrd_get_local_node_id(PGconn *conn);

 /* extension functions */
 ExtensionStatus get_repmgr_extension_status(PGconn *conn);
@@ -404,6 +405,8 @@ t_server_type parse_node_type(const char *type);
 const char *get_node_type_string(t_server_type type);

 RecordStatus get_node_record(PGconn *conn, int node_id, t_node_info *node_info);
+RecordStatus get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info);
+
 RecordStatus get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info);
 t_node_info *get_node_record_pointer(PGconn *conn, int node_id);

@@ -414,17 +417,23 @@ void		get_all_node_records(PGconn *conn, NodeInfoList *node_list);
 void		get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
 void		get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
 void		get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
-void		get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
+bool		get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
+bool		get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);

 bool		create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
 bool		update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
 bool		delete_node_record(PGconn *conn, int node);
+bool		truncate_node_records(PGconn *conn);

 bool		update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
 bool		update_node_record_set_primary(PGconn *conn, int this_node_id);
+bool		update_node_record_set_active_standby(PGconn *conn, int this_node_id);
 bool		update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
 bool		update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
 bool		update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
+bool		update_node_record_slot_name(PGconn *primary_conn, int node_id, char *slot_name);
+
+bool		witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn);

 void		clear_node_info_list(NodeInfoList *nodes);

@@ -438,11 +447,14 @@ void		config_file_list_add(t_configfile_list *list, const char *file, const char
 bool		create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
 bool		create_event_notification(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
 bool		create_event_notification_extended(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details, t_event_info *event_info);
+PGresult   *get_event_records(PGconn *conn, int node_id, const char *node_name, const char *event, bool all, int limit);

 /* replication slot functions */
+void		create_slot_name(char *slot_name, int node_id);
 bool		create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
 bool		drop_replication_slot(PGconn *conn, char *slot_name);
 RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
+int			get_free_replication_slots(PGconn *conn);

 /* tablespace functions */
 bool		get_tablespace_name_by_location(PGconn *conn, const char *location, char *name);
@@ -474,9 +486,9 @@ bool		delete_monitoring_records(PGconn *primary_conn, int keep_history);


 /* node voting functions */
-NodeVotingStatus get_voting_status(PGconn *conn);
-VoteRequestResult request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
-int			set_voting_status_initiated(PGconn *conn);
+void		initialize_voting_term(PGconn *conn);
+int			get_current_term(PGconn *conn);
+void		increment_current_term(PGconn *conn);
 bool		announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
 void		notify_follow_primary(PGconn *conn, int primary_node_id);
 bool		get_new_primary(PGconn *conn, int *primary_node_id);
@@ -487,24 +499,30 @@ XLogRecPtr	get_current_wal_lsn(PGconn *conn);
 XLogRecPtr	get_last_wal_receive_location(PGconn *conn);
 bool		get_replication_info(PGconn *conn, ReplInfo *replication_info);
 int			get_replication_lag_seconds(PGconn *conn);
-void		get_node_replication_stats(PGconn *conn, t_node_info *node_info);
+void		get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info);
 bool		is_downstream_node_attached(PGconn *conn, char *node_name);

 /* BDR functions */
 void		get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list);
 RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info);
 bool		is_bdr_db(PGconn *conn, PQExpBufferData *output);
+bool		is_bdr_db_quiet(PGconn *conn);
 bool		is_active_bdr_node(PGconn *conn, const char *node_name);
 bool		is_bdr_repmgr(PGconn *conn);
 bool		is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
 bool		add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
 void		add_extension_tables_to_bdr_replication_set(PGconn *conn);
-
-bool		bdr_node_exists(PGconn *conn, const char *node_name);
+bool		bdr_node_name_matches(PGconn *conn, const char *node_name, PQExpBufferData *bdr_local_node_name);
 ReplSlotStatus get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name);
 void		get_bdr_other_node_name(PGconn *conn, int node_id, char *name_buf);

 bool		am_bdr_failover_handler(PGconn *conn, int node_id);
 void		unset_bdr_failover_handler(PGconn *conn);
+bool		bdr_node_has_repmgr_set(PGconn *conn, const char *node_name);
+bool		bdr_node_set_repmgr_set(PGconn *conn, const char *node_name);
+
+/* miscellaneous debugging functions */
+const char *print_node_status(NodeStatus node_status);
+const char *print_pqping_status(PGPing ping_status);

 #endif							/* _REPMGR_DBUTILS_H_ */
--- a/dirutil.c
+++ b/dirutil.c
@@ -3,7 +3,7 @@
 * dirmod.c
 *	  directory handling functions
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -21,6 +21,7 @@

 #include <unistd.h>
 #include <dirent.h>
+#include <signal.h>
 #include <sys/stat.h>
 #include <errno.h>
 #include <stdio.h>
@@ -34,34 +35,33 @@
 #include "dirutil.h"
 #include "strutil.h"
 #include "log.h"
+#include "controldata.h"

 static int	unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf);

+/* PID can be negative if backend is standalone */
+typedef long pgpid_t;


 /*
- * make sure the directory either doesn't exist or is empty
- * we use this function to check the new data directory and
- * the directories for tablespaces
+ * Check if a directory exists, and if so whether it is empty.
 *
- * This is the same check initdb does on the new PGDATA dir
- *
- * Returns 0 if nonexistent, 1 if exists and empty, 2 if not empty,
- * or -1 if trouble accessing directory
+ * This function is used for checking both the data directory
+ * and tablespace directories.
 */
-int
+DataDirState
 check_dir(char *path)
 {
-	DIR		   *chkdir;
-	struct dirent *file;
-	int			result = 1;
+	DIR		   *chkdir = NULL;
+	struct dirent *file = NULL;
+	int			result = DIR_EMPTY;

 	errno = 0;

 	chkdir = opendir(path);

 	if (!chkdir)
-		return (errno == ENOENT) ? 0 : -1;
+		return (errno == ENOENT) ? DIR_NOENT : DIR_ERROR;

 	while ((file = readdir(chkdir)) != NULL)
 	{
@@ -73,25 +73,15 @@ check_dir(char *path)
 		}
 		else
 		{
-			result = 2;			/* not empty */
+			result = DIR_NOT_EMPTY;
 			break;
 		}
 	}

-#ifdef WIN32
-
-	/*
-	 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
-	 * released version
-	 */
-	if (GetLastError() == ERROR_NO_MORE_FILES)
-		errno = 0;
-#endif
-
 	closedir(chkdir);

 	if (errno != 0)
-		return -1;				/* some kind of I/O error? */
+		return DIR_ERROR;				/* some kind of I/O error? */

 	return result;
 }
@@ -106,12 +96,13 @@ create_dir(char *path)
 	if (mkdir_p(path, 0700) == 0)
 		return true;

-	log_error(_("unable to create directory \"%s\": %s"),
-			  path, strerror(errno));
+	log_error(_("unable to create directory \"%s\""), path);
+	log_detail("%s", strerror(errno));

 	return false;
 }

+
 bool
 set_dir_permissions(char *path)
 {
@@ -146,26 +137,6 @@ mkdir_p(char *path, mode_t omode)
 	oumask = 0;
 	retval = 0;

-#ifdef WIN32
-	/* skip network and drive specifiers for win32 */
-	if (strlen(p) >= 2)
-	{
-		if (p[0] == '/' && p[1] == '/')
-		{
-			/* network drive */
-			p = strstr(p + 2, "/");
-			if (p == NULL)
-				return 1;
-		}
-		else if (p[1] == ':' &&
-				 ((p[0] >= 'a' && p[0] <= 'z') ||
-				  (p[0] >= 'A' && p[0] <= 'Z')))
-		{
-			/* local drive */
-			p += 2;
-		}
-	}
-#endif

 	if (p[0] == '/')			/* Skip leading '/'. */
 		++p;
@@ -242,17 +213,91 @@ is_pg_dir(char *path)
 	return false;
 }

+/*
+ * Attempt to determine if a PostgreSQL data directory is in use
+ * by reading the pidfile. This is the same mechanism used by
+ * "pg_ctl".
+ *
+ * This function will abort with appropriate log messages if a file error
+ * is encountered, as the user will need to address the situation before
+ * any further useful progress can be made.
+ */
+PgDirState
+is_pg_running(char *path)
+{
+	long		pid;
+	FILE	   *pidf;
+
+	char pid_file[MAXPGPATH];
+
+	/* it's reasonable to assume the pidfile name will not change */
+	snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", path);
+
+	pidf = fopen(pid_file, "r");
+	if (pidf == NULL)
+	{
+		/*
+		 * No PID file - PostgreSQL shouldn't be running. From 9.3 (the
+		 * earliesty version we care about) removal of the PID file will
+		 * cause the postmaster to shut down, so it's highly unlikely
+		 * that PostgreSQL will still be running.
+		 */
+		if (errno == ENOENT)
+		{
+			return PG_DIR_NOT_RUNNING;
+		}
+		else
+		{
+			log_error(_("unable to open PostgreSQL PID file \"%s\""), pid_file);
+			log_detail("%s", strerror(errno));
+			exit(ERR_BAD_CONFIG);
+		}
+	}
+
+
+	/*
+	 * In the unlikely event we're unable to extract a PID from the PID file,
+	 * log a warning but assume we're not dealing with a running instance
+	 * as PostgreSQL should have shut itself down in these cases anyway.
+	 */
+	if (fscanf(pidf, "%ld", &pid) != 1)
+	{
+		/* Is the file empty? */
+		if (ftell(pidf) == 0 && feof(pidf))
+		{
+			log_warning(_("PostgreSQL PID file \"%s\" is empty"), path);
+		}
+		else
+		{
+			log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
+		}
+
+		return PG_DIR_NOT_RUNNING;
+	}
+
+	fclose(pidf);
+
+	if (pid == getpid())
+		return PG_DIR_NOT_RUNNING;
+
+	if (pid == getppid())
+		return PG_DIR_NOT_RUNNING;
+
+	if (kill(pid, 0) == 0)
+		return PG_DIR_RUNNING;
+
+	return PG_DIR_NOT_RUNNING;
+}
+

 bool
 create_pg_dir(char *path, bool force)
 {
-	bool		pg_dir = false;
-
-	/* Check this directory could be used as a PGDATA dir */
+	/* Check this directory can be used as a PGDATA dir */
 	switch (check_dir(path))
 	{
-		case 0:
-			/* dir not there, must create it */
+		case DIR_NOENT:
+			/* directory does not exist, attempt to create it */
 			log_info(_("creating directory \"%s\"..."), path);

 			if (!create_dir(path))
@@ -262,55 +307,62 @@ create_pg_dir(char *path, bool force)
 				return false;
 			}
 			break;
-		case 1:
-			/* Present but empty, fix permissions and use it */
-			log_info(_("checking and correcting permissions on existing directory %s"),
+		case DIR_EMPTY:
+			/* exists but empty, fix permissions and use it */
+			log_info(_("checking and correcting permissions on existing directory \"%s\""),
 					 path);

 			if (!set_dir_permissions(path))
 			{
-				log_error(_("unable to change permissions of directory \"%s\":\n  %s"),
-						  path, strerror(errno));
+				log_error(_("unable to change permissions of directory \"%s\""), path);
+				log_detail("%s", strerror(errno));
 				return false;
 			}
 			break;
-		case 2:
-			/* Present and not empty */
+		case DIR_NOT_EMPTY:
+			/* exists but is not empty */
 			log_warning(_("directory \"%s\" exists but is not empty"),
 						path);

-			pg_dir = is_pg_dir(path);
-
-			if (pg_dir && force)
+			if (is_pg_dir(path))
 			{
-				/* TODO: check DB state, if not running overwrite */
-
-				if (false)
+				if (force == true)
 				{
-					log_notice(_("deleting existing data directory \"%s\""), path);
+					log_notice(_("-F/--force provided - deleting existing data directory \"%s\""), path);
 					nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
+					return true;
 				}
-				/* Let it continue */
-				break;
-			}
-			else if (pg_dir && !force)
-			{
-				log_hint(_("This looks like a PostgreSQL directory.\n"
-						   "If you are sure you want to clone here, "
-						   "please check there is no PostgreSQL server "
-						   "running and use the -F/--force option"));
+
 				return false;
 			}
-
-			return false;
-		default:
+			else
+			{
+				if (force == true)
+				{
+					log_notice(_("deleting existing directory \"%s\""), path);
+					nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
+					return true;
+				}
+				return false;
+			}
+			break;
+		case DIR_ERROR:
 			log_error(_("could not access directory \"%s\": %s"),
 					  path, strerror(errno));
 			return false;
 	}
+
 	return true;
 }

+
+
+int
+rmdir_recursive(char *path)
+{
+	return nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
+}
+
 static int
 unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
 {
--- a/dirutil.h
+++ b/dirutil.h
@@ -1,6 +1,6 @@
 /*
 * dirutil.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -19,12 +19,29 @@
 #ifndef _DIRUTIL_H_
 #define _DIRUTIL_H_

+typedef enum
+{
+	DIR_ERROR = -1,
+	DIR_NOENT,
+	DIR_EMPTY,
+	DIR_NOT_EMPTY
+} DataDirState;
+
+typedef enum
+{
+	PG_DIR_ERROR = -1,
+	PG_DIR_NOT_RUNNING,
+	PG_DIR_RUNNING
+} PgDirState;
+
 extern int	mkdir_p(char *path, mode_t omode);
 extern bool set_dir_permissions(char *path);

-extern int	check_dir(char *path);
+extern DataDirState	check_dir(char *path);
 extern bool create_dir(char *path);
 extern bool is_pg_dir(char *path);
+extern PgDirState is_pg_running(char *path);
 extern bool create_pg_dir(char *path, bool force);
+extern int rmdir_recursive(char *path);

 #endif
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -2,4 +2,6 @@ HTML.index
 bookindex.sgml
 html-stamp
 html/
+nochunks.dsl
+repmgr.html
 version.sgml
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -10,7 +10,7 @@ SGMLINCLUDE = -D . -D ${srcdir}

 SPFLAGS += -wall -wno-unused-param -wno-empty -wfully-tagged

-JADE.html.call = $(JADE) $(JADEFLAGS) $(SPFLAGS) $(SGMLINCLUDE) $(CATALOG) -d stylesheet.dsl -t sgml -i output-html
+JADE.html.call = $(JADE) $(JADEFLAGS) $(SPFLAGS) $(SGMLINCLUDE) $(CATALOG) -t sgml -i output-html

 ALLSGML := $(wildcard $(srcdir)/*.sgml)
 # to build bookindex
@@ -26,10 +26,15 @@ html: html-stamp

 html-stamp: repmgr.sgml $(ALLSGML) $(GENERATED_SGML) stylesheet.dsl website-docs.css
 	$(MKDIR_P) html
-	$(JADE.html.call) -i include-index $<
+	$(JADE.html.call) -d stylesheet.dsl -i include-index $<
 	cp $(srcdir)/stylesheet.css $(srcdir)/website-docs.css html/
 	touch $@

+repmgr.html: repmgr.sgml $(ALLSGML) $(GENERATED_SGML) stylesheet.dsl website-docs.css
+	sed '/html-index-filename/a\
+(define nochunks  #t)' <stylesheet.dsl >nochunks.dsl
+	$(JADE.html.call) -d nochunks.dsl -i include-index $< >repmgr.html
+
 version.sgml: ${repmgr_top_builddir}/repmgr_version.h
 	{ \
 	  echo "<!ENTITY repmgrversion \"$(REPMGR_VERSION)\">"; \
@@ -37,7 +42,7 @@ version.sgml: ${repmgr_top_builddir}/repmgr_version.h

 HTML.index: repmgr.sgml $(ALMOSTALLSGML) stylesheet.dsl
 	@$(MKDIR_P) html
-	$(JADE.html.call) -V html-index $<
+	$(JADE.html.call) -d stylesheet.dsl -V html-index $<

 website-docs.css:
 	@$(MKDIR_P) html
--- a/doc/appendix-faq.sgml
+++ b/doc/appendix-faq.sgml
@@ -0,0 +1,279 @@
+<appendix id="appendix-faq" xreflabel="FAQ">
+ <indexterm>
+  <primary>FAQ (Frequently Asked Questions)</primary>
+ </indexterm>
+
+ <title>FAQ (Frequently Asked Questions)</title>
+
+ <sect1 id="faq-general" xreflabel="General">
+  <title>General</title>
+
+  <sect2 id="faq-xrepmgr-version-diff" xreflabel="Version differences">
+    <title>What's the difference between the repmgr versions?</title>
+    <para>
+      &repmgr; 4 is a complete rewrite of the existing &repmgr; code base
+      and implements &repmgr; as a PostgreSQL extension. It
+      supports all PostgreSQL versions from 9.3 (although some &repmgr;
+      features are not available for PostgreSQL 9.3 and 9.4).
+     </para>
+     <para>
+      &repmgr; 3.x builds on the improved replication facilities added
+      in PostgreSQL 9.3, as well as improved automated failover support
+      via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
+      and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
+      series will no longer be actively maintained.
+     </para>
+     <para>
+      repmgr 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
+      with PostgreSQL 9.3, we recommend using repmgr 4.x.
+     </para>
+  </sect2>
+
+  <sect2 id="faq-replication-slots-advantage" xreflabel="Advantages of replication slots">
+   <title>What's the advantage of using replication slots?</title>
+   <para>
+    Replication slots, introduced in PostgreSQL 9.4, ensure that the
+    primary server will retain WAL files until they have been consumed
+    by all standby servers. This makes WAL file management much easier,
+    and if used `repmgr` will no longer insist on a fixed minimum number
+    (default: 5000) of WAL files being retained.
+   </para>
+   <para>
+    However this does mean that if a standby is no longer connected to the
+    primary, the presence of the replication slot will cause WAL files
+    to be retained indefinitely.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
+   <title>How many replication slots should I define in <varname>max_replication_slots</varname>?</title>
+   <para>
+    Normally at least same number as the number of standbys which will connect
+    to the node. Note that changes to <varname>max_replication_slots</varname> require a server
+    restart to take effect, and as there is no particular penalty for unused
+    replication slots, setting a higher figure will make adding new nodes
+    easier.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-hash-index" xreflabel="Hash indexes">
+   <title>Does &repmgr; support hash indexes?</title>
+   <para>
+    Before PostgreSQL 10, hash indexes were not WAL logged and are therefore not suitable
+    for use in streaming replication in PostgreSQL 9.6 and earlier. See the
+    <ulink url="https://www.postgresql.org/docs/9.6/static/sql-createindex.html#AEN80279">PostgreSQL documentation</ulink>
+    for details.
+   </para>
+   <para>
+    From PostgreSQL 10, this restriction has been lifted and hash indexes can be used
+    in a streaming replication cluster.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-upgrades" xreflabel="Upgrading PostgreSQL with repmgr">
+   <title>Can &repmgr; assist with upgrading a PostgreSQL cluster?</title>
+   <para>
+     For <emphasis>minor</emphasis> version upgrades, e.g. from 9.6.7 to 9.6.8, a common
+     approach is to upgrade a standby to the latest version, perform a
+     <link linkend="performing-switchover">switchover</link> promoting it to a primary,
+     then upgrade the former primary.
+   </para>
+   <para>
+     For <emphasis>major</emphasis> version upgrades (e.g. from PostgreSQL 9.6 to PostgreSQL 10),
+     the traditional approach is to "reseed" a cluster by upgrading a single
+     node with <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade</ulink>
+     and recloning standbys from this.
+   </para>
+   <para>
+     To minimize downtime during major upgrades, for more recent PostgreSQL
+     versions <ulink url="https://www.2ndquadrant.com/en/resources/pglogical/">pglogical</ulink>
+     can be used to set up a parallel cluster using the newer PostgreSQL version,
+     which can be kept in sync with the existing production cluster until the
+     new cluster is ready to be put into production.
+   </para>
+  </sect2>
+ </sect1>
+
+ <sect1 id="faq-repmgr" xreflabel="repmgr">
+  <title><command>repmgr</command></title>
+
+  <sect2 id="faq-register-existing-node" xreflabel="registering an existing node">
+   <title>Can I register an existing PostgreSQL server with repmgr?</title>
+   <para>
+    Yes, any existing PostgreSQL server which is part of the same replication
+    cluster can be registered with &repmgr;. There's no requirement for a
+    standby to have been cloned using &repmgr;.
+   </para>
+  </sect2>
+  <sect2 id="faq-repmgr-clone-other-source" >
+   <title>Can I use a standby not cloned by &repmgr; as a &repmgr; node?</title>
+
+   <para>
+     For a standby which has been manually cloned or recovered from an external
+     backup manager such as Barman, the command
+     <command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>
+     can be used to create the correct <filename>recovery.conf</filename> file for
+     use with &repmgr; (and will create a replication slot if required). Once this has been done,
+     <link linkend="repmgr-standby-register">register the node</link> as usual.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-failed-primary-standby" xreflabel="Reintegrate a failed primary as a standby">
+   <title>How can a failed primary be re-added as a standby?</title>
+   <para>
+    This is a two-stage process. First, the failed primary's data directory
+    must be re-synced with the current primary; secondly the failed primary
+    needs to be re-registered as a standby.
+   </para>
+   <para>
+    In PostgreSQL 9.5 and later, it's possible to use <command>pg_rewind</command>
+    to re-synchronise the existing data directory, which will usually be much
+    faster than re-cloning the server. However <command>pg_rewind</command> can only
+    be used if PostgreSQL either has <varname>wal_log_hints</varname> enabled, or
+    data checksums were enabled when the cluster was initialized.
+   </para>
+   <para>
+    &repmgr; provides the command <command>repmgr node rejoin</command> which can
+    optionally execute <command>pg_rewind</command>; see the <xref linkend="repmgr-node-rejoin">
+    documentation for details.
+   </para>
+   <para>
+    If <command>pg_rewind</command> cannot be used, then the data directory will have
+    to be re-cloned from scratch.
+   </para>
+
+  </sect2>
+
+  <sect2 id="faq-repmgr-check-configuration" xreflabel="Check PostgreSQL configuration">
+   <title>Is there an easy way to check my primary server is correctly configured for use with &repmgr;?</title>
+   <para>
+    Execute <command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>
+    with the <literal>--dry-run</literal> option; this will report any configuration problems
+    which need to be rectified.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-clone-skip-config-files" xreflabel="">
+   <title>When cloning a standby, how can I get &repmgr; to copy
+     <filename>postgresql.conf</filename> and <filename>pg_hba.conf</filename> from the PostgreSQL configuration
+     directory in <filename>/etc</filename>?</title>
+   <para>
+    Use the command line option <literal>--copy-external-config-files</literal>. For more details
+    see <xref linkend="repmgr-standby-clone-config-file-copying">.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-shared-preload-libaries-no-repmgrd" xreflabel="shared_preload_libraries without repmgrd">
+    <title>Do I need to include <literal>shared_preload_libraries = 'repmgr'</literal>
+      in <filename>postgresql.conf</filename> if I'm not using <application>repmgrd</application>?</title>
+   <para>
+    No, the <literal>repmgr</literal> shared library is only needed when running <application>repmgrd</application>.
+    If you later decide to run <application>repmgrd</application>, you just need to add
+    <literal>shared_preload_libraries = 'repmgr'</literal> and restart PostgreSQL.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-permissions" xreflabel="Replication permission problems">
+   <title>I've provided replication permission for the <literal>repmgr</literal> user in <filename>pg_hba.conf</filename>
+     but <command>repmgr</command>/<application>repmgrd</application> complains it can't connect to the server... Why?</title>
+   <para>
+    <command>repmgr</command> and <application>repmgrd</application> need to be able to connect to the repmgr database
+    with a normal connection to query metadata. The <literal>replication</literal> connection
+    permission is for PostgreSQL's streaming replication (and doesn't  necessarily need to be the <literal>repmgr</literal> user).
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-clone-provide-primary-conninfo" xreflabel="Providing primary connection parameters">
+   <title>When cloning a standby, why do I need to provide the connection parameters
+     for the primary server on the command line, not in the configuration file?</title>
+   <para>
+    Cloning a standby is a one-time action; the role of the server being cloned
+    from could change, so fixing it in the configuration file would create
+    confusion. If &repmgr; needs to establish a connection to the primary
+    server, it can retrieve this from the <literal>repmgr.nodes</literal> table on the local
+    node, and if necessary scan the replication cluster until it locates the active primary.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-clone-waldir-xlogdir" xreflabel="Providing a custom WAL directory">
+   <title>When cloning a standby, how do I ensure the WAL files are placed in a custom directory?</title>
+   <para>
+     Provide the option <literal>--waldir</literal>  (<literal>--xlogdir</literal> in PostgreSQL 9.6
+     and earlier) with the absolute path to the WAL directory in <varname>pg_basebackup_options</varname>.
+     For more details see <xref linkend="cloning-advanced-pg-basebackup-options">.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgr-events-no-fkey" xreflabel="No foreign key on node_id in repmgr.events">
+   <title>Why is there no foreign key on the <literal>node_id</literal> column in the <literal>repmgr.events</literal>
+     table?</title>
+   <para>
+     Under some circumstances event notifications can be generated for servers
+     which have not yet been registered; it's also useful to retain a record
+     of events which includes servers removed from the replication cluster
+     which no longer have an entry in the <literal>repmrg.nodes</literal> table.
+   </para>
+  </sect2>
+
+
+
+
+ </sect1>
+
+ <sect1 id="faq-repmgrd" xreflabel="repmgrd">
+  <title><application>repmgrd</application></title>
+
+
+  <sect2 id="faq-repmgrd-prevent-promotion" xreflabel="Prevent standby from being promoted to primary">
+   <title>How can I prevent a node from ever being promoted to primary?</title>
+   <para>
+    In `repmgr.conf`, set its priority to a value of 0 or less; apply the changed setting with
+    <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>.
+   </para>
+   <para>
+    Additionally, if <varname>failover</varname> is set to <literal>manual</literal>, the node will never
+    be considered as a promotion candidate.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgrd-delayed-standby" xreflabel="Delayed standby support">
+   <title>Does <application>repmgrd</application> support delayed standbys?</title>
+   <para>
+    <application>repmgrd</application> can monitor delayed standbys - those set up with
+    <varname>recovery_min_apply_delay</varname> set to a non-zero value
+    in <filename>recovery.conf</filename> - but as it's not currently possible
+    to directly examine the value applied to the standby, <application>repmgrd</application>
+    may not be able to properly evaluate the node as a promotion candidate.
+   </para>
+   <para>
+    We recommend that delayed standbys are explicitly excluded from promotion
+    by setting <varname>priority</varname> to <literal>0</literal> in
+    <filename>repmgr.conf</filename>.
+   </para>
+   <para>
+    Note that after registering a delayed standby, <application>repmgrd</application> will only start
+    once the metadata added in the primary node has been replicated.
+   </para>
+  </sect2>
+
+  <sect2 id="faq-repmgrd-logfile-rotate" xreflabel="repmgrd logfile rotation">
+   <title>How can I get <application>repmgrd</application> to rotate its logfile?</title>
+   <para>
+     Configure your system's <literal>logrotate</literal> service to do this; see <xref linkend="repmgrd-log-rotation">.
+   </para>
+
+  </sect2>
+
+  <sect2 id="faq-repmgrd-recloned-no-start" xreflabel="repmgrd not restarting after node cloned">
+   <title>I've recloned a failed primary as a standby, but <application>repmgrd</application> refuses to start?</title>
+   <para>
+    Check you registered the standby after recloning. If unregistered, the standby
+    cannot be considered as a promotion candidate even if <varname>failover</varname> is set to
+    <literal>automatic</literal>, which is probably not what you want. <application>repmgrd</application> will start if
+    <varname>failover</varname> is set to <literal>manual</literal> so the node's replication status can still
+  be monitored, if desired.
+   </para>
+  </sect2>
+
+ </sect1>
+</appendix>
--- a/doc/appendix-packages.sgml
+++ b/doc/appendix-packages.sgml
@@ -0,0 +1,158 @@
+<appendix id="appendix-packages" xreflabel="Package details">
+ <indexterm>
+  <primary>packages</primary>
+ </indexterm>
+
+ <title>&repmgr; package details</title>
+ <para>
+   This section provides technical details about various &repmgr; binary
+   packages, such as location of the installed binaries and
+   configuration files.
+ </para>
+
+ <sect1 id="packages-centos" xreflabel="CentOS packages">
+  <title>CentOS, RHEL, Scientific Linux etc.</title>
+  <para>
+    Currently packages are provided for versions 6.x and 7.x of CentOS et al.
+  </para>
+
+  <note>
+    <para>
+      For PostgreSQL 9.6 and lower, the CentOS packages use a mixture of <literal>9.6</literal>
+      and <literal>96</literal> in various places to designate the major version;
+      from PostgreSQL 10, the first part of the version number (e.g. <literal>10</literal>) is
+      the major version, so there is more consistency in file/path/package naming.
+    </para>
+  </note>
+
+  <table id="centos-7-packages">
+   <title>CentOS 7 packages</title>
+
+   <tgroup cols="2">
+    <tbody>
+     <row>
+      <entry>Repository URL:</entry>
+      <entry><ulink url="https://yum.postgresql.org/repopackages.php">https://yum.postgresql.org/repopackages.php</ulink></entry>
+     </row>
+
+     <row>
+      <entry>Repository documentation:</entry>
+      <entry><ulink url="https://yum.postgresql.org/">https://yum.postgresql.org/</ulink></entry>
+     </row>
+
+     <row>
+      <entry>Package name example:</entry>
+      <entry><filename>repmgr10-4.0.0-1.rhel7.x86_64</filename></entry>
+     </row>
+
+     <row>
+      <entry>Metapackage:</entry>
+      <entry>(none)</entry>
+     </row>
+
+     <row>
+      <entry>Installation command:</entry>
+      <entry><literal>yum install -y repmgr10</literal></entry>
+     </row>
+
+     <row>
+      <entry>Binary location:</entry>
+      <entry><filename>/usr/pgsql-10/bin</filename></entry>
+     </row>
+
+     <row>
+      <entry>In default path:</entry>
+      <entry>NO</entry>
+     </row>
+
+     <row>
+      <entry>Configuration file location:</entry>
+      <entry><filename>/etc/repmgr/10/repmgr.conf</filename></entry>
+     </row>
+
+     <row>
+      <entry>repmgrd service command:</entry>
+      <entry><literal>service repmgr10</literal></entry>
+     </row>
+
+     <row>
+      <entry>repmgrd service file location:</entry>
+      <entry><filename>/usr/lib/systemd/system/repmgr10.service</filename></entry>
+     </row>
+
+     <row>
+      <entry>repmgrd log file location:</entry>
+      <entry>(not specified)</entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+
+  <table id="centos-6-packages">
+   <title>CentOS 6 packages</title>
+
+   <tgroup cols="2">
+    <tbody>
+     <row>
+      <entry>Repository URL:</entry>
+      <entry><ulink url="https://yum.postgresql.org/repopackages.php">https://yum.postgresql.org/repopackages.php</ulink></entry>
+     </row>
+
+     <row>
+      <entry>Repository documentation:</entry>
+      <entry><ulink url="https://yum.postgresql.org/">https://yum.postgresql.org/</ulink></entry>
+     </row>
+
+     <row>
+      <entry>Package name example:</entry>
+      <entry><filename>repmgr96-4.0.0-1.rhel6.x86_64</filename></entry>
+     </row>
+
+     <row>
+      <entry>Metapackage:</entry>
+      <entry>NO</entry>
+     </row>
+
+     <row>
+      <entry>Installation command:</entry>
+      <entry><literal>yum install -y repmgr96</literal></entry>
+     </row>
+
+     <row>
+      <entry>Binary location:</entry>
+      <entry><filename>/usr/pgsql-9.6/bin</filename></entry>
+     </row>
+
+     <row>
+      <entry>In default path:</entry>
+      <entry>NO</entry>
+     </row>
+
+     <row>
+      <entry>Configuration file location:</entry>
+      <entry><filename>/etc/repmgr/9.6/repmgr.conf</filename></entry>
+     </row>
+
+     <row>
+      <entry>repmgrd service command:</entry>
+      <entry>service repmgr-9.6</entry>
+     </row>
+
+     <row>
+      <entry>repmgrd service file location:</entry>
+      <entry><literal>/etc/init.d/repmgr-9.6</literal></entry>
+     </row>
+
+     <row>
+      <entry>repmgrd log file location:</entry>
+      <entry><filename>/var/log/repmgr/repmgrd-9.6.log</filename></entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+
+
+ </sect1>
+</appendix>
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -0,0 +1,787 @@
+<appendix id="appendix-release-notes">
+  <title>Release notes</title>
+  <indexterm>
+    <primary>Release notes</primary>
+  </indexterm>
+
+  <para>
+    Changes to each &repmgr; release are documented in the release notes.
+    Please read the release notes for all versions between
+    your current version and the version you are plan to upgrade to
+    before performing an upgrade, as there may be version-specific upgrade steps.
+  </para>
+
+  <para>
+    See also: <xref linkend="upgrading-repmgr">
+  </para>
+
+  <sect1 id="release-4.0.4">
+    <title>Release 4.0.4</title>
+    <para><emphasis>Thu Mar 8, 2018</emphasis></para>
+
+    <para>
+      &repmgr; 4.0.4 contains some bug fixes and and a number of
+      usability enhancements related to logging/diagnostics,
+      event notifications and pre-action checks.
+    </para>
+    <para>
+      This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.0.3;
+      <application>repmgrd</application> (if running) should be restarted. See <xref linkend="upgrading-repmgr">
+      for more details.
+    </para>
+
+    <sect2>
+      <title>Usability enhancements</title>
+
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              add <command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>
+              option to enable integration of a standby cloned from another source into a &repmgr; cluster (GitHub #382)
+            </para>
+          </listitem>
+
+         <listitem>
+            <para>
+              remove restriction on using replication slots when cloning from a Barman server (GitHub #379)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              make <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
+              timeout values configurable (GitHub #387)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              add missing options to main <literal>--help</literal> output (GitHub #391, #392)
+            </para>
+          </listitem>
+
+        </itemizedlist>
+      </para>
+
+    </sect2>
+
+    <sect2>
+      <title>Bug fixes</title>
+      <para>
+
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              ensure <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>
+              honours the <option>--dry-run</option> option (GitHub #383)
+           </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              improve replication slot warnings generated by
+              <command><link linkend="repmgr-node-status">repmgr node status</link></command>
+              (GitHub #385)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              fix --superuser handling when cloning a standby (GitHub #380)
+           </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <application>repmgrd</application>: improve detection of status change from primary to
+              standby
+           </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <application>repmgrd</application>:  improve reconnection to the local node after a
+              failover (previously a connection error due to the node starting up was being
+              interpreted as the node being unavailable)
+           </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <application>repmgrd</application>: when running on a witness server, correctly connect
+              to new primary after a failover
+           </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <application>repmgrd</application>: add <link linkend="event-notifications">event notification</link>
+              <literal>repmgrd_shutdown</literal> (GitHub #393)
+           </para>
+          </listitem>
+
+        </itemizedlist>
+
+      </para>
+    </sect2>
+
+  </sect1>
+
+  <sect1 id="release-4.0.3">
+    <title>Release 4.0.3</title>
+    <para><emphasis>Thu Feb 15, 2018</emphasis></para>
+
+    <para>
+      &repmgr; 4.0.3 contains some bug fixes and and a number of
+      usability enhancements related to logging/diagnostics,
+      event notifications and pre-action checks.
+    </para>
+
+    <para>
+      This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.0.2;
+      repmgrd (if running) should be restarted.
+    </para>
+    <sect2>
+      <title>Usability enhancements</title>
+
+      <para>
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              improve <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
+              behaviour when <command>pg_ctl</command> is used to control the server and logging output is
+              not explicitly redirected
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              improve <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
+              log messages and provide new exit code <literal>ERR_SWITCHOVER_INCOMPLETE</literal> when old primary could
+              not be shut down cleanly
+            </para>
+          </listitem>
+
+         <listitem>
+            <para>
+              add check to verify the demotion candidate can make a replication connection to the
+              promotion candidate before executing a switchover (GitHub #370)
+            </para>
+         </listitem>
+
+         <listitem>
+            <para>
+              add check for sufficient walsenders and replication slots on the promotion candidate  before executing
+              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
+              (GitHub #371)
+            </para>
+         </listitem>
+
+          <listitem>
+            <para>
+              add --dry-run mode to <command><link linkend="repmgr-standby-switchover">repmgr standby follow</link></command>
+              (GitHub #368)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              provide information about the primary node for
+              <command><link linkend="repmgr-standby-register">repmgr standby register</link></command> and
+              <command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command> event notifications (GitHub #375)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              add <literal>standby_register_sync</literal> <link linkend="event-notifications">event notification</link>, which is fired when
+              <command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
+              is run with the <option>--wait-sync</option> option and the new or updated standby node
+              record has synchronised to the standby (GitHub #374)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              when running <command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>,
+              if any node is unreachable, output the error message encountered in the list of warnings
+              (GitHub #369)
+            </para>
+          </listitem>
+
+        </itemizedlist>
+      </para>
+    </sect2>
+
+    <sect2>
+      <title>Bug fixes</title>
+
+      <para>
+        <itemizedlist>
+          <listitem>
+            <para>
+              ensure an inactive data directory can be overwritten when
+              cloning a standby (GitHub #366)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-node-status">repmgr node status</link></command>
+              upstream node display fixed (GitHub #363)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-primary-unregister">repmgr primary unregister</link></command>:
+              clarify usage and fix <literal>--help</literal> output (GitHub #373)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              parsing of <varname>pg_basebackup_options</varname> fixed (GitHub #376)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              ensure the <filename>pg_subtrans</filename> directory is created when cloning a
+              standby in Barman mode
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <command><link linkend="repmgr-witness-register">repmgr witness register</link></command>:
+              fix primary node check (GitHub #377).
+            </para>
+          </listitem>
+        </itemizedlist>
+
+      </para>
+    </sect2>
+
+  </sect1>
+
+
+  <sect1 id="release-4.0.2">
+    <title>Release 4.0.2</title>
+    <para><emphasis>Thu Jan 18, 2018</emphasis></para>
+
+    <para>
+      &repmgr; 4.0.2 contains some bug fixes and small usability enhancements.
+    </para>
+    <para>
+      This release can be installed as a simple package upgrade from &repmgr; 4.0.1 or 4.0;
+      <application>repmgrd</application> (if running) should be restarted.
+    </para>
+
+    <sect2>
+      <title>Usability enhancements</title>
+
+      <para>
+        <itemizedlist>
+          <listitem>
+            <para>
+              Recognize the <option>-t</option>/<option>--terse</option> option for
+              <command><link linkend="repmgr-cluster-event">repmgr cluster event</link></command> to hide
+              the <literal>Details</literal> column (GitHub #360)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Add "--wait-start" option for
+              <command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
+              (GitHub #356)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Add <literal>%p</literal> <link linkend="event-notifications">event notification parameter</link>
+              for <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
+            </para>
+          </listitem>
+        </itemizedlist>
+      </para>
+
+    </sect2>
+
+    <sect2>
+      <title>Bug fixes</title>
+
+      <para>
+        <itemizedlist>
+          <listitem>
+            <para>
+              Add missing -W option to <literal>getopt_long()</literal> invocation (GitHub #350)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Automatically create slot name if missing (GitHub #343)
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Fixes to parsing output of remote repmgr invocations (GitHub #349)
+            </para>
+          </listitem>
+
+
+          <listitem>
+            <para>
+              When registering BDR nodes, automatically create missing connection replication set (GitHub #347)
+            </para>
+          </listitem>
+
+
+          <listitem>
+            <para>
+              Handle missing node record in <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>
+              (GitHub #358)
+            </para>
+          </listitem>
+
+
+        </itemizedlist>
+      </para>
+
+    </sect2>
+
+    <sect2>
+      <title>Documentation</title>
+
+      <para>
+        <itemizedlist>
+          <listitem>
+            <para>
+              The documentation can now be built as a single HTML file (GitHub pull request #353)
+            </para>
+          </listitem>
+        </itemizedlist>
+      </para>
+    </sect2>
+
+  </sect1>
+
+ <sect1 id="release-4.0.1">
+  <title>Release 4.0.1</title>
+
+  <para><emphasis>Wed Dec 13, 2017</emphasis></para>
+
+  <para>
+    &repmgr; 4.0.1 is a bugfix release.
+  </para>
+  <sect2>
+    <title>Bug fixes</title>
+    <para>
+      <itemizedlist>
+        <listitem>
+          <para>
+            ensure correct return codes are returned for
+            <command><link linkend="repmgr-node-check">repmgr node check --action=</link></command> operations
+            (GitHub #340)
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Fix <xref linkend="repmgr-cluster-show"> when <literal>repmgr</literal> schema not set in search path
+            (GitHub #341)
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            When using <literal>--force-rewind</literal> with <xref linkend="repmgr-node-rejoin">
+            delete any replication slots copied by <application>pg_rewind</application>
+            (GitHub #334)
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Only perform sanity check on accessibility of configuration files outside
+            the data directory when <literal>--copy-external-config-files</literal>
+            provided (GitHub #342)
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Initialise "voting_term" table in application, not extension SQL
+            (GitHub #344)
+          </para>
+        </listitem>
+
+      </itemizedlist>
+    </para>
+  </sect2>
+ </sect1>
+
+
+
+ <sect1 id="release-4.0.0">
+  <title>Release 4.0.0</title>
+
+  <para><emphasis>Tue Nov 21, 2017</emphasis></para>
+
+  <para>
+    repmgr 4.0 is an entirely new version of &repmgr;, implementing &repmgr;
+    as a native PostgreSQL extension, adding new and improving existing features,
+    and making &repmgr; more user-friendly and intuitive to use. The new code base
+    will make it easier to add additional functionality for future releases.
+  </para>
+  <note>
+    <simpara>
+      With the new version, the opportunity has been taken to
+      make some changes in the way &repmgr; is set up and
+      configured. In particular changes have been made to some
+      configuration file settings consistency for and clarity.
+      Changes are covered in detail below
+    </simpara>
+    <simpara>
+      To standardise terminology, from this release <literal>primary</literal> is used to
+      denote the read/write node in a streaming replication cluster. <literal>master</literal>
+      is still accepted as an alias for &repmgr; commands
+      (e.g. <link linkend="repmgr-primary-register"><command>repmgr master register</command></link>).
+    </simpara>
+  </note>
+
+  <para>
+    For detailed instructions on upgrading from repmgr 3.x, see <xref linkend="upgrading-from-repmgr-3">.
+  </para>
+
+  <sect2>
+    <title>Features and improvements</title>
+    <para>
+
+      <itemizedlist>
+        <listitem>
+          <para>
+            <emphasis>improved switchover</emphasis>:
+            the <command>switchover</command> process has been improved and streamlined,
+            speeding up the switchover process and can also instruct other standbys
+            to follow the new primary once the switchover has completed. See
+             <xref linkend="performing-switchover"> for more details.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+           <emphasis>"--dry-run" option</emphasis>: many &repmgr; commands now provide
+           a <literal>--dry-run</literal> option which will execute the command as far
+           as possible without making any changes, which will enable possible issues
+           to be identified before the intended operation is actually carried out.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <emphasis>easier upgrades</emphasis>: &repmgr; is now implemented as a native
+            PostgreSQL extension, which means future upgrades can be carried out by
+            installing the upgraded package and issuing
+            <ulink url="https://www.postgresql.org/docs/current/static/sql-alterextension.html">ALTER EXTENSION repmgr UPDATE</ulink>.
+          </para>
+        </listitem>
+
+
+        <listitem>
+          <para>
+            <emphasis>improved logging output</emphasis>:
+            &repmgr; (and <application>repmgrd</application>) now provide more explicit
+            logging output giving a better picture of what is going on. Where appropriate,
+            <literal>DETAIL</literal> and <literal>HINT</literal> log lines provide additional
+            detail and suggestions for resolving problems. Additionally, <application>repmgrd</application>
+            now emits informational log lines at regular, configurable intervals
+            to confirm that it's running correctly and which node(s) it's monitoring.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <emphasis>automatic configuration file location in packages</emphasis>:
+            Many operating system packages place the &repmgr; configuration files
+            in a version-specific subdirectory, e.g. <filename>/etc/repmgr/9.6/repmgr.conf</filename>;
+            &repmgr; now makes it easy for package maintainers to provide a patch
+            with the actual file location, meaning <filename>repmgr.conf</filename>
+            does not need to be provided explicitly. This is currently the case
+            for 2ndQuadrant-provided <literal>.deb</literal> and <literal>.rpm</literal> packages.
+          </para>
+        </listitem>
+
+
+        <listitem>
+          <para>
+            <emphasis>monitoring and status checks</emphasis>:
+            New commands <xref linkend="repmgr-node-check"> and
+            <xref linkend="repmgr-node-status"> providing information
+            about a node's status and replication-related monitoring
+            output.
+          </para>
+        </listitem>
+
+
+        <listitem>
+          <para>
+            <emphasis>node rejoin</emphasis>:
+            New commands <xref linkend="repmgr-node-rejoin"> enables a failed
+            primary to be rejoined to a replication cluster, optionally using
+            <application>pg_rewind</application> to synchronise its data,
+            (note that <application>pg_rewind</application> may not be useable
+            in some circumstances).
+          </para>
+        </listitem>
+
+
+        <listitem>
+          <para>
+            <emphasis>automatic failover</emphasis>:
+            improved detection of node status; promotion decision based on a consensual
+            model, with the promoted primary explicitly informing other standbys to
+            follow it. The <application>repmgrd</application> daemon will continue
+            functioning even if the monitored PostgreSQL instance is down, and resume
+            monitoring if it reappears. Additionally, if the instance's role has changed
+            (typically from a primary to a standby, e.g. following reintegration of a
+            failed primary using <xref linkend="repmgr-node-rejoin">) <application>repmgrd</application>
+            will automatically resume monitoring it as a standby.
+          </para>
+        </listitem>
+
+
+
+        <listitem>
+          <para>
+            <emphasis>new documentation</emphasis>:
+            the existing documentation spread over multiple text files
+            has been consolidated into DocBook format (as used by the
+            main PostgreSQL project) and is now available online in
+            HTML format.
+          </para>
+          <para>
+            The DocBook files can easily be used to create versions
+            of the documentation in other formats such as PDF.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+    </para>
+  </sect2>
+  <sect2>
+    <title>New command line options</title>
+    <para>
+      <itemizedlist>
+
+        <listitem><para>
+          <literal>--dry-run</literal>: &repmgr; will attempt to perform
+          the action as far as possible without making any changes to the
+          database
+        </para></listitem>
+
+        <listitem>
+          <para>
+            <literal>--upstream-node-id</literal>: use to specify the upstream node
+            the standby will connect later stream from, when <link linkend="repmgr-standby-clone">cloning</link>
+            and <link linkend="repmgr-standby-register">registering</link> a standby.
+          </para>
+          <para>
+            This replaces the configuration file parameter <varname>upstream_node</varname>.
+            as the upstream node is set when the standby is initially cloned, but can change
+            over the lifetime of an installation (due to failovers, switchovers etc.) so it's
+            pointless/confusing keeping the original value around in <filename>repmgr.conf</filename>.
+        </para></listitem>
+
+      </itemizedlist>
+    </para>
+  </sect2>
+
+  <sect2>
+    <title>Changed command line options</title>
+    <para>
+      <application>repmgr</application>
+      <itemizedlist>
+
+        <listitem><para>
+            <literal>--replication-user</literal> has been deprecated; it has been replaced
+            by the configuration file option <varname>replication_user</varname>.
+            The value (which defaults to the user provided in the <varname>conninfo</varname>
+            string) will be stored in the &repmgr; metadata for use by
+            <xref linkend="repmgr-standby-clone"> and <xref linkend="repmgr-standby-follow">.
+        </para></listitem>
+
+        <listitem><para>
+            <literal>--recovery-min-apply-delay</literal> is now a configuration file parameter
+            <varname>recovery_min_apply_delay</varname>, to ensure the setting does not get lost
+            when a standby follows a new upstream.
+        </para></listitem>
+
+        <listitem><para>
+            <literal>--no-conninfo-password</literal> is deprecated; a password included in
+            the environment variable <varname>PGPASSWORD</varname> will no longer be added
+            to <varname>primary_conninfo</varname> by default; to force the inclusion
+            of a password (not recommended), use the new configuration file parameter
+            <varname>use_primary_conninfo_password</varname>. For details, ee section
+            <xref linkend="cloning-advanced-managing-passwords">.
+        </para></listitem>
+
+      </itemizedlist>
+    </para>
+
+    <para>
+      <application>repmgrd</application>
+      <itemizedlist>
+
+        <listitem><para>
+            <literal>--monitoring-history</literal> is deprecated and is replaced by the
+            configuration file option <varname>monitoring_history</varname>.
+            This enables the setting to be changed without having to modify system service
+            files.
+        </para></listitem>
+
+      </itemizedlist>
+    </para>
+
+  </sect2>
+
+  <sect2>
+    <title>Configuration file changes</title>
+
+    <para><emphasis>Required settings</emphasis></para>
+    <para>The following 4 parameters are mandatory in <filename>repmgr.conf</filename>:
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>node_id</simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>node_name</simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>conninfo</simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>data_directory</simpara>
+        </listitem>
+      </itemizedlist>
+    </para>
+
+   <para><emphasis>Renamed settings</emphasis></para>
+   <para>
+     Some settings have been renamed for clarity and consistency:
+     <itemizedlist spacing="compact" mark="bullet">
+
+       <listitem>
+         <simpara><varname>node</varname> is now <varname>node_id</varname></simpara>
+       </listitem>
+
+       <listitem>
+         <simpara><varname>name</varname> is now <varname>node_name</varname></simpara>
+       </listitem>
+
+       <listitem>
+         <simpara><varname>barman_server</varname> is now <varname>barman_host</varname></simpara>
+       </listitem>
+
+       <listitem>
+         <simpara><varname>master_reponse_timeout</varname> is now
+           <varname>async_query_timeout</varname> (to better indicate its purpose)
+         </simpara>
+       </listitem>
+
+     </itemizedlist>
+   </para>
+
+   <para>
+     The following configuration file parameters have been renamed for consistency
+     with other parameters (and conform to the pattern used by PostgreSQL itself,
+     which uses the prefix <varname>log_</varname> for logging parameters):
+
+    <itemizedlist spacing="compact" mark="bullet">
+
+      <listitem>
+        <simpara><varname>loglevel</varname> is now <varname>log_level</varname></simpara>
+      </listitem>
+
+      <listitem>
+        <simpara><varname>logfile</varname> is now <varname>log_file</varname></simpara>
+      </listitem>
+
+      <listitem>
+        <simpara><varname>logfacility</varname> is now <varname>log_facility</varname></simpara>
+      </listitem>
+
+    </itemizedlist>
+   </para>
+
+   <para><emphasis>Removed settings</emphasis></para>
+   <para>
+     <itemizedlist spacing="compact" mark="bullet">
+
+      <listitem>
+        <simpara><varname>cluster</varname> has been removed</simpara>
+      </listitem>
+      <listitem>
+        <simpara><varname>upstream_node</varname> - see note about
+          <literal>--upstream-node-id</literal> above</simpara>
+      </listitem>
+
+      <listitem>
+        <simpara><varname>retry_promote_interval_secs</varname>this is now redundant due
+          to changes in the failover/promotion mechanism; the new equivalent is
+          <varname>primary_notification_timeout</varname> </simpara>
+      </listitem>
+     </itemizedlist>
+   </para>
+
+   <para><emphasis>Logging changes</emphasis></para>
+   <para>
+     <itemizedlist spacing="compact" mark="bullet">
+
+      <listitem>
+        <simpara>
+          default value for <varname>log_level</varname> is <literal>INFO</literal>
+          rather than <literal>NOTICE</literal>.
+        </simpara>
+      </listitem>
+
+      <listitem>
+        <simpara>
+          new parameter <varname>log_status_interval</varname>, which causes
+          <application>repmgrd</application> to emit a status log
+          line at the specified interval
+        </simpara>
+      </listitem>
+
+     </itemizedlist>
+
+   </para>
+
+  </sect2>
+  <sect2>
+    <title>repmgrd</title>
+    <para>
+      The shared library has been renamed from <literal>repmgr_funcs</literal> to
+      <literal>repmgr</literal>,  meaning <varname>shared_preload_libraries</varname>
+      in <filename>postgresql.conf</filename> needs to be updated to the new name:
+      <programlisting>
+        shared_preload_libraries = 'repmgr'</programlisting>
+    </para>
+  </sect2>
+
+ </sect1>
+
+</appendix>
--- a/doc/appendix-signatures.sgml
+++ b/doc/appendix-signatures.sgml
@@ -1,5 +1,66 @@
 <appendix id="appendix-signatures" xreflabel="Verifying digital signatures">
 <title>Verifying digital signatures</title>

- <para>WIP</para>
+ <sect1 id="repmgr-source-key" xreflabel="repmgr source key">
+   <title>repmgr source code signing key</title>
+   <para>
+     The signing key ID used for <application>repmgr</application> source code bundles is:
+     <ulink url="http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr">
+       <literal>0x297F1DCC</literal></ulink>.
+   </para>
+
+   <para>
+     To download the <application>repmgr</application> source key to your computer:
+     <programlisting>
+       curl -s http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr | gpg --import
+       gpg --fingerprint 0x297F1DCC
+     </programlisting>
+     then verify that the fingerprint is the expected value:
+     <programlisting>
+       085A BE38 6FD9 72CE 6365  340D 8365 683D 297F 1DCC</programlisting>
+   </para>
+
+   <para>
+     For checking tarballs, first download and import the <application>repmgr</application>
+     source signing key as shown above. Then download both source tarball and the detached
+     key (e.g. <filename>repmgr-4.0beta1.tar.gz</filename> and
+     <filename>repmgr-4.0beta1.tar.gz.asc</filename>) from
+     <ulink url="https://repmgr.org/download/">https://repmgr.org/download/</ulink>
+     and use <application>gpg</application> to verify the key, e.g.:
+     <programlisting>
+       gpg --verify repmgr-4.0beta1.tar.gz.asc</programlisting>
+   </para>
+
+ </sect1>
+
+ <sect1 id="repmgr-rpm-key" xreflabel="repmgr rpm key">
+   <title>repmgr RPM signing key</title>
+   <para>
+     The signing key ID used for <application>repmgr</application> source code bundles is:
+     <ulink url="http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr">
+       <literal>0x702D883A</literal></ulink>.
+   </para>
+
+   <para>
+     To download the <application>repmgr</application> source key to your computer:
+     <programlisting>
+       curl -s http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr | gpg --import
+       gpg --fingerprint 0x702D883A
+     </programlisting>
+     then verify that the fingerprint is the expected value:
+     <programlisting>
+       AE4E 390E A58E 0037 6148  3F29 888D 018B 702D 883A</programlisting>
+   </para>
+
+   <para>
+     To check a repository RPM, use <application>rpmkeys</application> to load the
+      packaging signing key into the RPM database then use <literal>rpm -K</literal>, e.g.:
+     <programlisting>
+       sudo rpmkeys --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr
+       rpm -K postgresql-bdr94-2ndquadrant-redhat-1.0-2.noarch.rpm
+     </programlisting>
+   </para>
+
+ </sect1>
+
 </appendix>
--- a/doc/bdr-failover.md
+++ b/doc/bdr-failover.md
@@ -1,288 +1,8 @@
 BDR failover with repmgrd
 =========================

-`repmgr 4` provides support for monitoring BDR nodes and taking action in case
-one of the nodes fails.
+This document has been integrated into the main `repmgr` documentation
+and is now located here:

-    *NOTE* Due to the nature of BDR, it's only safe to use this solution for
-    a two-node scenario. Introducing additional nodes will create an inherent
-    risk of node desynchronisation if a node goes down without being cleanly
-    removed from the cluster.
+> [BDR failover with repmgrd](https://repmgr.org/docs/4.0/repmgrd-bdr.html)

-In contrast to streaming replication, there's no concept of "promoting" a new
-primary node with BDR. Instead, "failover" involves monitoring both nodes
-with `repmgrd` and redirecting queries from the failed node to the remaining
-active node. This can be done by using the event notification script generated by
-`repmgrd` to dynamically reconfigure a proxy server/connection pooler such
-as PgBouncer.
-
-
-Prerequisites
-------------
-
-`repmgr 4` requires PostgreSQL 9.6 with the BDR 2 extension enabled and
-configured for a two-node BDR network. `repmgr 4` packages
-must be installed on each node before attempting to configure repmgr.
-
-    *NOTE* `repmgr 4` will refuse to install if it detects more than two
-    BDR nodes.
-
-Application database connections *must* be passed through a proxy server/
-connection pooler such as PgBouncer, and it must be possible to dynamically
-reconfigure that from `repmgrd`. The example demonstrated in this document
-will use PgBouncer.
-
-The proxy server / connection poolers must not be installed on the database
-servers.
-
-For this example, it's assumed password-less SSH connections are available
-from the PostgreSQL servers to the servers where PgBouncer runs, and
-that the user on those servers has permission to alter the PgBouncer
-configuration files.
-
-PostgreSQL connections must be possible between each node, and each node
-must be able to connect to each PgBouncer instance.
-
-
-Configuration
-------------
-
-Sample configuration for `repmgr.conf`:
-
-    node_id=1
-    node_name='node1'
-    conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2'
-    replication_type='bdr'
-
-    event_notifications=bdr_failover
-    event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
-
-    # repmgrd options
-    monitor_interval_secs=5
-    reconnect_attempts=6
-    reconnect_interval=5
-
-Adjust settings as appropriate; copy and adjust for the second node (particularly
-the values `node_id`, `node_name` and `conninfo`).
-
-Note that the values provided for the `conninfo` string must be valid for
-connections from *both* nodes in the cluster. The database must be the BDR
-database.
-
-If defined, `event_notifications` will restrict execution of `event_notification_command`
-to the specified events.
-
-`event_notification_command` is the script which does the actual "heavy lifting"
-of reconfiguring the proxy server/ connection pooler. It is fully user-definable;
-a sample implementation is documented below.
-
-
-repmgr user permissions
-----------------------
-
-`repmgr` will create an extension in the BDR database containing objects
-for administering `repmgr` metadata. The user defined in the `conninfo`
-setting must be able to access all objects. Additionally, superuser permissions
-are required to install the `repmgr` extension. The easiest way to do this
-is create the `repmgr` user as a superuser, however if this is not
-desirable, the `repmgr` user can be created as a normal user and a
-superuser specified with `--superuser` when registering a BDR node.
-
-repmgr setup
------------
-
-Register both nodes:
-
-    $ repmgr -f /etc/repmgr.conf bdr register
-    NOTICE: attempting to install extension "repmgr"
-    NOTICE: "repmgr" extension successfully installed
-    NOTICE: node record created for node 'node1' (ID: 1)
-    NOTICE: BDR node 1 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5501)
-
-    $ repmgr -f /etc/repmgr.conf bdr register
-    NOTICE: node record created for node 'node2' (ID: 2)
-    NOTICE: BDR node 2 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5502)
-
-The `repmgr` extension will be automatically created when the first
-node is registered, and will be propagated to the second node.
-
-    *IMPORTANT* ensure the repmgr package is available on both nodes before
-    attempting to register the first node
-
-
-At this point the meta data for both nodes has been created; executing
-`repmgr cluster show` (on either node) should produce output like this:
-
-    $ repmgr -f /etc/repmgr.conf cluster show
-     ID | Name  | Role | Status    | Upstream | Connection string
-    ----+-------+------+-----------+----------+--------------------------------------------------------
-     1  | node1 | bdr  | * running |          | host=node1 dbname=bdrtest user=repmgr connect_timeout=2
-     2  | node2 | bdr  | * running |          | host=node2 dbname=bdrtest user=repmgr connect_timeout=2
-
-Additionally it's possible to see a log of significant events; so far
-this will only record the two node registrations (in reverse chronological order):
-
-     Node ID | Event        | OK | Timestamp           | Details
-    ---------+--------------+----+---------------------+----------------------------------------------
-     2       | bdr_register | t  | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2)
-     1       | bdr_register | t  | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1)
-
-
-Defining the "event_notification_command"
-----------------------------------------
-
-Key to "failover" execution is the `event_notification_command`, which is a
-user-definable script which should reconfigure the  proxy server/
-connection pooler.
-
-Each time `repmgr` (or `repmgrd`) records an event, it can optionally
-execute the script defined in `event_notification_command` to
-take further action; details of the event will be passed as parameters.
-Following placeholders are available to the script:
-
-    %n - node ID
-    %e - event type
-    %s - success (1 or 0)
-    %t - timestamp
-    %d - details
-    %c - conninfo string of the next available node
-    %a - name of the next available node
-
-Note that `%c` and `%a` will only be provided during `bdr_failover`
-events, which is what is of interest here.
-
-The provided sample script (`scripts/bdr-pgbouncer.sh`) is configured like
-this:
-
-    event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'
-
-and parses the configures parameters like this:
-
-    NODE_ID=$1
-    EVENT_TYPE=$2
-    SUCCESS=$3
-    NEXT_CONNINFO=$4
-    NEXT_NODE_NAME=$5
-
-It also contains some hard-coded values about the PgBouncer configuration for
-both nodes; these will need to be adjusted for your local environment of course
-(ideally the scripts would be maintained as templates and generated by some
-kind of provisioning system).
-
-The script performs following steps:
-
- - pauses PgBouncer on all nodes
- - recreates the PgBouncer configuration file on each node using the information
-   provided by `repmgrd` (mainly the `conninfo` string) to configure PgBouncer
-   to point to the remaining node
- - reloads the PgBouncer configuration
- - resumes PgBouncer
-
-From that point, any connections to PgBouncer on the failed BDR node will be redirected
-to the active node.
-
-
-repmgrd
-------
-
-
-
-Node monitoring and failover
----------------------------
-
-At the intervals specified by `monitor_interval_secs` in `repmgr.conf`, `repmgrd`
-will ping each node to check if it's available. If a node isn't available,
-`repmgrd` will enter failover mode and  check `reconnect_attempts` times
-at intervals of `reconnect_interval` to confirm the node is definitely unreachable.
-This buffer period is necessary to avoid false positives caused by transient
-network outages.
-
-If the node is still unavailable, `repmgrd` will enter failover mode and execute
-the script defined in `event_notification_command`; an entry will be logged
-in the `repmgr.events` table and `repmgrd` will (unless otherwise configured)
-resume monitoring of the node in "degraded" mode until it reappears.
-
-`repmgrd` logfile output during a failover event will look something like this
-one one node (usually the node which has failed, here "node2"):
-
-    ...
-    [2017-07-27 21:08:39] [INFO] starting continuous BDR node monitoring
-    [2017-07-27 21:08:39] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
-    [2017-07-27 21:08:55] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
-    [2017-07-27 21:09:11] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
-    [2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
-    [2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
-    [2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
-    [2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
-    [2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
-    [2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
-    [2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
-    [2017-07-27 21:09:28] [NOTICE] setting node record for node 2 to inactive
-    [2017-07-27 21:09:28] [INFO] executing notification command for event "bdr_failover"
-    [2017-07-27 21:09:28] [DETAIL] command is:
-      /path/to/bdr-pgbouncer.sh 2 bdr_failover 1 "host=host=node1 dbname=bdrtest user=repmgr connect_timeout=2" "node1"
-    [2017-07-27 21:09:28] [INFO] node 'node2' (ID: 2) detected as failed; next available node is 'node1' (ID: 1)
-    [2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
-    [2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
-    ...
-
-Output on the other node ("node1") during the same event will look like this:
-
-    [2017-07-27 21:08:35] [INFO] starting continuous BDR node monitoring
-    [2017-07-27 21:08:35] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
-    [2017-07-27 21:08:51] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
-    [2017-07-27 21:09:07] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
-    [2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
-    [2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
-    [2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
-    [2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
-    [2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
-    [2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
-    [2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
-    [2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
-    [2017-07-27 21:09:28] [NOTICE] other node's repmgrd is handling failover
-    [2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
-    [2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
-
-This assumes only the PostgreSQL instance on "node2" has failed. In this case the
-`repmgrd` instance running on "node2" has performed the failover. However if
-the entire server becomes unavailable, `repmgrd` on "node1" will perform
-the failover.
-
-
-Node recovery
-------------
-
-Following failure of a BDR node, if the node subsequently becomes available again,
-a `bdr_recovery` event will be generated. This could potentially be used to
-reconfigure PgBouncer automatically to bring the node back into the available pool,
-however it would be prudent to manually verify the node's status before
-exposing it to the application.
-
-If the failed node comes back up and connects correctly, output similar to this
-will be visible in the `repmgrd` log:
-
-    [2017-07-27 21:25:30] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
-    [2017-07-27 21:25:46] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
-    [2017-07-27 21:25:46] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
-    [2017-07-27 21:25:55] [INFO] active replication slot for node "node1" found after 1 seconds
-    [2017-07-27 21:25:55] [NOTICE] node "node2" (ID: 2) has recovered after 986 seconds
-
-
-Shutdown of both nodes
----------------------
-
-If both PostgreSQL instances are shut down, `repmgrd` will try and handle the
-situation as gracefully as possible, though with no failover candidates available
-there's not much it can do. Should this case ever occur, we recommend shutting
-down `repmgrd` on both nodes and restarting it once the PostgreSQL instances
-are running properly.
--- a/doc/changes-in-repmgr4.md
+++ b/doc/changes-in-repmgr4.md
@@ -1,106 +1,7 @@
+Changes in repmgr 4
+===================

-Standardisation on `primary`
----------------------------
+This document has been integrated into the main `repmgr` documentation
+and is now located here:

-To standardise terminology, `primary` is used to denote the read/write
-node in a streaming replication cluster. `master` is still accepted
-as a synonym (e.g. `repmgr master register`).
-
-
-New command line options
------------------------
-
- `--dry-run`: repmgr will attempt to perform the action as far as possible
-   without making any changes to the database
-
- `--upstream-node-id`: use to specify the upstream node the standby will
-  connect later stream from, when cloning a standby. This replaces the configuration
-  file parameter `upstream_node`, as the upstream node is set when the standby
-  is initially cloned, but can change over the lifetime of an installation (due
-  to failovers, switchovers etc.) so it's pointless/confusing keeping the original
-  value around in the config file.
-
-Changed command line options
----------------------------
-
-### repmgr
-
- `--replication-user` has been deprecated; it has been replaced by the
-  configuration file option `replication_user`.  The value (which defaults
-  to the user in the `conninfo` string) will be stored in the repmgr metadata
-  for use by  standby clone/follow..
-
- `--recovery-min-apply-delay` is now a configuration file parameter
-  `recovery_min_apply_delay, to ensure the setting does not get lost when
-  a standby follows a new upstream.
-
-### repmgrd
-
- `--monitoring-history` is deprecated and has been replaced by the
-  configuration file option `monitoring_history`. This enables the
-  setting to be changed without having to modify system service files.
-
-Changes to repmgr commands
--------------------------
-
-
-### `repmgr cluster show`
-
-This now displays the role of each node (e.g. `primary`, `standby`)
-and its status in separate columns.
-
-The `--csv` option now emits a third column indicating the recovery
-status of the node.
-
-
-Configuration file changes
--------------------------
-
-### Required settings
-
-The following 4 parameters are mandatory in `repmgr.conf`:
-
- `node_id`
- `node_name`
- `conninfo`
- `data_directory`
-
-
-### Renamed settings
-
-Some settings have been renamed for clarity and consistency:
-
- `node`: now `node_id`
- `name`: now `node_name`
- `master_reponse_timeout`: now `async_query_timeout` to better indicate its
-   purpose
-
- The following configuration file parameters have been renamed for consistency
-  with other parameters (and conform to the pattern used by PostgreSQL itself,
-  which uses the prefix `log_` for logging parameters):
-  - `loglevel` has been renamed to `log_level`
-  - `logfile` has been renamed to `log_file`
-  - `logfacility` has been renamed to `log_facility`
-
-### Removed settings
-
- `cluster`: has been removed
- `upstream_node`: see note about `--upstream-node-id` above.
- `retry_promote_interval_secs`: this is now redundant due to changes in the
-   failover/promotion mechanism; the new equivalent is `primary_notification_timeout`
-
-
-### Logging changes
-
- default value for `log_level` is `INFO` rather than `NOTICE`.
- new parameter `log_status_interval`, which causes `repmgrd` to emit a status log
-  line at the specified interval
-
-
-repmgrd
-------
-
-The `repmgr` shared library has been renamed from `repmgr_funcs` to `repmgr`,
-meaning `shared_preload_libraries` needs to be updated to the new name:
-
-    shared_preload_libraries = 'repmgr'
+> [Release notes](https://repmgr.org/docs/4.0/release-4.0.html)
--- a/doc/cloning-standbys.sgml
+++ b/doc/cloning-standbys.sgml
@@ -1,13 +1,16 @@
 <chapter id="cloning-standbys" xreflabel="cloning standbys">
 <title>Cloning standbys</title>
- <para>
- </para>

 <sect1 id="cloning-from-barman" xreflabel="Cloning from Barman">
   <indexterm>
-     <primary>cloning</primary>
-     <secondary>from Barman</secondary>
+    <primary>cloning</primary>
+    <secondary>from Barman</secondary>
   </indexterm>
+   <indexterm>
+    <primary>Barman</primary>
+    <secondary>cloning a standby</secondary>
+   </indexterm>
+
   <title>Cloning a standby from Barman</title>
   <para>
    <xref linkend="repmgr-standby-clone"> can use
@@ -70,7 +73,8 @@
      <para>
        the <varname>restore_command</varname> setting in <filename>repmgr.conf</filename> is configured to
        use a copy of the <command>barman-wal-restore</command> script shipped with the
-        <literal>barman-cli</literal> package (see below);
+        <literal>barman-cli</literal> package (see section <xref linkend="cloning-from-barman-restore-command">
+        below).
      </para>
     </listitem>
     <listitem>
@@ -122,6 +126,11 @@
   </para>
  </sect2>
  <sect2 id="cloning-from-barman-restore-command" xreflabel="Using Barman as a WAL file source">
+  <indexterm>
+    <primary>Barman</primary>
+    <secondary>fetching archived WAL</secondary>
+   </indexterm>
+
   <title>Using Barman as a WAL file source</title>
   <para>
    As a fallback in case streaming replication is interrupted, PostgreSQL can optionally
@@ -188,11 +197,10 @@
     To enable &repmgr; to use replication slots, set the boolean parameter
     <varname>use_replication_slots</varname> in <filename>repmgr.conf</filename>:
     <programlisting>
-       use_replication_slots=true
-     </programlisting>
+       use_replication_slots=true</programlisting>
   </para>
   <para>
-    Replication slots must be enabled in <filename>postgresql.conf</filename>` by
+    Replication slots must be enabled in <filename>postgresql.conf</filename> by
    setting the parameter <varname>max_replication_slots</varname> to at least the
    number of expected standbys (changes to this parameter require a server restart).
   </para>
@@ -339,16 +347,36 @@

   <sect2 id="cloning-advanced-pg-basebackup-options" xreflabel="pg_basebackup options when cloning a standby">
    <title>pg_basebackup options when cloning a standby</title>
+    <para>
+      As &repmgr; uses <command>pg_basebackup</command> to clone a standby, it's possible to
+      provide additional parameters for <command>pg_basebackup</command> to customise the
+      cloning process.
+    </para>
    <para>
     By default, <command>pg_basebackup</command> performs a checkpoint before beginning the backup
     process. However, a normal checkpoint may take some time to complete;
     a fast checkpoint can be forced with the <literal>-c/--fast-checkpoint</literal> option.
-     However this may impact performance of the server being cloned from
+     However this may impact performance of the server being cloned from (typically the primary)
     so should be used with care.
    </para>
+    <tip>
+      <simpara>
+        If <application>Barman</application> is set up for the cluster, it's possible to
+        clone the standby directly from Barman, without any impact on the server the standby
+        is being cloned from. For more details see <xref linkend="cloning-from-barman">.
+      </simpara>
+    </tip>
+    <para>
+      Other options can be passed to <command>pg_basebackup</command> by including them
+      in the <filename>repmgr.conf</filename> setting <varname>pg_basebackup_options</varname>.
+    </para>
+    <para>
+      If using a separate directory to store WAL files, provide the option <literal>--waldir</literal>
+      (<literal>--xlogdir</literal> in PostgreSQL 9.6 and earlier) with the absolute path to the
+      WAL directory. Any WALs generated during the cloning process will be copied here, and
+      a symlink will automatically be created from the main data directory.
+    </para>
    <para>
-     Further options can be passed to the <command>pg_basebackup</command> utility via
-     the setting <varname>pg_basebackup_options</varname> in <filename>repmgr.conf</filename>.
     See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">PostgreSQL pg_basebackup documentation</ulink>
     for more details of available options.
    </para>
@@ -372,7 +400,7 @@
     If, for whatever reason, you wish to include the password in <filename>recovery.conf</filename>,
     set <varname>use_primary_conninfo_password</varname> to <literal>true</literal> in
     <filename>repmgr.conf</filename>. This will read a password set in <varname>PGPASSWORD</varname>
-     (but not <filename>~/.pgpass</filename>) and place it into the <literal>primary_conninfo</literal>
+     (but not <filename>~/.pgpass</filename>) and place it into the <varname>primary_conninfo</varname>
     string in <filename>recovery.conf</filename>. Note that <varname>PGPASSWORD</varname>
     will need to be set during any action which causes <filename>recovery.conf</filename> to be
     rewritten, e.g. <xref linkend="repmgr-standby-follow">.
@@ -382,6 +410,15 @@
     string for each node, but this is obviously a security risk and should be
     avoided.
    </para>
+    <para>
+      From PostgreSQL 9.6, <application>libpq</application> supports the <varname>passfile</varname>
+      parameter in connection strings, which can be used to specify a password file other than
+      the default <filename>~/.pgpass</filename>.
+    </para>
+    <para>
+      To have &repmgr; write a custom password file in <varname>primary_conninfo</varname>,
+      specify its location in <varname>passfile</varname> in <filename>repmgr.conf</filename>.
+    </para>
   </sect2>

   <sect2 id="cloning-advanced-replication-user" xreflabel="Separate replication user">
@@ -392,7 +429,7 @@
     the replication user should be set in <filename>repmgr.conf</filename> via the parameter
     <varname>replication_user</varname>; &repmgr; will use this value when making
     replication connections and generating <filename>recovery.conf</filename>. This
-     value will also be stored in the <literal>repmgr.nodes</literal>
+     value will also be stored in the parameter <literal>repmgr.nodes</literal>
     table for each node; it no longer needs to be explicitly specified when
     cloning a node or executing <xref linkend="repmgr-standby-follow">.
    </para>
--- a/doc/configuration-file-settings.sgml
+++ b/doc/configuration-file-settings.sgml
@@ -1,4 +1,9 @@
 <sect1 id="configuration-file-settings" xreflabel="configuration file settings">
+  <indexterm>
+    <primary>repmgr.conf</primary>
+    <secondary>settings</secondary>
+  </indexterm>
+
 <title>Configuration file settings</title>
 <para>
   Each <filename>repmgr.conf</filename> file must contain the following parameters:
--- a/doc/configuration-file.sgml
+++ b/doc/configuration-file.sgml
@@ -1,4 +1,14 @@
 <sect1 id="configuration-file" xreflabel="configuration file location">
+  <indexterm>
+    <primary>repmgr.conf</primary>
+    <secondary>location</secondary>
+  </indexterm>
+
+  <indexterm>
+    <primary>configuration</primary>
+    <secondary>repmgr.conf location</secondary>
+  </indexterm>
+
  <title>Configuration file location</title>
  <para>
    <application>repmgr</application> and <application>repmgrd</application>
@@ -7,7 +17,7 @@
    <filename>repmgr.conf</filename> must contain a number of required parameters, including
    the database connection string for the local node and the location
    of its data directory; other values will be inferred from defaults if
-    not explicitly supplied. See section `configuration file parameters`
+    not explicitly supplied. See section <xref linkend="configuration-file-settings">
    for more details.
  </para>

@@ -15,7 +25,7 @@
   The configuration file will be searched for in the following locations:
   <itemizedlist spacing="compact" mark="bullet">
    <listitem>
-     <para>a configuration file specified by the `-f/--config-file` command line option</para>
+     <para>a configuration file specified by the <literal>-f/--config-file</literal> command line option</para>
    </listitem>
    <listitem>
     <para>
@@ -37,10 +47,23 @@
  </para>

  <para>
-   Note that if a file is explicitly specified with <application>-f/--config-file</application>,
-   an error will be raised if it is not found or not readable and no attempt will be made to
+   Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
+   an error will be raised if it is not found or not readable, and no attempt will be made to
   check default locations; this is to prevent <application>repmgr</application> unexpectedly
-   reading the wrong file.
+   reading the wrong configuraton file.
  </para>

+  <note>
+    <para>
+      If providing the configuration file location with <literal>-f/--config-file</literal>,
+      avoid using a relative path, particularly when executing <xref linkend="repmgr-primary-register">
+      and <xref linkend="repmgr-standby-register">, as &repmgr; stores the configuration file location
+      in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
+      <xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
+      a relative path into an absolute one, but this may not be the same as the path you
+      would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
+      to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
+      <filename>/path/to/repmgr.conf</filename>).
+    </para>
+  </note>
 </sect1>
--- a/doc/configuration.sgml
+++ b/doc/configuration.sgml
@@ -1,7 +1,24 @@
 <chapter id="configuration" xreflabel="Configuration">
- <title>repmgr configuration</title>
+  <title>repmgr configuration</title>

  &configuration-file;
  &configuration-file-settings;

+  <sect1 id="configuration-permissions" xreflabel="User permissions">
+    <indexterm>
+      <primary>configuration</primary>
+      <secondary>user permissions</secondary>
+    </indexterm>
+
+    <title>repmgr user permissions</title>
+    <para>
+      &repmgr; will create an extension database containing objects
+      for administering &repmgr; metadata. The user defined in the <varname>conninfo</varname>
+      setting must be able to access all objects. Additionally, superuser permissions
+      are required to install the &repmgr; extension. The easiest way to do this
+      is create the &repmgr; user as a superuser, however if this is not
+      desirable, the &repmgr; user can be created as a normal user and a
+      superuser specified with <literal>--superuser</literal> when registering a &repmgr; node.
+    </para>
+  </sect1>
 </chapter>
--- a/doc/configuring-witness-server.sgml
+++ b/doc/configuring-witness-server.sgml
@@ -0,0 +1,86 @@
+<chapter id="using-witness-server">
+ <indexterm>
+  <primary>witness server</primary>
+  <seealso>Using a witness server with repmgrd</seealso>
+ </indexterm>
+
+
+ <title>Using a witness server</title>
+ <para>
+   A <xref linkend="witness-server"> is a normal PostgreSQL instance which
+   is not part of the streaming replication cluster; its purpose is, if a
+   failover situation occurs, to provide proof that the primary server
+   itself is unavailable.
+ </para>
+
+ <para>
+   A typical use case for a witness server is a two-node streaming replication
+   setup, where the primary and standby are in different locations (data centres).
+   By creating a witness server in the same location as the primary, if the primary
+   becomes unavailable  it's possible for the standby to decide whether it can
+   promote itself without risking a "split brain" scenario: if it can't see either the
+   witness or the primary server, it's likely there's a network-level interruption
+   and it should not promote itself. If it can seen the witness but not the primary,
+   this proves there is no network interruption and the primary itself is unavailable,
+   and it can therefore promote itself (and ideally take action to fence the
+   former primary).
+ </para>
+ <para>
+   For more complex replication scenarios,e.g. with multiple datacentres, it may
+   be preferable to use location-based failover, which ensures that only nodes
+   in the same location as the primary will ever be promotion candidates;
+   see <xref linkend="repmgrd-network-split"> for more details.
+ </para>
+
+ <note>
+   <simpara>
+     A witness server will only be useful if <application>repmgrd</application>
+     is in use.
+   </simpara>
+ </note>
+
+ <sect1 id="creating-witness-server">
+   <title>Creating a witness server</title>
+ <para>
+   To create a witness server, set up a normal PostgreSQL instance on a server
+   in the same physical location as the cluster's primary server.
+ </para>
+ <para>
+   This instance should *not* be on the same physical host as the primary server,
+   as otherwise if the primary server fails due to hardware issues, the witness
+   server will be lost too.
+ </para>
+ <note>
+   <simpara>
+     &repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
+     command, which would automatically create a PostgreSQL instance. However
+     this often resulted in an unsatisfactory, hard-to-customise instance.
+   </simpara>
+ </note>
+ <para>
+   The witness server should be configured in the same way as a normal
+   &repmgr; node; see section <xref linkend="configuration">.
+ </para>
+ <para>
+   Register the witness server with <xref linkend="repmgr-witness-register">.
+   This will create the &repmgr; extension on the witness server, and make
+   a copy of the &repmgr; metadata.
+ </para>
+ <note>
+   <simpara>
+    As the witness server is not part of the replication cluster, further
+    changes to the &repmgr; metadata will be synchronised by
+    <application>repmgrd</application>.
+   </simpara>
+ </note>
+ <para>
+   Once the witness server has been configured, <application>repmgrd</application>
+   should be started; for more details see <xref linkend="repmgrd-witness-server">.
+ </para>
+
+ <para>
+  To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
+ </para>
+
+ </sect1>
+</chapter>
--- a/doc/event-notifications.sgml
+++ b/doc/event-notifications.sgml
@@ -1,12 +1,17 @@
 <chapter id="event-notifications" xreflabel="event notifications">
+
+ <indexterm>
+   <primary>event notifications</primary>
+ </indexterm>
+
 <title>Event Notifications</title>
 <para>
-  Each time `repmgr` or `repmgrd` perform a significant event, a record
-  of that event is written into the `repmgr.events` table together with
+  Each time &repmgr; or <application>repmgrd</application> perform a significant event, a record
+  of that event is written into the <literal>repmgr.events</literal> table together with
  a timestamp, an indication of failure or success, and further details
  if appropriate. This is useful for gaining an overview of events
  affecting the replication cluster. However note that this table has
-  advisory character and should be used in combination with the `repmgr`
+  advisory character and should be used in combination with the &repmgr;
  and PostgreSQL logs to obtain details of any events.
 </para>
 <para>
@@ -28,11 +33,11 @@
 <para>
  Additionally, event notifications can be passed to a user-defined program
  or script which can take further action, e.g. send email notifications.
-  This is done by setting the `event_notification_command` parameter in
-  `repmgr.conf`.
+  This is done by setting the <literal>event_notification_command</literal> parameter in
+  <filename>repmgr.conf</filename>.
 </para>
 <para>
-  This parameter accepts the following format placeholders:
+  The following format placeholders are provided for all event notifications:
 </para>

 <variablelist>
@@ -55,10 +60,10 @@
  </varlistentry>

  <varlistentry>
-   <term><option>%t</option></term>
+   <term><option>%s</option></term>
   <listitem>
    <para>
-     success (1 or 0)
+     success (1) or failure (0)
    </para>
   </listitem>
  </varlistentry>
@@ -80,6 +85,7 @@
   </listitem>
  </varlistentry>
 </variablelist>
+
 <para>
  The values provided for <literal>%t</literal> and <literal>%d</literal>
  will probably contain spaces, so should be quoted in the provided command
@@ -88,42 +94,75 @@
    event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
  </programlisting>
 </para>
+
 <para>
-  Additionally the following format placeholders are available for the event
-  type <varname>bdr_failover</varname> and optionally <varname>bdr_recovery</varname>:
+   The following parameters are provided for a subset of event notifications:
 </para>
+
 <variablelist>
+  <varlistentry>
+   <term><option>%p</option></term>
+   <listitem>
+    <para>
+     node ID of the current primary (<xref linkend="repmgr-standby-register"> and <xref linkend="repmgr-standby-follow">)
+    </para>
+    <para>
+     node ID of the demoted primary (<xref linkend="repmgr-standby-switchover"> only)
+    </para>
+   </listitem>
+  </varlistentry>
  <varlistentry>
   <term><option>%c</option></term>
   <listitem>
    <para>
-     conninfo string of the next available node
+     <literal>conninfo</literal> string of the primary node
+     (<xref linkend="repmgr-standby-register"> and <xref linkend="repmgr-standby-follow">)
+    </para>
+    <para>
+      <literal>conninfo</literal> string of the next available node
+      (<varname>bdr_failover</varname> and  <varname>bdr_recovery</varname>)
    </para>
   </listitem>
  </varlistentry>
+
  <varlistentry>
   <term><option>%a</option></term>
   <listitem>
    <para>
-     name of the next available node
+     name of the current primary node (<xref linkend="repmgr-standby-register"> and <xref linkend="repmgr-standby-follow">)
+    </para>
+    <para>
+     name of the next available node (<varname>bdr_failover</varname> and  <varname>bdr_recovery</varname>)
    </para>
   </listitem>
  </varlistentry>
+
 </variablelist>
+
 <para>
-  These should always be quoted.
+  The values provided for <literal>%c</literal> and <literal>%a</literal>
+  will probably contain spaces, so should always be quoted.
 </para>
+
 <para>
  By default, all notification types will be passed to the designated script;
-  the notification types can be filtered to explicitly named ones:
+  the notification types can be filtered to explicitly named ones using the
+  <varname>event_notifications</varname> parameter:
+
  <itemizedlist spacing="compact" mark="bullet">

   <listitem>
    <simpara><literal>primary_register</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>primary_unregister</literal></simpara>
+   </listitem>
   <listitem>
    <simpara><literal>standby_register</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>standby_register_sync</literal></simpara>
+   </listitem>
   <listitem>
    <simpara><literal>standby_unregister</literal></simpara>
   </listitem>
@@ -139,6 +178,21 @@
   <listitem>
    <simpara><literal>standby_disconnect_manual</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>standby_failure</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>standby_recovery</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>witness_register</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>witness_unregister</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>node_rejoin</literal></simpara>
+   </listitem>
   <listitem>
    <simpara><literal>repmgrd_start</literal></simpara>
   </listitem>
@@ -151,6 +205,18 @@
   <listitem>
    <simpara><literal>repmgrd_failover_follow</literal></simpara>
   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_upstream_disconnect</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_upstream_reconnect</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_promote_error</literal></simpara>
+   </listitem>
+   <listitem>
+    <simpara><literal>repmgrd_failover_promote</literal></simpara>
+   </listitem>
   <listitem>
    <simpara><literal>bdr_failover</literal></simpara>
   </listitem>
@@ -169,6 +235,7 @@

  </itemizedlist>
 </para>
+
 <para>
  Note that under some circumstances (e.g. when no replication cluster primary
  could be located), it will not be possible to write an entry into the
--- a/doc/filelist.sgml
+++ b/doc/filelist.sgml
@@ -43,6 +43,8 @@
 <!ENTITY promoting-standby  SYSTEM "promoting-standby.sgml">
 <!ENTITY follow-new-primary  SYSTEM "follow-new-primary.sgml">
 <!ENTITY switchover  SYSTEM "switchover.sgml">
+<!ENTITY configuring-witness-server SYSTEM "configuring-witness-server.sgml">
+
 <!ENTITY event-notifications  SYSTEM "event-notifications.sgml">
 <!ENTITY upgrading-repmgr  SYSTEM "upgrading-repmgr.sgml">

@@ -53,6 +55,8 @@
 <!ENTITY repmgrd-degraded-monitoring SYSTEM "repmgrd-degraded-monitoring.sgml">
 <!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
 <!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
+<!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
+<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">

 <!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
 <!ENTITY repmgr-primary-unregister SYSTEM "repmgr-primary-unregister.sgml">
@@ -62,6 +66,8 @@
 <!ENTITY repmgr-standby-promote SYSTEM "repmgr-standby-promote.sgml">
 <!ENTITY repmgr-standby-follow SYSTEM "repmgr-standby-follow.sgml">
 <!ENTITY repmgr-standby-switchover SYSTEM "repmgr-standby-switchover.sgml">
+<!ENTITY repmgr-witness-register SYSTEM "repmgr-witness-register.sgml">
+<!ENTITY repmgr-witness-unregister SYSTEM "repmgr-witness-unregister.sgml">
 <!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
 <!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
 <!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
@@ -71,7 +77,10 @@
 <!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
 <!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">

+<!ENTITY appendix-release-notes  SYSTEM "appendix-release-notes.sgml">
+<!ENTITY appendix-faq      SYSTEM "appendix-faq.sgml">
 <!ENTITY appendix-signatures      SYSTEM "appendix-signatures.sgml">
+<!ENTITY appendix-packages      SYSTEM "appendix-packages.sgml">

 <!ENTITY bookindex  SYSTEM "bookindex.sgml">

--- a/doc/follow-new-primary.sgml
+++ b/doc/follow-new-primary.sgml
@@ -1,4 +1,9 @@
-<chapter id="follow-new-primary" xreflabel="Following a new primary">
+<chapter id="follow-new-primary">
+ <indexterm>
+  <primary>Following a new primary</primary>
+  <seealso>repmgr standby follow</seealso>
+ </indexterm>
+
 <title>Following a new primary</title>
 <para>
   Following the failure or removal of the replication cluster's existing primary
@@ -22,7 +27,8 @@
  </programlisting>
 </para>
 <para>
-   The standby is now replicating from the new primary and `repmgr cluster show`
+   The standby is now replicating from the new primary and
+   <command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>
   output reflects this:
   <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster show
--- a/doc/install-packages.sgml
+++ b/doc/install-packages.sgml
@@ -1,32 +1,153 @@
 <sect1 id="installation-packages" xreflabel="Installing from packages">
 <title>Installing &repmgr; from packages</title>
 <para>
-We recommend installing `repmgr` using the available packages for your
-system.
+  We recommend installing &repmgr; using the available packages for your
+  system.
 </para>

 <sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, Fedora and CentOS">
-   <title>RedHat/Fedora/CentOS</title>
-   <para>
-     RPM packages for `repmgr` are available via Yum through
-     the PostgreSQL Global Development Group RPM repository
-     ( <ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</>).
-     Follow the instructions for your distribution (RedHat, CentOS,
-     Fedora, etc.) and architecture as detailed at yum.postgresql.org.
-   </para>
-   <para>
-     2ndQuadrant also provides its own RPM packages which are made available
-     at the same time as each `repmgr` release, as it can take some days for
-     them to become available via the main PGDG repository. See here for details:
-     <ulink url="http://repmgr.org/yum-repository.html">http://repmgr.org/yum-repository.html</>
-   </para>
+
+  <indexterm>
+   <primary>installation</primary>
+   <secondary>on Redhat/CentOS/Fedora etc.</secondary>
+  </indexterm>
+
+  <title>RedHat/Fedora/CentOS</title>
+  <para>
+   RPM packages for &repmgr; are available via Yum through
+   the PostgreSQL Global Development Group RPM repository
+   (<ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</ulink>).
+   Follow the instructions for your distribution (RedHat, CentOS,
+   Fedora, etc.) and architecture as detailed there.
+  </para>
+  <para>
+   <ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> also provides its
+   own RPM packages which are made available
+   at the same time as each &repmgr; release, as it can take some days for
+   them to become available via the main PGDG repository. See following section for details:
+  </para>
+
+
+  <sect3 id="installation-packages-redhat-2ndq">
+    <title>2ndQuadrant repmgr yum repository</title>
+    <para>
+      Beginning with <ulink url="http://repmgr.org/release-notes-3.1.3.html">repmgr 3.1.3</ulink>,
+      <ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
+      repository for &repmgr; releases. This repository complements the main
+      <ulink url="https://yum.postgresql.org/repopackages.php">PGDG community repository</ulink>,
+      but enables repmgr users to access the latest &repmgr; packages before they are
+      available via the PGDG repository, which can take several days to be updated following
+      a fresh  &repmgr; release.
+    </para>
+    <para>
+      <emphasis>Installation</emphasis>
+
+      <itemizedlist>
+        <listitem>
+          <para>
+            Import the repository public key (optional but recommended):
+            <programlisting>
+              rpm --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr</programlisting>
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Install the repository RPM for your distribution (this enables the 2ndQuadrant
+            repository as a source of repmgr packages):
+            <itemizedlist>
+              <listitem>
+                <simpara>
+                  <emphasis>Fedora:</emphasis>
+                  <ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
+                </simpara>
+              </listitem>
+              <listitem>
+                <simpara>
+                  <emphasis>RHEL, CentOS etc:</emphasis>
+                  <ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
+                </simpara>
+              </listitem>
+            </itemizedlist>
+          </para>
+          <para>
+            e.g.:
+            <programlisting>
+              $ yum install http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</programlisting>
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Install the repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr96</literal>), e.g.:
+            <programlisting>
+              $ yum install repmgr96</programlisting>
+          </para>
+        </listitem>
+      </itemizedlist>
+    </para>
+
+    <para>
+      <emphasis>Compatibility with PGDG Repositories</emphasis>
+    </para>
+    <para>
+        The 2ndQuadrant &repmgr; yum repository uses exactly the same package definitions as the
+        main PGDG repository and is effectively a selective mirror for &repmgr; packages only.
+    </para>
+    <para>
+        Normally yum should prioritize the repository with the most recent &repmgr; version.
+        Once the PGDG repository has been updated, it doesn't matter which repository
+        the packages are installed from.
+    </para>
+    <para>
+      To ensure the 2ndQuadrant repository is always prioritised, install <literal>yum-plugin-priorities</literal>
+      and set the repository priorities accordingly.
+    </para>
+
+    <para>
+      <emphasis>Installing a specific package version</emphasis>
+    </para>
+    <para>
+      To install a specific package version, execute <command>yum --showduplicates list</command>
+      for the package in question:
+      <programlisting>
+        [root@localhost ~]# yum --showduplicates list repmgr96
+        Loaded plugins: fastestmirror
+        Loading mirror speeds from cached hostfile
+         * base: ftp.iij.ad.jp
+         * extras: ftp.iij.ad.jp
+         * updates: ftp.iij.ad.jp
+        Available Packages
+        repmgr96.x86_64               3.2-1.el6                    2ndquadrant-repmgr
+        repmgr96.x86_64               3.2.1-1.el6                  2ndquadrant-repmgr
+        repmgr96.x86_64               3.3-1.el6                    2ndquadrant-repmgr
+        repmgr96.x86_64               3.3.1-1.el6                  2ndquadrant-repmgr
+        repmgr96.x86_64               3.3.2-1.el6                  2ndquadrant-repmgr
+        repmgr96.x86_64               3.3.2-1.rhel6                pgdg96
+        repmgr96.x86_64               4.0.0-1.el6                  2ndquadrant-repmgr
+        repmgr96.x86_64               4.0.0-1.rhel6                pgdg96</programlisting>
+      then append the appropriate version number to the package name with a hyphen, e.g.:
+      <programlisting>
+        [root@localhost ~]# yum install repmgr96-3.3.2-1.el6</programlisting>
+    </para>
+  </sect3>
 </sect2>
+
+
+
 <sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
+
+  <indexterm>
+   <primary>installation</primary>
+   <secondary>on Debian/Ubuntu etc.</secondary>
+  </indexterm>
+
  <title>Debian/Ubuntu</title>
-  <para>.deb packages for `repmgr` are available from the
-  PostgreSQL Community APT repository (<ulink url="http://apt.postgresql.org/">http://apt.postgresql.org/</> ).
+  <para>.deb packages for &repmgr; are available from the
+  PostgreSQL Community APT repository (<ulink url="http://apt.postgresql.org/">http://apt.postgresql.org/</ulink>).
  Instructions can be found in the APT section of the PostgreSQL Wiki
-  (<ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</> ).
+  (<ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink>).
  </para>
 </sect2>
+
 </sect1>
--- a/doc/install-requirements.sgml
+++ b/doc/install-requirements.sgml
@@ -1,4 +1,10 @@
 <sect1 id="install-requirements" xreflabel="installation requirements">
+
+  <indexterm>
+   <primary>installation</primary>
+   <secondary>requirements</secondary>
+  </indexterm>
+
  <title>Requirements for installing repmgr</title>
  <para>
    repmgr is developed and tested on Linux and OS X, but should work on any
@@ -7,17 +13,14 @@
  </para>

  <para>
-   From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.4, including PostgreSQL 10.
-  </para>
-  <para>
-   PostgreSQL 9.3 is supported by repmgr 3.3.
+   From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.3, including PostgreSQL 10.
+   Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
  </para>

  <note>
-    <simpara>
-If upgrading from `repmgr 3`, please see the separate upgrade guide
-`doc/upgrading-from-repmgr3.md`.
-    </simpara>
+   <simpara>
+    If upgrading from &repmgr; 3.x, please see the section <xref linkend="upgrading-from-repmgr-3">.
+   </simpara>
  </note>

  <para>
@@ -26,42 +29,44 @@ If upgrading from `repmgr 3`, please see the separate upgrade guide
  </para>

  <para>
-   `repmgr` must be installed on each server in the replication cluster.
+   &repmgr; must be installed on each server in the replication cluster.
   If installing repmgr from packages, the package version must match the PostgreSQL
   version. If installing from source, repmgr must be compiled against the same
   major version.
  </para>

  <para>
-   A dedicated system user for `repmgr` is *not* required; as many `repmgr` and
-  `repmgrd` actions require direct access to the PostgreSQL data directory,
-  these commands should be executed by the `postgres` user.
+   A dedicated system user for &repmgr; is *not* required; as many &repmgr; and
+   <application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
+   these commands should be executed by the <literal>postgres</literal> user.
  </para>

-
  <para>
-   Passwordless `ssh` connectivity between all servers in the replication cluster
+   Passwordless <command>ssh</command> connectivity between all servers in the replication cluster
   is not required, but is necessary in the following cases:
   <itemizedlist>
     <listitem>
-       <simpara>if you need `repmgr` to copy configuration files from outside the PostgreSQL
-  data directory (in which case `rsync` is also required)</simpara>
+       <simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
+       data directory (in which case <command>rsync</command> is also required)</simpara>
     </listitem>
     <listitem>
-       <simpara>to perform switchover operations</simpara>
+       <simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
     </listitem>
     <listitem>
-       <simpara>when executing `repmgr cluster matrix` and `repmgr cluster crosscheck`</simpara>
+       <simpara>
+        when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
+        and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
+       </simpara>
     </listitem>
   </itemizedlist>
  </para>

  <tip>
   <simpara>
-    We recommend using a session multiplexer utility such as `screen` or
-    `tmux` when performing long-running actions (such as cloning a database)
-    on a remote server - this will ensure the `repmgr` action won't be prematurely
-    terminated if your `ssh` session to the server is interrupted or closed.
+    We recommend using a session multiplexer utility such as <command>screen</command> or
+    <command>tmux</command> when performing long-running actions (such as cloning a database)
+    on a remote server - this will ensure the &repmgr; action won't be prematurely
+    terminated if your <command>ssh</command> session to the server is interrupted or closed.
    </simpara>
  </tip>
 </sect1>
--- a/doc/install-source.sgml
+++ b/doc/install-source.sgml
@@ -1,4 +1,9 @@
 <sect1 id="installation-source" xreflabel="Installing from source code">
+  <indexterm>
+   <primary>installation</primary>
+   <secondary>from source</secondary>
+  </indexterm>
+
 <title>Installing &repmgr; from source</title>

 <sect2 id="installation-source-prereqs">
@@ -25,8 +30,7 @@
      building PostgreSQL with:
      <programlisting>
       sudo apt-get update
-       sudo apt-get build-dep postgresql-9.6
-      </programlisting>
+       sudo apt-get build-dep postgresql-9.6</programlisting>
      </para>
    </listitem>
    <listitem>
@@ -39,8 +43,7 @@
       sudo yum check-update
       sudo yum groupinstall "Development Tools"
       sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
-       sudo yum-builddep postgresql96
-      </programlisting>
+       sudo yum-builddep postgresql96</programlisting>
     </para>
    </listitem>
   </itemizedlist>
@@ -83,8 +86,7 @@
   <para>
    Clone the source code using <application>git</application>:
    <programlisting>
-     git clone https://github.com/2ndQuadrant/repmgr
-    </programlisting>
+     git clone https://github.com/2ndQuadrant/repmgr</programlisting>
   </para>

   <para>
@@ -129,11 +131,45 @@
   To installing &repmgr; from source, simply execute:

   <programlisting>
-    ./configure && make install
-   </programlisting>
+    ./configure && make install</programlisting>

-   Ensure `pg_config` for the target PostgreSQL version is in `$PATH`.
+   Ensure <command>pg_config</command> for the target PostgreSQL version is in
+   <varname>$PATH</varname>.
  </para>
 </sect2>

+
+
+ <sect2 id="installation-build-repmgr-docs">
+   <title>Building &repmgr; documentation</title>
+   <para>
+    The &repmgr; documentation is (like the main PostgreSQL project)
+    written in DocBook format. To build it locally as HTML, you'll need to
+    install the required packages as described in the
+    <ulink url="https://www.postgresql.org/docs/9.6/static/docguide-toolsets.html">
+      PostgreSQL documentation</ulink> then execute:
+   <programlisting>
+    ./configure && make install-doc</programlisting>
+   </para>
+   <para>
+     The generated HTML files will be placed in the <filename>doc/html</filename>
+     subdirectory of your source tree.
+   </para>
+
+   <para>
+     To build the documentation as a single HTML file, execute:
+   <programlisting>
+    cd doc/ && make repmgr.html</programlisting>
+   </para>
+
+   <note>
+     <simpara>
+       Due to changes in PostgreSQL's documentation build system from PostgreSQL 10,
+       the documentation can currently only be built agains PostgreSQL 9.6 or earlier.
+       This limitation will be fixed when time and resources permit.
+     </simpara>
+   </note>
+ </sect2>
+
+
 </sect1>
--- a/doc/install.sgml
+++ b/doc/install.sgml
@@ -1,4 +1,8 @@
 <chapter id="installation" xreflabel="Installation">
+ <indexterm>
+  <primary>installation</primary>
+ </indexterm>
+
 <title>Installation</title>

 <para>
--- a/doc/legal.sgml
+++ b/doc/legal.sgml
@@ -1,9 +1,9 @@
-<!-- doc/src/sgml/legal.sgml -->
+<!-- doc/legal.sgml -->

 <date>2017</date>

 <copyright>
- <year>2010-2017</year>
+ <year>2010-2018</year>
 <holder>2ndQuadrant, Ltd.</holder>
 </copyright>

@@ -11,10 +11,27 @@
 <title>Legal Notice</title>

 <para>
-  <productname>repmgr</productname> is Copyright &copy; 2010-2017
-  by 2ndQuadrant, Ltd.
+  <productname>repmgr</productname> is Copyright &copy; 2010-2018
+  by 2ndQuadrant, Ltd. All rights reserved.
 </para>

- <para>add license</para>
+ <para>
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+ </para>
+ <para>
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+ </para>
+ <para>
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see
+   <ulink url="https://www.gnu.org/licenses/">https://www.gnu.org/licenses/</ulink>
+   to obtain one.
+ </para>

 </legalnotice>
--- a/doc/overview.sgml
+++ b/doc/overview.sgml
@@ -5,6 +5,11 @@
  This chapter provides a high-level overview of repmgr's components and functionality.
 </para>
 <sect1 id="repmgr-concepts" xreflabel="Concepts">
+
+  <indexterm>
+    <primary>concepts</primary>
+  </indexterm>
+
  <title>Concepts</title>

  <para>
@@ -15,13 +20,13 @@
   streaming replication</>.
  </para>
  <para>
-   The following terms are used throughout the `repmgr` documentation.
+   The following terms are used throughout the &repmgr; documentation.
   <variablelist>
    <varlistentry>
     <term>replication cluster</term>
     <listitem>
      <simpara>
-       In the `repmgr` documentation, "replication cluster" refers to the network
+       In the &repmgr; documentation, "replication cluster" refers to the network
       of PostgreSQL servers connected by streaming replication.
      </simpara>
     </listitem>
@@ -31,7 +36,7 @@
     <term>node</term>
     <listitem>
      <simpara>
-       A node is a server within a replication cluster.
+       A node is a single PostgreSQL server within a replication cluster.
      </simpara>
     </listitem>
    </varlistentry>
@@ -41,7 +46,7 @@
     <listitem>
      <simpara>
       The node a standby server connects to, in order to receive streaming replication.
-       This is either the primary server or in the case of cascading replication, another
+       This is either the primary server, or in the case of cascading replication, another
       standby.
      </simpara>
     </listitem>
@@ -52,7 +57,7 @@
     <listitem>
      <simpara>
       This is the action which occurs if a primary server fails and a suitable standby
-       is  promoted as the new primary. The `repmgrd` daemon supports automatic failover
+       is  promoted as the new primary. The <application>repmgrd</application> daemon supports automatic failover
       to minimise downtime.
      </simpara>
     </listitem>
@@ -66,7 +71,7 @@
       it's necessary to take a primary server offline; in this case a controlled
       switchover is necessary, whereby a suitable standby is promoted and the
       existing primary removed from the replication cluster in a controlled manner.
-       The `repmgr` command line client provides this functionality.
+       The &repmgr; command line client provides this functionality.
      </simpara>
     </listitem>
    </varlistentry>
@@ -82,13 +87,37 @@
      </simpara>
     </listitem>
    </varlistentry>
+   <varlistentry id="witness-server">
+     <term>witness server</term>
+     <listitem>
+      <para>
+        &repmgr; provides functionality to set up a so-called "witness server" to
+        assist in determining a new primary server in a failover situation with more
+        than one standby. The witness server itself is not part of the replication
+        cluster, although it does contain a copy of the repmgr metadata schema.
+      </para>
+      <para>
+        The purpose of a witness server is to provide a "casting vote" where servers
+        in the replication cluster are split over more than one location. In the event
+        of a loss of connectivity between locations, the presence or absence of
+        the witness server will decide whether a server at that location is promoted
+        to primary; this is to prevent a "split-brain" situation where an isolated
+        location interprets a network outage as a failure of the (remote) primary and
+        promotes a (local) standby.
+      </para>
+      <para>
+        A witness server only needs to be created if <application>repmgrd</application>
+        is in use.
+      </para>
+     </listitem>
+    </varlistentry>
   </variablelist>
  </para>
 </sect1>
 <sect1 id="repmgr-components" xreflabel="Components">
  <title>Components</title>
  <para>
-  `repmgr` is a suite of open-source tools to manage replication and failover
+  &repmgr; is a suite of open-source tools to manage replication and failover
  within a cluster of PostgreSQL servers. It supports and enhances PostgreSQL's
  built-in streaming replication, which provides a single read/write primary server
  and one or more read-only standbys containing near-real time copies of the primary
@@ -147,11 +176,12 @@
 <sect1 id="repmgr-user-metadata" xreflabel="Repmgr user and metadata">
  <title>Repmgr user and metadata</title>
  <para>
-   In order to effectively manage a replication cluster, `repmgr` needs to store
+   In order to effectively manage a replication cluster, &repmgr; needs to store
   information about the servers in the cluster in a dedicated database schema.
-   This schema is automatically by the `repmgr` extension, which is installed
-   during the first step in initialising a `repmgr`-administered cluster
-   (`repmgr primary register`) and contains the following objects:
+   This schema is automatically created by the &repmgr; extension, which is installed
+   during the first step in initializing a &repmgr;-administered cluster
+   (<command><link linkend="repmgr-primary-register">repmgr primary register</link></command>)
+   and contains the following objects:
   <variablelist>
    <varlistentry>
     <term>Tables</term>
@@ -159,14 +189,15 @@
      <para>
       <itemizedlist>
        <listitem>
-          <simpara>repmgr.events: records events of interest</simpara>
+          <simpara><literal>repmgr.events</literal>: records events of interest</simpara>
        </listitem>
        <listitem>
-          <simpara>repmgr.nodes: connection and status information for each server in the
+          <simpara><literal>repmgr.nodes</literal>: connection and status information for each server in the
    replication cluster</simpara>
        </listitem>
        <listitem>
-          <simpara>repmgr.monitoring_history: historical standby monitoring information written by `repmgrd`</simpara>
+          <simpara><literal>repmgr.monitoring_history</literal>: historical standby monitoring information
+            written by <application>repmgrd</application></simpara>
        </listitem>
       </itemizedlist>
      </para>
@@ -178,12 +209,12 @@
      <para>
       <itemizedlist>
        <listitem>
-          <simpara>repmgr.show_nodes: based on the table `repl_nodes`, additionally showing the
-     name of the server's upstream node</simpara>
+          <simpara>repmgr.show_nodes: based on the table <literal>repmgr.nodes</literal>, additionally showing the
+           name of the server's upstream node</simpara>
        </listitem>
        <listitem>
-          <simpara>repmgr.replication_status: when `repmgrd`'s monitoring is enabled, shows current monitoring
-    status for each standby.</simpara>
+          <simpara>repmgr.replication_status: when <application>repmgrd</application>'s monitoring is enabled, shows
+            current monitoring status for each standby.</simpara>
        </listitem>
       </itemizedlist>
      </para>
@@ -193,16 +224,16 @@
  </para>

  <para>
-   The `repmgr` metadata schema can be stored in an existing database or in its own
-   dedicated database. Note that the `repmgr` metadata schema cannot reside on a database
-   server which is not part of the replication cluster managed by `repmgr`.
+   The &repmgr; metadata schema can be stored in an existing database or in its own
+   dedicated database. Note that the &repmgr; metadata schema cannot reside on a database
+   server which is not part of the replication cluster managed by &repmgr;.
  </para>
  <para>
-   A database user must be available for `repmgr` to access this database and perform
+   A database user must be available for &repmgr; to access this database and perform
   necessary changes. This user does not need to be a superuser, however some operations
-   such as initial installation of the `repmgr` extension will require a superuser
+   such as initial installation of the &repmgr; extension will require a superuser
   connection (this can be specified where required with the command line option
-   `--superuser`).
+   <literal>--superuser</literal>).
  </para>
 </sect1>

--- a/doc/promoting-standby.sgml
+++ b/doc/promoting-standby.sgml
@@ -1,4 +1,8 @@
 <chapter id="promoting-standby" xreflabel="Promoting a standby">
+ <indexterm>
+   <primary>promoting a standby</primary>
+   <seealso>repmgr standby promote</seealso>
+ </indexterm>
 <title>Promoting a standby server with repmgr</title>
 <para>
   If a primary server fails or needs to be removed from the replication cluster,
--- a/doc/quickstart.sgml
+++ b/doc/quickstart.sgml
@@ -5,12 +5,17 @@
  This section gives a quick introduction to &repmgr;, including setting up a
  sample &repmgr; installation and a basic replication cluster.
 </para>
-
 <para>
-  These instructions are not suitable for a production install, as they may not
-  take into account security considerations, proper system administration
-  procedures etc..
+  These instructions for demonstration purposes and are not suitable for a production
+  install, as issues such as account security considerations, and system administration
+  best practices are omitted.
 </para>
+ <note>
+   <simpara>
+     To upgrade an existing &repmgr; 3.x installation, see section
+     <xref linkend="upgrading-from-repmgr-3">.
+   </simpara>
+ </note>

 <sect1 id="quickstart-prerequisites">
   <title>Prerequisites for setting up a basic replication cluster with &repmgr;</title>
@@ -98,11 +103,11 @@
    # ignores archiving. Use something more sensible.
    archive_command = '/bin/true'

-    # If you have configured `pg_basebackup_options`
-    # in `repmgr.conf` to include the setting `--xlog-method=fetch` (from
-    # PostgreSQL 10 `--wal-method=fetch`), *and* you have not set
-    # `restore_command` in `repmgr.conf`to fetch WAL files from another
-    # source such as Barman, you'll need to set `wal_keep_segments` to a
+    # If you have configured "pg_basebackup_options"
+    # in "repmgr.conf" to include the setting "--xlog-method=fetch" (from
+    # PostgreSQL 10 "--wal-method=fetch"), *and* you have not set
+    # "restore_command" in "repmgr.conf"to fetch WAL files from another
+    # source such as Barman, you'll need to set "wal_keep_segments" to a
    # high enough value to ensure that all WAL files generated while
    # the standby is being cloned are retained until the standby starts up.
    #
@@ -116,6 +121,11 @@
     <command>include 'postgresql.replication.conf</command>.
    </simpara>
   </tip>
+   <para>
+     Additionally, if you are intending to use <application>pg_rewind</application>,
+     and the cluster was not initialised using data checksums, you may want to consider enabling
+     <varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
+   </para>
 </sect1>

 <sect1 id="quickstart-repmgr-user-database">
@@ -286,7 +296,7 @@
    slot_name        |
    config_file      | /etc/repmgr.conf</programlisting>
  <para>
-    Each server in the replication cluster will have its own record. If <command>repmgrd</command>
+    Each server in the replication cluster will have its own record. If <application>repmgrd</application>
    is in use, the fields <literal>upstream_node_id</literal>, <literal>active</literal> and
    <literal>type</literal> will be updated when the node's status or role changes.
  </para>
@@ -301,11 +311,10 @@
   (and possibly <literal>data_directory</literal>) adjusted accordingly, e.g.:
  </para>
  <programlisting>
-    node=2
+    node_id=2
    node_name=node2
    conninfo='host=node2 user=repmgr dbname=repmgr connect_timeout=2'
-    data_directory='/var/lib/postgresql/data'
-  </programlisting>
+    data_directory='/var/lib/postgresql/data'</programlisting>
  <para>
   Use the <command>--dry-run</command> option to check the standby can be cloned:
  </para>
--- a/doc/repmgr-cluster-cleanup.sgml
+++ b/doc/repmgr-cluster-cleanup.sgml
@@ -1,23 +1,41 @@
-<chapter id="repmgr-cluster-cleanup" xreflabel="repmgr cluster cleanup">
+<refentry id="repmgr-cluster-cleanup">
  <indexterm>
    <primary>repmgr cluster cleanup</primary>
  </indexterm>
-  <title>repmgr cluster cleanup</title>
-  <para>
-   Purges monitoring history from the <literal>repmgr.monitoring_history</literal> table to
-   prevent excessive table growth. Use the <literal>-k/--keep-history</literal> to specify the
-   number of days of monitoring history to retain. This command can be used
-   manually or as a cronjob.
-  </para>
-  <para>
-   This command requires a valid <filename>repmgr.conf</filename> file for the node on which it is
-   executed; no additional arguments are required.
-  </para>
-  <note>
-   <simpara>
-    Monitoring history will only be written if <command>repmgrd</command> is active, and
-    <varname>monitoring_history</varname> is set to <literal>true</literal> in
-    <filename>repmgr.conf</filename>.
-   </simpara>
-  </note>
-</chapter>
+ <refmeta>
+    <refentrytitle>repmgr cluster cleanup</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr cluster cleanup</refname>
+    <refpurpose>purge monitoring history</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Purges monitoring history from the <literal>repmgr.monitoring_history</literal> table to
+      prevent excessive table growth. Use the <literal>-k/--keep-history</literal> to specify the
+      number of days of monitoring history to retain. This command can be used
+      manually or as a cronjob.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Usage</title>
+    <para>
+      This command requires a valid <filename>repmgr.conf</filename> file for the node on which it is
+      executed; no additional arguments are required.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Notes</title>
+
+    <para>
+      Monitoring history will only be written if <application>repmgrd</application> is active, and
+      <varname>monitoring_history</varname> is set to <literal>true</literal> in
+      <filename>repmgr.conf</filename>.
+    </para>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-cluster-crosscheck.sgml
+++ b/doc/repmgr-cluster-crosscheck.sgml
@@ -1,15 +1,27 @@
-<chapter id="repmgr-cluster-crosscheck" xreflabel="repmgr cluster crosscheck">
+<refentry id="repmgr-cluster-crosscheck">
  <indexterm>
    <primary>repmgr cluster crosscheck</primary>
  </indexterm>
-  <title>repmgr cluster crosscheck</title>
-  <para>
-    <command>repmgr cluster crosscheck</command> is similar to <xref linkend="repmgr-cluster-matrix">,
-    but cross-checks connections between each combination of nodes. In "Example 3" in
-    <xref linkend="repmgr-cluster-matrix"> we have no information about the state of <literal>node3</literal>.
-    However by running <command>repmgr cluster crosscheck</command> it's possible to get a better
-    overview of the cluster situation:
-    <programlisting>
+
+
+  <refmeta>
+    <refentrytitle>repmgr cluster crosscheck</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr cluster crosscheck</refname>
+    <refpurpose>cross-checks connections between each combination of nodes</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr cluster crosscheck</command> is similar to <xref linkend="repmgr-cluster-matrix">,
+        but cross-checks connections between each combination of nodes. In "Example 3" in
+        <xref linkend="repmgr-cluster-matrix"> we have no information about the state of <literal>node3</literal>.
+        However by running <command>repmgr cluster crosscheck</command> it's possible to get a better
+        overview of the cluster situation:
+          <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster crosscheck

    Name   | Id |  1 |  2 |  3
@@ -17,12 +29,14 @@
     node1 |  1 |  * |  * |  x
     node2 |  2 |  * |  * |  *
     node3 |  3 |  * |  * |  *</programlisting>
-  </para>
-  <para>
-   What happened is that <command>repmgr cluster crosscheck</command> merged its own
-   <command>repmgr cluster matrix</command> with the <command>repmgr cluster matrix</command>
-   output from <literal>node2</literal>; the latter is able to connect to <literal>node3</literal>
-   and therefore determine the state of outbound connections from that node.
-  </para>
-</chapter>
+    </para>
+    <para>
+      What happened is that <command>repmgr cluster crosscheck</command> merged its own
+      <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command> with the
+      <command>repmgr cluster matrix</command> output from <literal>node2</literal>; the latter is
+      able to connect to <literal>node3</literal>
+      and therefore determine the state of outbound connections from that node.
+    </para>
+  </refsect1>
+</refentry>

--- a/doc/repmgr-cluster-event.sgml
+++ b/doc/repmgr-cluster-event.sgml
@@ -1,37 +1,63 @@
-<chapter id="repmgr-cluster-event" xreflabel="repmgr cluster event">
- <indexterm>
-  <primary>repmgr cluster event</primary>
- </indexterm>
- <title>repmgr cluster event</title>
- <para>
-  This outputs a formatted list of cluster events, as stored in the
-  <literal>repmgr.events</literal> table. Output is in reverse chronological order, and
-  can be filtered with the following options:
- <itemizedlist spacing="compact" mark="bullet">
-  <listitem>
-    <simpara><literal>--all</literal>: outputs all entries</simpara>
-  </listitem>
-  <listitem>
-    <simpara><literal>--limit</literal>: set the maximum number of entries to output (default: 20)</simpara>
-  </listitem>
-  <listitem>
-    <simpara><literal>--node-id</literal>: restrict entries to node with this ID</simpara>
-  </listitem>
-  <listitem>
-    <simpara><literal>--node-name</literal>: restrict entries to node with this name</simpara>
-  </listitem>
-  <listitem>
-    <simpara><literal>--event</literal>: filter specific event</simpara>
-  </listitem>
- </itemizedlist>
- </para>
- <para>
-  Example:
-  <programlisting>
+<refentry id="repmgr-cluster-event">
+  <indexterm>
+    <primary>repmgr cluster event</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr cluster event</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr cluster event</refname>
+    <refpurpose>output a formatted list of cluster events</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Outputs a formatted list of cluster events, as stored in the <literal>repmgr.events</literal> table.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Usage</title>
+
+    <para>
+      Output is in reverse chronological order, and
+      can be filtered with the following options:
+      <itemizedlist spacing="compact" mark="bullet">
+        <listitem>
+          <simpara><literal>--all</literal>: outputs all entries</simpara>
+        </listitem>
+        <listitem>
+          <simpara><literal>--limit</literal>: set the maximum number of entries to output (default: 20)</simpara>
+        </listitem>
+        <listitem>
+          <simpara><literal>--node-id</literal>: restrict entries to node with this ID</simpara>
+        </listitem>
+        <listitem>
+          <simpara><literal>--node-name</literal>: restrict entries to node with this name</simpara>
+        </listitem>
+        <listitem>
+          <simpara><literal>--event</literal>: filter specific event (see <xref linkend="event-notifications"> for a full list)</simpara>
+        </listitem>
+      </itemizedlist>
+    </para>
+    <para>
+      The "Details" column can be omitted by providing <literal>--terse</literal>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+      <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster event --event=standby_register
     Node ID | Name  | Event            | OK | Timestamp           | Details
    ---------+-------+------------------+----+---------------------+--------------------------------
     3       | node3 | standby_register | t  | 2017-08-17 10:28:55 | standby registration succeeded
     2       | node2 | standby_register | t  | 2017-08-17 10:28:53 | standby registration succeeded</programlisting>
- </para>
-</chapter>
+    </para>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-cluster-matrix.sgml
+++ b/doc/repmgr-cluster-matrix.sgml
@@ -1,27 +1,44 @@
-<chapter id="repmgr-cluster-matrix" xreflabel="repmgr cluster matrix">
+<refentry id="repmgr-cluster-matrix">
  <indexterm>
    <primary>repmgr cluster matrix</primary>
  </indexterm>
-  <title>repmgr cluster matrix</title>
-  <para>
-    <command>repmgr cluster matrix</command> runs  <command>repmgr cluster show</command> on each
-    node and arranges the results in a matrix, recording success or failure.
-  </para>
-  <para>
-    <command>repmgr cluster matrix</command> requires a valid <filename>repmgr.conf</filename>
-    file on each node.  Additionally passwordless `ssh` connections are required between
-    all nodes.
-  </para>
-  <para>
+
+  <refmeta>
+    <refentrytitle>repmgr cluster matrix</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr cluster matrix</refname>
+    <refpurpose>
+      runs repmgr cluster show on each node and summarizes output
+    </refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr cluster matrix</command> runs <command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command> on each
+      node and arranges the results in a matrix, recording success or failure.
+    </para>
+    <para>
+      <command>repmgr cluster matrix</command> requires a valid <filename>repmgr.conf</filename>
+      file on each node. Additionally, passwordless <command>ssh</command> connections are required between
+      all nodes.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
    Example 1 (all nodes up):
    <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster matrix

    Name   | Id |  1 |  2 |  3
    -------+----+----+----+----
-    node1 |  1 |  * |  * |  *
-    node2 |  2 |  * |  * |  *
-    node3 |  3 |  * |  * |  *</programlisting>
+     node1 |  1 |  * |  * |  *
+     node2 |  2 |  * |  * |  *
+     node3 |  3 |  * |  * |  *</programlisting>
  </para>
  <para>
    Example 2 (<literal>node1</literal> and <literal>node2</literal> up, <literal>node3</literal> down):
@@ -46,7 +63,7 @@
  <para>
    The other two nodes are up; the corresponding rows have <literal>x</literal> in the
    column corresponding to <literal>node3</literal>, meaning that inbound connections to
-    that node have failed, and `*` in the columns corresponding to
+    that node have failed, and <literal>*</literal> in the columns corresponding to
    <literal>node1</literal> and <literal>node2</literal>, meaning that inbound connections
    to these nodes have succeeded.
  </para>
@@ -79,5 +96,6 @@
    In this case, the <xref linkend="repmgr-cluster-crosscheck"> command will produce a more
    useful result.
  </para>
-</chapter>
+  </refsect1>
+</refentry>

--- a/doc/repmgr-cluster-show.sgml
+++ b/doc/repmgr-cluster-show.sgml
@@ -1,22 +1,46 @@
-<chapter id="repmgr-cluster-show" xreflabel="repmgr cluster show">
+<refentry id="repmgr-cluster-show">
  <indexterm>
    <primary>repmgr cluster show</primary>
  </indexterm>
-  <title>repmgr cluster show</title>
-  <para>
-    Displays information about each active node in the replication cluster. This
-    command polls each registered server and shows its role (<literal>primary</literal> /
-    <literal>standby</literal> / <literal>bdr</literal>) and status. It polls each server
-    directly and can be run on any node in the cluster; this is also useful when analyzing
-    connectivity from a particular node.
-  </para>
-  <para>
-    This command requires either a valid <filename>repmgr.conf</filename> file or a database
-    connection string to one of the registered nodes; no  additional arguments are needed.
-  </para>

-  <para>
-    Example:
+  <refmeta>
+    <refentrytitle>repmgr cluster show</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr cluster show</refname>
+    <refpurpose>display information about each registered node in the replication cluster</refpurpose>
+  </refnamediv>
+
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Displays information about each registered node in the replication cluster. This
+      command polls each registered server and shows its role (<literal>primary</literal> /
+      <literal>standby</literal> / <literal>bdr</literal>) and status. It polls each server
+      directly and can be run on any node in the cluster; this is also useful when analyzing
+      connectivity from a particular node.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+      This command requires either a valid <filename>repmgr.conf</filename> file or a database
+      connection string to one of the registered nodes; no additional arguments are needed.
+    </para>
+
+    <para>
+      To show database connection errors when polling nodes, run the command in
+      <literal>--verbose</literal> mode.
+    </para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
    <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster show

@@ -26,42 +50,67 @@
     2  | node2 | standby |   running | node1    | default  | host=db_node2 dbname=repmgr user=repmgr
     3  | node3 | standby |   running | node1    | default  | host=db_node3 dbname=repmgr user=repmgr</programlisting>
  </para>
+  </refsect1>
+  <refsect1>
+    <title>Notes</title>
+    <para>
+      The column <literal>Role</literal> shows the expected server role according to the
+      &repmgr; metadata. <literal>Status</literal> shows whether the server is running or unreachable.
+      If the node has an unexpected role not reflected in the &repmgr; metadata, e.g. a node was manually
+      promoted to primary, this will be highlighted with an exclamation mark, e.g.:
+      <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster show

-  <para>
-    To show database connection errors when polling nodes, run the command in
-    <literal>--verbose</literal> mode.
-  </para>
-  <para>
-    The `cluster show` command accepts an optional parameter <literal>--csv</literal>, which
-    outputs the replication cluster's status in a simple CSV format, suitable for
-    parsing by scripts:
-    <programlisting>
+     ID | Name  | Role    | Status               | Upstream | Location | Connection string
+    ----+-------+---------+----------------------+----------+----------+-----------------------------------------
+     1  | node1 | primary | ? unreachable        |          | default  | host=db_node1 dbname=repmgr user=repmgr
+     2  | node2 | standby | ! running as primary | node1    | default  | host=db_node2 dbname=repmgr user=repmgr
+     3  | node3 | standby |   running            | node1    | default  | host=db_node3 dbname=repmgr user=repmgr
+
+    WARNING: following issues were detected
+      node "node1" (ID: 1) is registered as an active primary but is unreachable
+      node "node2" (ID: 2) is registered as standby but running as primary</programlisting>
+    </para>
+    <para>
+      Node availability is tested by connecting from the node where
+      <command>repmgr cluster show</command> is executed, and does not necessarily imply the node
+      is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
+          a better overviews of connections between nodes.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <para>
+      <command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
+      outputs the replication cluster's status in a simple CSV format, suitable for
+      parsing by scripts:
+      <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster show --csv
    1,-1,-1
    2,0,0
    3,0,1</programlisting>
-  </para>
-  <para>
-    The columns have following meanings:
-    <itemizedlist spacing="compact" mark="bullet">
-     <listitem>
-      <simpara>
-        node ID
-      </simpara>
-      <simpara>
-        availability (0 = available, -1 = unavailable)
-      </simpara>
-      <simpara>
-        recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
-      </simpara>
-     </listitem>
-    </itemizedlist>
-  </para>
+    </para>
+    <para>
+      The columns have following meanings:
+      <itemizedlist spacing="compact" mark="bullet">
+        <listitem>
+          <simpara>
+            node ID
+          </simpara>
+        </listitem>
+        <listitem>
+          <simpara>
+            availability (0 = available, -1 = unavailable)
+          </simpara>
+        </listitem>
+        <listitem>
+          <simpara>
+            recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
+          </simpara>
+        </listitem>
+      </itemizedlist>
+    </para>
+  </refsect1>

-  <para>
-   Note that the availability is tested by connecting from the node where
-   <command>repmgr cluster show</command> is executed, and does not necessarily imply the node
-   is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
-    a better overviews of connections between nodes.
-  </para>
-</chapter>
+</refentry>
--- a/doc/repmgr-node-check.sgml
+++ b/doc/repmgr-node-check.sgml
@@ -1,32 +1,48 @@
-<chapter id="repmgr-node-check" xreflabel="repmgr node check">
+<refentry id="repmgr-node-check">
  <indexterm>
    <primary>repmgr node check</primary>
  </indexterm>
-  <title>repmgr node check</title>
-  <para>
-    Performs some health checks on a node from a replication perspective.
-    This command must be run on the local node.
-  </para>
-  <para>
-   Sample output (execute <command>repmgr node check</command>):
-   <programlisting>
+
+  <refmeta>
+    <refentrytitle>repmgr node check</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr node check</refname>
+    <refpurpose>performs some health checks on a node from a replication perspective</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Performs some health checks on a node from a replication perspective.
+      This command must be run on the local node.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+      <programlisting>
+       $ repmgr -f /etc/repmgr.conf node check
       Node "node1":
            Server role: OK (node is primary)
            Replication lag: OK (N/A - node is primary)
            WAL archiving: OK (0 pending files)
            Downstream servers: OK (2 of 2 downstream nodes attached)
-            Replication slots: OK (node has no replication slots)
-   </programlisting>
-  </para>
-  <para>
-   Additionally each check can be performed individually by supplying
-   an additional command line parameter, e.g.:
-   <programlisting>
-     $ repmgr node check --role
-     OK (node is primary)
-   </programlisting>
-  </para>
-  <para>
+            Replication slots: OK (node has no replication slots)</programlisting>
+    </para>
+  </refsect1>
+  <refsect1>
+    <title>Individual checks</title>
+    <para>
+      Each check can be performed individually by supplying
+      an additional command line parameter, e.g.:
+      <programlisting>
+        $ repmgr node check --role
+        OK (node is primary)</programlisting>
+    </para>
+    <para>
   Parameters for individual checks are as follows:
    <itemizedlist spacing="compact" mark="bullet">

@@ -67,4 +83,5 @@
   Individual checks can also be output in a Nagios-compatible format by additionally
   providing the option <literal>--nagios</literal>.
  </para>
-</chapter>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-node-rejoin.sgml
+++ b/doc/repmgr-node-rejoin.sgml
@@ -1,13 +1,155 @@
-<chapter id="repmgr-node-rejoin" xreflabel="repmgr node rejoin">
+<refentry id="repmgr-node-rejoin">
+
  <indexterm>
    <primary>repmgr node rejoin</primary>
  </indexterm>
-  <title>repmgr node rejoin</title>
-  <para>
-   Enables a dormant (stopped) node to be rejoined to the replication cluster.
-  </para>
-  <para>
-    This can optionally use <command>pg_rewind</command> to re-integrate a node which has diverged
-    from the rest of the cluster, typically a failed primary.
-  </para>
-</chapter>
+
+  <refmeta>
+    <refentrytitle>repmgr node rejoin</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr node rejoin</refname>
+    <refpurpose>rejoin a dormant (stopped) node to the replication cluster</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Enables a dormant (stopped) node to be rejoined to the replication cluster.
+    </para>
+    <para>
+      This can optionally use <application>pg_rewind</application> to re-integrate
+      a node which has diverged from the rest of the cluster, typically a failed primary.
+    </para>
+
+    <tip>
+      <para>
+        If the node is running and needs to be attached to the current primary, use
+        <xref linkend="repmgr-standby-follow">.
+      </para>
+    </tip>
+  </refsect1>
+
+
+  <refsect1>
+    <title>Usage</title>
+
+    <para>
+      <programlisting>
+      repmgr node rejoin -d '$conninfo'</programlisting>
+
+      where <literal>$conninfo</literal> is the conninfo string of any reachable node in the cluster.
+      <filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
+      otherwise available.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>node_rejoin</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Notes</title>
+    <para>
+      Currently <command>repmgr node rejoin</command> can only be used to attach
+      a standby to the current primary, not another standby.
+    </para>
+    <para>
+      The node must have been shut down cleanly; if this was not the case, it will
+      need to be manually started (remove any existing <filename>recovery.conf</filename> file first)
+      until it has reached a consistent recovery point, then shut down cleanly.
+    </para>
+    <tip>
+      <para>
+        If <application>PostgreSQL</application> is started in single-user mode and
+        input is directed from <filename>/dev/null/</filename>, it will perform recovery
+        then immediately quit, and will then be in a state suitable for use by
+        <application>pg_rewind</application>.
+        <programlisting>
+          rm -f /var/lib/pgsql/data/recovery.conf
+          postgres --single -D /var/lib/pgsql/data/ &lt; /dev/null</programlisting>
+      </para>
+    </tip>
+  </refsect1>
+
+  <refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
+    <title>Using <command>pg_rewind</command></title>
+    <para>
+      <command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
+      node which has diverged from the rest of the cluster, typically a failed primary.
+      <command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
+    </para>
+    <note>
+      <para>
+        <command>pg_rewind</command> <emphasis>requires</emphasis> that either
+        <varname>wal_log_hints</varname> is enabled, or that
+        data checksums were enabled when the cluster was initialized. See the
+        <ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
+      </para>
+    </note>
+
+    <para>
+      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
+      pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
+      to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
+    </para>
+
+    <para>
+      Be aware that if <command>pg_rewind</command> is executed and actually performs a
+      rewind operation, any configuration files in the PostgreSQL data directory will be
+      overwritten with those from the source server.
+    </para>
+    <para>
+      To prevent this happening, provide a comma-separated list of files to retain
+      using the <literal>--config-file</literal> command line option; the specified files
+      will be archived in a temporary directory (whose parent directory can be specified with
+      <literal>--config-archive-dir</literal>) and restored once the rewind operation is
+      complete.
+    </para>
+
+    <para>
+      Example, first using <literal>--dry-run</literal>, then actually executing the
+      <literal>node rejoin command</literal>.
+    <programlisting>
+    $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
+         --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
+    NOTICE: using provided configuration file "/etc/repmgr.conf"
+    INFO: prerequisites for using pg_rewind are met
+    INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
+    INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
+    INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1"
+    INFO: directory "/tmp/repmgr-config-archive-node1" deleted
+    INFO: pg_rewind would now be executed
+    DETAIL: pg_rewind command is:
+      pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
+    <programlisting>
+    $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
+         --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
+    NOTICE: using provided configuration file "/etc/repmgr.conf"
+    INFO: prerequisites for using pg_rewind are met
+    INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
+    NOTICE: executing pg_rewind
+    NOTICE: 2 files copied to /var/lib/pgsql/data
+    INFO: directory "/tmp/repmgr-config-archive-node1" deleted
+    INFO: deleting "recovery.done"
+    INFO: setting node 1's primary to node 2
+    NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
+    waiting for server to start.... done
+    server started
+    NOTICE: NODE REJOIN successful
+    DETAIL: node 1 is now attached to node 2</programlisting>
+    </para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+     <xref linkend="repmgr-standby-follow">
+    </para>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-node-status.sgml
+++ b/doc/repmgr-node-status.sgml
@@ -1,16 +1,30 @@
-
-<chapter id="repmgr-node-status" xreflabel="repmgr node status">
+<refentry id="repmgr-node-status">
  <indexterm>
    <primary>repmgr node status</primary>
  </indexterm>
-  <title>repmgr node status</title>
-  <para>
-   Displays an overview of a node's basic information and replication
-   status. This command must be run on the local node.
-  </para>
-  <para>
-   Sample output (execute <command>repmgr node status</command>):
-   <programlisting>
+
+  <refmeta>
+    <refentrytitle>repmgr node status</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr node status</refname>
+    <refpurpose>show overview of a node's basic information and replication status</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Displays an overview of a node's basic information and replication
+      status. This command must be run on the local node.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+    <programlisting>
+        $ repmgr -f /etc/repmgr.comf node status
        Node "node1":
            PostgreSQL version: 10beta1
            Total data size: 30 MB
@@ -20,10 +34,14 @@
            Archive command: (none)
            Replication connections: 2 (of maximal 10)
            Replication slots: 0 (of maximal 10)
-            Replication lag: n/a
-   </programlisting>
-  </para>
-  <para>
-    See <xref linkend="repmgr-node-check"> to diagnose issues.
-  </para>
-</chapter>
+            Replication lag: n/a</programlisting>
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+      See <xref linkend="repmgr-node-check"> to diagnose issues.
+    </para>
+  </refsect1>
+</refentry>
--- a/doc/repmgr-primary-register.sgml
+++ b/doc/repmgr-primary-register.sgml
@@ -1,18 +1,85 @@
-<chapter id="repmgr-primary-register" xreflabel="repmgr primary register">
-  <indexterm><primary>repmgr primary register</primary></indexterm>
-  <title>repmgr primary register</title>
-  <para>
-   <command>repmgr primary register</command> registers a primary node in a
-   streaming replication cluster, and configures it for use with repmgr, including
-   installing the &repmgr; extension. This command needs to be executed before any
-   standby nodes are registered.
-  </para>
-  <para>
-   Execute with the <literal>--dry-run</literal> option to check what would happen without
-   actually registering the primary.
-  </para>
-  <para>
-   <command>repmgr master register</command> can be used as an alias for
-   <command>repmgr primary register</command>.
-  </para>
-</chapter>
+<refentry id="repmgr-primary-register">
+  <indexterm>
+    <primary>repmgr primary register</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr primary register</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr primary register</refname>
+    <refpurpose>initialise a repmgr installation and register the primary node</refpurpose>
+  </refnamediv>
+
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr primary register</command> registers a primary node in a
+      streaming replication cluster, and configures it for use with repmgr, including
+      installing the &repmgr; extension. This command needs to be executed before any
+      standby nodes are registered.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+      Execute with the <option>--dry-run</option> option to check what would happen without
+      actually registering the primary.
+    </para>
+    <para>
+      <command>repmgr master register</command> can be used as an alias for
+      <command>repmgr primary register</command>.
+    </para>
+
+    <note>
+    <para>
+      If providing the configuration file location with <option>-f/--config-file</option>,
+      avoid using a relative path, as &repmgr; stores the configuration file location
+      in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
+      <xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
+        a relative path into an absolute one, but this may not be the same as the path you
+        would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
+        to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
+        <filename>/path/to/repmgr.conf</filename>).
+    </para>
+    </note>
+  </refsect1>
+
+  <refsect1>
+
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check prerequisites but don't actually register the primary.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term><option>-F</option><option>--force</option></term>
+        <listitem>
+          <para>
+            Overwrite an existing node record
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>primary_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr-primary-unregister.sgml
+++ b/doc/repmgr-primary-unregister.sgml
@@ -1,18 +1,74 @@
-<chapter id="repmgr-primary-unregister" xreflabel="repmgr primary unregister">
-  <indexterm><primary>repmgr primary unregister</primary></indexterm>
-  <title>repmgr primary unregister</title>
-  <para>
-   <command>repmgr primary register</command> unregisters an inactive primary node
-   from the &repmgr; metadata. This is typically when the primary has failed and is
-   being removed from the cluster after a new primary has been promoted.
-  </para>
-  <para>
-   Execute with the <literal>--dry-run</literal> option to check what would happen without
-   actually unregistering the node.
-  </para>
+<refentry id="repmgr-primary-unregister">
+  <indexterm>
+    <primary>repmgr primary unregister</primary>
+  </indexterm>
+  <refmeta>
+    <refentrytitle>repmgr primary unregister</refentrytitle>
+  </refmeta>
+  <refnamediv>
+    <refname>repmgr primary unregister</refname>
+    <refpurpose>unregister an inactive primary node</refpurpose>
+  </refnamediv>

-  <para>
-   <command>repmgr master unregister</command> can be used as an alias for
-   <command>repmgr primary unregister</command>/
-  </para>
-</chapter>
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr primary unregister</command> unregisters an inactive primary node
+      from the &repmgr; metadata. This is typically when the primary has failed and is
+      being removed from the cluster after a new primary has been promoted.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+      <command>repmgr primary unregister</command> can be run on any active &repmgr; node,
+      with the ID of the node to unregister passed as <option>--node-id</option>.
+    </para>
+    <para>
+      Execute with the <literal>--dry-run</literal> option to check what would happen without
+      actually unregistering the node.
+    </para>
+
+    <para>
+      <command>repmgr master unregister</command> can be used as an alias for
+      <command>repmgr primary unregister</command>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+
+    <title>Options</title>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check prerequisites but don't actually unregister the primary.
+          </para>
+        </listitem>
+      </varlistentry>
+
+     <varlistentry>
+        <term><option>--node-id</option></term>
+        <listitem>
+          <para>
+            ID of the inactive primary to be unregistered.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>primary_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr-standby-clone.sgml
+++ b/doc/repmgr-standby-clone.sgml
@@ -1,25 +1,37 @@
-<chapter id="repmgr-standby-clone" xreflabel="repmgr standby clone">
+<refentry id="repmgr-standby-clone">
  <indexterm>
    <primary>repmgr standby clone</primary>
    <seealso>cloning</seealso>
  </indexterm>
-  <title>repmgr standby clone</title>
-  <para>
-   <command>repmgr standby clone</command> clones a PostgreSQL node from another
-   PostgreSQL node, typically the primary, but optionally from any other node in
-   the cluster or from Barman. It creates the <filename>recovery.conf</filename> file required
-   to attach the cloned node to the primary node (or another standby, if cascading replication
-   is in use).
-  </para>
-  <note>
-   <simpara>
-    <command>repmgr standby clone</command> does not start the standby, and after cloning
-    <command>repmgr standby register</command> must be executed to notify &repmgr; of its presence.
-   </simpara>
-  </note>
+
+  <refmeta>
+    <refentrytitle>repmgr standby clone</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr standby clone</refname>
+    <refpurpose>clone a PostgreSQL standby node from another PostgreSQL node</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr standby clone</command> clones a PostgreSQL node from another
+      PostgreSQL node, typically the primary, but optionally from any other node in
+      the cluster or from Barman. It creates the <filename>recovery.conf</filename> file required
+      to attach the cloned node to the primary node (or another standby, if cascading replication
+      is in use).
+    </para>
+    <note>
+      <simpara>
+        <command>repmgr standby clone</command> does not start the standby, and after cloning
+        <command>repmgr standby register</command> must be executed to notify &repmgr; of its presence.
+      </simpara>
+    </note>
+  </refsect1>


-  <sect1 id="repmgr-standby-clone-config-file-copying" xreflabel="Copying configuration files">
+  <refsect1 id="repmgr-standby-clone-config-file-copying" xreflabel="Copying configuration files">
   <title>Handling configuration files</title>

   <para>
@@ -51,14 +63,14 @@
     configuration management tool such as Ansible, Chef, Puppet or Salt.
    </simpara>
   </tip>
-  </sect1>
+  </refsect1>

-  <sect1 id="repmgr-standby-clone-wal-management" xreflabel="Managing WAL during the cloning process">
+  <refsect1 id="repmgr-standby-clone-wal-management" xreflabel="Managing WAL during the cloning process">
   <title>Managing WAL during the cloning process</title>
   <para>
    When initially cloning a standby, you will need to ensure
    that all required WAL files remain available while the cloning is taking
-    place. To ensure this happens when using the default `pg_basebackup` method,
+    place. To ensure this happens when using the default <command>pg_basebackup</command> method,
    &repmgr; will set <command>pg_basebackup</command>'s <literal>--xlog-method</literal>
    parameter to <literal>stream</literal>,
    which will ensure all WAL files generated during the cloning process are
@@ -86,6 +98,158 @@
      <literal>--wal-method</literal>.
    </simpara>
   </note>
-  </sect1>
-</chapter>
+  </refsect1>
+
+
+  <refsect1 id="repmgr-standby-create-recovery-conf">
+   <title>Using a standby cloned by another method</title>
+   <para>
+     &repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
+     <command>barman recover</command> command).
+   </para>
+   <para>
+     To integrate the standby as a &repmgr; node, ensure the <filename>repmgr.conf</filename>
+     file is created for the node, then execute the command
+     <command>repmgr standby clone --recovery-conf-only</command>.
+     This will create the <filename>recovery.conf</filename> file needed to attach
+     the node to its upstream, and will also create a replication slot on the
+     upstream node if required.
+   </para>
+   <para>
+     Note that the upstream node must be running. An existing
+     <filename>recovery.conf</filename> will not be overwritten unless the
+     <option>-F/--force</option> option is provided.
+   </para>
+   <para>
+     Execute <command>repmgr standby clone --recovery-conf-only --dry-run</command>
+     to check the prerequisites for creating the <filename>recovery.conf</filename> file,
+     and display the contents of the file without actually creating it.
+   </para>
+  </refsect1>
+
+  <refsect1>
+
+    <title>Options</title>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check prerequisites but don't actually clone the standby.
+          </para>
+          <para>
+            If <option>--recovery-conf-only</option> specified, the contents of
+            the generated <filename>recovery.conf</filename> file will be displayed
+            but the file itself not written.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-c, --fast-checkpoint</option></term>
+        <listitem>
+          <para>
+            force fast checkpoint (not effective when cloning from Barman
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--copy-external-config-files[={samepath|pgdata}]</option></term>
+        <listitem>
+          <para>
+            copy configuration files located outside the data directory on the source
+            node to the same path on the standby (default) or to the
+            PostgreSQL data directory.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--no-upstream-connection</option></term>
+        <listitem>
+          <para>
+            when using Barman, do not connect to upstream node
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-R, --remote-user=USERNAME</option></term>
+        <listitem>
+          <para>
+            remote system username for SSH operations (default: current local system username)
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option> --recovery-conf-only</option></term>
+        <listitem>
+          <para>
+            create <filename>recovery.conf</filename> file for a previously cloned instance
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--replication-user</option></term>
+        <listitem>
+          <para>
+            user to make replication connections with (optional, not usually required)
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--superuser</option></term>
+        <listitem>
+          <para>
+            if the &repmgr; user is not a superuser, the name of a valid superuser must
+            be provided with this option
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+        <term><option>--upstream-conninfo</option></term>
+        <listitem>
+          <para>
+            <literal>primary_conninfo</literal> value to write in recovery.conf
+            when the intended upstream server does not yet exist
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--upstream-node-id</option></term>
+        <listitem>
+          <para>
+            ID of the upstream node to replicate from (optional, defaults to primary node)
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>--without-barman </option></term>
+        <listitem>
+          <para>
+            do not use Barman even if configured
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>standby_clone</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>

--- a/doc/repmgr-standby-follow.sgml
+++ b/doc/repmgr-standby-follow.sgml
@@ -1,21 +1,108 @@
-<chapter id="repmgr-standby-follow" xreflabel="repmgr standby follow">
+<refentry id="repmgr-standby-follow">
  <indexterm>
    <primary>repmgr standby follow</primary>
  </indexterm>
-  <title>repmgr standby follow</title>
-  <para>
-   Attaches the standby to a new primary. This command requires a valid
-   <filename>repmgr.conf</filename> file for the standby, either specified
-   explicitly with <literal>-f/--config-file</literal> or located in a
-   default location; no additional arguments are required.
-  </para>
-  <para>
-   This command will force a restart of the standby server, which must be
-   running. It can only be used to attach a standby to a new primary node.
-  </para>
-  <para>
-   To re-add an inactive node to the replication cluster, see
-   <xref linkend="repmgr-node-rejoin">
-  </para>
-</chapter>
+
+  <refmeta>
+    <refentrytitle>repmgr standby follow</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr standby follow</refname>
+    <refpurpose>attach a standby to a new primary</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Attaches the standby to a new primary. This command requires a valid
+      <filename>repmgr.conf</filename> file for the standby, either specified
+      explicitly with <literal>-f/--config-file</literal> or located in a
+      default location; no additional arguments are required.
+    </para>
+    <para>
+      This command will force a restart of the standby server, which must be
+      running. It can only be used to attach an active standby to the current primary node
+   (and not to another standby).
+    </para>
+    <para>
+      To re-add an inactive node to the replication cluster, see
+      <xref linkend="repmgr-node-rejoin">
+    </para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+      <programlisting>
+      $ repmgr -f /etc/repmgr.conf standby follow
+      INFO: setting node 3's primary to node 2
+      NOTICE: restarting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' restart"
+      waiting for server to shut down........ done
+      server stopped
+      waiting for server to start.... done
+      server started
+      NOTICE: STANDBY FOLLOW successful
+      DETAIL: node 3 is now attached to node 2</programlisting>
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check prerequisites but don't actually follow a new standby.
+          </para>
+          <important>
+            <para>
+              This does not guarantee the standby can follow the primary; in
+              particular, whether the primary and standby timelines have diverged,
+              can currently only be determined by actually attempting to
+              attach the standby to the primary.
+            </para>
+          </important>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-W</option></term>
+        <term><option>--wait</option></term>
+        <listitem>
+          <para>
+            Wait for a primary to appear. &repmgr; will wait for up to
+            <varname>primary_follow_timeout</varname> seconds
+            (default: 60 seconds) to verify that the standby is following the new primary.
+            This value can be defined in <filename>repmgr.conf</filename>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+    <para>
+      If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the primary
+      being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
+      <literal>%a</literal> with its node name.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+     <xref linkend="repmgr-node-rejoin">
+    </para>
+  </refsect1>
+</refentry>

--- a/doc/repmgr-standby-promote.sgml
+++ b/doc/repmgr-standby-promote.sgml
@@ -1,18 +1,59 @@
-<chapter id="repmgr-standby-promote" xreflabel="repmgr standby promote">
+<refentry id="repmgr-standby-promote">
  <indexterm>
    <primary>repmgr standby promote</primary>
  </indexterm>
-  <title>repmgr standby promote</title>
-  <para>
-   Promotes a standby to a primary if the current primary has failed. This
-   command requires a valid <filename>repmgr.conf</filename> file for the standby, either
-   specified explicitly  with <literal>-f/--config-file</literal> or located in a
-    default location; no additional arguments are required.
-  </para>
-  <para>
-   If the standby promotion succeeds, the server will not need to be
-   restarted. However any other standbys will need to follow the new server,
-   by using <xref linkend="repmgr-standby-follow">; if <command>repmgrd</command>
-   is active, it will handle this automatically.
-  </para>
-</chapter>
+
+  <refmeta>
+    <refentrytitle>repmgr standby promote</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr standby promote</refname>
+    <refpurpose>promote a standby to a primary</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Promotes a standby to a primary if the current primary has failed. This
+      command requires a valid <filename>repmgr.conf</filename> file for the standby, either
+      specified explicitly  with <literal>-f/--config-file</literal> or located in a
+      default location; no additional arguments are required.
+    </para>
+    <para>
+      If the standby promotion succeeds, the server will not need to be
+      restarted. However any other standbys will need to follow the new server,
+      by using <xref linkend="repmgr-standby-follow">; if <application>repmgrd</application>
+        is active, it will handle this automatically.
+    </para>
+    <para>
+      Note that &repmgr; will wait for up to <varname>promote_check_timeout</varname> seconds
+      (default: 60 seconds) to verify that the standby has been promoted, and will
+      check the promotion every <varname>promote_check_interval</varname> seconds (default: 1 second).
+      Both values can be defined in <filename>repmgr.conf</filename>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Example</title>
+    <para>
+      <programlisting>
+      $ repmgr -f /etc/repmgr.conf standby promote
+      NOTICE: promoting standby to primary
+      DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' promote"
+      server promoting
+      DEBUG: setting node 2 as primary and marking existing primary as failed
+      NOTICE: STANDBY PROMOTE successful
+      DETAIL: server "node2" (ID: 2) was successfully promoted to primary</programlisting>
+    </para>
+  </refsect1>
+
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>standby_promote</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr-standby-register.sgml
+++ b/doc/repmgr-standby-register.sgml
@@ -1,50 +1,183 @@
-<chapter id="repmgr-standby-register" xreflabel="repmgr standby register">
-  <indexterm><primary>repmgr standby register</primary></indexterm>
-  <title>repmgr standby register</title>
-  <para>
-   <command>repmgr standby register</command> adds a standby's information to
-   the &repmgr; metadata. This command needs to be executed to enable
-   promote/follow operations and to allow <command>repmgrd</command> to work with the node.
-   An existing standby can be registered using this command. Execute with the
-   <literal>--dry-run</literal> option to check what would happen without actually registering the
-   standby.
-  </para>
+<refentry id="repmgr-standby-register" xreflabel="repmgr standby register">
+  <indexterm>
+    <primary>repmgr standby register</primary>
+  </indexterm>

-  <sect1 id="repmgr-standby-register-wait" xreflabel="repmgr standby register --wait">
+  <refmeta>
+    <refentrytitle>repmgr standby register</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr standby register</refname>
+    <refpurpose>add a standby's information to the &repmgr; metadata</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr standby register</command> adds a standby's information to
+      the &repmgr; metadata. This command needs to be executed to enable
+      promote/follow operations and to allow <application>repmgrd</application> to work with the node.
+      An existing standby can be registered using this command. Execute with the
+      <literal>--dry-run</literal> option to check what would happen without actually registering the
+      standby.
+    </para>
+
+    <note>
+      <para>
+        If providing the configuration file location with <literal>-f/--config-file</literal>,
+        avoid using a relative path, as &repmgr; stores the configuration file location
+        in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
+        <xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
+          a relative path into an absolute one, but this may not be the same as the path you
+          would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
+          to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
+          <filename>/path/to/repmgr.conf</filename>).
+      </para>
+    </note>
+  </refsect1>
+
+  <refsect1 id="repmgr-standby-register-wait-start" xreflabel="repmgr standby register --wait-start">
+   <title>Waiting for the the standby to start</title>
+   <para>
+     By default, &repmgr; will wait 30 seconds for the standby to become available before
+     aborting with a connection error. This is useful when setting up a standby from a script,
+     as the standby may not have fully started up by the time <command>repmgr standby register</command>
+     is executed.
+   </para>
+   <para>
+     To change the timeout, pass the desired value with the <literal>--wait-start</literal> option.
+     A value of <literal>0</literal> will disable the timeout.
+   </para>
+   <para>
+     The timeout will be ignored if <literal>-F/--force</literal> was provided.
+   </para>
+  </refsect1>
+
+  <refsect1 id="repmgr-standby-register-wait-sync" xreflabel="repmgr standby register --wait-sync">
   <title>Waiting for the registration to propagate to the standby</title>
   <para>
-     Depending on your environment and workload, it may take some time for
-     the standby's node record to propagate from the primary to the standby. Some
-     actions (such as starting <command>repmgrd</command>) require that the standby's node record
+     Depending on your environment and workload, it may take some time for the standby's node record
+     to propagate from the primary to the standby. Some actions (such as starting
+     <application>repmgrd</application>) require that the standby's node record
     is present and up-to-date to function correctly.
   </para>
   <para>
-    By providing the option <literal>--wait-sync</literal> to the
+    By providing the option <option>--wait-sync</option> to the
    <command>repmgr standby register</command> command, &repmgr; will wait
    until the record is synchronised before exiting. An optional timeout (in
-    seconds) can be added to this option (e.g. <literal>--wait-sync=60</literal>).
+    seconds) can be added to this option (e.g. <option>--wait-sync=60</option>).
   </para>
-  </sect1>
+  </refsect1>

-  <sect1 id="rempgr-standby-register-inactive-node" xreflabel="Registering an inactive node">
+  <refsect1 id="repmgr-standby-register-inactive-node" xreflabel="Registering an inactive node">
   <title>Registering an inactive node</title>
   <para>
    Under some circumstances you may wish to register a standby which is not
    yet running; this can be the case when using provisioning tools to create
-    a complex replication cluster. In this case, by using the <literal>-F/--force</literal>
+    a complex replication cluster. In this case, by using the <option>-F/--force</option>
    option and providing the connection parameters to the primary server,
    the standby can be registered.
   </para>
   <para>
    Similarly, with cascading replication it may be necessary to register
    a standby whose upstream node has not yet been registered - in this case,
-    using <literal>-F/--force</literal> will result in the creation of an inactive placeholder
+    using <option>-F/--force</option> will result in the creation of an inactive placeholder
    record for the upstream node, which will however later need to be registered
-    with the <literal>-F/--force</literal> option too.
+    with the <option>-F/--force</option> option too.
   </para>
   <para>
    When used with <command>repmgr standby register</command>, care should be taken that use of the
-    <literal>-F/--force</literal> option does not result in an incorrectly configured cluster.
+    <option>-F/--force</option> option does not result in an incorrectly configured cluster.
   </para>
-  </sect1>
-</chapter>
+  </refsect1>
+
+  <refsect1 id="repmgr-standby-register-node-cloned-other-source">
+    <title>Registering a node not cloned by repmgr</title>
+    <para>
+      If you've cloned a standby using another method (e.g. <application>barman</application>'s
+     <command>barman recover</command> command), first execute
+     <link linkend="repmgr-standby-create-recovery-conf">repmgr standby clone --recovery-conf-only</link>
+     to add the <filename>recovery.conf</filename> file, then register the standby as usual.
+    </para>
+  </refsect1>
+
+  <refsect1>
+
+    <title>Options</title>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check prerequisites but don't actually register the standby.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+       <term><option>-F</option><option>--force</option></term>
+        <listitem>
+          <para>
+            Overwrite an existing node record
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+        <term><option>--upstream-node-id</option></term>
+        <listitem>
+          <para>
+            ID of the upstream node to replicate from (optional)
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--wait-start</option></term>
+        <listitem>
+          <para>
+            wait for the standby to start (timeout in seconds, default 30 seconds)
+          </para>
+        </listitem>
+      </varlistentry>
+
+     <varlistentry>
+        <term><option>--wait-sync</option></term>
+        <listitem>
+          <para>
+            wait for the node record to synchronise to the standby (optional timeout in seconds)
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>standby_register</literal> <link linkend="event-notifications">event notification</link>
+      will be generated immediately after the node record is updated on the primary.
+    </para>
+
+    <para>
+      If the <option>--wait-sync</option> option is provided, a <literal>standby_register_sync</literal>
+      event notification  will be generated immediately after the node record has synchronised to the
+      standby.
+    </para>
+
+    <para>
+      If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the
+      primary node, <literal>%c</literal> with its <literal>conninfo</literal> string, and
+      <literal>%a</literal> with its node name.
+    </para>
+
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr-standby-switchover.sgml
+++ b/doc/repmgr-standby-switchover.sgml
@@ -1,27 +1,196 @@
-<chapter id="repmgr-standby-switchover" xreflabel="repmgr standby switchover">
+<refentry id="repmgr-standby-switchover">
  <indexterm>
    <primary>repmgr standby switchover</primary>
  </indexterm>
-  <title>repmgr standby switchover</title>
-  <para>
-    Promotes a standby to primary and demotes the existing primary to a standby.
-    This command must be run on the standby to be promoted, and requires a
-    passwordless SSH connection to the current primary.
-  </para>
-  <para>
-    If other standbys are connected to the demotion candidate, &repmgr; can instruct
-    these to follow the new primary if the option <literal>--siblings-follow</literal>
-    is specified.
-  </para>
-  <para>
-    Execute with the <literal>--dry-run</literal> option to test the switchover as far as
-    possible without actually changing the status of either node.
-  </para>
-  <para>
-    <command>repmgrd</command> should not be active on any nodes while a switchover is being
-    executed. This restriction may be lifted in a later version.
-  </para>
-  <para>
-    For more details see the section <xref linkend="performing-switchover">.
-  </para>
-</chapter>
+
+  <refmeta>
+    <refentrytitle>repmgr standby switchover</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr standby switchover</refname>
+    <refpurpose>promote a standby to primary and demote the existing primary to a standby</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Promotes a standby to primary and demotes the existing primary to a standby.
+      This command must be run on the standby to be promoted, and requires a
+      passwordless SSH connection to the current primary.
+    </para>
+    <para>
+      If other standbys are connected to the demotion candidate, &repmgr; can instruct
+      these to follow the new primary if the option <literal>--siblings-follow</literal>
+      is specified. This requires a passwordless SSH connection between the promotion
+      candidate (new primary) and the standbys attached to the demotion candidate
+      (existing primary).
+    </para>
+    <note>
+      <para>
+        Performing a switchover is a non-trivial operation. In particular it
+        relies on the current primary being able to shut down cleanly and quickly.
+        &repmgr; will attempt to check for potential issues but cannot guarantee
+        a successful switchover.
+      </para>
+    </note>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--always-promote</option></term>
+        <listitem>
+          <para>
+            Promote standby to primary, even if it is behind original primary
+            (original primary will be shut down in any case).
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--dry-run</option></term>
+        <listitem>
+          <para>
+            Check prerequisites but don't actually execute a switchover.
+          </para>
+          <important>
+            <para>
+              Success of <option>--dry-run</option> does not imply the switchover will
+              complete successfully, only that
+              the prerequisites for performing the operation are met.
+            </para>
+          </important>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-F</option></term>
+        <term><option>--force</option></term>
+        <listitem>
+          <para>
+            Ignore warnings and continue anyway.
+          </para>
+          <para>
+            Specifically, if a problem is encountered when shutting down the current primary,
+            using <option>-F/--force</option> will cause &repmgr; to continue by promoting
+            the standby to be the new primary, and if <option>--siblings-follow</option> is
+            specified, attach any other standbys to the new primary.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--force-rewind</option></term>
+        <listitem>
+          <para>
+            Use <application>pg_rewind</application> to reintegrate the old primary if necessary
+            (PostgreSQL 9.5 and later).
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-R</option></term>
+        <term><option>--remote-user</option></term>
+        <listitem>
+          <para>
+            System username for remote SSH operations (defaults to local system user).
+          </para>
+        </listitem>
+      </varlistentry>
+
+     <varlistentry>
+        <term><option>--siblings-follow</option></term>
+        <listitem>
+          <para>
+            Have standbys attached to the old primary follow the new primary.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+
+    <para>
+      Execute with the <literal>--dry-run</literal> option to test the switchover as far as
+      possible without actually changing the status of either node.
+    </para>
+    <para>
+      <application>repmgrd</application> should not be active on any nodes while a switchover is being
+      executed. This restriction may be lifted in a later version.
+    </para>
+    <para>
+      External database connections, e.g. from an application, should not be permitted while
+      the switchover is taking place. In particular, active transactions on the primary
+      can potentially disrupt the shutdown process.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      <literal>standby_switchover</literal> and <literal>standby_promote</literal>
+      <link linkend="event-notifications">event notifications</link> will be generated for the new primary,
+      and a <literal>node_rejoin</literal> event notification for the former primary (new standby).
+    </para>
+    <para>
+      If using an event notification script, <literal>standby_switchover</literal>
+      will populate the placeholder parameter <literal>%p</literal> with the node ID of
+      the former primary.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Exit codes</title>
+    <para>
+      Following exit codes can be emitted by <literal>repmgr standby switchover</literal>:
+    </para>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>SUCCESS (0)</option></term>
+        <listitem>
+          <para>
+            The switchover completed successfully.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>ERR_SWITCHOVER_FAIL (18)</option></term>
+        <listitem>
+          <para>
+            The switchover could not be executed.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>ERR_SWITCHOVER_INCOMPLETE (22)</option></term>
+        <listitem>
+          <para>
+            The switchover was executed but a problem was encountered.
+            Typically this means the former primary could not be reattached
+            as a standby.
+          </para>
+        </listitem>
+      </varlistentry>
+
+   </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>See also</title>
+    <para>
+      For more details see the section <xref linkend="performing-switchover">.
+    </para>
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr-standby-unregister.sgml
+++ b/doc/repmgr-standby-unregister.sgml
@@ -1,29 +1,70 @@
-<chapter id="repmgr-standby-unregister" xreflabel="repmgr standby unregister">
-  <indexterm><primary>repmgr standby unregister</primary></indexterm>
-  <title>repmgr standby unregister</title>
-  <para>
-    Unregisters a standby with `repmgr`. This command does not affect the actual
-    replication, just removes the standby's entry from the &repmgr; metadata.
-  </para>
-  <para>
-    To unregister a running standby, execute:
-    <programlisting>
-      repmgr standby unregister -f /etc/repmgr.conf</programlisting>
-  </para>
-  <para>
-    This will remove the standby record from &repmgr;'s internal metadata
-    table (<literal>repmgr.nodes</literal>). A <literal>standby_unregister</literal>
-    event notification will be recorded in the <literal>repmgr.events</literal> table.
-  </para>
-  <para>
-   If the standby is not running, the command can be executed on another
-   node by providing the id of the node to be unregistered using
-   the command line parameter <literal>--node-id</literal>, e.g. executing the following
-   command on the master server will unregister the standby with
-   id <literal>3</literal>:
-   <programlisting>
-    repmgr standby unregister -f /etc/repmgr.conf --node-id=3
-   </programlisting>
-  </para>
-</chapter>
+<refentry id="repmgr-standby-unregister">
+  <indexterm>
+    <primary>repmgr standby unregister</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr standby unregister</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr standby unregister</refname>
+    <refpurpose>remove a standby's information from the &repmgr; metadata</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      Unregisters a standby with &repmgr;. This command does not affect the actual
+      replication, just removes the standby's entry from the &repmgr; metadata.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Execution</title>
+    <para>
+      To unregister a running standby, execute:
+      <programlisting>
+        repmgr standby unregister -f /etc/repmgr.conf</programlisting>
+    </para>
+    <para>
+      This will remove the standby record from &repmgr;'s internal metadata
+      table (<literal>repmgr.nodes</literal>). A <literal>standby_unregister</literal>
+      event notification will be recorded in the <literal>repmgr.events</literal> table.
+    </para>
+    <para>
+      If the standby is not running, the command can be executed on another
+      node by providing the id of the node to be unregistered using
+      the command line parameter <literal>--node-id</literal>, e.g. executing the following
+      command on the primary server will unregister the standby with
+      id <literal>3</literal>:
+      <programlisting>
+        repmgr standby unregister -f /etc/repmgr.conf --node-id=3</programlisting>
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--node-id</option></term>
+        <listitem>
+          <para>
+            <varname>node_id</varname> of the node to unregister (optional)
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>standby_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>

--- a/doc/repmgr-witness-register.sgml
+++ b/doc/repmgr-witness-register.sgml
@@ -0,0 +1,60 @@
+<refentry id="repmgr-witness-register">
+  <indexterm>
+    <primary>repmgr witness register</primary>
+    <seealso>witness server</seealso>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr witness register</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr witness register</refname>
+    <refpurpose>add a witness node's information to the &repmgr; metadata</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr witness register</command> adds a witness server's node
+      record to the &repmgr; metadata, and if necessary initialises the witness
+      node by installing the &repmgr; extension and copying the &repmgr; metadata
+      to the witness server. This command needs to be executed to enable
+      use of the witness server with <application>repmgrd</application>.
+    </para>
+    <para>
+      When executing <command>repmgr witness register</command>, connection information
+      for the cluster primary server must also be provided. &repmgr; will automatically
+      use the <varname>user</varname> and <varname>dbname</varname> values defined
+      in the <varname>conninfo</varname> string defined in the  witness node's
+      <filename>repmgr.conf</filename>, if these are not explicitly provided.
+    </para>
+    <para>
+      Execute with the <literal>--dry-run</literal> option to check what would happen
+      without actually registering the witness server.
+    </para>
+  </refsect1>
+  <refsect1>
+    <title>Example</title>
+    <para>
+      <programlisting>
+    $ repmgr -f /etc/repmgr.conf witness register -h node1
+    INFO: connecting to witness node "node3" (ID: 3)
+    INFO: connecting to primary node
+    NOTICE: attempting to install extension "repmgr"
+    NOTICE: "repmgr" extension successfully installed
+    INFO: witness registration complete
+    NOTICE: witness node "node3" (ID: 3) successfully registered
+      </programlisting>
+    </para>
+  </refsect1>
+
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>witness_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr-witness-unregister.sgml
+++ b/doc/repmgr-witness-unregister.sgml
@@ -0,0 +1,73 @@
+<refentry id="repmgr-witness-unregister" xreflabel="repmgr witness unregister">
+  <indexterm>
+    <primary>repmgr witness unregister</primary>
+  </indexterm>
+
+  <refmeta>
+    <refentrytitle>repmgr witness unregister</refentrytitle>
+  </refmeta>
+
+  <refnamediv>
+    <refname>repmgr witness unregister</refname>
+    <refpurpose>remove a witness node's information to the &repmgr; metadata</refpurpose>
+  </refnamediv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>repmgr witness unregister</command> removes a witness server's node
+      record from the &repmgr; metadata.
+    </para>
+    <para>
+      The node does not have to be running to be unregistered, however if this is the
+      case then connection information for the primary server must be provided.
+    </para>
+    <para>
+      Execute with the <literal>--dry-run</literal> option to check what would happen
+      without actually registering the witness server.
+    </para>
+  </refsect1>
+  <refsect1>
+    <title>Examples</title>
+    <para>
+      Unregistering a running witness node:
+      <programlisting>
+    $ repmgr -f /etc/repmgr.conf witness unregister
+    INFO: connecting to witness node "node3" (ID: 3)
+    INFO: unregistering witness node 3
+    INFO: witness unregistration complete
+    DETAIL: witness node with id 3 (conninfo: host=node3 dbname=repmgr user=repmgr port=5499) successfully unregistered</programlisting>
+    </para>
+    <para>
+      Unregistering a non-running witness node:
+      <programlisting>
+        $ repmgr -f /etc/repmgr.conf witness unregister -h node1 -p 5501  -F
+        INFO: connecting to witness node "node3" (ID: 3)
+        NOTICE: unable to connect to witness node "node3" (ID: 3), removing node record on cluster primary only
+        INFO: unregistering witness node 3
+        INFO: witness unregistration complete
+        DETAIL: witness node with id 3 (conninfo: host=node3 dbname=repmgr user=repmgr port=5499) successfully unregistered</programlisting>
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Notes</title>
+    <para>
+      This command will not make any changes to the witness node itself and will neither
+      remove any data from the witness database nor stop the PostgreSQL instance.
+    </para>
+    <para>
+      A witness node which has been unregistered, can be re-registered with
+      <link linkend="repmgr-witness-register">repmgr witness register --force</link>.
+    </para>
+  </refsect1>
+
+
+  <refsect1>
+    <title>Event notifications</title>
+    <para>
+      A <literal>witness_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
+    </para>
+  </refsect1>
+
+</refentry>
--- a/doc/repmgr.sgml
+++ b/doc/repmgr.sgml
@@ -23,13 +23,13 @@

  <abstract>
   <para>
-   Thisis the official documentation of &repmgr; &repmgrversion; for
+   This is the official documentation of &repmgr; &repmgrversion; for
   use with PostgreSQL 9.3 - PostgreSQL 10.
   It describes the functionality supported by the current version of &repmgr;.
   </para>

   <para>
-    repmgr was developed by
+    &repmgr; was developed by
    <ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
    along with contributions from other individuals and companies.
    Contributions from the community are appreciated and welcome - get
@@ -73,6 +73,7 @@
  &promoting-standby;
  &follow-new-primary;
  &switchover;
+  &configuring-witness-server;
  &event-notifications;
  &upgrading-repmgr;
 </part>
@@ -84,8 +85,10 @@
  &repmgrd-demonstration;
  &repmgrd-cascading-replication;
  &repmgrd-network-split;
+  &repmgrd-witness-server;
  &repmgrd-degraded-monitoring;
  &repmgrd-monitoring;
+  &repmgrd-bdr;
 </part>

 <part id="repmgr-command-reference">
@@ -99,6 +102,8 @@
  &repmgr-standby-promote;
  &repmgr-standby-follow;
  &repmgr-standby-switchover;
+  &repmgr-witness-register;
+  &repmgr-witness-unregister;
  &repmgr-node-status;
  &repmgr-node-check;
  &repmgr-node-rejoin;
@@ -109,7 +114,10 @@
  &repmgr-cluster-cleanup;
 </part>

+ &appendix-release-notes;
 &appendix-signatures;
+ &appendix-faq;
+ &appendix-packages;

 <![%include-index;[&bookindex;]]>
 <![%include-xslt-index;[<index id="bookindex"></index>]]>
--- a/doc/repmgrd-automatic-failover.sgml
+++ b/doc/repmgrd-automatic-failover.sgml
@@ -1,8 +1,13 @@
 <chapter id="repmgrd-automatic-failover" xreflabel="Automatic failover with repmgrd">
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>automatic failover</secondary>
+ </indexterm>
+
 <title>Automatic failover with repmgrd</title>

 <para>
-  <command>repmgrd</command> is a management and monitoring daemon which runs
+  <application>repmgrd</application> is a management and monitoring daemon which runs
  on each node in a replication cluster. It can automate actions such as
  failover and updating standbys to follow the new primary, as well as
  providing monitoring information about the state of each standby.
--- a/doc/repmgrd-bdr.sgml
+++ b/doc/repmgrd-bdr.sgml
@@ -0,0 +1,414 @@
+<chapter id="repmgrd-bdr">
+  <indexterm>
+    <primary>repmgrd</primary>
+    <secondary>BDR</secondary>
+  </indexterm>
+
+  <indexterm>
+    <primary>BDR</primary>
+  </indexterm>
+
+  <title>BDR failover with repmgrd</title>
+  <para>
+    &repmgr; 4.x provides support for monitoring BDR nodes and taking action in
+    case one of the nodes fails.
+  </para>
+  <note>
+    <simpara>
+      Due to the nature of BDR, it's only safe to use this solution for
+      a two-node scenario. Introducing additional nodes will create an inherent
+      risk of node desynchronisation if a node goes down without being cleanly
+      removed from the cluster.
+    </simpara>
+  </note>
+  <para>
+    In contrast to streaming replication, there's no concept of "promoting" a new
+    primary node with BDR. Instead, "failover" involves monitoring both nodes
+    with <application>repmgrd</application> and redirecting queries from the failed node to the remaining
+    active node. This can be done by using an
+    <link linkend="event-notifications">event notification</link> script
+    which is called by <application>repmgrd</application> to dynamically
+    reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
+  </para>
+
+  <sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
+    <title>Prerequisites</title>
+    <para>
+      &repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
+      enabled and configured for a two-node BDR network. &repmgr; 4 packages
+      must be installed on each node before attempting to configure
+      <application>repmgr</application>.
+    </para>
+    <note>
+      <simpara>
+        &repmgr; 4 will refuse to install if it detects more than two BDR nodes.
+      </simpara>
+    </note>
+    <para>
+      Application database connections *must* be passed through a proxy server/
+      connection pooler such as <application>PgBouncer</application>, and it must be possible to dynamically
+      reconfigure that from <application>repmgrd</application>. The example demonstrated in this document
+      will use <application>PgBouncer</application>
+    </para>
+    <para>
+      The proxy server / connection poolers must <emphasis>not</emphasis>
+      be installed on the database servers.
+    </para>
+    <para>
+      For this example, it's assumed password-less SSH connections are available
+      from the PostgreSQL servers to the servers where <application>PgBouncer</application>
+      runs, and that the user on those servers has permission to alter the
+      <application>PgBouncer</application> configuration files.
+    </para>
+    <para>
+      PostgreSQL connections must be possible between each node, and each node
+      must be able to connect to each PgBouncer instance.
+    </para>
+  </sect1>
+
+  <sect1 id="bdr-configuration" xreflabel="BDR configuration">
+    <title>Configuration</title>
+    <para>
+      A sample configuration for <filename>repmgr.conf</filename> on each
+      BDR node would look like this:
+      <programlisting>
+        # Node information
+        node_id=1
+        node_name='node1'
+        conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2'
+        data_directory='/var/lib/postgresql/data'
+        replication_type='bdr'
+
+        # Event notification configuration
+        event_notifications=bdr_failover
+        event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
+
+        # repmgrd options
+        monitor_interval_secs=5
+        reconnect_attempts=6
+        reconnect_interval=5</programlisting>
+    </para>
+    <para>
+      Adjust settings as appropriate; copy and adjust for the second node (particularly
+      the values <varname>node_id</varname>, <varname>node_name</varname>
+      and <varname>conninfo</varname>).
+    </para>
+    <para>
+      Note that the values provided for the <varname>conninfo</varname> string
+      must be valid for connections from <emphasis>both</emphasis> nodes in the
+      replication cluster. The database must be the BDR-enabled database.
+    </para>
+    <para>
+      If defined, the evenr <application>event_notifications</application> parameter
+      will restrict execution of <varname>event_notification_command</varname>
+      to the specified event(s).
+    </para>
+    <note>
+      <simpara>
+        <varname>event_notification_command</varname> is the script which does the actual "heavy lifting"
+        of reconfiguring the proxy server/ connection pooler. It is fully
+        user-definable; a reference implementation is documented below.
+      </simpara>
+    </note>
+
+  </sect1>
+
+  <sect1 id="bdr-repmgr-setup" xreflabel="repmgr setup with BDR">
+    <title>repmgr setup</title>
+    <para>
+      Register both nodes; example on <literal>node1</literal>:
+      <programlisting>
+        $ repmgr -f /etc/repmgr.conf bdr register
+        NOTICE: attempting to install extension "repmgr"
+        NOTICE: "repmgr" extension successfully installed
+        NOTICE: node record created for node 'node1' (ID: 1)
+        NOTICE: BDR node 1 registered (conninfo: host=node1 dbname=bdrtest user=repmgr)</programlisting>
+    </para>
+    <para>
+      and on <literal>node1</literal>:
+      <programlisting>
+        $ repmgr -f /etc/repmgr.conf bdr register
+        NOTICE: node record created for node 'node2' (ID: 2)
+        NOTICE: BDR node 2 registered (conninfo: host=node2 dbname=bdrtest user=repmgr)</programlisting>
+    </para>
+    <para>
+      The <literal>repmgr</literal> extension will be automatically created
+      when the first node is registered, and will be propagated to the second
+      node.
+    </para>
+    <important>
+      <simpara>
+        Ensure the &repmgr; package is available on both nodes before
+        attempting to register the first node.
+      </simpara>
+    </important>
+    <para>
+      At this point the meta data for both nodes has been created; executing
+      <xref linkend="repmgr-cluster-show"> (on either node) should produce output like this:
+      <programlisting>
+        $ repmgr -f /etc/repmgr.conf cluster show
+        ID | Name  | Role | Status    | Upstream | Location | Connection string
+       ----+-------+------+-----------+----------+--------------------------------------------------------
+        1  | node1 | bdr  | * running |          | default  | host=node1 dbname=bdrtest user=repmgr connect_timeout=2
+        2  | node2 | bdr  | * running |          | default  | host=node2 dbname=bdrtest user=repmgr connect_timeout=2</programlisting>
+    </para>
+    <para>
+      Additionally it's possible to display log of significant events;  executing
+      <xref linkend="repmgr-cluster-event"> (on either node) should produce output like this:
+      <programlisting>
+        $ repmgr -f /etc/repmgr.conf cluster event
+        Node ID | Event        | OK | Timestamp           | Details
+       ---------+--------------+----+---------------------+----------------------------------------------
+        2       | bdr_register | t  | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2)
+        1       | bdr_register | t  | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1)
+      </programlisting>
+    </para>
+    <para>
+      At this point there will only be records for the two node registrations (displayed here
+      in reverse chronological order).
+    </para>
+  </sect1>
+
+  <sect1 id="bdr-event-notification-command" xreflabel="BDR failover event notification command">
+    <title>Defining the "event_notification_command"</title>
+    <para>
+      Key to "failover" execution is the <literal>event_notification_command</literal>,
+      which is a user-definable script specified in <filename>repmpgr.conf</filename>
+      and which can use a &repmgr; <link linkend="event-notifications">event notification</link>
+      to reconfigure the proxy server / connection pooler so it points to the other, still-active node.
+      Details of the event will be passed as parameters to the script.
+    </para>
+    <para>
+      Following parameter placeholders are available for the script definition in <filename>repmpgr.conf</filename>;
+      these will be replaced with the appropriate value when the script is executed:
+    </para>
+
+    <variablelist>
+      <varlistentry>
+        <term><option>%n</option></term>
+        <listitem>
+          <para>
+            node ID
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>%e</option></term>
+        <listitem>
+          <para>
+            event type
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>%t</option></term>
+        <listitem>
+          <para>
+            success (1 or 0)
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>%t</option></term>
+        <listitem>
+          <para>
+            timestamp
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>%d</option></term>
+        <listitem>
+          <para>
+            details
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>%c</option></term>
+        <listitem>
+          <para>
+            conninfo string of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>%a</option></term>
+        <listitem>
+          <para>
+            name of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+    <para>
+      Note that <literal>%c</literal> and <literal>%a</literal> are only provided with
+      particular failover events, in this case <varname>bdr_failover</varname>.
+    </para>
+    <para>
+      The provided sample script
+     (<literal><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/scripts/bdr-pgbouncer.sh">scripts/bdr-pgbouncer.sh</ulink></literal>)
+      is configured as follows:
+      <programlisting>
+        event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'</programlisting>
+    </para>
+    <para>
+      and parses the placeholder parameters like this:
+      <programlisting>
+        NODE_ID=$1
+        EVENT_TYPE=$2
+        SUCCESS=$3
+        NEXT_CONNINFO=$4
+        NEXT_NODE_NAME=$5</programlisting>
+    </para>
+    <note>
+      <para>
+        The sample script also contains some hard-coded values for the <application>PgBouncer</application>
+        configuration for both nodes; these will need to be adjusted for your local environment
+        (ideally the scripts would be maintained as templates and generated by some
+        kind of provisioning system).
+      </para>
+    </note>
+
+    <para>
+      The script performs following steps:
+      <itemizedlist spacing="compact" mark="bullet">
+        <listitem>
+          <simpara>pauses <application>PgBouncer</application> on all nodes</simpara>
+        </listitem>
+        <listitem>
+          <simpara>recreates the <application>PgBouncer</application> configuration file on each
+            node using the information provided by <application>repmgrd</application>
+            (primarily the <varname>conninfo</varname> string) to configure
+            <application>PgBouncer</application></simpara>
+        </listitem>
+        <listitem>
+          <simpara>reloads the <application>PgBouncer</application> configuration</simpara>
+        </listitem>
+        <listitem>
+          <simpara>executes the <command>RESUME</command> command (in <application>PgBouncer</application>)</simpara>
+        </listitem>
+      </itemizedlist>
+    </para>
+    <para>
+      Following successful script execution, any connections to PgBouncer on the failed BDR node
+      will be redirected to the active node.
+    </para>
+  </sect1>
+
+  <sect1 id="bdr-monitoring-failover" xreflabel="Node monitoring and failover">
+    <title>Node monitoring and failover</title>
+    <para>
+      At the intervals specified by <varname>monitor_interval_secs</varname>
+      in <filename>repmgr.conf</filename>, <application>repmgrd</application>
+      will ping each node to check if it's available. If a node isn't available,
+      <application>repmgrd</application> will enter failover mode and check <varname>reconnect_attempts</varname>
+      times at intervals of <varname>reconnect_interval</varname> to confirm the node is definitely unreachable.
+      This buffer period is necessary to avoid false positives caused by transient
+      network outages.
+    </para>
+    <para>
+      If the node is still unavailable, <application>repmgrd</application> will enter failover mode and execute
+      the script defined in <varname>event_notification_command</varname>; an entry will be logged
+      in the <literal>repmgr.events</literal> table and <application>repmgrd</application> will
+      (unless otherwise configured) resume monitoring of the node in "degraded" mode until it reappears.
+    </para>
+    <para>
+      <application>repmgrd</application> logfile output during a failover event will look something like this
+      on one node (usually the node which has failed, here <literal>node2</literal>):
+      <programlisting>
+            ...
+    [2017-07-27 21:08:39] [INFO] starting continuous BDR node monitoring
+    [2017-07-27 21:08:39] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
+    [2017-07-27 21:08:55] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
+    [2017-07-27 21:09:11] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
+    [2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
+    [2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
+    [2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
+    [2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
+    [2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
+    [2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
+    [2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
+    [2017-07-27 21:09:28] [NOTICE] setting node record for node 2 to inactive
+    [2017-07-27 21:09:28] [INFO] executing notification command for event "bdr_failover"
+    [2017-07-27 21:09:28] [DETAIL] command is:
+      /path/to/bdr-pgbouncer.sh 2 bdr_failover 1 "host=host=node1 dbname=bdrtest user=repmgr connect_timeout=2" "node1"
+    [2017-07-27 21:09:28] [INFO] node 'node2' (ID: 2) detected as failed; next available node is 'node1' (ID: 1)
+    [2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
+    [2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
+    ...</programlisting>
+    </para>
+    <para>
+      Output on the other node (<literal>node1</literal>) during the same event will look like this:
+      <programlisting>
+    ...
+    [2017-07-27 21:08:35] [INFO] starting continuous BDR node monitoring
+    [2017-07-27 21:08:35] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
+    [2017-07-27 21:08:51] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
+    [2017-07-27 21:09:07] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
+    [2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
+    [2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
+    [2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
+    [2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
+    [2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
+    [2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
+    [2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
+    [2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
+    [2017-07-27 21:09:28] [NOTICE] other node's repmgrd is handling failover
+    [2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
+    [2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
+    ...</programlisting>
+    </para>
+    <para>
+      This assumes only the PostgreSQL instance on <literal>node2</literal> has failed. In this case the
+      <application>repmgrd</application> instance running on <literal>node2</literal> has performed the failover. However if
+      the entire server becomes unavailable, <application>repmgrd</application> on <literal>node1</literal> will perform
+      the failover.
+    </para>
+  </sect1>
+  <sect1 id="bdr-node-recovery" xreflabel="Node recovery">
+    <title>Node recovery</title>
+    <para>
+      Following failure of a BDR node, if the node subsequently becomes available again,
+      a <varname>bdr_recovery</varname> event will be generated. This could potentially be used to
+      reconfigure PgBouncer automatically to bring the node back into the available pool,
+      however it would be prudent to manually verify the node's status before
+      exposing it to the application.
+    </para>
+    <para>
+      If the failed node comes back up and connects correctly, output similar to this
+      will be visible in the <application>repmgrd</application> log:
+      <programlisting>
+        [2017-07-27 21:25:30] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
+        [2017-07-27 21:25:46] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
+        [2017-07-27 21:25:46] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
+        [2017-07-27 21:25:55] [INFO] active replication slot for node "node1" found after 1 seconds
+        [2017-07-27 21:25:55] [NOTICE] node "node2" (ID: 2) has recovered after 986 seconds</programlisting>
+    </para>
+  </sect1>
+
+  <sect1 id="bdr-complete-shutdown" xreflabel="Shutdown of both nodes">
+    <title>Shutdown of both nodes</title>
+    <para>
+      If both PostgreSQL instances are shut down, <application>repmgrd</application> will try and handle the
+      situation as gracefully as possible, though with no failover candidates available
+      there's not much it can do. Should this case ever occur, we recommend shutting
+      down <application>repmgrd</application> on both nodes and restarting it once the PostgreSQL instances
+      are running properly.
+    </para>
+  </sect1>
+</chapter>
+
--- a/doc/repmgrd-cascading-replication.sgml
+++ b/doc/repmgrd-cascading-replication.sgml
@@ -1,9 +1,14 @@
 <chapter id="repmgrd-cascading-replication">
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>cascading replication</secondary>
+ </indexterm>
+
 <title>repmgrd and cascading replication</title>
 <para>
  Cascading replication - where a standby can connect to an upstream node and not
  the primary server itself - was introduced in PostgreSQL 9.2. &repmgr; and
-  <command>repmgrd</command> support cascading replication by keeping track of the relationship
+  <application>repmgrd</application> support cascading replication by keeping track of the relationship
  between standby servers - each node record is stored with the node id of its
  upstream ("parent") server (except of course the primary server).
 </para>
--- a/doc/repmgrd-configuration.sgml
+++ b/doc/repmgrd-configuration.sgml
@@ -1,7 +1,13 @@
 <chapter id="repmgrd-configuration">
+
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>configuration</secondary>
+ </indexterm>
+
 <title>repmgrd configuration</title>
 <para>
-  To use <command>repmgrd</command>, its associated function library must be
+  To use <application>repmgrd</application>, its associated function library must be
  included in <filename>postgresql.conf</filename> with:

  <programlisting>
@@ -12,7 +18,7 @@
  the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
 </para>
 <para>
-  Additionally the following <command>repmgrd</command> options *must* be set in
+  Additionally the following <application>repmgrd</application> options *must* be set in
  <filename>repmgr.conf</filename> (adjust configuration file locations as appropriate):
  <programlisting>
    failover=automatic
@@ -21,13 +27,13 @@
 </para>
 <para>
  Note that the <literal>--log-to-file</literal> option will cause
-  output generated by the &repmgr; command, when executed by <command>repmgrd</command>,
-  to be logged to the same destination configured to receive log output for <command>repmgrd</command>.
-  See <filename>repmgr.conf.sample</filename> for further <command>repmgrd</command>-specific settings.
+  output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
+  to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
+  See <filename>repmgr.conf.sample</filename> for further <application>repmgrd</application>-specific settings.
 </para>
 <para>
  When <varname>failover</varname> is set to <literal>automatic</literal>, upon detecting failure
-  of the current  primary, <command>repmgrd</command> will execute one of
+  of the current  primary, <application>repmgrd</application> will execute one of
  <varname>promote_command</varname> or <varname>follow_command</varname>,
  depending on whether the current server is to become the new primary, or
  needs to follow another server which has become the new primary. Note that
@@ -41,7 +47,7 @@
 <para>
  The <varname>follow_command</varname> should provide the <literal>--upstream-node-id=%n</literal>
  option to <command>repmgr standby follow</command>; the <literal>%n</literal> will be replaced by
-  <command>repmgrd</command> with the ID of the new primary node. If this is not provided, &repmgr;
+  <application>repmgrd</application> with the ID of the new primary node. If this is not provided, &repmgr;
  will attempt to determine the new primary by itself, but  if the
  original primary comes back online after the new primary is promoted, there is a risk that
  <command>repmgr standby follow</command> will result in the node continuing to follow
@@ -73,7 +79,7 @@
 <sect1 id="repmgrd-log-rotation">
  <title>repmgrd log rotation</title>
  <para>
-   To ensure the current <command>repmgrd</command> logfile does not grow
+   To ensure the current <application>repmgrd</application> logfile does not grow
   indefinitely, configure your system's <command>logrotate</command> to
   regularly rotate it.
  </para>
--- a/doc/repmgrd-degraded-monitoring.sgml
+++ b/doc/repmgrd-degraded-monitoring.sgml
@@ -1,9 +1,14 @@
 <chapter id="repmgrd-degraded-monitoring">
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>degraded monitoring</secondary>
+ </indexterm>
+
 <title>"degraded monitoring" mode</title>
 <para>
-  In certain circumstances, <command>repmgrd</command> is not able to fulfill its primary mission
+  In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
  of monitoring the nodes' upstream server. In these cases it enters "degraded
-  monitoring" mode, where <command>repmgrd</command> remains active but is waiting for the situation
+  monitoring" mode, where <application>repmgrd</application> remains active but is waiting for the situation
  to be resolved.
 </para>
 <para>
@@ -35,7 +40,7 @@
   </listitem>

   <listitem>
-    <simpara>repmgrd is monitoring the primary node, but it is not available</simpara>
+    <simpara>repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary)</simpara>
   </listitem>
  </itemizedlist>
 </para>
@@ -63,8 +68,16 @@
 <para>
  By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
  However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
-  after which <command>repmgrd</command> will terminate.
-
+  after which <application>repmgrd</application> will terminate.
 </para>

+ <note>
+   <para>
+     If <application>repmgrd</application> is monitoring a primary mode which has been stopped
+     and manually restarted as a standby attached to a new primary, it will automatically detect
+     the status change and update the node record to reflect the node's new status
+     as an active standby. It will then resume monitoring the node as a standby.
+   </para>
+ </note>
+
 </chapter>
--- a/doc/repmgrd-demonstration.sgml
+++ b/doc/repmgrd-demonstration.sgml
@@ -10,19 +10,19 @@
    ----+-------+---------+-----------+----------+----------+--------------------------------------
     1  | node1 | primary | * running |          | default  | host=node1 dbname=repmgr user=repmgr
     2  | node2 | standby |   running | node1    | default  | host=node2 dbname=repmgr user=repmgr
-     3  | node3 | standby |   running | node1    | default  | host=node3 dbname=repmgr user=repmgr  </programlisting>
+     3  | node3 | standby |   running | node1    | default  | host=node3 dbname=repmgr user=repmgr</programlisting>
 </para>
 <para>
-  Start <command>repmgrd</command> on each standby and verify that it's running by examining the
+  Start <application>repmgrd</application> on each standby and verify that it's running by examining the
  log output, which at log level <literal>INFO</literal> will look like this:
  <programlisting>
    [2017-08-24 17:31:00] [NOTICE] using configuration file "/etc/repmgr.conf"
    [2017-08-24 17:31:00] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr"
    [2017-08-24 17:31:00] [NOTICE] starting monitoring of node <literal>node2</literal> (ID: 2)
-    [2017-08-24 17:31:00] [INFO] monitoring connection to upstream node "node1" (node ID: 1)  </programlisting>
+    [2017-08-24 17:31:00] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
 </para>
 <para>
-  Each <command>repmgrd</command> should also have recorded its successful startup as an event:
+  Each <application>repmgrd</application> should also have recorded its successful startup as an event:
  <programlisting>
    $ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
     Node ID | Name  | Event         | OK | Timestamp           | Details
@@ -38,8 +38,8 @@
 </para>
 <para>
  This will force the primary to shut down straight away, aborting all processes
-  and transactions.  This will cause a flurry of activity in the <command>repmgrd</command> log
-  files as each <command>repmgrd</command> detects the failure of the primary and a failover
+  and transactions.  This will cause a flurry of activity in the <application>repmgrd</application> log
+  files as each <application>repmgrd</application> detects the failure of the primary and a failover
  decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
  which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
  <programlisting>
@@ -61,7 +61,7 @@
    [2017-08-24 23:32:12] [NOTICE] this node is the winner, will now promote self and inform other nodes
    INFO: connecting to standby database
    NOTICE: promoting standby
-    DETAIL: promoting server using '/home/barwick/devel/builds/HEAD/bin/pg_ctl -l /tmp/postgres.5602.log -w -D '/tmp/repmgr-test/node_2/data' promote'
+    DETAIL: promoting server using 'pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' promote'
    INFO: reconnecting to promoted server
    NOTICE: STANDBY PROMOTE successful
    DETAIL: node 2 was successfully promoted to primary
--- a/doc/repmgrd-monitoring.sgml
+++ b/doc/repmgrd-monitoring.sgml
@@ -1,7 +1,12 @@
 <chapter id="repmgrd-monitoring">
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>monitoring</secondary>
+ </indexterm>
+
 <title>Monitoring with repmgrd</title>
 <para>
-  When `repmgrd` is running with the option <literal>monitoring_history=true</literal>,
+   When <application>repmgrd</application> is running with the option <literal>monitoring_history=true</literal>,
  it will constantly write standby node status information to the
  <varname>monitoring_history</varname> table, providing a near-real time
  overview of replication status on all nodes
@@ -39,7 +44,7 @@
  specify how many day's worth of data should be retained.
 </para>
 <para>
-  It's possible to use <command>repmgrd</command> to run in monitoring
+  It's possible to use <application>repmgrd</application> to run in monitoring
  mode only (without automatic failover capability) for some or all
  nodes by setting <literal>failover=manual</literal> in the node's
  <filename>repmgr.conf</filename> file. In the event of the node's upstream failing,
--- a/doc/repmgrd-network-split.sgml
+++ b/doc/repmgrd-network-split.sgml
@@ -1,4 +1,9 @@
-<chapter id="repmgrd-network-split">
+<chapter id="repmgrd-network-split" xreflabel="Handling network splits with repmgrd">
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>network splits</secondary>
+ </indexterm>
+
 <title>Handling network splits with repmgrd</title>
 <para>
  A common pattern for replication cluster setups is to spread servers over
@@ -10,7 +15,7 @@
  in the main data centre and promoted a standby among themselves.
 </para>
 <para>
-  Previous &repmgr; versions used the concept of a "witness server" to
+  &repmgr; enables provision of &quot;<xref linkend="witness-server">&quot; to
  artificially create a quorum of servers in a particular location, ensuring
  that nodes in another location will not elect a new primary if they
  are unable to see the majority of nodes. However this approach does not
@@ -32,8 +37,8 @@
    location='dc1'</programlisting>
 </para>
 <para>
-  In a failover situation, <command>repmgrd</command> will check if any servers in the
-  same location as the current primary node are visible.  If not, <command>repmgrd</command>
+  In a failover situation, <application>repmgrd</application> will check if any servers in the
+  same location as the current primary node are visible.  If not, <application>repmgrd</application>
  will assume a network interruption and not promote any node in any
  other location (it will however enter <xref linkend="repmgrd-degraded-monitoring"> mode until
  a primary becomes visible).
--- a/doc/repmgrd-witness-server.sgml
+++ b/doc/repmgrd-witness-server.sgml
@@ -0,0 +1,31 @@
+<chapter id="repmgrd-witness-server" xreflabel="Using a witness server with repmgrd">
+ <indexterm>
+   <primary>repmgrd</primary>
+   <secondary>witness server</secondary>
+ </indexterm>
+
+ <title>Using a witness server with repmgrd</title>
+ <para>
+   In a situation caused e.g. by a network interruption between two
+   data centres, it's important to avoid a "split-brain" situation where
+   both sides of the network assume they are the active segment and the
+   side without an active primary unilaterally promotes one of its standbys.
+ </para>
+ <para>
+   To prevent this situation happening, it's essential to ensure that one
+   network segment has a "voting majority", so other segments will know
+   they're in the minority and not attempt to promote a new primary. Where
+   an odd number of servers exists, this is not an issue. However, if each
+   network has an even number of nodes, it's necessary to provide some way
+   of ensuring a majority, which is where the witness server becomes useful.
+ </para>
+ <para>
+   This is not a fully-fledged standby node and is not integrated into
+   replication, but it effectively represents the "casting vote" when
+   deciding which network segment has a majority. A witness server can
+   be set up using <xref linkend="repmgr-witness-register">. Note that it only
+   makes sense to create a witness server in conjunction with running
+   <application>repmgrd</application>; the witness server will require its own
+   <application>repmgrd</application> instance.
+ </para>
+</chapter>
--- a/doc/switchover.sgml
+++ b/doc/switchover.sgml
@@ -1,4 +1,9 @@
 <chapter id="performing-switchover" xreflabel="Performing a switchover with repmgr">
+
+ <indexterm>
+  <primary>switchover</primary>
+ </indexterm>
+
 <title>Performing a switchover with repmgr</title>
 <para>
  A typical use-case for replication is a combination of primary and standby
@@ -14,7 +19,7 @@
 </para>
 <para>
  <command>repmgr standby switchover</command> differs from other &repmgr;
-  actions in that it lso performs actions on another server (the demotion
+  actions in that it also performs actions on another server (the demotion
  candidate), which means passwordless SSH access is required to that server
  from the one where <command>repmgr standby switchover</command> is executed.
 </para>
@@ -39,7 +44,7 @@
  </simpara>
  <simpara>
   Please also read carefully the sections <xref linkend="preparing-for-switchover"> and
-  `Caveats` below.
+   <xref linkend="switchover-caveats"> below.
  </simpara>
 </note>

@@ -49,45 +54,84 @@
     <secondary>preparation</secondary>
   </indexterm>
   <title>Preparing for switchover</title>
+
   <para>
-    As mentioned above, success of the switchover operation depends on &repmgr;
-    being able to shut down the current primary server quickly and cleanly.
+    As mentioned in the previous section, success of the switchover operation depends on
+    &repmgr;  being able to shut down the current primary server quickly and cleanly.
   </para>
+
+   <para>
+     Ensure that a passwordless SSH connection is possible from the promotion candidate
+     (standby) to the demotion candidate (current primary). If <literal>--siblings-follow</literal>
+     will be used, ensure that passwordless SSH connections are possible from the
+     promotion candidate to all standbys attached to the demotion candidate.
+   </para>
+
   <para>
    Double-check which commands will be used to stop/start/restart the current
    primary; on the primary execute:
    <programlisting>
-     repmgr -f /etc./repmgr.conf node service --list --action=stop
-     repmgr -f /etc./repmgr.conf node service --list --action=start
-     repmgr -f /etc./repmgr.conf node service --list --action=restart
-    </programlisting>
+     repmgr -f /etc/repmgr.conf node service --list --action=stop
+     repmgr -f /etc/repmgr.conf node service --list --action=start
+     repmgr -f /etc/repmgr.conf node service --list --action=restart</programlisting>
   </para>
+
+   <para>
+     These commands can be defined in <filename>repmgr.conf</filename> with
+     <option>service_start_command</option>, <option>service_stop_command</option>
+     and <option>service_restart_command</option>.
+   </para>
+
+   <important>
+     <para>
+       If &repmgr; is installed from a package. you should set these commands
+       to use the appropriate service commands defined by the package/operating
+       system as these will ensure PostgreSQL is stopped/started properly
+       taking into account configuration and log file locations etc.
+     </para>
+     <para>
+       If the <option>service_*_command</option> options aren't defined, &repmgr; will
+       fall back to using <application>pg_ctl</application> to stop/start/restart
+       PostgreSQL, which may not work properly.
+     </para>
+   </important>
+
   <note>
    <simpara>
     On <literal>systemd</literal> systems we strongly recommend using the appropriate
     <command>systemctl</command> commands (typically run via <command>sudo</command>) to ensure
-     <literal>systemd</literal> informed about the status of the PostgreSQL service.
+     <literal>systemd</literal> is informed about the status of the PostgreSQL service.
+    </simpara>
+    <simpara>
+     If using <command>sudo</command> for the <command>systemctl</command> calls, make sure the
+     <command>sudo</command> specification doesn't require a real tty for the user. If not set
+     this way, <command>repmgr</command> will fail to stop the primary.
    </simpara>
   </note>
+
   <para>
    Check that access from applications is minimalized or preferably blocked
    completely, so applications are not unexpectedly interrupted.
   </para>
+
   <para>
    Check there is no significant replication lag on standbys attached to the
    current primary.
   </para>
+
   <para>
    If WAL file archiving is set up, check that there is no backlog of files waiting
-    to be archived, as PostgreSQL will not finally shut down until all these have been
+    to be archived, as PostgreSQL will not finally shut down until all of these have been
    archived. If there is a backlog exceeding <varname>archive_ready_warning</varname> WAL files,
-    `repmgr` will emit a warning before attempting to perform a switchover; you can also check
+    &repmgr; will emit a warning before attempting to perform a switchover; you can also check
    manually with <command>repmgr node check --archive-ready</command>.
   </para>
+
   <para>
-    Ensure that <command>repmgrd</command> is *not* running anywhere to prevent it unintentionally
+    Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
    promoting a node.
   </para>
+
   <para>
    Finally, consider executing <command>repmgr standby switchover</command> with the
    <literal>--dry-run</literal> option; this will perform any necessary checks and inform you about
@@ -96,7 +140,7 @@
    <programlisting>
      $ repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run
      NOTICE: checking switchover on node "node2" (ID: 2) in --dry-run mode
-      INFO: SSH connection to host "localhost" succeeded
+      INFO: SSH connection to host "node1" succeeded
      INFO: archive mode is "off"
      INFO: replication lag on this standby is 0 seconds
      INFO: all sibling nodes are reachable via SSH
@@ -105,6 +149,48 @@
        "pg_ctl -l /var/log/postgresql/startup.log -D '/var/lib/postgresql/data' -m fast -W stop"
    </programlisting>
   </para>
+
+   <important>
+     <para>
+       Be aware that <option>--dry-run</option> checks the prerequisites
+       for performing the switchover and some basic sanity checks on the
+       state of the database which might effect the switchover operation
+       (e.g. replication lag); it cannot however guarantee the switchover
+       operation will succeed. In particular, if the current primary
+       does not shut down cleanly, &repmgr; will not be able to reliably
+       execute the switchover (as there would be a danger of divergence
+       between the former and new primary nodes).
+     </para>
+   </important>
+
+   <para>
+     Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
+     switchover operation:
+     <itemizedlist spacing="compact" mark="bullet">
+       <listitem>
+         <simpara>
+           <literal>reconnect_attempts</literal>: number of times to check the original primary
+           for a clean shutdown after executing the shutdown command, before aborting
+         </simpara>
+       </listitem>
+       <listitem>
+         <simpara>
+           <literal>reconnect_interval</literal>: interval (in seconds) to check the original
+           primary for a clean shutdown after executing the shutdown command (up to a maximum
+           of <literal>reconnect_attempts</literal> tries)
+         </simpara>
+       </listitem>
+       <listitem>
+         <simpara>
+           <literal>replication_lag_critical</literal>:
+           if replication lag (in seconds) on the standby exceeds this value, the
+           switchover will be aborted (unless the <literal>-F/--force</literal> option
+           is provided)
+         </simpara>
+       </listitem>
+
+     </itemizedlist>
+   </para>
 </sect1>

 <sect1 id="switchover-execution" xreflabel="Executing the switchover command">
@@ -128,7 +214,7 @@
    INFO: searching for primary node
    INFO: checking if node 1 is primary
    INFO: current primary node is 1
-    INFO: SSH connection to host "localhost" succeeded
+    INFO: SSH connection to host "node1" succeeded
    INFO: archive mode is "off"
    INFO: replication lag on this standby is 0 seconds
    NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
@@ -190,15 +276,15 @@
    </listitem>
    <listitem>
     <simpara>
-      <command>repmgrd</command> should not be running with setting <varname>failover=automatic</varname>
+      <application>repmgrd</application> should not be running with setting <varname>failover=automatic</varname>
      in <filename>repmgr.conf</filename> when a switchover is carried out, otherwise the
-      <command>repmgrd</command> daemon may try and promote a standby by itself.
+      <application>repmgrd</application> daemon may try and promote a standby by itself.
     </simpara>
    </listitem>
   </itemizedlist>
  </para>
  <para>
-   We hope to remove some of these restrictions in future versions of `repmgr`.
+   We hope to remove some of these restrictions in future versions of &repmgr;.
  </para>
 </sect1>
 </chapter>
--- a/doc/upgrading-from-repmgr3.md
+++ b/doc/upgrading-from-repmgr3.md
@@ -1,140 +1,9 @@
 Upgrading from repmgr 3
 =======================

-The upgrade process consists of two steps:
+This document has been integrated into the main `repmgr` documentation
+and is now located here:

-    1) converting the repmgr.conf configuration files
-    2) upgrading the repmgr schema
-
-A script is provided to assist with converting `repmgr.conf`.
-
-The schema upgrade (which converts the `repmgr` metadata into
-a packaged PostgreSQL extension) is normally carried out
-automatically when the `repmgr` extension is created.
+> [Upgrading from repmgr 3.x](https://repmgr.org/docs/4.0/upgrading-from-repmgr-3.html)


-Converting repmgr.conf configuration files
------------------------------------------
-
-With a completely new repmgr version, we've taken the opportunity
-to rename some configuration items have had their names changed for
-clarity and consistency, both between the configuration file and
-the column names in `repmgr.nodes` (e.g. `node` → `node_id`), and
-also for consistency with PostgreSQL naming conventions
-(e.g. `loglevel` → `log_level`).
-
-Other configuration items have been changed to command line options,
-and vice-versa, e.g. to avoid hard-coding items such as a a node's
-upstream ID, which might change over time.
-
-`repmgr` will issue a warning about deprecated/altered options.
-
-
-### Changed parameters
-
-Following parameters have been added:
-
-    - `data_directory`: this is mandatory and must contain the path
-        to the node's data directory
-    - `monitoring_history`: this replaces the `repmgrd` command line
-        option `--monitoring-history`
-
-Following parameters have been renamed:
-
-    - `node` → `node_id`
-    - `loglevel` → `log_level`
-    - `logfacility` → `log_facility`
-    - `logfile` → `log_file`
-    - `master_reponse_timeout` → `async_query_timeout`
-
-Following parameters have been removed:
-
-    - `cluster` is no longer required and will be ignored.
-    - `upstream_node_id` is replaced by the command-line parameter
-         `--upstream-node-id`
-
-### Conversion script
-
-To assist with conversion of `repmgr.conf` files, a Perl script
-is provided in `contrib/convert-config.pl`. Use like this:
-
-    $ ./convert-config.pl /etc/repmgr.conf
-    node_id=2
-    node_name=node2
-    conninfo=host=node2 dbname=repmgr user=repmgr connect_timeout=2
-    pg_ctl_options='-l /var/log/postgres/startup.log'
-    rsync_options=--exclude=postgresql.local.conf --archive
-    log_level=INFO
-    pg_basebackup_options=--no-slot
-    data_directory=
-
-The converted file is printed to `STDOUT` and the original file is not
-changed.
-
-Please note that the parameter `data_directory` *must* be provided;
-if not already present, the conversion script will add an empty
-placeholder parameter.
-
-
-Upgrading the repmgr schema
---------------------------
-
-Ensure `repmgrd` is not running, or any cron jobs which execute the
-`repmgr` binary.
-
-Install `repmgr4`; any `repmgr3` packages should be uninstalled
-(if not automatically uninstalled already).
-
-### Upgrading from repmgr 3.1.1 or earlier
-
-If your repmgr version is 3.1.1 or earlier, you will need to update
-the schema to the latest version in the 3.x series (3.3.2) before
-converting the installation to repmgr 4.
-
-To do this, apply the following upgrade scripts as appropriate for
-your current version:
-
-    - repmgr3.0_repmgr3.1.sql
-    - repmgr3.1.1_repmgr3.1.2.sql
-
-For more details see:
-
-    https://repmgr.org/release-notes-3.3.2.html#upgrading
-
-### Manually create the repmgr extension
-
-In the database used by the existing `repmgr` configuration, execute:
-
-    CREATE EXTENSION repmgr FROM unpackaged;
-
-This will move and convert all objects from the existing schema
-into the new, standard `repmgr` schema.
-
-> *NOTE* there must be only one schema matching 'repmgr_%' in the
-> database, otherwise this step may not work.
-
-### Re-register each node
-
-This is necessary to update the `repmgr` metadata with some additional items.
-
-On the primary node, execute e.g.
-
-    repmgr primary register -f /etc/repmgr.conf --force
-
-On each standby node, execute e.g.
-
-    repmgr standby register -f /etc/repmgr.conf --force
-
-Check the data is updated as expected by examining the `repmgr.nodes` table;
-restart `repmgrd` if required.
-
-The original `repmgr_$cluster` schema can be dropped at any time.
-
-* * *
-
-> *TIP* If you don't care about any data from the existing `repmgr` installation,
-> (e.g. the contents of the `events` and `monitoring` tables), the manual
-> "CREATE EXTENSION" step can be skipped; just re-register each node, starting
-> with the primary node, and the `repmgr` extension will be automatically created.
-
-* * *
--- a/doc/upgrading-repmgr.sgml
+++ b/doc/upgrading-repmgr.sgml
@@ -1,24 +1,104 @@
 <chapter id="upgrading-repmgr" xreflabel="Upgrading repmgr">
+
+ <indexterm>
+  <primary>upgrading</primary>
+ </indexterm>
+
 <title>Upgrading repmgr</title>
+
 <para>
  &repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
  containing bugfixes and other minor improvements. Any substantial new
  functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
 </para>
- <para>
-  &repmgr; is implemented as a PostgreSQL extension; to upgrade it, first
-  install the updated package (or compile the updated source), then in the
-  database where the &repmgr; extension is installed, execute
-  <command>ALTER EXTENSION repmgr UPDATE</command>.
- </para>
- <para>
-  If <command>repmgrd</command> is running, it may be necessary to restart
-  the PostgreSQL server if the upgrade contains changes to the shared object
-  file used by <command>repmgrd</command>; check the release notes for details.
- </para>

- <sect1 id="upgrading-from-repmgr-3">
-  <title>Upgrading from repmgr 3</title>
+ <sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
+  <indexterm>
+   <primary>upgrading</primary>
+   <secondary>repmgr 4.x and later</secondary>
+  </indexterm>
+  <title>Upgrading repmgr 4.x and later</title>
+  <para>
+    &repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
+    of the two following steps:
+    <orderedlist>
+      <listitem>
+        <simpara>
+          Install the updated package (or compile the updated source)
+        </simpara>
+      </listitem>
+      <listitem>
+        <simpara>
+          In the database where the &repmgr; extension is installed, execute
+          <command>ALTER EXTENSION repmgr UPDATE</command>.
+        </simpara>
+      </listitem>
+    </orderedlist>
+  </para>
+
+  <para>
+    Always check the <link linkend="appendix-release-notes">release notes</link> for every
+    release as they may contain upgrade instructions particular to individual versions.
+  </para>
+
+  <para>
+    If the <application>repmgrd</application> daemon is in use, we recommend stopping it
+    before upgrading &repmgr;.
+  </para>
+  <para>
+    Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
+    changes to the shared object file used by <application>repmgrd</application>; check the
+    release notes for details.
+  </para>
+ </sect1>
+
+ <sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
+  <indexterm>
+   <primary>upgrading</primary>
+   <secondary>pg_upgrade</secondary>
+  </indexterm>
+  <indexterm>
+    <primary>pg_upgrade</primary>
+  </indexterm>
+  <title>pg_upgrade and repmgr</title>
+
+  <para>
+    <application>pg_upgrade</application> requires that if any functions are
+    dependent on a shared library, this library must be present in both
+    the old and new installations before <application>pg_upgrade</application>
+    can be executed.
+  </para>
+  <para>
+    To minimize the risk of any upgrade issues (particularly if an upgrade to
+    a new major &repmgr; version is involved), we recommend upgrading
+    &repmgr; on the old server <emphasis>before</emphasis> running
+    <application>pg_upgrade</application> to ensure that old and new
+    versions are the same.
+  </para>
+  <note>
+    <simpara>
+      This issue applies to any PostgreSQL extension which has
+      dependencies on a shared library.
+    </simpara>
+  </note>
+  <para>
+    For further details please see the <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade documentation</ulink>.
+  </para>
+  <para>
+    If replication slots are in use, bear in mind these will <emphasis>not</emphasis>
+    be recreated by <application>pg_upgrade</application>. These will need to
+    be recreated manually.
+  </para>
+ </sect1>
+
+
+ <sect1 id="upgrading-from-repmgr-3" xreflabel="Upgrading from repmgr 3.x">
+  <indexterm>
+   <primary>upgrading</primary>
+   <secondary>from repmgr 3.x</secondary>
+  </indexterm>
+
+  <title>Upgrading from repmgr 3.x</title>
  <para>
   The upgrade process consists of two steps:
   <orderedlist>
@@ -29,7 +109,7 @@
    </listitem>
    <listitem>
     <simpara>
-       upgrading the repmgr schema
+       upgrading the repmgr schema using <command>CREATE EXTENSION</command>
     </simpara>
    </listitem>
   </orderedlist>
@@ -42,11 +122,19 @@
   a packaged PostgreSQL extension) is normally carried out
   automatically when the &repmgr; extension is created.
  </para>
+  <para>
+   The shared library has been renamed from <literal>repmgr_funcs</literal> to
+   <literal>repmgr</literal> - if it's set in <varname>shared_preload_libraries</varname>
+   in <filename>postgresql.conf</filename> it will need to be updated to the new name:
+   <programlisting>
+    shared_preload_libraries = 'repmgr'</programlisting>
+  </para>
+
  <sect2 id="converting-repmgr-conf">
   <title>Converting repmgr.conf configuration files</title>
   <para>
    With a completely new repmgr version, we've taken the opportunity
-    to rename some configuration items have had their names changed for
+    to rename some configuration items for
    clarity and consistency, both between the configuration file and
    the column names in <structname>repmgr.nodes</structname>
    (e.g. <varname>node</varname> to <varname>node_id</varname>), and
@@ -72,7 +160,7 @@
      </listitem>
      <listitem>
        <simpara><varname>monitoring_history</varname>: this replaces the
-          <command>repmgrd</command> command line option
+          <application>repmgrd</application> command line option
          <literal>--monitoring-history</literal></simpara>
      </listitem>
     </itemizedlist>
@@ -106,6 +194,10 @@
        <entry><varname>logfile</varname></entry>
        <entry><varname>log_file</varname></entry>
       </row>
+       <row>
+        <entry><varname>barman_server</varname></entry>
+        <entry><varname>barman_host</varname></entry>
+       </row>
       <row>
        <entry><varname>master_reponse_timeout</varname></entry>
        <entry><varname>async_query_timeout</varname></entry>
@@ -113,6 +205,16 @@
      </tbody>
     </tgroup>
    </table>
+    <note>
+      <para>
+        From &repmgr; 4, <literal>barman_server</literal> refers
+        to the server configured in Barman (in &repmgr; 3, the deprecated
+        <literal>cluster</literal> parameter was used for this);
+        the physical Barman hostname is configured with
+        <literal>barman_host</literal> (see <xref linkend="cloning-from-barman-prerequisites">
+          for details).
+      </para>
+    </note>
    <para>
     Following parameters have been removed:
     <itemizedlist spacing="compact" mark="bullet">
@@ -121,7 +223,7 @@
        be ignored.</simpara>
      </listitem>
      <listitem>
-        <simpara><varname>upstream_node_id</varname>:  is replaced by the
+        <simpara><varname>upstream_node</varname>:  is replaced by the
        command-line parameter <literal>--upstream-node-id</literal></simpara>
      </listitem>
     </itemizedlist>
@@ -137,7 +239,7 @@
    $ ./convert-config.pl /etc/repmgr.conf
    node_id=2
    node_name=node2
-    conninfo=host=localhost dbname=repmgr user=repmgr connect_timeout=2
+    conninfo=host=node2 dbname=repmgr user=repmgr connect_timeout=2
    pg_ctl_options='-l /var/log/postgres/startup.log'
    rsync_options=--exclude=postgresql.local.conf --archive
    log_level=INFO
@@ -149,21 +251,22 @@
      changed.
    </para>
    <para>
-      Please note that the parameter <varname>data_directory</varname> <emphasis>must</emphasis>
-      be provided; if not already present, the conversion script will add an empty
-      placeholder parameter.
+      Please note that the the conversion script will add an empty
+      placeholder parameter for <varname>data_directory</varname>, which
+      is a required parameter in repmgr4 and which <emphasis>must</emphasis>
+      be provided.
    </para>
   </sect3>
  </sect2>
  <sect2>
   <title>Upgrading the repmgr schema</title>
   <para>
-    Ensure <command>repmgrd</command> is not running, or any cron jobs which execute the
+    Ensure <application>repmgrd</application> is not running, or any cron jobs which execute the
    <command>repmgr</command> binary.
   </para>
   <para>
-    Install <literal>repmgr4</literal>; any <literal>repmgr3</literal> packages should be uninstalled
-    (if not automatically uninstalled already).
+    Install <literal>repmgr 4</literal> packages; any <literal>repmgr 3.x</literal> packages
+    should be uninstalled (if not automatically uninstalled already by your packaging system).
   </para>
   <sect3>
    <title>Upgrading from repmgr 3.1.1 or earlier</title>
@@ -203,7 +306,8 @@
    </para>
    <note>
      <simpara>there must be only one schema matching <literal>repmgr_%</literal> in the
-        database, otherwise this step may not work.</simpara>
+        database, otherwise this step may not work.
+      </simpara>
    </note>
   </sect3>
   <sect3>
@@ -223,7 +327,7 @@
    </para>
    <para>
     Check the data is updated as expected by examining the <structname>repmgr.nodes</structname>
-     table; restart <command>repmgrd</command> if required.
+     table; restart <application>repmgrd</application> if required.
    </para>
    <para>
     The original <literal>repmgr_$cluster</literal> schema can be dropped at any time.
--- a/doc/version.sgml
+++ b/doc/version.sgml
@@ -1 +1 @@
-<!ENTITY repmgrversion "4.0dev">
+<!ENTITY repmgrversion "4.0.4">
--- a/errcode.h
+++ b/errcode.h
@@ -1,6 +1,6 @@
 /*
 * errcode.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -43,5 +43,6 @@
 #define ERR_BARMAN 19
 #define ERR_REGISTRATION_SYNC 20
 #define ERR_OUT_OF_MEMORY 21
+#define ERR_SWITCHOVER_INCOMPLETE 22

 #endif							/* _ERRCODE_H_ */
--- a/expected/repmgr_extension.out
+++ b/expected/repmgr_extension.out
@@ -38,15 +38,15 @@ SELECT repmgr.am_bdr_failover_handler(-1);
 
 (1 row)

-SELECT repmgr.get_new_primary();
- get_new_primary 
-----------------
+SELECT repmgr.am_bdr_failover_handler(NULL);
+ am_bdr_failover_handler 
+-------------------------
 
 (1 row)

-SELECT repmgr.get_voting_status();
- get_voting_status 
-------------------
+SELECT repmgr.get_new_primary();
+ get_new_primary 
+-----------------
                
 (1 row)

@@ -56,15 +56,9 @@ SELECT repmgr.notify_follow_primary(-1);
 
 (1 row)

-SELECT repmgr.other_node_is_candidate(-1,-1);
- other_node_is_candidate 
-------------------------
- 
-(1 row)
-
-SELECT repmgr.request_vote(-1,-1);
- request_vote 
--------------
+SELECT repmgr.notify_follow_primary(NULL);
+ notify_follow_primary 
+-----------------------
 
 (1 row)

@@ -80,9 +74,9 @@ SELECT repmgr.set_local_node_id(-1);
 
 (1 row)

-SELECT repmgr.set_voting_status_initiated();
- set_voting_status_initiated 
-----------------------------
+SELECT repmgr.set_local_node_id(NULL);
+ set_local_node_id 
+-------------------
 
 (1 row)

--- a/log.c
+++ b/log.c
@@ -1,6 +1,6 @@
 /*
 * log.c - Logging methods
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/log.h
+++ b/log.h
@@ -1,6 +1,6 @@
 /*
 * log.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/repmgr--4.0.sql
+++ b/repmgr--4.0.sql
@@ -6,7 +6,7 @@ CREATE TABLE repmgr.nodes (
  upstream_node_id INTEGER     NULL REFERENCES nodes (node_id) DEFERRABLE,
  active           BOOLEAN     NOT NULL DEFAULT TRUE,
  node_name        TEXT        NOT NULL,
-  type             TEXT        NOT NULL CHECK (type IN('primary','standby','bdr')),
+  type             TEXT        NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
  location         TEXT        NOT NULL DEFAULT 'default',
  priority         INT         NOT NULL DEFAULT 100,
  conninfo         TEXT        NOT NULL,
@@ -79,6 +79,19 @@ LEFT JOIN repmgr.nodes un
       ON un.node_id = n.upstream_node_id;


+/* XXX update upgrade scripts! */
+CREATE TABLE repmgr.voting_term (
+  term INT NOT NULL
+);
+
+CREATE UNIQUE INDEX voting_term_restrict
+ON repmgr.voting_term ((TRUE));
+
+CREATE RULE voting_term_delete AS
+   ON DELETE TO repmgr.voting_term
+   DO INSTEAD NOTHING;
+
+
 /* ================= */
 /* repmgrd functions */
 /* ================= */
@@ -90,6 +103,11 @@ CREATE FUNCTION set_local_node_id(INT)
  AS 'MODULE_PATHNAME', 'set_local_node_id'
  LANGUAGE C STRICT;

+CREATE FUNCTION get_local_node_id()
+  RETURNS INT
+  AS 'MODULE_PATHNAME', 'get_local_node_id'
+  LANGUAGE C STRICT;
+
 CREATE FUNCTION standby_set_last_updated()
  RETURNS TIMESTAMP WITH TIME ZONE
  AS 'MODULE_PATHNAME', 'standby_set_last_updated'
@@ -102,49 +120,6 @@ CREATE FUNCTION standby_get_last_updated()

 /* failover functions */

-
-DO $repmgr$
-DECLARE
-  DECLARE server_version_num INT;
-BEGIN
-  SELECT setting
-    FROM pg_catalog.pg_settings
-   WHERE name = 'server_version_num'
-    INTO server_version_num;
-
-  IF server_version_num >= 90400 THEN
-    EXECUTE $repmgr_func$
-CREATE FUNCTION request_vote(INT,INT)
-  RETURNS pg_lsn
-  AS 'MODULE_PATHNAME', 'request_vote'
-  LANGUAGE C STRICT;
-    $repmgr_func$;
-  ELSE
-    EXECUTE $repmgr_func$
-CREATE FUNCTION request_vote(INT,INT)
-  RETURNS TEXT
-  AS 'MODULE_PATHNAME', 'request_vote'
-  LANGUAGE C STRICT;
-    $repmgr_func$;
-  END IF;
-END$repmgr$;
-
-
-CREATE FUNCTION get_voting_status()
-  RETURNS INT
-  AS 'MODULE_PATHNAME', 'get_voting_status'
-  LANGUAGE C STRICT;
-
-CREATE FUNCTION set_voting_status_initiated()
-  RETURNS INT
-  AS 'MODULE_PATHNAME', 'set_voting_status_initiated'
-  LANGUAGE C STRICT;
-
-CREATE FUNCTION other_node_is_candidate(INT, INT)
-  RETURNS BOOL
-  AS 'MODULE_PATHNAME', 'other_node_is_candidate'
-  LANGUAGE C STRICT;
-
 CREATE FUNCTION notify_follow_primary(INT)
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'notify_follow_primary'
@@ -160,13 +135,11 @@ CREATE FUNCTION reset_voting_status()
  AS 'MODULE_PATHNAME', 'reset_voting_status'
  LANGUAGE C STRICT;

-
 CREATE FUNCTION am_bdr_failover_handler(INT)
  RETURNS BOOL
  AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
  LANGUAGE C STRICT;

-
 CREATE FUNCTION unset_bdr_failover_handler()
  RETURNS VOID
  AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
--- a/repmgr--unpackaged--4.0.sql
+++ b/repmgr--unpackaged--4.0.sql
@@ -32,7 +32,7 @@ CREATE TABLE repmgr.nodes (
  upstream_node_id INTEGER     NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
  active           BOOLEAN     NOT NULL DEFAULT TRUE,
  node_name        TEXT        NOT NULL,
-  type             TEXT        NOT NULL CHECK (type IN('primary','standby','bdr')),
+  type             TEXT        NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
  location         TEXT        NOT NULL DEFAULT 'default',
  priority         INT         NOT NULL DEFAULT 100,
  conninfo         TEXT        NOT NULL,
@@ -54,8 +54,34 @@ SELECT id, upstream_node_id, active, name,

 ALTER TABLE repmgr.repl_events RENAME TO events;

+-- create new table "repmgr.voting_term"
+CREATE TABLE repmgr.voting_term (
+  term INT NOT NULL
+);
+
+CREATE UNIQUE INDEX voting_term_restrict
+ON repmgr.voting_term ((TRUE));
+
+CREATE RULE voting_term_delete AS
+   ON DELETE TO repmgr.voting_term
+   DO INSTEAD NOTHING;
+
+INSERT INTO repmgr.voting_term (term) VALUES (1);
+
+
 -- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"

+
+DO $repmgr$
+DECLARE
+  DECLARE server_version_num INT;
+BEGIN
+  SELECT setting
+    FROM pg_catalog.pg_settings
+   WHERE name = 'server_version_num'
+    INTO server_version_num;
+  IF server_version_num >= 90400 THEN
+    EXECUTE $repmgr_func$
 CREATE TABLE repmgr.monitoring_history (
  primary_node_id                INTEGER NOT NULL,
  standby_node_id                INTEGER NOT NULL,
@@ -65,12 +91,32 @@ CREATE TABLE repmgr.monitoring_history (
  last_wal_standby_location      PG_LSN,
  replication_lag                BIGINT NOT NULL,
  apply_lag                      BIGINT NOT NULL
-);
+)
+    $repmgr_func$;
+    INSERT INTO repmgr.monitoring_history
+      (primary_node_id, standby_node_id, last_monitor_time,  last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
+    SELECT primary_node, standby_node, last_monitor_time,  last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
+      FROM repmgr.repl_monitor;
+  ELSE
+    EXECUTE $repmgr_func$
+CREATE TABLE repmgr.monitoring_history (
+  primary_node_id                INTEGER NOT NULL,
+  standby_node_id                INTEGER NOT NULL,
+  last_monitor_time              TIMESTAMP WITH TIME ZONE NOT NULL,
+  last_apply_time                TIMESTAMP WITH TIME ZONE,
+  last_wal_primary_location      TEXT NOT NULL,
+  last_wal_standby_location      TEXT,
+  replication_lag                BIGINT NOT NULL,
+  apply_lag                      BIGINT NOT NULL
+)
+    $repmgr_func$;
+    INSERT INTO repmgr.monitoring_history
+      (primary_node_id, standby_node_id, last_monitor_time,  last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
+    SELECT primary_node, standby_node, last_monitor_time,  last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag
+      FROM repmgr.repl_monitor;

-INSERT INTO repmgr.monitoring_history
-  (primary_node_id, standby_node_id, last_monitor_time,  last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
-SELECT primary_node, standby_node, last_monitor_time,  last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
-  FROM repmgr.repl_monitor;
+  END IF;
+END$repmgr$;

 CREATE INDEX idx_monitoring_history_time
          ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
@@ -95,6 +141,16 @@ LEFT JOIN repmgr.nodes un

 /* monitoring functions */

+CREATE FUNCTION set_local_node_id(INT)
+  RETURNS VOID
+  AS 'MODULE_PATHNAME', 'set_local_node_id'
+  LANGUAGE C STRICT;
+
+CREATE FUNCTION get_local_node_id()
+  RETURNS INT
+  AS 'MODULE_PATHNAME', 'get_local_node_id'
+  LANGUAGE C STRICT;
+
 CREATE FUNCTION standby_set_last_updated()
  RETURNS TIMESTAMP WITH TIME ZONE
  AS '$libdir/repmgr', 'standby_set_last_updated'
@@ -108,26 +164,6 @@ CREATE FUNCTION standby_get_last_updated()

 /* failover functions */

-CREATE FUNCTION request_vote(INT,INT)
-  RETURNS pg_lsn
-  AS '$libdir/repmgr', 'request_vote'
-  LANGUAGE C STRICT;
-
-CREATE FUNCTION get_voting_status()
-  RETURNS INT
-  AS '$libdir/repmgr', 'get_voting_status'
-  LANGUAGE C STRICT;
-
-CREATE FUNCTION set_voting_status_initiated()
-  RETURNS INT
-  AS '$libdir/repmgr', 'set_voting_status_initiated'
-  LANGUAGE C STRICT;
-
-CREATE FUNCTION other_node_is_candidate(INT, INT)
-  RETURNS BOOL
-  AS '$libdir/repmgr', 'other_node_is_candidate'
-  LANGUAGE C STRICT;
-
 CREATE FUNCTION notify_follow_primary(INT)
  RETURNS VOID
  AS '$libdir/repmgr', 'notify_follow_primary'
--- a/repmgr-action-bdr.c
+++ b/repmgr-action-bdr.c
@@ -1,9 +1,9 @@
 /*
- * repmgr-action-standby.c
+ * repmgr-action-bdr.c
 *
 * Implements BDR-related actions for the repmgr command line utility
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@
 /*
 * do_bdr_register()
 *
- * As each BDR node is its own master, registering a BDR node
+ * As each BDR node is its own primary, registering a BDR node
 * will create the repmgr metadata schema if necessary.
 */
 void
@@ -92,7 +92,39 @@ do_bdr_register(void)
 		exit(ERR_BAD_CONFIG);
 	}

-	/* check whether repmgr extension exists, and that any other nodes are BDR */
+	/* check for a matching BDR node */
+	{
+		PQExpBufferData bdr_local_node_name;
+		bool		node_match = false;
+
+		initPQExpBuffer(&bdr_local_node_name);
+		node_match = bdr_node_name_matches(conn, config_file_options.node_name, &bdr_local_node_name);
+
+		if (node_match == false)
+		{
+			if (strlen(bdr_local_node_name.data))
+			{
+				log_error(_("local node BDR node name is \"%s\", expected: \"%s\""),
+						  bdr_local_node_name.data,
+						  config_file_options.node_name);
+				log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes"));
+			}
+			else
+			{
+				log_error(_("local node does not report BDR node name"));
+				log_hint(_("ensure this is an active BDR node"));
+			}
+
+			PQfinish(conn);
+			pfree(dbname);
+			termPQExpBuffer(&bdr_local_node_name);
+			exit(ERR_BAD_CONFIG);
+		}
+
+		termPQExpBuffer(&bdr_local_node_name);
+	}
+
+	/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
 	extension_status = get_repmgr_extension_status(conn);

 	if (extension_status == REPMGR_UNKNOWN)
@@ -142,17 +174,9 @@ do_bdr_register(void)

 	pfree(dbname);

-	/* check for a matching BDR node */
+	if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false)
 	{
-		bool		node_exists = bdr_node_exists(conn, config_file_options.node_name);
-
-		if (node_exists == false)
-		{
-			log_error(_("no BDR node with node_name \"%s\" found"), config_file_options.node_name);
-			log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes"));
-			PQfinish(conn);
-			exit(ERR_BAD_CONFIG);
-		}
+		bdr_node_set_repmgr_set(conn, config_file_options.node_name);
 	}

 	/*
--- a/repmgr-action-bdr.h
+++ b/repmgr-action-bdr.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-action-bdr.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/repmgr-action-cluster.c
+++ b/repmgr-action-cluster.c
@@ -3,7 +3,7 @@
 *
 * Implements cluster information actions for the repmgr command line utility
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -82,6 +82,7 @@ do_cluster_show(void)
 	NodeInfoListCell *cell = NULL;
 	int			i = 0;
 	ItemList	warnings = {NULL, NULL};
+	bool		success = false;

 	/* Connect to local database to obtain cluster connection data */
 	log_verbose(LOG_INFO, _("connecting to database"));
@@ -91,11 +92,19 @@ do_cluster_show(void)
 	else
 		conn = establish_db_connection_by_params(&source_conninfo, true);

-	get_all_node_records_with_upstream(conn, &nodes);
+	success = get_all_node_records_with_upstream(conn, &nodes);
+
+	if (success == false)
+	{
+		/* get_all_node_records_with_upstream() will print error message */
+		PQfinish(conn);
+		exit(ERR_BAD_CONFIG);
+	}

 	if (nodes.node_count == 0)
 	{
-		log_error(_("unable to retrieve any node records"));
+		log_error(_("no node records were found"));
+		log_hint(_("ensure at least one node is registered"));
 		PQfinish(conn);
 		exit(ERR_BAD_CONFIG);
 	}
@@ -131,8 +140,14 @@ do_cluster_show(void)
 		}
 		else
 		{
+			char		error[MAXLEN];
+
+			strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
 			cell->node_info->node_status = NODE_STATUS_DOWN;
 			cell->node_info->recovery_type = RECTYPE_UNKNOWN;
+			item_list_append_format(&warnings,
+									"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
+									cell->node_info->node_name, cell->node_info->node_id, trim(error));
 		}

 		initPQExpBuffer(&details);
@@ -158,15 +173,13 @@ do_cluster_show(void)
 									break;
 								case RECTYPE_STANDBY:
 									appendPQExpBuffer(&details, "! running as standby");
-									item_list_append_format(
-															&warnings,
+									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) is registered as primary but running as standby",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
 								case RECTYPE_UNKNOWN:
 									appendPQExpBuffer(&details, "! unknown");
-									item_list_append_format(
-															&warnings,
+									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) has unknown replication status",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
@@ -177,16 +190,14 @@ do_cluster_show(void)
 							if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
 							{
 								appendPQExpBuffer(&details, "! running");
-								item_list_append_format(
-														&warnings,
+								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
 							else
 							{
 								appendPQExpBuffer(&details, "! running as standby");
-								item_list_append_format(
-														&warnings,
+								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
@@ -199,8 +210,7 @@ do_cluster_show(void)
 						if (cell->node_info->active == true)
 						{
 							appendPQExpBuffer(&details, "? unreachable");
-							item_list_append_format(
-													&warnings,
+							item_list_append_format(&warnings,
 													"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
 													cell->node_info->node_name, cell->node_info->node_id);
 						}
@@ -226,8 +236,7 @@ do_cluster_show(void)
 									break;
 								case RECTYPE_PRIMARY:
 									appendPQExpBuffer(&details, "! running as primary");
-									item_list_append_format(
-															&warnings,
+									item_list_append_format(&warnings,
 															"node \"%s\" (ID: %i) is registered as standby but running as primary",
 															cell->node_info->node_name, cell->node_info->node_id);
 									break;
@@ -245,16 +254,14 @@ do_cluster_show(void)
 							if (cell->node_info->recovery_type == RECTYPE_STANDBY)
 							{
 								appendPQExpBuffer(&details, "! running");
-								item_list_append_format(
-														&warnings,
+								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
 							else
 							{
 								appendPQExpBuffer(&details, "! running as primary");
-								item_list_append_format(
-														&warnings,
+								item_list_append_format(&warnings,
 														"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
 														cell->node_info->node_name, cell->node_info->node_id);
 							}
@@ -267,8 +274,7 @@ do_cluster_show(void)
 						if (cell->node_info->active == true)
 						{
 							appendPQExpBuffer(&details, "? unreachable");
-							item_list_append_format(
-													&warnings,
+							item_list_append_format(&warnings,
 													"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
 													cell->node_info->node_name, cell->node_info->node_id);
 						}
@@ -279,6 +285,7 @@ do_cluster_show(void)
 					}
 				}
 				break;
+			case WITNESS:
 			case BDR:
 				{
 					/* node is reachable */
@@ -415,7 +422,7 @@ do_cluster_show(void)
 		printf(_("\nWARNING: following issues were detected\n"));
 		for (cell = warnings.head; cell; cell = cell->next)
 		{
-			printf(_("  %s\n"), cell->string);
+			printf(_("  - %s\n"), cell->string);
 		}
 	}
 }
@@ -435,82 +442,18 @@ void
 do_cluster_event(void)
 {
 	PGconn	   *conn = NULL;
-	PQExpBufferData query;
-	PQExpBufferData where_clause;
 	PGresult   *res;
 	int			i = 0;
+	int			column_count = EVENT_HEADER_COUNT;

 	conn = establish_db_connection(config_file_options.conninfo, true);

-	initPQExpBuffer(&query);
-	initPQExpBuffer(&where_clause);
-
-	/* LEFT JOIN used here as a node record may have been removed */
-	appendPQExpBuffer(
-					  &query,
-					  "   SELECT e.node_id, n.node_name, e.event, e.successful, \n"
-					  "          TO_CHAR(e.event_timestamp, 'YYYY-MM-DD HH24:MI:SS') AS timestamp, \n"
-					  "          e.details \n"
-					  "     FROM repmgr.events e \n"
-					  "LEFT JOIN repmgr.nodes n ON e.node_id = n.node_id ");
-
-	if (runtime_options.node_id != UNKNOWN_NODE_ID)
-	{
-
-		append_where_clause(&where_clause,
-							"n.node_id=%i", runtime_options.node_id);
-	}
-	else if (runtime_options.node_name[0] != '\0')
-	{
-		char	   *escaped = escape_string(conn, runtime_options.node_name);
-
-		if (escaped == NULL)
-		{
-			log_error(_("unable to escape value provided for node name"));
-		}
-		else
-		{
-			append_where_clause(&where_clause,
-								"n.node_name='%s'",
-								escaped);
-			pfree(escaped);
-		}
-	}
-
-	if (runtime_options.event[0] != '\0')
-	{
-		char	   *escaped = escape_string(conn, runtime_options.event);
-
-		if (escaped == NULL)
-		{
-			log_error(_("unable to escape value provided for event"));
-		}
-		else
-		{
-			append_where_clause(&where_clause,
-								"e.event='%s'",
-								escaped);
-			pfree(escaped);
-		}
-	}
-
-	appendPQExpBuffer(&query, "\n%s\n",
-					  where_clause.data);
-
-	appendPQExpBuffer(&query,
-					  " ORDER BY e.event_timestamp DESC");
-
-	if (runtime_options.all == false && runtime_options.limit > 0)
-	{
-		appendPQExpBuffer(&query, " LIMIT %i",
-						  runtime_options.limit);
-	}
-
-	log_debug("do_cluster_event():\n%s", query.data);
-	res = PQexec(conn, query.data);
-
-	termPQExpBuffer(&query);
-	termPQExpBuffer(&where_clause);
+	res = get_event_records(conn,
+							runtime_options.node_id,
+							runtime_options.node_name,
+							runtime_options.event,
+							runtime_options.all,
+							runtime_options.limit);

 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
@@ -537,7 +480,11 @@ do_cluster_event(void)
 	strncpy(headers_event[EV_TIMESTAMP].title, _("Timestamp"), MAXLEN);
 	strncpy(headers_event[EV_DETAILS].title, _("Details"), MAXLEN);

-	for (i = 0; i < EVENT_HEADER_COUNT; i++)
+	/* if --terse provided, simply omit the "Details" column */
+	if (runtime_options.terse == true)
+		column_count --;
+
+	for (i = 0; i < column_count; i++)
 	{
 		headers_event[i].max_length = strlen(headers_event[i].title);
 	}
@@ -546,7 +493,7 @@ do_cluster_event(void)
 	{
 		int			j;

-		for (j = 0; j < EVENT_HEADER_COUNT; j++)
+		for (j = 0; j < column_count; j++)
 		{
 			headers_event[j].cur_length = strlen(PQgetvalue(res, i, j));
 			if (headers_event[j].cur_length > headers_event[j].max_length)
@@ -557,7 +504,7 @@ do_cluster_event(void)

 	}

-	for (i = 0; i < EVENT_HEADER_COUNT; i++)
+	for (i = 0; i < column_count; i++)
 	{
 		if (i == 0)
 			printf(" ");
@@ -570,14 +517,14 @@ do_cluster_event(void)
 	}
 	printf("\n");
 	printf("-");
-	for (i = 0; i < EVENT_HEADER_COUNT; i++)
+	for (i = 0; i < column_count; i++)
 	{
 		int			j;

 		for (j = 0; j < headers_event[i].max_length; j++)
 			printf("-");

-		if (i < (EVENT_HEADER_COUNT - 1))
+		if (i < (column_count - 1))
 			printf("-+-");
 		else
 			printf("-");
@@ -590,13 +537,13 @@ do_cluster_event(void)
 		int			j;

 		printf(" ");
-		for (j = 0; j < EVENT_HEADER_COUNT; j++)
+		for (j = 0; j < column_count; j++)
 		{
 			printf("%-*s",
 				   headers_event[j].max_length,
 				   PQgetvalue(res, i, j));

-			if (j < (EVENT_HEADER_COUNT - 1))
+			if (j < (column_count - 1))
 				printf(" | ");
 		}

@@ -1203,7 +1150,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
 		}
 		else
 		{
-			t_conninfo_param_list remote_conninfo;
+			t_conninfo_param_list remote_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
 			char	   *host = NULL;
 			PQExpBufferData quoted_command;

@@ -1323,7 +1270,7 @@ do_cluster_cleanup(void)

 	conn = establish_db_connection(config_file_options.conninfo, true);

-	/* check if there is a master in this cluster */
+	/* check if there is a primary in this cluster */
 	log_info(_("connecting to primary server"));
 	primary_conn = establish_primary_db_connection(conn, true);

--- a/repmgr-action-cluster.h
+++ b/repmgr-action-cluster.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-action-cluster.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
--- a/repmgr-action-node.h
+++ b/repmgr-action-node.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-action-node.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -22,7 +22,6 @@
 extern void do_node_status(void);
 extern void do_node_check(void);

-
 extern void do_node_rejoin(void);
 extern void do_node_service(void);

--- a/repmgr-action-primary.c
+++ b/repmgr-action-primary.c
@@ -3,7 +3,7 @@
 *
 * Implements primary actions for the repmgr command line utility
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -74,7 +74,11 @@ do_primary_register(void)

 	log_verbose(LOG_INFO, _("server is not in recovery"));

-	/* create the repmgr extension if it doesn't already exist */
+	/*
+	 * create the repmgr extension if it doesn't already exist;
+	 * note that create_repmgr_extension() will take into account
+	 * the --dry-run option
+	 */
 	if (!create_repmgr_extension(conn))
 	{
 		PQfinish(conn);
@@ -92,6 +96,7 @@ do_primary_register(void)
 		return;
 	}

+	initialize_voting_term(conn);

 	/* Ensure there isn't another registered node which is primary */
 	primary_conn = get_primary_connection(conn, &current_primary_id, NULL);
@@ -543,7 +548,8 @@ do_primary_help(void)
 	printf(_("  \"primary unregister\" unregisters an inactive primary node.\n"));
 	puts("");
 	printf(_("  --dry-run                           check what would happen, but don't actually unregister the primary\n"));
-	printf(_("  -F, --force                         force removal of the record\n"));
+	printf(_("  --node-id                           ID of the inactive primary node to unregister.\n"));
+	printf(_("  -F, --force                         force removal of an active record\n"));

 	puts("");

--- a/repmgr-action-primary.h
+++ b/repmgr-action-primary.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-action-primary.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
--- a/repmgr-action-standby.h
+++ b/repmgr-action-standby.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-action-standby.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@ extern void do_standby_switchover(void);

 extern void do_standby_help(void);

-extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output);
+extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output, int *error_code);



--- a/repmgr-action-witness.c
+++ b/repmgr-action-witness.c
@@ -0,0 +1,462 @@
+/*
+ * repmgr-action-witness.c
+ *
+ * Implements witness actions for the repmgr command line utility
+ *
+ * Copyright (c) 2ndQuadrant, 2010-2018
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <sys/stat.h>
+
+#include "repmgr.h"
+#include "dirutil.h"
+#include "compat.h"
+#include "controldata.h"
+
+#include "repmgr-client-global.h"
+#include "repmgr-action-witness.h"
+
+static char		repmgr_user[MAXLEN];
+static char		repmgr_db[MAXLEN];
+
+void
+do_witness_register(void)
+{
+	PGconn	   *witness_conn = NULL;
+	PGconn	   *primary_conn = NULL;
+	RecoveryType recovery_type = RECTYPE_UNKNOWN;
+	NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
+	t_node_info node_record = T_NODE_INFO_INITIALIZER;
+	RecordStatus record_status = RECORD_NOT_FOUND;
+	bool		record_created = false;
+
+	log_info(_("connecting to witness node \"%s\" (ID: %i)"),
+			 config_file_options.node_name,
+			 config_file_options.node_id);
+
+	witness_conn = establish_db_connection_quiet(config_file_options.conninfo);
+
+	if (PQstatus(witness_conn) != CONNECTION_OK)
+	{
+		log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
+				  config_file_options.node_name,
+				  config_file_options.node_id);
+		log_detail("%s",
+				   PQerrorMessage(witness_conn));
+		log_hint(_("the witness node must be running before it can be registered"));
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* check witness node's recovery type */
+	recovery_type = get_recovery_type(witness_conn);
+
+	if (recovery_type == RECTYPE_STANDBY)
+	{
+		log_error(_("provided node is a standby"));
+		log_error(_("a witness node must run on an independent primary server"));
+
+		PQfinish(witness_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* check that witness node is not a BDR node */
+	if (is_bdr_db_quiet(witness_conn) == true)
+	{
+		log_error(_("witness node is a BDR node"));
+		log_hint(_("a witness node cannot be configured for a BDR cluster"));
+
+		PQfinish(witness_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+
+	/* connect to primary with provided parameters */
+	log_info(_("connecting to primary node"));
+	/*
+	 * Extract the repmgr user and database names from the conninfo string
+	 * provided in repmgr.conf
+	 */
+	get_conninfo_value(config_file_options.conninfo, "user", repmgr_user);
+	get_conninfo_value(config_file_options.conninfo, "dbname", repmgr_db);
+
+	param_set_ine(&source_conninfo, "user", repmgr_user);
+	param_set_ine(&source_conninfo, "dbname", repmgr_db);
+
+	/* We need to connect to check configuration and copy it */
+	primary_conn = establish_db_connection_by_params(&source_conninfo, false);
+
+	if (PQstatus(primary_conn) != CONNECTION_OK)
+	{
+		log_error(_("unable to connect to the primary node"));
+		log_hint(_("a primary node must be configured before registering a witness node"));
+
+		PQfinish(witness_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* check primary node's recovery type */
+	recovery_type = get_recovery_type(primary_conn);
+
+	if (recovery_type == RECTYPE_STANDBY)
+	{
+		log_error(_("provided primary node is a standby"));
+		log_hint(_("provide the connection details of the cluster's primary server"));
+
+		PQfinish(witness_conn);
+		PQfinish(primary_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* check that primary node is not a BDR node */
+	if (is_bdr_db_quiet(primary_conn) == true)
+	{
+		log_error(_("primary node is a BDR node"));
+		log_hint(_("a witness node cannot be configured for a BDR cluster"));
+
+		PQfinish(witness_conn);
+		PQfinish(primary_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* XXX sanity check witness node is not part of main cluster */
+
+
+	/* create repmgr extension, if does not exist */
+	if (runtime_options.dry_run == false &&  !create_repmgr_extension(witness_conn))
+	{
+		PQfinish(witness_conn);
+		PQfinish(primary_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/*
+	 * check if node record exists on primary, overwrite if -F/--force provided,
+	 * otherwise exit with error
+	 */
+
+	record_status = get_node_record(primary_conn,
+									config_file_options.node_id,
+									&node_record);
+
+	if (record_status == RECORD_FOUND)
+	{
+		/*
+		 * If node is not a witness, cowardly refuse to do anything, let the
+		 * user work out what's the correct thing to do.
+		 */
+		if (node_record.type != WITNESS)
+		{
+			log_error(_("node \"%s\" (ID: %i) is already registered as a %s node"),
+					  config_file_options.node_name,
+					  config_file_options.node_id,
+					  get_node_type_string(node_record.type));
+			log_hint(_("use \"repmgr %s unregister\" to remove a non-witness node record"),
+					 get_node_type_string(node_record.type));
+
+			PQfinish(witness_conn);
+			PQfinish(primary_conn);
+
+			exit(ERR_BAD_CONFIG);
+		}
+
+		if (!runtime_options.force)
+		{
+			log_error(_("witness node is already registered"));
+			log_hint(_("use option -F/--force to reregister the node"));
+
+
+			PQfinish(witness_conn);
+			PQfinish(primary_conn);
+
+			exit(ERR_BAD_CONFIG);
+		}
+	}
+
+
+	// XXX check other node with same name does not exist
+
+	/*
+	 * if repmgr.nodes contains entries, delete if -F/--force provided,
+	 * otherwise exit with error
+	 */
+	get_all_node_records(witness_conn, &nodes);
+
+	log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
+
+	if (nodes.node_count > 0)
+	{
+		if (!runtime_options.force)
+		{
+			log_error(_("witness node is already initialised and contains node records"));
+			log_hint(_("use option -F/--force to reinitialise the node"));
+			PQfinish(primary_conn);
+			PQfinish(witness_conn);
+			exit(ERR_BAD_CONFIG);
+		}
+	}
+
+	clear_node_info_list(&nodes);
+
+	if (runtime_options.dry_run == true)
+	{
+		log_info(_("prerequisites for registering the witness node are met"));
+		PQfinish(primary_conn);
+		PQfinish(witness_conn);
+		exit(SUCCESS);
+	}
+	/* create record on primary */
+
+	/*
+	 * node record exists - update it (at this point we have already
+	 * established that -F/--force is in use)
+	 */
+
+	init_node_record(&node_record);
+
+	/* these values are mandatory, setting them to anything else has no point */
+	node_record.type = WITNESS;
+	node_record.priority = 0;
+	node_record.upstream_node_id = get_primary_node_id(primary_conn);
+
+	if (record_status == RECORD_FOUND)
+	{
+		record_created = update_node_record(primary_conn,
+											"witness register",
+											&node_record);
+	}
+	else
+	{
+		record_created = create_node_record(primary_conn,
+											"witness register",
+											&node_record);
+	}
+
+	if (record_created == false)
+	{
+		log_error(_("unable to create or update node record on primary"));
+		PQfinish(primary_conn);
+		PQfinish(witness_conn);
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* sync records from primary */
+	if (witness_copy_node_records(primary_conn, witness_conn) == false)
+	{
+		log_error(_("unable to copy repmgr node records from primary"));
+		PQfinish(primary_conn);
+		PQfinish(witness_conn);
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* create event */
+	create_event_record(primary_conn,
+						&config_file_options,
+						config_file_options.node_id,
+						"witness_register",
+						true,
+						NULL);
+
+	PQfinish(primary_conn);
+	PQfinish(witness_conn);
+
+	log_info(_("witness registration complete"));
+	log_notice(_("witness node \"%s\" (ID: %i) successfully registered"),
+			   config_file_options.node_name, config_file_options.node_id);
+
+	return;
+}
+
+
+void
+do_witness_unregister(void)
+{
+	PGconn	   *witness_conn = NULL;
+	PGconn	   *primary_conn = NULL;
+	t_node_info node_record = T_NODE_INFO_INITIALIZER;
+	RecordStatus record_status = RECORD_NOT_FOUND;
+	bool		node_record_deleted = false;
+	bool		witness_available = true;
+
+	log_info(_("connecting to witness node \"%s\" (ID: %i)"),
+			 config_file_options.node_name,
+			 config_file_options.node_id);
+
+	witness_conn = establish_db_connection_quiet(config_file_options.conninfo);
+
+	if (PQstatus(witness_conn) != CONNECTION_OK)
+	{
+		if (!runtime_options.force)
+		{
+			log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
+					  config_file_options.node_name,
+					  config_file_options.node_id);
+			log_detail("%s", PQerrorMessage(witness_conn));
+			log_hint(_("provide -F/--force to remove the witness record if the server is not running"));
+			exit(ERR_BAD_CONFIG);
+		}
+
+		log_notice(_("unable to connect to witness node \"%s\" (ID: %i), removing node record on cluster primary only"),
+				   config_file_options.node_name,
+				   config_file_options.node_id);
+		witness_available = false;
+	}
+
+	if (witness_available == true)
+	{
+		primary_conn = get_primary_connection_quiet(witness_conn, NULL, NULL);
+	}
+	else
+	{
+		/*
+		 * Extract the repmgr user and database names from the conninfo string
+		 * provided in repmgr.conf
+		 */
+		get_conninfo_value(config_file_options.conninfo, "user", repmgr_user);
+		get_conninfo_value(config_file_options.conninfo, "dbname", repmgr_db);
+
+		param_set_ine(&source_conninfo, "user", repmgr_user);
+		param_set_ine(&source_conninfo, "dbname", repmgr_db);
+
+		primary_conn = establish_db_connection_by_params(&source_conninfo, false);
+
+	}
+
+	if (PQstatus(primary_conn) != CONNECTION_OK)
+	{
+		log_error(_("unable to connect to primary"));
+		log_detail("%s", PQerrorMessage(primary_conn));
+
+		if (witness_available == true)
+		{
+			PQfinish(witness_conn);
+		}
+		else
+		{
+			log_hint(_("provide connection details to primary server"));
+		}
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* Check node exists and is really a witness */
+	record_status = get_node_record(primary_conn, config_file_options.node_id, &node_record);
+
+	if (record_status != RECORD_FOUND)
+	{
+		log_error(_("no record found for node %i"), config_file_options.node_id);
+
+		if (witness_available == true)
+			PQfinish(witness_conn);
+		PQfinish(primary_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	if (node_record.type != WITNESS)
+	{
+		log_error(_("node %i is not a witness node"), config_file_options.node_id);
+		log_detail(_("node %i is a %s node"), config_file_options.node_id, get_node_type_string(node_record.type));
+
+		if (witness_available == true)
+			PQfinish(witness_conn);
+		PQfinish(primary_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	if (runtime_options.dry_run == true)
+	{
+		log_info(_("prerequisites for unregistering the witness node are met"));
+		if (witness_available == true)
+			PQfinish(witness_conn);
+		PQfinish(primary_conn);
+
+		exit(SUCCESS);
+	}
+
+	log_info(_("unregistering witness node %i"), config_file_options.node_id);
+	node_record_deleted = delete_node_record(primary_conn,
+										     config_file_options.node_id);
+
+	if (node_record_deleted == false)
+	{
+		PQfinish(primary_conn);
+		PQfinish(witness_conn);
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* sync records from primary */
+	if (witness_available == true && witness_copy_node_records(primary_conn, witness_conn) == false)
+	{
+		log_error(_("unable to copy repmgr node records from primary"));
+		PQfinish(primary_conn);
+		PQfinish(witness_conn);
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/* Log the event */
+	create_event_record(primary_conn,
+						&config_file_options,
+						config_file_options.node_id,
+						"witness_unregister",
+						true,
+						NULL);
+
+	PQfinish(primary_conn);
+
+	if (witness_available == true)
+		PQfinish(witness_conn);
+
+	log_info(_("witness unregistration complete"));
+	log_detail(_("witness node with id %i (conninfo: %s) successfully unregistered"),
+			    config_file_options.node_id, config_file_options.conninfo);
+
+	return;
+}
+
+
+void do_witness_help(void)
+{
+	print_help_header();
+
+	printf(_("Usage:\n"));
+	printf(_("    %s [OPTIONS] witness register\n"), progname());
+	printf(_("    %s [OPTIONS] witness unregister\n"), progname());
+
+	printf(_("WITNESS REGISTER\n"));
+	puts("");
+	printf(_("  \"witness register\" registers a witness node.\n"));
+	puts("");
+	printf(_("  Requires provision of connection information for the primary\n"));
+	puts("");
+	printf(_("  --dry-run                           check prerequisites but don't make any changes\n"));
+	printf(_("  -F, --force                         overwrite an existing node record\n"));
+	puts("");
+
+	printf(_("WITNESS UNREGISTER\n"));
+	puts("");
+	printf(_("  \"witness register\" unregisters a witness node.\n"));
+	puts("");
+	printf(_("  --dry-run                           check prerequisites but don't make any changes\n"));
+	printf(_("  -F, --force                         unregister when witness node not running\n"));
+	puts("");
+
+	return;
+}
--- a/repmgr-action-witness.h
+++ b/repmgr-action-witness.h
@@ -0,0 +1,27 @@
+/*
+ * repmgr-action-witness.h
+ * Copyright (c) 2ndQuadrant, 2010-2018
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _REPMGR_ACTION_WITNESS_H_
+#define _REPMGR_ACTION_WITNESS_H_
+
+extern void do_witness_register(void);
+extern void do_witness_unregister(void);
+
+extern void do_witness_help(void);
+
+#endif							/* _REPMGR_ACTION_WITNESS_H_ */
--- a/repmgr-client-global.h
+++ b/repmgr-client-global.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-client-global.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -68,6 +68,7 @@ typedef struct
 	int			node_id;
 	char		node_name[MAXLEN];
 	char		data_dir[MAXPGPATH];
+	int			remote_node_id;

 	/* "standby clone" options */
 	bool		copy_external_config_files;
@@ -79,6 +80,7 @@ typedef struct
 	char		replication_user[MAXLEN];
 	char		upstream_conninfo[MAXLEN];
 	bool		without_barman;
+	bool		recovery_conf_only;

 	/* "standby clone"/"standby follow" options */
 	int			upstream_node_id;
@@ -86,6 +88,7 @@ typedef struct
 	/* "standby register" options */
 	bool		wait_register_sync;
 	int			wait_register_sync_seconds;
+	int			wait_start;

 	/* "standby switchover" options */
 	bool		always_promote;
@@ -101,6 +104,8 @@ typedef struct
 	bool		replication_lag;
 	bool		role;
 	bool		slots;
+	bool		has_passfile;
+	bool		replication_connection;

 	/* "node join" options */
 	char		config_files[MAXLEN];
@@ -134,24 +139,24 @@ typedef struct
 		/* output options */ \
 		false, false, false,  \
 		/* database connection options */ \
-		"", "", "",	"",				  \
+		"", "", "",	"", \
 		/* other connection options */ \
-		"",	"",  \
-		/* node options */ \
-		UNKNOWN_NODE_ID, "", "", \
+		"",	"", \
+		/* general node options */ \
+		UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \
 		/* "standby clone" options */ \
 		false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \
-		false,  \
+		false, false, \
 		/* "standby clone"/"standby follow" options */ \
 		NO_UPSTREAM_NODE, \
 		/* "standby register" options */ \
-		false, 0, \
+		false, 0, DEFAULT_WAIT_START,   \
 		/* "standby switchover" options */ \
 		false, false, false, \
 		/* "node status" options */ \
 		false, \
 		/* "node check" options */ \
-		false, false, false, false, false, \
+		false, false, false, false, false, false, false, \
 		/* "node join" options */ \
 		"", \
 		/* "node service" options */ \
@@ -160,7 +165,7 @@ typedef struct
 		false, "", CLUSTER_EVENT_LIMIT,	\
 		/* "cluster cleanup" options */ \
 		0, \
-		/* Following options for internal use */ \
+		/* following options for internal use */ \
 		"/tmp", OM_TEXT	\
 }

@@ -177,6 +182,7 @@ typedef enum
 	ACTION_NONE,
 	ACTION_START,
 	ACTION_STOP,
+	ACTION_STOP_WAIT,
 	ACTION_RESTART,
 	ACTION_RELOAD,
 	ACTION_PROMOTE
--- a/repmgr-client.c
+++ b/repmgr-client.c
@@ -1,7 +1,7 @@
 /*
 * repmgr-client.c - Command interpreter for the repmgr package
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This module is a command-line utility to easily setup a cluster of
 * hot standby servers for an HA environment
@@ -57,9 +57,9 @@
 #include "repmgr-client-global.h"
 #include "repmgr-action-primary.h"
 #include "repmgr-action-standby.h"
+#include "repmgr-action-witness.h"
 #include "repmgr-action-bdr.h"
 #include "repmgr-action-node.h"
-
 #include "repmgr-action-cluster.h"

 #include <storage/fd.h>			/* for PG_TEMP_FILE_PREFIX */
@@ -72,7 +72,7 @@ t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
 t_configuration_options config_file_options = T_CONFIGURATION_OPTIONS_INITIALIZER;

 /* conninfo params for the node we're operating on */
-t_conninfo_param_list source_conninfo;
+t_conninfo_param_list source_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;

 bool		config_file_required = true;
 char		pg_bindir[MAXLEN] = "";
@@ -94,7 +94,7 @@ static ItemList cli_warnings = {NULL, NULL};
 int
 main(int argc, char **argv)
 {
-	t_conninfo_param_list default_conninfo;
+	t_conninfo_param_list default_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;

 	int			optindex;
 	int			c;
@@ -176,7 +176,7 @@ main(int argc, char **argv)
 		strncpy(runtime_options.username, pw->pw_name, MAXLEN);
 	}

-	while ((c = getopt_long(argc, argv, "?Vb:f:Fd:h:p:U:R:S:L:vtD:crC:", long_options,
+	while ((c = getopt_long(argc, argv, "?Vb:f:FWd:h:p:U:R:S:D:ck:L:tvC:", long_options,
 							&optindex)) != -1)
 	{
 		/*
@@ -328,6 +328,11 @@ main(int argc, char **argv)
 				strncpy(runtime_options.node_name, optarg, MAXLEN);
 				break;

+				/* --remote-node-id */
+			case OPT_REMOTE_NODE_ID:
+				runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, false);
+				break;
+
 				/*
 				 * standby options * ---------------
 				 */
@@ -383,12 +388,21 @@ main(int argc, char **argv)
 				runtime_options.without_barman = true;
 				break;

+			case OPT_RECOVERY_CONF_ONLY:
+				runtime_options.recovery_conf_only = true;
+				break;
+
+
 				/*---------------------------
 				 * "standby register" options
 				 *---------------------------
 				 */

-			case OPT_REGISTER_WAIT:
+			case OPT_WAIT_START:
+				runtime_options.wait_start = repmgr_atoi(optarg, "--wait-start", &cli_errors, false);
+				break;
+
+			case OPT_WAIT_SYNC:
 				runtime_options.wait_register_sync = true;
 				if (optarg != NULL)
 				{
@@ -446,6 +460,14 @@ main(int argc, char **argv)
 				runtime_options.slots = true;
 				break;

+			case OPT_HAS_PASSFILE:
+				runtime_options.has_passfile = true;
+				break;
+
+			case OPT_REPL_CONN:
+				runtime_options.replication_connection = true;
+				break;
+
 				/*--------------------
 				 * "node rejoin" options
 				 *--------------------
@@ -473,10 +495,6 @@ main(int argc, char **argv)
 				runtime_options.list_actions = true;
 				break;

-			case OPT_CHECK:
-				runtime_options.check = true;
-				break;
-
 			case OPT_CHECKPOINT:
 				runtime_options.checkpoint = true;
 				break;
@@ -707,6 +725,7 @@ main(int argc, char **argv)
 	 *
 	 *   { PRIMARY | MASTER } REGISTER |
 	 *   STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
+	 *   WITNESS { CREATE | REGISTER | UNREGISTER }
 	 *   BDR { REGISTER | UNREGISTER } |
 	 *   NODE { STATUS | CHECK | REJOIN | SERVICE } |
 	 *   CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
@@ -731,7 +750,6 @@ main(int argc, char **argv)

 	if (repmgr_command != NULL)
 	{
-#ifndef BDR_ONLY
 		if (strcasecmp(repmgr_command, "PRIMARY") == 0 || strcasecmp(repmgr_command, "MASTER") == 0)
 		{
 			if (help_option == true)
@@ -775,10 +793,19 @@ main(int argc, char **argv)
 			else if (strcasecmp(repmgr_action, "STATUS") == 0)
 				action = NODE_STATUS;
 		}
+		else if (strcasecmp(repmgr_command, "WITNESS") == 0)
+		{
+			if (help_option == true)
+			{
+				do_witness_help();
+				exit(SUCCESS);
+			}
+			else if (strcasecmp(repmgr_action, "REGISTER") == 0)
+				action = WITNESS_REGISTER;
+			else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
+				action = WITNESS_UNREGISTER;
+		}
 		else if (strcasecmp(repmgr_command, "BDR") == 0)
-#else
-		if (strcasecmp(repmgr_command, "BDR") == 0)
-#endif
 		{
 			if (help_option == true)
 			{
@@ -971,31 +998,10 @@ main(int argc, char **argv)
 	}


-
-	/* check for conflicts between runtime options and configuration file */
-	/* ================================================================== */
-
-	if (action == STANDBY_CLONE)
-	{
-		standy_clone_mode mode = get_standby_clone_mode();
-
-		if (mode == barman && runtime_options.without_barman == false
-			&& config_file_options.use_replication_slots == true)
-		{
-			log_error(_("STANDBY CLONE in Barman mode is incompatible with configuration option \"use_replication_slots\""));
-			log_hint(_("set \"use_replication_slots\" to \"no\" in repmgr.conf, or use --without-barman fo clone directly from the upstream server"));
-			exit(ERR_BAD_CONFIG);
-		}
-	}
-
-
 	/*
 	 * Check for configuration file items which can be overriden by runtime
 	 * options
-	 */
-
-	/*
-	 * ============================================================================
+	 * =====================================================================
 	 */

 	/*
@@ -1139,7 +1145,6 @@ main(int argc, char **argv)

 	switch (action)
 	{
-#ifndef BDR_ONLY
 			/* PRIMARY */
 		case PRIMARY_REGISTER:
 			do_primary_register();
@@ -1168,20 +1173,13 @@ main(int argc, char **argv)
 			do_standby_switchover();
 			break;

+			/* WITNESS */
+		case WITNESS_REGISTER:
+			do_witness_register();
 			break;
-#else
-			/* we won't ever reach here, but stop the compiler complaining */
-		case PRIMARY_REGISTER:
-		case PRIMARY_UNREGISTER:
-		case STANDBY_CLONE:
-		case STANDBY_REGISTER:
-		case STANDBY_UNREGISTER:
-		case STANDBY_PROMOTE:
-		case STANDBY_FOLLOW:
-		case STANDBY_SWITCHOVER:
+		case WITNESS_UNREGISTER:
+			do_witness_unregister();
 			break;
-
-#endif
 			/* BDR */
 		case BDR_REGISTER:
 			do_bdr_register();
@@ -1298,7 +1296,7 @@ check_cli_parameters(const int action)

 					/*
 					 * XXX if -D/--pgdata provided, and also
-					 * config_file_options.pgdaga, warn -D/--pgdata will be
+					 * config_file_options.pgdata, warn -D/--pgdata will be
 					 * ignored
 					 */

@@ -1328,6 +1326,22 @@ check_cli_parameters(const int action)
 				 */
 			}
 			break;
+		case WITNESS_REGISTER:
+			{
+				if (!runtime_options.host_param_provided)
+				{
+					item_list_append_format(&cli_errors,
+											_("host name for the source node must be provided when executing %s"),
+											action_name(action));
+				}
+			}
+			break;
+		case NODE_CHECK:
+			if (runtime_options.has_passfile == true)
+			{
+				config_file_required = false;
+			}
+			break;
 		case NODE_STATUS:
 			if (runtime_options.node_id != UNKNOWN_NODE_ID)
 			{
@@ -1376,6 +1390,8 @@ check_cli_parameters(const int action)
 			case STANDBY_CLONE:
 			case STANDBY_FOLLOW:
 			case STANDBY_REGISTER:
+			case WITNESS_REGISTER:
+			case WITNESS_UNREGISTER:
 			case CLUSTER_SHOW:
 			case CLUSTER_MATRIX:
 			case CLUSTER_CROSSCHECK:
@@ -1463,6 +1479,39 @@ check_cli_parameters(const int action)
 		}
 	}

+	if (runtime_options.replication_user[0])
+	{
+		switch (action)
+		{
+			case PRIMARY_REGISTER:
+			case STANDBY_REGISTER:
+			case STANDBY_CLONE:
+				break;
+
+			case STANDBY_FOLLOW:
+				item_list_append_format(&cli_warnings,
+										_("--replication-user ignored when executing %s"),
+										action_name(action));
+			default:
+				item_list_append_format(&cli_warnings,
+										_("--replication-user not required when executing %s"),
+										action_name(action));
+		}
+	}
+
+	if (runtime_options.recovery_conf_only == true)
+	{
+		switch (action)
+		{
+			case STANDBY_CLONE:
+				break;
+			default:
+				item_list_append_format(&cli_warnings,
+										_("--create-recovery-conf will be ignored when executing %s"),
+										action_name(action));
+		}
+	}
+
 	if (runtime_options.event[0])
 	{
 		switch (action)
@@ -1476,25 +1525,6 @@ check_cli_parameters(const int action)
 		}
 	}

-	if (runtime_options.replication_user[0])
-	{
-		switch (action)
-		{
-			case PRIMARY_REGISTER:
-			case STANDBY_REGISTER:
-				break;
-			case STANDBY_CLONE:
-			case STANDBY_FOLLOW:
-				item_list_append_format(&cli_warnings,
-										_("--replication-user ignored when executing %s)"),
-										action_name(action));
-			default:
-				item_list_append_format(&cli_warnings,
-										_("--replication-user not required when executing %s"),
-										action_name(action));
-		}
-	}
-
 	if (runtime_options.limit_provided)
 	{
 		switch (action)
@@ -1555,8 +1585,7 @@ check_cli_parameters(const int action)
 			case NODE_STATUS:
 				break;
 			default:
-				item_list_append_format(
-										&cli_warnings,
+				item_list_append_format(&cli_warnings,
 										_("--is-shutdown-cleanly will be ignored when executing %s"),
 										action_name(action));
 		}
@@ -1569,8 +1598,7 @@ check_cli_parameters(const int action)
 			case STANDBY_SWITCHOVER:
 				break;
 			default:
-				item_list_append_format(
-										&cli_warnings,
+				item_list_append_format(&cli_warnings,
 										_("--always-promote will be ignored when executing %s"),
 										action_name(action));
 		}
@@ -1584,8 +1612,7 @@ check_cli_parameters(const int action)
 			case NODE_REJOIN:
 				break;
 			default:
-				item_list_append_format(
-										&cli_warnings,
+				item_list_append_format(&cli_warnings,
 										_("--force-rewind will be ignored when executing %s"),
 										action_name(action));
 		}
@@ -1599,8 +1626,7 @@ check_cli_parameters(const int action)
 			case NODE_REJOIN:
 				break;
 			default:
-				item_list_append_format(
-										&cli_warnings,
+				item_list_append_format(&cli_warnings,
 										_("--config-files will be ignored when executing %s"),
 										action_name(action));
 		}
@@ -1614,12 +1640,15 @@ check_cli_parameters(const int action)
 			case PRIMARY_UNREGISTER:
 			case STANDBY_CLONE:
 			case STANDBY_REGISTER:
+			case STANDBY_FOLLOW:
 			case STANDBY_SWITCHOVER:
+			case WITNESS_REGISTER:
+			case WITNESS_UNREGISTER:
+			case NODE_REJOIN:
 			case NODE_SERVICE:
 				break;
 			default:
-				item_list_append_format(
-										&cli_warnings,
+				item_list_append_format(&cli_warnings,
 										_("--dry-run is not effective when executing %s"),
 										action_name(action));
 		}
@@ -1641,8 +1670,7 @@ check_cli_parameters(const int action)
 		if (used_options > 1)
 		{
 			/* TODO: list which options were used */
-			item_list_append(
-							 &cli_errors,
+			item_list_append(&cli_errors,
 							 "only one of --csv, --nagios and --optformat can be used");
 		}
 	}
@@ -1670,6 +1698,11 @@ action_name(const int action)
 		case STANDBY_FOLLOW:
 			return "STANDBY FOLLOW";

+		case WITNESS_REGISTER:
+			return "WITNESS REGISTER";
+		case WITNESS_UNREGISTER:
+			return "WITNESS UNREGISTER";
+
 		case BDR_REGISTER:
 			return "BDR REGISTER";
 		case BDR_UNREGISTER:
@@ -1741,13 +1774,12 @@ do_help(void)
 	print_help_header();

 	printf(_("Usage:\n"));
-#ifndef BDR_ONLY
 	printf(_("    %s [OPTIONS] primary {register|unregister}\n"), progname());
-	printf(_("    %s [OPTIONS] standby {register|unregister|clone|promote|follow}\n"), progname());
-#endif
+	printf(_("    %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname());
 	printf(_("    %s [OPTIONS] bdr     {register|unregister}\n"), progname());
-	printf(_("    %s [OPTIONS] node    status\n"), progname());
+	printf(_("    %s [OPTIONS] node    {status|check|rejoin|service}\n"), progname());
 	printf(_("    %s [OPTIONS] cluster {show|event|matrix|crosscheck}\n"), progname());
+	printf(_("    %s [OPTIONS] witness {register|unregister}\n"), progname());

 	puts("");

@@ -2071,9 +2103,12 @@ test_ssh_connection(char *host, char *remote_user)
 bool
 local_command(const char *command, PQExpBufferData *outputbuf)
 {
-	FILE	   *fp;
+	FILE	   *fp = NULL;
 	char		output[MAXLEN];
 	int			retval = 0;
+	bool		success;
+
+	log_verbose(LOG_DEBUG, "executing:\n  %s", command);

 	if (outputbuf == NULL)
 	{
@@ -2089,27 +2124,45 @@ local_command(const char *command, PQExpBufferData *outputbuf)
 		return false;
 	}

-	/* TODO: better error handling */
+
 	while (fgets(output, MAXLEN, fp) != NULL)
 	{
 		appendPQExpBuffer(outputbuf, "%s", output);
+		if (!feof(fp))
+		{
+			break;
+		}
 	}

-	pclose(fp);
+	retval = pclose(fp);
+
+	/*  */
+	success = (WEXITSTATUS(retval) == 0 || WEXITSTATUS(retval) == 141) ? true : false;
+
+	log_verbose(LOG_DEBUG, "result of command was %i (%i)", WEXITSTATUS(retval), retval);

 	if (outputbuf->data != NULL)
 		log_verbose(LOG_DEBUG, "local_command(): output returned was:\n%s", outputbuf->data);
 	else
 		log_verbose(LOG_DEBUG, "local_command(): no output returned");

-	return true;
+	return success;
 }


+/*
+ * get_superuser_connection()
+ *
+ * Check if provided connection "conn" is a superuser connection, if not attempt to
+ * make a superuser connection "superuser_conn" with the provided --superuser parameter.
+ *
+ * "privileged_conn" is set to whichever connection is the superuser connection.
+ */
 void
 get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn)
 {
 	t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
+	t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
 	bool		is_superuser = false;

 	/* this should never happen */
@@ -2135,9 +2188,11 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil
 		exit(ERR_BAD_CONFIG);
 	}

-	*superuser_conn = establish_db_connection_as_user(config_file_options.conninfo,
-													  runtime_options.superuser,
-													  false);
+	initialize_conninfo_params(&conninfo_params, false);
+	conn_to_param_list(*conn, &conninfo_params);
+	param_set(&conninfo_params, "user", runtime_options.superuser);
+
+	*superuser_conn = establish_db_connection_by_params(&conninfo_params, false);

 	if (PQstatus(*superuser_conn) != CONNECTION_OK)
 	{
@@ -2364,7 +2419,12 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf
 	pclose(fp);

 	if (outputbuf != NULL)
-		log_verbose(LOG_DEBUG, "remote_command(): output returned was:\n  %s", outputbuf->data);
+	{
+		if (strlen(outputbuf->data))
+			log_verbose(LOG_DEBUG, "remote_command(): output returned was:\n  %s", outputbuf->data);
+		else
+			log_verbose(LOG_DEBUG, "remote_command(): no output returned");
+	}

 	return true;
 }
@@ -2410,18 +2470,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
 				{
 					initPQExpBuffer(&command);

-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  "%s %s -w -D ",
 									  make_pg_path("pg_ctl"),
 									  config_file_options.pg_ctl_options);

-					appendShellString(
-									  &command,
+					appendShellString(&command,
 									  data_dir);

-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  " start");

 					strncpy(script, command.data, MAXLEN);
@@ -2433,6 +2490,7 @@ get_server_action(t_server_action action, char *script, char *data_dir)
 			}

 		case ACTION_STOP:
+		case ACTION_STOP_WAIT:
 			{
 				if (config_file_options.service_stop_command[0] != '\0')
 				{
@@ -2442,19 +2500,23 @@ get_server_action(t_server_action action, char *script, char *data_dir)
 				else
 				{
 					initPQExpBuffer(&command);
-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  "%s %s -D ",
 									  make_pg_path("pg_ctl"),
 									  config_file_options.pg_ctl_options);

-					appendShellString(
-									  &command,
+					appendShellString(&command,
 									  data_dir);

-					appendPQExpBuffer(
-									  &command,
-									  " -m fast -W stop");
+					if (action == ACTION_STOP_WAIT)
+						appendPQExpBuffer(&command,
+										  " -w");
+					else
+						appendPQExpBuffer(&command,
+										  " -W");
+
+					appendPQExpBuffer(&command,
+									  " -m fast stop");

 					strncpy(script, command.data, MAXLEN);

@@ -2473,18 +2535,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
 				else
 				{
 					initPQExpBuffer(&command);
-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  "%s %s -w -D ",
 									  make_pg_path("pg_ctl"),
 									  config_file_options.pg_ctl_options);

-					appendShellString(
-									  &command,
+					appendShellString(&command,
 									  data_dir);

-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  " restart");

 					strncpy(script, command.data, MAXLEN);
@@ -2504,18 +2563,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
 				else
 				{
 					initPQExpBuffer(&command);
-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  "%s %s -w -D ",
 									  make_pg_path("pg_ctl"),
 									  config_file_options.pg_ctl_options);

-					appendShellString(
-									  &command,
+					appendShellString(&command,
 									  data_dir);

-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  " reload");

 					strncpy(script, command.data, MAXLEN);
@@ -2536,18 +2592,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
 				else
 				{
 					initPQExpBuffer(&command);
-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  "%s %s -w -D ",
 									  make_pg_path("pg_ctl"),
 									  config_file_options.pg_ctl_options);

-					appendShellString(
-									  &command,
+					appendShellString(&command,
 									  data_dir);

-					appendPQExpBuffer(
-									  &command,
+					appendPQExpBuffer(&command,
 									  " promote");

 					strncpy(script, command.data, MAXLEN);
@@ -2581,6 +2634,7 @@ data_dir_required_for_action(t_server_action action)
 			return true;

 		case ACTION_STOP:
+		case ACTION_STOP_WAIT:
 			if (config_file_options.service_stop_command[0] != '\0')
 			{
 				return false;
@@ -2651,7 +2705,6 @@ init_node_record(t_node_info *node_record)
 	node_record->priority = config_file_options.priority;
 	node_record->active = true;

-
 	if (config_file_options.location[0] != '\0')
 		strncpy(node_record->location, config_file_options.location, MAXLEN);
 	else
@@ -2664,7 +2717,7 @@ init_node_record(t_node_info *node_record)

 	if (config_file_options.replication_user[0] != '\0')
 	{
-		/* replication user explicitly provided */
+		/* replication user explicitly provided in configuration file */
 		strncpy(node_record->repluser, config_file_options.replication_user, NAMEDATALEN);
 	}
 	else
@@ -2678,8 +2731,6 @@ init_node_record(t_node_info *node_record)

 	if (config_file_options.use_replication_slots == true)
 	{
-		maxlen_snprintf(node_record->slot_name, "repmgr_slot_%i", config_file_options.node_id);
+		create_slot_name(node_record->slot_name, config_file_options.node_id);
 	}
-
-
 }
--- a/repmgr-client.h
+++ b/repmgr-client.h
@@ -1,6 +1,6 @@
 /*
 * repmgr-client.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -23,7 +23,6 @@
 #include "log.h"


-
 #define NO_ACTION			   0	/* Dummy default action */
 #define PRIMARY_REGISTER	   1
 #define PRIMARY_UNREGISTER	   2
@@ -33,17 +32,19 @@
 #define STANDBY_PROMOTE		   6
 #define STANDBY_FOLLOW		   7
 #define STANDBY_SWITCHOVER	   8
-#define BDR_REGISTER		   9
-#define BDR_UNREGISTER		   10
-#define NODE_STATUS			   11
-#define NODE_CHECK			   12
-#define NODE_SERVICE		   13
-#define NODE_REJOIN            14
-#define CLUSTER_SHOW		   15
-#define CLUSTER_CLEANUP		   16
-#define CLUSTER_MATRIX		   17
-#define CLUSTER_CROSSCHECK	   18
-#define CLUSTER_EVENT		   19
+#define WITNESS_REGISTER       9
+#define WITNESS_UNREGISTER     10
+#define BDR_REGISTER		   11
+#define BDR_UNREGISTER		   12
+#define NODE_STATUS			   13
+#define NODE_CHECK			   14
+#define NODE_SERVICE		   15
+#define NODE_REJOIN            16
+#define CLUSTER_SHOW		   17
+#define CLUSTER_CLEANUP		   18
+#define CLUSTER_MATRIX		   19
+#define CLUSTER_CROSSCHECK	   20
+#define CLUSTER_EVENT		   21

 /* command line options without short versions */
 #define OPT_HELP						   1001
@@ -55,7 +56,7 @@
 #define OPT_NODE_NAME					   1007
 #define OPT_WITHOUT_BARMAN				   1008
 #define OPT_NO_UPSTREAM_CONNECTION		   1009
-#define OPT_REGISTER_WAIT				   1010
+#define OPT_WAIT_SYNC				       1010
 #define OPT_LOG_TO_FILE					   1011
 #define OPT_UPSTREAM_CONNINFO			   1012
 #define OPT_REPLICATION_USER			   1013
@@ -66,21 +67,26 @@
 #define OPT_UPSTREAM_NODE_ID			   1018
 #define OPT_ACTION						   1019
 #define OPT_LIST_ACTIONS				   1020
-#define OPT_CHECK						   1021
-#define OPT_CHECKPOINT					   1022
-#define OPT_IS_SHUTDOWN_CLEANLY			   1023
-#define OPT_ALWAYS_PROMOTE				   1024
-#define OPT_FORCE_REWIND				   1025
-#define OPT_NAGIOS						   1026
-#define OPT_ARCHIVE_READY				   1027
-#define OPT_OPTFORMAT					   1028
-#define OPT_REPLICATION_LAG				   1029
-#define OPT_CONFIG_FILES				   1030
-#define OPT_SIBLINGS_FOLLOW				   1031
-#define OPT_ROLE						   1032
-#define OPT_DOWNSTREAM					   1033
-#define OPT_SLOTS						   1034
-#define OPT_CONFIG_ARCHIVE_DIR			   1035
+#define OPT_CHECKPOINT					   1021
+#define OPT_IS_SHUTDOWN_CLEANLY			   1022
+#define OPT_ALWAYS_PROMOTE				   1023
+#define OPT_FORCE_REWIND				   1024
+#define OPT_NAGIOS						   1025
+#define OPT_ARCHIVE_READY				   1026
+#define OPT_OPTFORMAT					   1027
+#define OPT_REPLICATION_LAG				   1028
+#define OPT_CONFIG_FILES				   1029
+#define OPT_SIBLINGS_FOLLOW				   1030
+#define OPT_ROLE						   1031
+#define OPT_DOWNSTREAM					   1032
+#define OPT_SLOTS						   1033
+#define OPT_CONFIG_ARCHIVE_DIR			   1034
+#define OPT_HAS_PASSFILE                   1035
+#define OPT_WAIT_START                     1036
+#define OPT_REPL_CONN                      1037
+#define OPT_REMOTE_NODE_ID                 1038
+#define OPT_RECOVERY_CONF_ONLY             1039
+
 /* deprecated since 3.3 */
 #define OPT_DATA_DIR						999
 #define OPT_NO_CONNINFO_PASSWORD			998
@@ -112,6 +118,7 @@ static struct option long_options[] =
 	{"pgdata", required_argument, NULL, 'D'},
 	{"node-id", required_argument, NULL, OPT_NODE_ID},
 	{"node-name", required_argument, NULL, OPT_NODE_NAME},
+	{"remote-node-id", required_argument, NULL, OPT_REMOTE_NODE_ID},

 /* logging options */
 	{"log-level", required_argument, NULL, 'L'},
@@ -133,15 +140,16 @@ static struct option long_options[] =
 	{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
 	{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
 	{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
+	{"recovery-conf-only", no_argument, NULL, OPT_RECOVERY_CONF_ONLY},

 /* "standby register" options */
-	{"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT},
+	{"wait-start", required_argument, NULL, OPT_WAIT_START},
+	{"wait-sync", optional_argument, NULL, OPT_WAIT_SYNC},

 /* "standby switchover" options
 *
 * Note: --force-rewind accepted to pass to "node join"
 */
-	{"remote-config-file", required_argument, NULL, 'C'},
 	{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
 	{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},

@@ -154,6 +162,8 @@ static struct option long_options[] =
 	{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG},
 	{"role", no_argument, NULL, OPT_ROLE},
 	{"slots", no_argument, NULL, OPT_SLOTS},
+	{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
+	{"replication-connection", no_argument, NULL, OPT_REPL_CONN},

 /* "node rejoin" options */
 	{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
@@ -162,7 +172,6 @@ static struct option long_options[] =

 /* "node service" options */
 	{"action", required_argument, NULL, OPT_ACTION},
-	{"check", no_argument, NULL, OPT_CHECK},
 	{"list-actions", no_argument, NULL, OPT_LIST_ACTIONS},
 	{"checkpoint", no_argument, NULL, OPT_CHECKPOINT},

@@ -177,6 +186,8 @@ static struct option long_options[] =
 /* deprecated */
 	{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
 	{"no-conninfo-password", no_argument, NULL, OPT_NO_CONNINFO_PASSWORD},
+	/* previously used by "standby switchover" */
+	{"remote-config-file", required_argument, NULL, 'C'},
 	/* legacy alias for -D/--pgdata */
 	{"data-dir", required_argument, NULL, OPT_DATA_DIR},
 	/* replaced by --node-id */
--- a/repmgr.c
+++ b/repmgr.c
@@ -1,7 +1,7 @@
 /*
 * repmgr.c - repmgr extension
 *
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This is the actual extension code; see repmgr-client.c for the code which
 * generates the repmgr binary
@@ -67,7 +67,6 @@ typedef struct repmgrdSharedState
 	TimestampTz last_updated;
 	int			local_node_id;
 	/* streaming failover */
-	NodeState	node_state;
 	NodeVotingStatus voting_status;
 	int			current_electoral_term;
 	int			candidate_node_id;
@@ -87,52 +86,30 @@ void		_PG_fini(void);
 static void repmgr_shmem_startup(void);

 Datum		set_local_node_id(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(set_local_node_id);

-Datum		standby_set_last_updated(PG_FUNCTION_ARGS);
+Datum		get_local_node_id(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(get_local_node_id);

+Datum		standby_set_last_updated(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(standby_set_last_updated);

 Datum		standby_get_last_updated(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(standby_get_last_updated);

-
-Datum		request_vote(PG_FUNCTION_ARGS);
-
-PG_FUNCTION_INFO_V1(request_vote);
-
-Datum		get_voting_status(PG_FUNCTION_ARGS);
-
-PG_FUNCTION_INFO_V1(get_voting_status);
-
-Datum		set_voting_status_initiated(PG_FUNCTION_ARGS);
-
-PG_FUNCTION_INFO_V1(set_voting_status_initiated);
-
-Datum		other_node_is_candidate(PG_FUNCTION_ARGS);
-
-PG_FUNCTION_INFO_V1(other_node_is_candidate);
-
 Datum		notify_follow_primary(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(notify_follow_primary);

 Datum		get_new_primary(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(get_new_primary);

 Datum		reset_voting_status(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(reset_voting_status);

 Datum		am_bdr_failover_handler(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(am_bdr_failover_handler);

 Datum		unset_bdr_failover_handler(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);


@@ -144,7 +121,6 @@ _PG_init(void)
 {
 	elog(DEBUG1, "repmgr init");

-	/* error here? */
 	if (!process_shared_preload_libraries_in_progress)
 		return;

@@ -227,12 +203,17 @@ repmgr_shmem_startup(void)
 Datum
 set_local_node_id(PG_FUNCTION_ARGS)
 {
-	int			local_node_id = PG_GETARG_INT32(0);
+	int			local_node_id = UNKNOWN_NODE_ID;

 	if (!shared_state)
 		PG_RETURN_NULL();

-	LWLockAcquire(shared_state->lock, LW_SHARED);
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	local_node_id = PG_GETARG_INT32(0);
+
+	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);

 	/* only set local_node_id once, as it should never change */
 	if (shared_state->local_node_id == UNKNOWN_NODE_ID)
@@ -245,6 +226,23 @@ set_local_node_id(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }

+
+Datum
+get_local_node_id(PG_FUNCTION_ARGS)
+{
+	int			local_node_id = UNKNOWN_NODE_ID;
+
+	if (!shared_state)
+		PG_RETURN_NULL();
+
+	LWLockAcquire(shared_state->lock, LW_SHARED);
+	local_node_id = shared_state->local_node_id;
+	LWLockRelease(shared_state->lock);
+
+	PG_RETURN_INT32(local_node_id);
+}
+
+
 /* update and return last updated with current timestamp */
 Datum
 standby_set_last_updated(PG_FUNCTION_ARGS)
@@ -254,7 +252,7 @@ standby_set_last_updated(PG_FUNCTION_ARGS)
 	if (!shared_state)
 		PG_RETURN_NULL();

-	LWLockAcquire(shared_state->lock, LW_SHARED);
+	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 	shared_state->last_updated = last_updated;
 	LWLockRelease(shared_state->lock);

@@ -272,7 +270,7 @@ standby_get_last_updated(PG_FUNCTION_ARGS)
 	if (!shared_state)
 		PG_RETURN_NULL();

-	LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
+	LWLockAcquire(shared_state->lock, LW_SHARED);
 	last_updated = shared_state->last_updated;
 	LWLockRelease(shared_state->lock);

@@ -286,210 +284,20 @@ standby_get_last_updated(PG_FUNCTION_ARGS)
 /* failover functions */
 /* ===================*/

-Datum
-request_vote(PG_FUNCTION_ARGS)
-{
-#ifndef BDR_ONLY
-	StringInfoData query;
-
-#if (PG_VERSION_NUM >= 90400)
-	XLogRecPtr	our_lsn = InvalidXLogRecPtr;
-	bool		isnull;
-#else
-	char *value = NULL;
-	char lsn_text[64] = "";
-#endif
-
-	/* node_id used for logging purposes */
-	int			requesting_node_id = PG_GETARG_INT32(0);
-	int			current_electoral_term = PG_GETARG_INT32(1);
-
-	int			ret;
-
-	if (!shared_state)
-		PG_RETURN_NULL();
-
-	LWLockAcquire(shared_state->lock, LW_SHARED);
-
-	/* only do something if local_node_id is initialised */
-	if (shared_state->local_node_id != UNKNOWN_NODE_ID)
-	{
-		/* this node has initiated voting or already responded to another node */
-		if (shared_state->voting_status != VS_NO_VOTE)
-		{
-			LWLockRelease(shared_state->lock);
-
-			PG_RETURN_NULL();
-		}
-
-		elog(INFO, "node %i has received request from node %i for electoral term %i (our term: %i)",
-			 shared_state->local_node_id,
-			 requesting_node_id, current_electoral_term,
-			 shared_state->current_electoral_term);
-
-		SPI_connect();
-
-		initStringInfo(&query);
-
-		appendStringInfo(
-			&query,
-#if (PG_VERSION_NUM >= 100000)
-			"SELECT pg_catalog.pg_last_wal_receive_lsn()");
-#else
-		    "SELECT pg_catalog.pg_last_xlog_receive_location()");
-#endif
-
-		elog(DEBUG1, "query: %s", query.data);
-		ret = SPI_execute(query.data, true, 0);
-
-		if (ret < 0)
-		{
-			SPI_finish();
-			elog(WARNING, "unable to retrieve last received LSN");
-			LWLockRelease(shared_state->lock);
-
-#if (PG_VERSION_NUM >= 90400)
-			PG_RETURN_LSN(InvalidOid);
-#else
-			PG_RETURN_TEXT_P(cstring_to_text("0/0"));
-#endif
-		}
-
-#if (PG_VERSION_NUM >= 90400)
-		our_lsn = DatumGetLSN(SPI_getbinval(SPI_tuptable->vals[0],
-											SPI_tuptable->tupdesc,
-											1, &isnull));
-
-		elog(DEBUG1, "our LSN is %X/%X",
-			 (uint32) (our_lsn >> 32),
-			 (uint32) our_lsn);
-#else
-		value = SPI_getvalue(SPI_tuptable->vals[0],
-							 SPI_tuptable->tupdesc,
-							 1);
-		strncpy(lsn_text, value, 64);
-		pfree(value);
-		elog(DEBUG1, "our LSN is %s", lsn_text);
-#endif
-
-		/* indicate this node has responded to a vote request */
-		shared_state->voting_status = VS_VOTE_REQUEST_RECEIVED;
-		shared_state->current_electoral_term = current_electoral_term;
-
-		/* should we free "query" here? */
-		SPI_finish();
-	}
-
-	LWLockRelease(shared_state->lock);
-
-#if (PG_VERSION_NUM >= 90400)
-	PG_RETURN_LSN(our_lsn);
-#else
-	PG_RETURN_TEXT_P(cstring_to_text(lsn_text));
-#endif
-#else
-	PG_RETURN(InvalidOid);
-#endif
-}
-
-
-
-Datum
-get_voting_status(PG_FUNCTION_ARGS)
-{
-#ifndef BDR_ONLY
-	NodeVotingStatus voting_status;
-
-	if (!shared_state)
-		PG_RETURN_NULL();
-
-	LWLockAcquire(shared_state->lock, LW_SHARED);
-	voting_status = shared_state->voting_status;
-	LWLockRelease(shared_state->lock);
-
-	PG_RETURN_INT32(voting_status);
-#else
-	PG_RETURN_INT32(VS_UNKNOWN);
-#endif
-}
-
-Datum
-set_voting_status_initiated(PG_FUNCTION_ARGS)
-{
-#ifndef BDR_ONLY
-	int			electoral_term = -1;
-
-	if (!shared_state)
-		PG_RETURN_NULL();
-
-	LWLockAcquire(shared_state->lock, LW_SHARED);
-
-	/* only do something if local_node_id is initialised */
-	if (shared_state->local_node_id != UNKNOWN_NODE_ID)
-	{
-		shared_state->voting_status = VS_VOTE_INITIATED;
-		shared_state->current_electoral_term += 1;
-
-		electoral_term = shared_state->current_electoral_term;
-
-		elog(INFO, "setting voting term to %i", electoral_term);
-	}
-
-	LWLockRelease(shared_state->lock);
-
-	PG_RETURN_INT32(electoral_term);
-#else
-	PG_RETURN_INT32(-1);
-#endif
-}
-
-Datum
-other_node_is_candidate(PG_FUNCTION_ARGS)
-{
-#ifndef BDR_ONLY
-	int			requesting_node_id = PG_GETARG_INT32(0);
-	int			electoral_term = PG_GETARG_INT32(1);
-
-	if (!shared_state)
-		PG_RETURN_NULL();
-
-	LWLockAcquire(shared_state->lock, LW_SHARED);
-
-	/* only do something if local_node_id is initialised */
-	if (shared_state->local_node_id != UNKNOWN_NODE_ID)
-	{
-		if (shared_state->current_electoral_term == electoral_term)
-		{
-			if (shared_state->candidate_node_id != UNKNOWN_NODE_ID)
-			{
-				elog(INFO, "node %i requesting candidature, but node %i already candidate",
-					 requesting_node_id,
-					 shared_state->candidate_node_id);
-				PG_RETURN_BOOL(false);
-			}
-		}
-
-		shared_state->candidate_node_id = requesting_node_id;
-		elog(INFO, "node %i is candidate", requesting_node_id);
-	}
-
-	LWLockRelease(shared_state->lock);
-
-	PG_RETURN_BOOL(true);
-#else
-	PG_RETURN_BOOL(false);
-#endif
-}

 Datum
 notify_follow_primary(PG_FUNCTION_ARGS)
 {
-#ifndef BDR_ONLY
-	int			primary_node_id = PG_GETARG_INT32(0);
+	int			primary_node_id = UNKNOWN_NODE_ID;

 	if (!shared_state)
 		PG_RETURN_NULL();

+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	primary_node_id = PG_GETARG_INT32(0);
+
 	LWLockAcquire(shared_state->lock, LW_SHARED);

 	/* only do something if local_node_id is initialised */
@@ -499,13 +307,15 @@ notify_follow_primary(PG_FUNCTION_ARGS)
 			 shared_state->local_node_id,
 			 primary_node_id);

+		LWLockRelease(shared_state->lock);
+		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 		/* Explicitly set the primary node id */
 		shared_state->candidate_node_id = primary_node_id;
 		shared_state->follow_new_primary = true;
 	}

 	LWLockRelease(shared_state->lock);
-#endif
+
 	PG_RETURN_VOID();
 }

@@ -518,14 +328,16 @@ get_new_primary(PG_FUNCTION_ARGS)
 	if (!shared_state)
 		PG_RETURN_NULL();

-#ifndef BDR_ONLY
 	LWLockAcquire(shared_state->lock, LW_SHARED);

 	if (shared_state->follow_new_primary == true)
 		new_primary_node_id = shared_state->candidate_node_id;

 	LWLockRelease(shared_state->lock);
-#endif
+
+	if (new_primary_node_id == UNKNOWN_NODE_ID)
+		PG_RETURN_NULL();
+
 	PG_RETURN_INT32(new_primary_node_id);
 }

@@ -533,7 +345,6 @@ get_new_primary(PG_FUNCTION_ARGS)
 Datum
 reset_voting_status(PG_FUNCTION_ARGS)
 {
-#ifndef BDR_ONLY
 	if (!shared_state)
 		PG_RETURN_NULL();

@@ -542,13 +353,16 @@ reset_voting_status(PG_FUNCTION_ARGS)
 	/* only do something if local_node_id is initialised */
 	if (shared_state->local_node_id != UNKNOWN_NODE_ID)
 	{
+		LWLockRelease(shared_state->lock);
+		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
+
 		shared_state->voting_status = VS_NO_VOTE;
 		shared_state->candidate_node_id = UNKNOWN_NODE_ID;
 		shared_state->follow_new_primary = false;
 	}

 	LWLockRelease(shared_state->lock);
-#endif
+
 	PG_RETURN_VOID();
 }

@@ -556,16 +370,23 @@ reset_voting_status(PG_FUNCTION_ARGS)
 Datum
 am_bdr_failover_handler(PG_FUNCTION_ARGS)
 {
-	int			node_id = PG_GETARG_INT32(0);
+	int			node_id = UNKNOWN_NODE_ID;
 	bool		am_handler = false;

 	if (!shared_state)
 		PG_RETURN_NULL();

+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	node_id = PG_GETARG_INT32(0);
+
 	LWLockAcquire(shared_state->lock, LW_SHARED);

 	if (shared_state->bdr_failover_handler == UNKNOWN_NODE_ID)
 	{
+		LWLockRelease(shared_state->lock);
+		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
 		shared_state->bdr_failover_handler = node_id;
 		am_handler = true;
 	}
@@ -586,11 +407,16 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
 	if (!shared_state)
 		PG_RETURN_NULL();

+	LWLockAcquire(shared_state->lock, LW_SHARED);
+
 	/* only do something if local_node_id is initialised */
 	if (shared_state->local_node_id != UNKNOWN_NODE_ID)
 	{
-		LWLockAcquire(shared_state->lock, LW_SHARED);
+		LWLockRelease(shared_state->lock);
+		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
+
 		shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
+
 		LWLockRelease(shared_state->lock);
 	}

--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -13,35 +13,35 @@
 # repmgr and repmgrd require the following items to be explicitly configured.


-#node_id=                               # A unique integer greater than zero
-#node_name=''                           # An arbitrary (but unique) string; we recommend
-                                        # using the server's hostname or another identifier
-                                        # unambiguously associated with the server to avoid
-                                        # confusion. Avoid choosing names which reflect the
-                                        # node's current role, e.g. "primary" or "standby1",
-                                        # as roles can change and it will be confusing if
-                                        # the current primary is called "standby1".
+#node_id=    	     	     	 # A unique integer greater than zero
+#node_name=''			 # An arbitrary (but unique) string; we recommend
+				 # using the server's hostname or another identifier
+				 # unambiguously associated with the server to avoid
+				 # confusion. Avoid choosing names which reflect the
+				 # node's current role, e.g. "primary" or "standby1",
+				 # as roles can change and it will be confusing if
+				 # the current primary is called "standby1".

-#conninfo=''                            # Database connection information as a conninfo string.
-                                        # All servers in the cluster must be able to connect to
-                                        # the local node using this string.
-                                        #
-                                        # For details on conninfo strings, see:
-                                        #  https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
-                                        #
-                                        # If repmgrd is in use, consider explicitly setting
-                                        # "connect_timeout" in the conninfo string to determine
-                                        # the length of time which elapses before a network
-                                        # connection attempt is abandoned; for details see:
-                                        #  https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT
+#conninfo=''			 # Database connection information as a conninfo string.
+				 # All servers in the cluster must be able to connect to
+				 # the local node using this string.
+				 #
+				 # For details on conninfo strings, see:
+				 #  https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
+				 #
+				 # If repmgrd is in use, consider explicitly setting
+				 # "connect_timeout" in the conninfo string to determine
+				 # the length of time which elapses before a network
+				 # connection attempt is abandoned; for details see:
+				 #  https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT

-#data_directory                         # The node's data directory. This is needed by repmgr
-                                        # when performing operations when the PostgreSQL instance
-                                        # is not running and there's no other way of determining
-                                        # the data directory.
+#data_directory=''		 # The node's data directory. This is needed by repmgr
+				 # when performing operations when the PostgreSQL instance
+				 # is not running and there's no other way of determining
+				 # the data directory.

-#replication_user                       # User to make replication connections with, if not set defaults
-                                        # to the user defined in "conninfo".
+#replication_user='repmgr'	 # User to make replication connections with, if not set defaults
+				 # to the user defined in "conninfo".

 # =============================================================================

@@ -52,23 +52,28 @@
 # Replication settings
 #------------------------------------------------------------------------------

-#replication_type=physical              # Must be one of 'physical' or 'bdr'.
+#replication_type=physical	 # Must be one of 'physical' or 'bdr'.

-#location=default                       # arbitrary string defining the location of the node; this
-                                        # is used during failover to check visibilty of the
-                                        # current primary node. See the 'repmgrd' documentation
-                                        # in README.md for further details.
+#location=default		 # arbitrary string defining the location of the node; this
+				 # is used during failover to check visibilty of the
+				 # current primary node. See the 'repmgrd' documentation
+				 # in README.md for further details.

-#use_replication_slots=no               # whether to use physical replication slots
-                                        # NOTE: when using replication slots,
-                                        # 'max_replication_slots' should be configured for
-                                        # at least the number of standbys which will connect
-                                        # to the primary.
+#use_replication_slots=no	 # whether to use physical replication slots
+				 # NOTE: when using replication slots,
+				 # 'max_replication_slots' should be configured for
+				 # at least the number of standbys which will connect
+				 # to the primary.

-#recovery_min_apply_delay=              # If provided, "recovery_min_apply_delay" in recovery.conf
-                                        # will be set to this value.
+#recovery_min_apply_delay=	 # If provided, "recovery_min_apply_delay" in recovery.conf
+				 # will be set to this value.

+#------------------------------------------------------------------------------
+# Witness server settings
+#------------------------------------------------------------------------------

+#witness_sync_interval=15	 # interval (in seconds) to synchronise node records
+				 # to the witness server

 #------------------------------------------------------------------------------
 # Logging settings
@@ -80,14 +85,14 @@
 # This is mainly intended for those cases when `repmgr` is executed directly
 # by `repmgrd`.

-#log_level=INFO                         # Log level: possible values are DEBUG, INFO, NOTICE,
-                                        # WARNING, ERROR, ALERT, CRIT or EMERG
+#log_level=INFO			 # Log level: possible values are DEBUG, INFO, NOTICE,
+				 # WARNING, ERROR, ALERT, CRIT or EMERG

-#log_facility=STDERR                    # Logging facility: possible values are STDERR, or for
-                                        # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER
+#log_facility=STDERR		 # Logging facility: possible values are STDERR, or for
+				 # syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER

-#log_file=''                            # stderr can be redirected to an arbitrary file:
-#log_status_interval=300                # interval (in seconds) for repmgrd to log a status message
+#log_file=''			 # stderr can be redirected to an arbitrary file:
+#log_status_interval=300	 # interval (in seconds) for repmgrd to log a status message


 #------------------------------------------------------------------------------
@@ -111,30 +116,30 @@
 # By default, all notifications will be passed; the notification types
 # can be filtered to explicitly named ones, e.g.:
 #
-#   event_notifications=master_register,standby_register
+#   event_notifications=primary_register,standby_register

-#event_notification_command=''          # An external program or script which
-                                        # can be executed by the user under which
-                                        # repmgr/repmgrd are run.
+#event_notification_command=''		# An external program or script which
+					# can be executed by the user under which
+					# repmgr/repmgrd are run.

-#event_notifications=''                 # A commas-separated list of notification
-                                        # types
+#event_notifications=''			# A commas-separated list of notification
+					# types

 #------------------------------------------------------------------------------
 # Environment/command settings
 #------------------------------------------------------------------------------

-#pg_bindir=''                           # Path to PostgreSQL binary directory (location
-                                        # of pg_ctl, pg_basebackup etc.). Only needed
-                                        # if these files are not in the system $PATH.
-                                        #
-                                        # Debian/Ubuntu users: you will probably need to
-                                        # set this to the directory where `pg_ctl` is located,
-                                        # e.g. /usr/lib/postgresql/9.6/bin/
-#use_primary_conninfo_password=false    # explicitly set "password" in recovery.conf's
-                                        # "primary_conninfo" parameter using the value contained
-                                        # in the environment variable PGPASSWORD
-
+#pg_bindir=''				# Path to PostgreSQL binary directory (location
+					# of pg_ctl, pg_basebackup etc.). Only needed
+					# if these files are not in the system $PATH.
+					#
+					# Debian/Ubuntu users: you will probably need to
+					# set this to the directory where `pg_ctl` is located,
+					# e.g. /usr/lib/postgresql/9.6/bin/
+#use_primary_conninfo_password=false	# explicitly set "password" in recovery.conf's
+					# "primary_conninfo" parameter using the value contained
+					# in the environment variable PGPASSWORD
+#passfile=''				# path to .pgpass file to include in "primary_conninfo"
 #------------------------------------------------------------------------------
 # external command options
 #------------------------------------------------------------------------------
@@ -148,16 +153,15 @@
 #   rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
 #   ssh_options=-o "StrictHostKeyChecking no"

-#pg_ctl_options=''                      # Options to append to "pg_ctl"
-#pg_basebackup_options=''               # Options to append to "pg_basebackup"
-#rsync_options=''                       # Options to append to "rsync"
-ssh_options='-q -o ConnectTimeout=10'   # Options to append to "ssh"
-
+#pg_ctl_options='' 			# Options to append to "pg_ctl"
+#pg_basebackup_options=''		# Options to append to "pg_basebackup"
+#rsync_options=''			# Options to append to "rsync"
+ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"



 #------------------------------------------------------------------------------
-# Standby clone settings
+# "standby clone" settings
 #------------------------------------------------------------------------------
 #
 # These settings apply when cloning a standby ("repmgr standby clone").
@@ -167,12 +171,26 @@ ssh_options='-q -o ConnectTimeout=10'   # Options to append to "ssh"
 #   tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
 #   restore_command = 'cp /path/to/archived/wals/%f %p'

-#tablespace_mapping=''                  # Tablespaces can be remapped from one
-                                        # file system location to another. This
-                                        # parameter can be provided multiple times.
+#tablespace_mapping=''	  		# Tablespaces can be remapped from one
+					# file system location to another. This
+					# parameter can be provided multiple times.
+
+#restore_command=''			# This will be placed in the recovery.conf
+					# file generated by repmgr
+
+
+#------------------------------------------------------------------------------
+# "standby promote" settings
+#------------------------------------------------------------------------------
+
+# These settings apply when instructing a standby to promote itself to the
+# new primary ("repmgr standby promote").
+
+#promote_check_timeout=60		# The length of time (in seconds) to wait
+					# for the new primary to finish promoting
+#promote_check_interval=1		# The interval (in seconds) to check whether
+					# the new primary has finished promoting

-#restore_command=''                     # This will be placed in the recovery.conf
-                                        # file generated by repmgr

 #------------------------------------------------------------------------------
 # Standby follow settings
@@ -181,19 +199,19 @@ ssh_options='-q -o ConnectTimeout=10'   # Options to append to "ssh"
 # These settings apply when instructing a standby to follow the new primary
 # ("repmgr standby follow").

-#primary_follow_timeout=60              # The length of time (in seconds) to wait
-                                        # for the new primary to become available
+#primary_follow_timeout=60		# The length of time (in seconds) to wait
+					# for the new primary to become available


 #------------------------------------------------------------------------------
 # Barman options
 #------------------------------------------------------------------------------

-#barman_server=''                       # The barman configuration section
-#barman_host=''                         # The host name of the barman server
-#barman_config=''                       # The Barman configuration file on the
-                                        # Barman server (needed if the file is
-                                        # in a non-standard location)
+#barman_server=''			# The barman configuration section
+#barman_host=''				# The host name of the barman server
+#barman_config=''			# The Barman configuration file on the
+					# Barman server (needed if the file is
+					# in a non-standard location)

 #------------------------------------------------------------------------------
 # Failover and monitoring settings (repmgrd)
@@ -202,32 +220,45 @@ ssh_options='-q -o ConnectTimeout=10'   # Options to append to "ssh"
 # These settings are only applied when repmgrd is running. Values shown
 # are defaults.

-#failover=manual                        # one of 'automatic', 'manual'.
-                                        # determines what action to take in the event of upstream failure
-                                        #
-                                        # 'automatic': repmgrd will automatically attempt to promote the
-                                        #    node or follow the new upstream node
-                                        # 'manual': repmgrd will take no action and the node will require
-                                        #    manual attention to reattach it to replication
-                                        # (does not apply to BDR mode)
-#promote_command=                       # command to execute when promoting a new primary; use something like:
-                                        #
-                                        #     repmgr standby promote -f /etc/repmgr.conf
-                                        #
-#follow_command=                        # command to execute when instructing a standby to follow a new primary;
-                                        # use something like:
-                                        #
-                                        #     repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
-                                        #
-#primary_notification_timeout=60        # Interval (in seconds) which repmgrd on a standby
-                                        # will wait for a notification from the new primary,
-                                        # before falling back to degraded monitoring
-#monitoring_history=no
-#degraded_monitoring_timeout=-1         # Interval (in seconds) after which repmgrd will terminate if the
-                                        # server being monitored is no longer available. -1 (default)
-                                        # disables the timeout completely.
-#async_query_timeout=60                 # Interval (in seconds) which repmgrd will wait before
-                                        # cancelling an asynchronous query.
+#failover=manual			# one of 'automatic', 'manual'.
+					# determines what action to take in the event of upstream failure
+					#
+					# 'automatic': repmgrd will automatically attempt to promote the
+					#    node or follow the new upstream node
+					# 'manual': repmgrd will take no action and the node will require
+					#    manual attention to reattach it to replication
+					# (does not apply to BDR mode)
+
+#priority=100				# indicate a preferred priorty for promoting nodes;
+					# a value of zero prevents the node being promoted to primary
+					# (default: 100)
+
+#reconnect_attempts=6			# Number attempts which will be made to reconnect to an unreachable
+					# primary (or other upstream node)
+#reconnect_interval=10			# Interval between attempts  to reconnect to an unreachable
+					# primary (or other upstream node)
+#promote_command=			# command to execute when promoting a new primary; use something like:
+					#
+					#     repmgr standby promote -f /etc/repmgr.conf
+					#
+#follow_command=			# command to execute when instructing a standby to follow a new primary;
+					# use something like:
+					#
+					#     repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
+					#
+#primary_notification_timeout=60	# Interval (in seconds) which repmgrd on a standby
+					# will wait for a notification from the new primary,
+					# before falling back to degraded monitoring
+#standby_reconnect_timeout=60	# Interval (in seconds) which repmgrd on a standby will wait
+					# to reconnect to the local node after executing "follow_command"
+
+#monitoring_history=no                  # Whether to write monitoring data to the "montoring_history" table
+#monitor_interval_secs=2                # Interval (in seconds) at which to write monitoring data
+#degraded_monitoring_timeout=-1		# Interval (in seconds) after which repmgrd will terminate if the
+					# server being monitored is no longer available. -1 (default)
+					# disables the timeout completely.
+#async_query_timeout=60			# Interval (in seconds) which repmgrd will wait before
+					# cancelling an asynchronous query.

 #------------------------------------------------------------------------------
 # service control commands
@@ -260,8 +291,10 @@ ssh_options='-q -o ConnectTimeout=10'   # Options to append to "ssh"
 #service_stop_command = ''
 #service_restart_command = ''
 #service_reload_command = ''
-#service_promote_command = ''            # Note: this overrides any value contained
-                                         # in the setting "promote_command"
+#service_promote_command = ''		# Note: this overrides any value contained in the setting
+					# "promote_command". This is intended for systems which
+					# provide a package-level promote command, such as Debian's
+					# "pg_ctlcluster"


 #------------------------------------------------------------------------------
@@ -270,25 +303,25 @@ ssh_options='-q -o ConnectTimeout=10'   # Options to append to "ssh"

 # Various warning/critical thresholds used by "repmgr node check".

-#archive_ready_warning=16                # repmgr node check --archiver
-#archive_ready_critical=128              #
-                                         # Numbers of files pending archiving via PostgreSQL's
-                                         # "archive_command" configuration parameter. If
-                                         # files can't be archived fast enough, or the archive
-                                         # command is failing, the buildup of files can
-                                         # cause various issues, such as server shutdown being
-                                         # delayed until all files are archived, or excessive
-                                         # space being occupied by unarchived files.
-                                         #
-                                         # Note that these values will be checked when executing
-                                         # "repmgr standby switchover" to warn about potential
-                                         # issues with shutting down the demotion candidate.
+#archive_ready_warning=16  	      	# repmgr node check --archive-ready
+#archive_ready_critical=128		#
+					# Numbers of files pending archiving via PostgreSQL's
+					# "archive_command" configuration parameter. If
+					# files can't be archived fast enough, or the archive
+					# command is failing, the buildup of files can
+					# cause various issues, such as server shutdown being
+					# delayed until all files are archived, or excessive
+					# space being occupied by unarchived files.
+					#
+					# Note that these values will be checked when executing
+					# "repmgr standby switchover" to warn about potential
+					# issues with shutting down the demotion candidate.

-#replication_lag_warning=300             # repmgr node check --replication-lag
-#replication_lag_critical=600            #
-                                         # Note that these values will be checked when executing
-                                         # "repmgr standby switchover" to warn about potential
-                                         # issues with shutting down the demotion candidate.
+#replication_lag_warning=300		# repmgr node check --replication-lag
+#replication_lag_critical=600		#
+					# Note that these values will be checked when executing
+					# "repmgr standby switchover" to warn about potential
+					# issues with shutting down the demotion candidate.


 #------------------------------------------------------------------------------
--- a/repmgr.h
+++ b/repmgr.h
@@ -1,6 +1,6 @@
 /*
 * repmgr.h
- * Copyright (c) 2ndQuadrant, 2010-2017
+ * Copyright (c) 2ndQuadrant, 2010-2018
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -56,25 +56,30 @@
 #define NO_UPSTREAM_NODE	-1
 #define UNKNOWN_NODE_ID		-1

+#define VOTING_TERM_NOT_SET -1

 /*
 * various default values - ensure repmgr.conf.sample is update
 * if any of these are changed
 */
-#define DEFAULT_LOCATION                 "default"
-#define DEFAULT_PRIORITY		         100
-#define DEFAULT_RECONNECTION_ATTEMPTS    6	/* seconds */
-#define DEFAULT_RECONNECTION_INTERVAL    10 /* seconds */
-#define DEFAULT_MONITORING_INTERVAL      2	/* seconds */
-#define DEFAULT_ASYNC_QUERY_TIMEOUT      60 /* seconds */
-#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
-#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT   60 /* seconds */
-#define DEFAULT_BDR_RECOVERY_TIMEOUT     30 /* seconds */
-#define DEFAULT_ARCHIVE_READY_WARNING    16 /* WAL files */
-#define DEFAULT_ARCHIVE_READY_CRITICAL   128	/* WAL files */
-#define	DEFAULT_REPLICATION_LAG_WARNING  300	/* seconds */
-#define DEFAULT_REPLICATION_LAG_CRITICAL 600	/* seconds */
-
+#define DEFAULT_LOCATION                     "default"
+#define DEFAULT_PRIORITY		             100
+#define DEFAULT_RECONNECTION_ATTEMPTS        6	 /* seconds */
+#define DEFAULT_RECONNECTION_INTERVAL        10  /* seconds */
+#define DEFAULT_MONITORING_INTERVAL          2	 /* seconds */
+#define DEFAULT_ASYNC_QUERY_TIMEOUT          60  /* seconds */
+#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60  /* seconds */
+#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT       60  /* seconds */
+#define DEFAULT_BDR_RECOVERY_TIMEOUT         30  /* seconds */
+#define DEFAULT_ARCHIVE_READY_WARNING        16  /* WAL files */
+#define DEFAULT_ARCHIVE_READY_CRITICAL       128 /* WAL files */
+#define	DEFAULT_REPLICATION_LAG_WARNING      300 /* seconds */
+#define DEFAULT_REPLICATION_LAG_CRITICAL     600 /* seconds */
+#define DEFAULT_WITNESS_SYNC_INTERVAL        15  /* seconds */
+#define DEFAULT_WAIT_START                   30  /* seconds */
+#define DEFAULT_PROMOTE_CHECK_TIMEOUT        60  /* seconds */
+#define DEFAULT_PROMOTE_CHECK_INTERVAL       1   /* seconds */
+#define DEFAULT_STANDBY_RECONNECT_TIMEOUT    60  /* seconds */

 #ifndef RECOVERY_COMMAND_FILE
 #define RECOVERY_COMMAND_FILE "recovery.conf"
--- a/Show More
+++ b/Show More