Add missing line break in logging output

Something else we don't have to worry about in repmgr4.
Fix directories to exclude in clone from Barman
2026-03-23 07:06:30 +00:00 · 2019-02-27 09:52:45 +09:00 · 2019-02-22 16:31:58 +09:00 · 2019-02-22 15:49:02 +09:00 · 2019-02-22 14:04:37 +09:00 · 2019-02-21 16:14:08 +09:00
35 changed files with 2701 additions and 986 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@ License and Contributions
 =========================

 `repmgr` is licensed under the GPL v3.  All of its code and documentation is
-Copyright 2010-2016, 2ndQuadrant Limited.  See the files COPYRIGHT and LICENSE for
+Copyright 2010-2017, 2ndQuadrant Limited.  See the files COPYRIGHT and LICENSE for
 details.

 The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
@@ -21,9 +21,11 @@ copy of the relevant Copyright Assignment Form.
 Code style
 ----------

-Code in repmgr is formatted to a consistent style using the following command:
+Code in repmgr should be formatted to the same standards as the main PostgreSQL
+project. For more details see:

-    astyle --style=ansi --indent=tab --suffix=none *.c *.h
+    https://www.postgresql.org/docs/current/static/source-format.html

 Contributors should reformat their code similarly before submitting code to
-the project, in order to minimize merge conflicts with other work.
+the project, in order to minimize merge conflicts with other work.
+
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-Copyright (c) 2010-2016, 2ndQuadrant Limited
+Copyright (c) 2010-2017, 2ndQuadrant Limited
 All rights reserved.

 This program is free software: you can redistribute it and/or modify
--- a/65
+++ b/65
@@ -1,4 +1,57 @@
-3.2     2016-
+3.4.0   2019-02-
+		default log level is now INFO (Ian)
+        repmgr: fix `standby register --force` when updating existing node record (Ian)
+		repmgrd: set LSN shared memory value at standby startup (Ian)
+		repmgrd: improve logging during failover (Ian)
+
+3.3.2   2017-06-01
+        Add support for PostgreSQL 10 (Ian)
+        repmgr: ensure --replication-user option is honoured when passing database
+          connection parameters as a conninfo string (Ian)
+        repmgr: improve detection of pg_rewind on remote server (Ian)
+        repmgr: add DETAIL log output for additional clarification of error messages (Ian)
+        repmgr: suppress various spurious error messages in `standby follow` and
+          `standby switchover` (Ian)
+        repmgr: add missing `-P` option (Ian)
+        repmgrd: monitoring statistic reporting fixes (Ian)
+
+3.3.1   2017-03-13
+        repmgrd: prevent invalid apply lag value being written to the
+          monitoring table (Ian)
+        repmgrd: fix error in XLogRecPtr conversion when calculating
+          monitoring statistics (Ian)
+        repmgr: if replication slots in use, where possible delete slot on old
+          upstream node after following new upstream (Ian)
+        repmgr: improve logging of rsync actions (Ian)
+        repmgr: improve `standby clone` when synchronous replication in use (Ian)
+        repmgr: stricter checking of allowed node id values
+        repmgr: enable `master register --force` when there is a foreign key
+          dependency from a standby node (Ian)
+
+3.3     2016-12-27
+        repmgr: always log to STDERR even if log facility defined (Ian)
+        repmgr: add --log-to-file to log repmgr output to the defined
+          log facility (Ian)
+        repmgr: improve handling of command line parameter errors (Ian)
+        repmgr: add option --upstream-conninfo to explicitly set
+          'primary_conninfo' in recovery.conf (Ian)
+        repmgr: enable a standby to be registered which isn't running (Ian)
+        repmgr: enable `standby register --force` to update a node record
+          with cascaded downstream node records (Ian)
+        repmgr: add option `--no-conninfo-password` (Abhijit, Ian)
+        repmgr: add initial support for PostgreSQL 10.0 (Ian)
+        repmgr: escape values in primary_conninfo if needed (Ian)
+
+3.2.1   2016-10-24
+        repmgr: require a valid repmgr cluster name unless -F/--force
+          supplied (Ian)
+        repmgr: check master server is registered with repmgr before
+          cloning (Ian)
+        repmgr: ensure data directory defaults to that of the source node (Ian)
+        repmgr: various fixes to Barman cloning mode (Gianni, Ian)
+        repmgr: fix `repmgr cluster crosscheck` output (Ian)
+
+3.2     2016-10-05
        repmgr: add support for cloning from a Barman backup (Gianni)
        repmgr: add commands `standby matrix` and `standby crosscheck` (Gianni)
        repmgr: suppress connection error display in `repmgr cluster show`
@@ -15,10 +68,16 @@
           the standby (Ian)
        repmgr: add option `--copy-external-config-files` for files outside
           of the data directory (Ian)
-        repmgr: add configuration options to override the default pg_ctl
-           commands (Jarkko Oranen)
+        repmgr: only require `wal_keep_segments` to be set in certain corner
+           cases (Ian)
+        repmgr: better support cloning from a node other than the one to
+           stream from (Ian)
+        repmgrd: add configuration options to override the default pg_ctl
+           commands (Jarkko Oranen, Ian)
+        repmgrd: don't start if node is inactive and failover=automatic (Ian)
        packaging: improve "repmgr-auto" Debian package (Gianni)

+
 3.1.5   2016-08-15
        repmgrd: in a failover situation, prevent endless looping when
          attempting to establish the status of a node with
--- a/11
+++ b/11
@@ -1,15 +1,16 @@
 #
 # Makefile
-# Copyright (c) 2ndQuadrant, 2010-2016
+# Copyright (c) 2ndQuadrant, 2010-2017

 HEADERS = $(wildcard *.h)

 repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
-repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o
+repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o compat.o

 DATA = repmgr.sql uninstall_repmgr.sql
+REGRESS = repmgr_funcs repmgr_test

-PG_CPPFLAGS = -I$(libpq_srcdir)
+PG_CPPFLAGS = -I$(includedir_internal) -I$(libpq_srcdir)
 PG_LIBS     = $(libpq_pgport)


@@ -17,11 +18,11 @@ all: repmgrd repmgr
 	$(MAKE) -C sql

 repmgrd: $(repmgrd_OBJS)
-	$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
+	$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX)
 	$(MAKE) -C sql

 repmgr: $(repmgr_OBJS)
-	$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
+	$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX)

 # Make all objects depend on all include files. This is a bit of a
 # shotgun approach, but the codebase is small enough that a complete rebuild
--- a/README.md
+++ b/README.md
@@ -7,8 +7,12 @@ replication capabilities with utilities to set up standby servers, monitor
 replication, and perform administrative tasks such as failover or switchover
 operations.

-The current `repmgr` version, 3.1.5, supports all PostgreSQL versions from
-9.3, including the upcoming 9.6.
+This `repmgr` version (3.4) supports PostgreSQL versions from
+9.3 to 9.6.
+
+*NOTE*: we strongly recommend using the repmgr 4.x series, which contains
+many new features and usability enhancements and is being actively developed
+and maintained.

 Overview
 --------
@@ -121,7 +125,8 @@ views:
    status for each node

 The `repmgr` metadata schema can be stored in an existing database or in its own
-dedicated database.
+dedicated database. Note that the `repmgr` metadata schema cannot reside on a database
+server which is not part of the replication cluster managed by `repmgr`.

 A dedicated database superuser is required to own the meta-database as well as carry
 out administrative actions.
@@ -188,6 +193,14 @@ system.
  Instructions can be found in the APT section of the PostgreSQL Wiki
  ( https://wiki.postgresql.org/wiki/Apt ).

+  *NOTE*: repmgr 3.3 packages are now only available via a 2ndQuadrant-hosted
+  repository which can be installed like this:
+
+    apt-key adv --fetch-keys http://packages.2ndquadrant.com/repmgr3/apt/0xD3FA41F6.asc
+
+    echo deb http://packages.2ndquadrant.com/repmgr3/apt/ $(lsb_release -cs)-2ndquadrant main > /etc/apt/sources.list.d/repmgr3.list
+
+
 See `PACKAGES.md` for details on building .deb and .rpm packages from the
 `repmgr` source code.

@@ -201,7 +214,7 @@ See `PACKAGES.md` for details on building .deb and .rpm packages from the
 Release tarballs are also available:

    https://github.com/2ndQuadrant/repmgr/releases
-    http://repmgr.org/downloads.php
+    https://repmgr.org/

 `repmgr` is compiled in the same way as a PostgreSQL extension using the PGXS
 infrastructure, e.g.:
@@ -229,15 +242,29 @@ The configuration file will be searched for in the following locations:

 Note that if a file is explicitly specified with `-f/--config-file`, an error will
 be raised if it is not found or not readable and no attempt will be made to check
-default locations; this is to prevent `repmgr` reading the wrong file.
+default locations; this is to prevent `repmgr` unexpectedly reading the wrong file.

 For a full list of annotated configuration items, see the file `repmgr.conf.sample`.

 The following parameters in the configuration file can be overridden with
 command line options:

- `-L/--log-level`
- `-b/--pg_bindir`
+- `log_level` with `-L/--log-level`
+- `pg_bindir` with `-b/--pg_bindir`
+
+
+### Logging
+
+By default `repmgr` and `repmgrd` will log directly to `STDERR`. For `repmgrd`
+we recommend capturing output in a logfile or using your system's log facility;
+see `repmgr.conf.sample` for details.
+
+As a command line utility, `repmgr` will log directly to the console by default
+(this is a change in behaviour from versions before 3.3, where it would always
+log to the same location as `repmgrd`). However in some circumstances, such as
+when `repmgr` is executed by `repmgrd` during a failover event, it makes sense to
+capture `repmgr`'s log output - this can be done by supplying the command-line
+option `--log-to-file` to `repmgr`.


 ### Command line options and environment variables
@@ -274,14 +301,14 @@ Setting up a simple replication cluster with repmgr
 The following section will describe how to set up a basic replication cluster
 with a master and a standby server using the `repmgr` command line tool.
 It is assumed PostgreSQL is installed on both servers in the cluster,
-`rsync` is available and password-less SSH connections are possible between
+`rsync` is available and passwordless SSH connections are possible between
 both servers.

 * * *

 > *TIP*: for testing `repmgr`, it's possible to use multiple PostgreSQL
 > instances running on different ports on the same computer, with
-> password-less SSH access to `localhost` enabled.
+> passwordless SSH access to `localhost` enabled.

 * * *

@@ -299,7 +326,13 @@ The following replication settings may need to be adjusted:
    max_wal_senders = 10

    # Ensure WAL files contain enough information to enable read-only queries
-    # on the standby
+    # on the standby.
+    #
+    #  PostgreSQL 9.5 and earlier: one of 'hot_standby' or 'logical'
+    #  PostgreSQL 9.6 and later: one of 'replica' or 'logical'
+    #    ('hot_standby' will still be accepted as an alias for 'replica')
+    #
+    # See: https://www.postgresql.org/docs/current/static/runtime-config-wal.html#GUC-WAL-LEVEL

    wal_level = 'hot_standby'

@@ -318,10 +351,11 @@ The following replication settings may need to be adjusted:
    archive_command = '/bin/true'

    # If cloning using rsync, or you have configured `pg_basebackup_options`
-    # in `repmgr.conf` to include the setting `--xlog-method=fetch`, *and*
-    # you have not set `restore_command` in `repmgr.conf`to fetch WAL files
-    # from another source such as Barman, you'll need to set `wal_keep_segments`
-    # to a high enough value to ensure that all WAL files generated while
+    # in `repmgr.conf` to include the setting `--xlog-method=fetch` (from
+    # PostgreSQL 10 `--wal-method=fetch`), *and* you have not set
+    # `restore_command` in `repmgr.conf`to fetch WAL files from another
+    # source such as Barman, you'll need to set `wal_keep_segments` to a
+    # high enough value to ensure that all WAL files generated while
    # the standby is being cloned are retained until the standby starts up.

    # wal_keep_segments = 5000
@@ -375,7 +409,8 @@ least the following parameters:

 - `cluster`: an arbitrary name for the replication cluster; this must be identical
     on all nodes
- `node`: a unique integer identifying the node
+- `node`: a unique integer identifying the node; note this must be a positive
+     32 bit signed integer between 1 and 2147483647
 - `node_name`: a unique string identifying the node; we recommend a name
     specific to the server (e.g. 'server_1'); avoid names indicating the
     current replication role like 'master' or 'standby' as the server's
@@ -383,7 +418,8 @@ least the following parameters:
 - `conninfo`: a valid connection string for the `repmgr` database on the
     *current* server. (On the standby, the database will not yet exist, but
     `repmgr` needs to know the connection details to complete the setup
-     process).
+     process). *NOTE* this must be a keyword/value string, not a connection
+     URI; this limitation will be removed in a future `repmgr` version.

 `repmgr.conf` should not be stored inside the PostgreSQL data directory,
 as it could be overwritten when setting up or reinitialising the PostgreSQL
@@ -408,11 +444,11 @@ to include this schema name, e.g.
 ### Initialise the master server

 To enable `repmgr` to support a replication cluster, the master node must
-be registered with `repmgr`, which creates the `repmgr` database and adds
+be registered with `repmgr`, which creates the `repmgr` metadatabase and adds
 a metadata record for the server:

    $ repmgr -f repmgr.conf master register
-    [2016-01-07 16:56:46] [NOTICE] master node correctly registered for cluster test with id 1 (conninfo: host=repmgr_node1 user=repmgr dbname=repmgr)
+    NOTICE: master node correctly registered for cluster test with id 1 (conninfo: host=repmgr_node1 user=repmgr dbname=repmgr)

 The metadata record looks like this:

@@ -439,13 +475,13 @@ the values `node`, `node_name` and `conninfo` adjusted accordingly, e.g.:
 Clone the standby with:

    $ repmgr -h repmgr_node1 -U repmgr -d repmgr -D /path/to/node2/data/ -f /etc/repmgr.conf standby clone
-    [2016-01-07 17:21:26] [NOTICE] destination directory '/path/to/node2/data/' provided
-    [2016-01-07 17:21:26] [NOTICE] starting backup...
-    [2016-01-07 17:21:26] [HINT] this may take some time; consider using the -c/--fast-checkpoint option
+    NOTICE: destination directory '/path/to/node2/data/' provided
+    NOTICE: starting backup...
+    HINT: this may take some time; consider using the -c/--fast-checkpoint option
    NOTICE:  pg_stop_backup complete, all required WAL segments have been archived
-    [2016-01-07 17:21:28] [NOTICE] standby clone (using pg_basebackup) complete
-    [2016-01-07 17:21:28] [NOTICE] you can now start your PostgreSQL server
-    [2016-01-07 17:21:28] [HINT] for example : pg_ctl -D /path/to/node2/data/ start
+    NOTICE: standby clone (using pg_basebackup) complete
+    NOTICE: you can now start your PostgreSQL server
+    HINT: for example : pg_ctl -D /path/to/node2/data/ start

 This will clone the PostgreSQL data directory files from the master at `repmgr_node1`
 using PostgreSQL's `pg_basebackup` utility. A `recovery.conf` file containing the
@@ -486,7 +522,8 @@ place. To ensure this happens when using the default `pg_basebackup` method,
 `repmgr` will set `pg_basebackup`'s `--xlog-method` parameter to `stream`,
 which will ensure all WAL files generated during the cloning process are
 streamed in parallel with the main backup. Note that this requires two
-replication connections to be available.
+replication connections to be available (`repmgr` will verify sufficient
+connections are available before attempting to clone).

 To override this behaviour, in `repmgr.conf` set `pg_basebackup`'s
 `--xlog-method` parameter to `fetch`:
@@ -498,6 +535,9 @@ See the `pg_basebackup` documentation for details:

    https://www.postgresql.org/docs/current/static/app-pgbasebackup.html

+> *NOTE*: From PostgreSQL 10, `pg_basebackup`'s `--xlog-method` parameter
+> has been renamed to `--wal-method`.
+
 Make any adjustments to the standby's PostgreSQL configuration files now,
 then start the server.

@@ -540,8 +580,8 @@ Connect to the master server and execute:

 Register the standby server with:

-    repmgr -f /etc/repmgr.conf standby register
-    [2016-01-08 11:13:16] [NOTICE] standby node correctly registered for cluster test with id 2 (conninfo: host=repmgr_node2 user=repmgr dbname=repmgr)
+    $ repmgr -f /etc/repmgr.conf standby register
+    NOTICE: standby node correctly registered for cluster test with id 2 (conninfo: host=repmgr_node2 user=repmgr dbname=repmgr)

 Connect to the standby server's `repmgr` database and check the `repl_nodes`
 table:
@@ -572,6 +612,21 @@ to effectively manage cascading replication (see below).

 * * *

+Under some circumstances you may wish to register a standby which is not
+yet running; this can be the case when using provisioning tools to create
+a complex replication cluster. In this case, by using the `-F/--force`
+option and providing the connection parameters to the master server,
+the standby can be registered.
+
+Similarly, with cascading replication it may be necessary to register
+a standby whose upstream node has not yet been registered - in this case,
+using `-F/--force` will result in the creation of an inactive placeholder
+record for the upstream node, which will however later need to be registered
+with the `-F/--force` option too.
+
+When used with `standby register`, care should be taken that use of the
+`-F/--force` option does not result in an incorrectly configured cluster.
+
 ### Using Barman to clone a standby

 `repmgr standby clone` also supports Barman, the Backup and
@@ -580,13 +635,13 @@ base backups and WAL files.

 Barman support provides the following advantages:

- the primary node does not need to perform a new backup every time a
+- the master node does not need to perform a new backup every time a
  new standby is cloned;
 - a standby node can be disconnected for longer periods without losing
  the ability to catch up, and without causing accumulation of WAL
-  files on the primary node;
+  files on the master node;
 - therefore, `repmgr` does not need to use replication slots, and the
-  primary node does not need to set `wal_keep_segments`.
+  master node does not need to set `wal_keep_segments`.

 > *NOTE*: In view of the above, Barman support is incompatible with
 > the `use_replication_slots` setting in `repmgr.conf`.
@@ -595,12 +650,12 @@ In order to enable Barman support for `repmgr standby clone`, you must
 ensure that:

 - the name of the server configured in Barman is equal to the
-  `cluster_name` setting in `repmgr.conf`;
+  `cluster` setting in `repmgr.conf`;
 - the `barman_server` setting in `repmgr.conf` is set to the SSH
  hostname of the Barman server;
 - the `restore_command` setting in `repmgr.conf` is configured to
-  use a copy of the `barman-wal-restore.py` script shipped with Barman
-  (see below);
+  use a copy of the `barman-wal-restore` script shipped with the
+  `barman-cli` package (see below);
 - the Barman catalogue includes at least one valid backup for this
  server.

@@ -616,39 +671,37 @@ ensure that:
 > corresponding to the value of `barman_server` in `repmgr.conf`. See
 > the "Host" section in `man 5 ssh_config` for more details.

-`barman-wal-restore.py` is a Python script provided by the Barman
-development team, which must be copied in a location accessible to
-`repmgr`, and marked as executable; `restore_command` must then be
-set in `repmgr.conf` as follows:
+`barman-wal-restore` is a Python script provided by the Barman
+development team as part of the `barman-cli` package (Barman 2.0
+and later; for Barman 1.x the script is provided separately as
+`barman-wal-restore.py`).
+
+`restore_command` must then be set in `repmgr.conf` as follows:

    <script> <Barman hostname> <cluster_name> %f %p

 For instance, suppose that we have installed Barman on the `barmansrv`
-host, and that we have placed a copy of `barman-wal-restore.py` into
-the `/usr/local/bin` directory. First, we ensure that the script is
-executable:
-
-    sudo chmod +x /usr/local/bin/barman-wal-restore.py
-
-Then we check that `repmgr.conf` includes the following lines:
+host, and that `barman-wal-restore` is located as an executable at
+`/usr/bin/barman-wal-restore`;  `repmgr.conf` should include the following
+lines:

    barman_server=barmansrv
-    restore_command=/usr/local/bin/barman-wal-restore.py barmansrv test %f %p
+    restore_command=/usr/bin/barman-wal-restore barmansrv test %f %p

-To use a non-default Barman configuration file on the Barman server,
+NOTE: to use a non-default Barman configuration file on the Barman server,
 specify this in `repmgr.conf` with `barman_config`:

    barman_config=/path/to/barman.conf

 Now we can clone a standby using the Barman server:

-    $ repmgr -h node1 -D 9.5/main -f /etc/repmgr.conf standby clone
-    [2016-06-12 20:08:35] [NOTICE] destination directory '9.5/main' provided
-    [2016-06-12 20:08:35] [NOTICE] getting backup from Barman...
-    [2016-06-12 20:08:36] [NOTICE] standby clone (from Barman) complete
-    [2016-06-12 20:08:36] [NOTICE] you can now start your PostgreSQL server
-    [2016-06-12 20:08:36] [HINT] for example : pg_ctl -D 9.5/data start
-    [2016-06-12 20:08:36] [HINT] After starting the server, you need to register this standby with "repmgr standby register"
+    $ repmgr -h node1 -d repmgr -D 9.5/main -f /etc/repmgr.conf standby clone
+    NOTICE: destination directory '9.5/main' provided
+    NOTICE: getting backup from Barman...
+    NOTICE: standby clone (from Barman) complete
+    NOTICE: you can now start your PostgreSQL server
+    HINT: for example : pg_ctl -D 9.5/data start
+    HINT: After starting the server, you need to register this standby with "repmgr standby register"



@@ -688,24 +741,10 @@ and destination server as the contents of files existing on both servers need
 to be compared, meaning this method is not necessarily faster than making a
 fresh clone with `pg_basebackup`.

-> *NOTE*: `barman-wal-restore.py` supports command line switches to
+> *NOTE*: `barman-wal-restore` supports command line switches to
 > control parallelism (`--parallel=N`) and compression (`--bzip2`,
 > `--gzip`).

-### Dealing with PostgreSQL configuration files
-
-By default, `repmgr` will attempt to copy the standard configuration files
-(`postgresql.conf`, `pg_hba.conf` and `pg_ident.conf`) even if they are located
-outside of the data directory (though currently they will be copied
-into the standby's data directory). To prevent this happening, when executing
-`repmgr standby clone` provide the `--ignore-external-config-files` option.
-
-If using `rsync` to clone a standby, additional control over which files
-not to transfer is possible by configuring `rsync_options` in `repmgr.conf`,
-which enables any valid `rsync` options to be passed to that command, e.g.:
-
-    rsync_options='--exclude=postgresql.local.conf'
-
 ### Controlling `primary_conninfo` in `recovery.conf`

 The `primary_conninfo` setting in `recovery.conf` generated by `repmgr`
@@ -722,13 +761,22 @@ string passed to `repmgr` with `-d/--dbname` (see above for details), and/or set
 appropriate environment variables.

 Note that PostgreSQL will always set explicit defaults for `sslmode` and
-`sslcompression`.
+`sslcompression` (and from PostgreSQL 10.0 also `target_session_attrs`).

 If `application_name` is set in the standby's `conninfo` parameter in
 `repmgr.conf`, this value will be appended to `primary_conninfo`, otherwise
 `repmgr` will set `application_name` to the same value as the `node_name`
 parameter.

+By default `repmgr` assumes the user who owns the `repmgr` metadatabase will
+also be the replication user; a different replication user can be specified
+with `--replication-user`.
+
+If the upstream server requires a password, and this was provided via
+`PGPASSWORD`, `.pgpass` etc., by default `repmgr` will include this in
+`primary_conninfo`. Use the command line option `--no-conninfo-password` to
+suppress this.
+

 Setting up cascading replication with repmgr
 --------------------------------------------
@@ -762,15 +810,15 @@ created standby. Clone this standby (using the connection parameters
 for the existing standby) and register it:

    $ repmgr -h repmgr_node2 -U repmgr -d repmgr -D /path/to/node3/data/ -f /etc/repmgr.conf standby clone
-    [2016-01-08 13:44:52] [NOTICE] destination directory 'node_3/data/' provided
-    [2016-01-08 13:44:52] [NOTICE] starting backup (using pg_basebackup)...
-    [2016-01-08 13:44:52] [HINT] this may take some time; consider using the -c/--fast-checkpoint option
-    [2016-01-08 13:44:52] [NOTICE] standby clone (using pg_basebackup) complete
-    [2016-01-08 13:44:52] [NOTICE] you can now start your PostgreSQL server
-    [2016-01-08 13:44:52] [HINT] for example : pg_ctl -D /path/to/node_3/data start
+    NOTICE: destination directory 'node_3/data/' provided
+    NOTICE: starting backup (using pg_basebackup)...
+    HINT: this may take some time; consider using the -c/--fast-checkpoint option
+    NOTICE: standby clone (using pg_basebackup) complete
+    NOTICE: you can now start your PostgreSQL server
+    HINT: for example : pg_ctl -D /path/to/node_3/data start

    $ repmgr -f /etc/repmgr.conf standby register
-    [2016-01-08 14:04:32] [NOTICE] standby node correctly registered for cluster test with id 3 (conninfo: host=repmgr_node3 dbname=repmgr user=repmgr)
+    NOTICE: standby node correctly registered for cluster test with id 3 (conninfo: host=repmgr_node3 dbname=repmgr user=repmgr)

 After starting the standby, the `repl_nodes` table will look like this:

@@ -782,6 +830,15 @@ After starting the standby, the `repl_nodes` table will look like this:
      3 | standby |                2 | test    | node3 | host=repmgr_node3 dbname=repmgr user=repmgr |           |      100 | t
    (3 rows)

+* * *
+
+> *TIP*: under some circumstances when setting up a cascading replication
+> cluster, you may wish to clone a downstream standby whose upstream node
+> does not yet exist. In this case you can clone from the master (or
+> another upstream node) and provide the parameter `--upstream-conninfo`
+> to explictly set the upstream's `primary_conninfo` string in `recovery.conf`.
+
+* * *

 Using replication slots with repmgr
 -----------------------------------
@@ -867,19 +924,19 @@ Promote the first standby with:

 This will produce output similar to the following:

-    [2016-01-08 16:07:31] [ERROR] connection to database failed: could not connect to server: Connection refused
+    ERROR: connection to database failed: could not connect to server: Connection refused
            Is the server running on host "repmgr_node1" (192.161.2.1) and accepting
            TCP/IP connections on port 5432?
    could not connect to server: Connection refused
            Is the server running on host "repmgr_node1" (192.161.2.1) and accepting
            TCP/IP connections on port 5432?

-    [2016-01-08 16:07:31] [NOTICE] promoting standby
-    [2016-01-08 16:07:31] [NOTICE] promoting server using '/usr/bin/postgres/pg_ctl -D /path/to/node_2/data promote'
+    NOTICE: promoting standby
+    NOTICE: promoting server using '/usr/bin/postgres/pg_ctl -D /path/to/node_2/data promote'
    server promoting
-    [2016-01-08 16:07:33] [NOTICE] STANDBY PROMOTE successful
+    NOTICE: STANDBY PROMOTE successful

-Note: the first `[ERROR]` is `repmgr` attempting to connect to the current
+Note: the first `ERROR` is `repmgr` attempting to connect to the current
 master to verify that it has failed. If a valid master is found, `repmgr`
 will refuse to promote a standby.

@@ -911,7 +968,7 @@ end of the preceding section ("Promoting a standby server with repmgr"),
 execute this:

    $ repmgr -f /etc/repmgr.conf -D /path/to/node_3/data/ -h repmgr_node2 -U repmgr -d repmgr standby follow
-    [2016-01-08 16:57:06] [NOTICE] restarting server using '/usr/bin/postgres/pg_ctl -D /path/to/node_3/data/ -w -m fast restart'
+    NOTICE: restarting server using '/usr/bin/postgres/pg_ctl -D /path/to/node_3/data/ -w -m fast restart'
    waiting for server to shut down.... done
    server stopped
    waiting for server to start.... done
@@ -958,6 +1015,13 @@ both passwordless SSH access and the path of `repmgr.conf` on that server.
 > careful preparation and with adequate attention. In particular you should
 > be confident that your network environment is stable and reliable.
 >
+> Additionally you should be sure that the current master can be shut down
+> quickly and cleanly. In particular, access from applications should be
+> minimalized or preferably blocked completely. Also check that there is
+> no backlog of files waiting to be archived, as PostgreSQL will not shut
+> down until archiving completes, and that any standbys attached to the
+> current primary don't have a significant amount of replication lag.
+>
 > We recommend running `repmgr standby switchover` at the most verbose
 > logging level (`--log-level DEBUG --verbose`) and capturing all output
 > to assist troubleshooting any problems.
@@ -983,26 +1047,26 @@ local server, as well as the normal default locations. `repmgr` will check
 this file can be found before performing any further actions.

    $ repmgr -f /etc/repmgr.conf -C /etc/repmgr.conf standby switchover -v
-    [2016-01-27 16:38:33] [NOTICE] using configuration file "/etc/repmgr.conf"
-    [2016-01-27 16:38:33] [NOTICE] switching current node 2 to master server and demoting current master to standby...
-    [2016-01-27 16:38:34] [NOTICE] 5 files copied to /tmp/repmgr-node1-archive
-    [2016-01-27 16:38:34] [NOTICE] connection to database failed: FATAL:  the database system is shutting down
+    NOTICE: using configuration file "/etc/repmgr.conf"
+    NOTICE: switching current node 2 to master server and demoting current master to standby...
+    NOTICE: 5 files copied to /tmp/repmgr-node1-archive
+    NOTICE: connection to database failed: FATAL:  the database system is shutting down

-    [2016-01-27 16:38:34] [NOTICE] current master has been stopped
-    [2016-01-27 16:38:34] [ERROR] connection to database failed: FATAL:  the database system is shutting down
+    NOTICE: current master has been stopped
+    ERROR: connection to database failed: FATAL:  the database system is shutting down

-    [2016-01-27 16:38:34] [NOTICE] promoting standby
-    [2016-01-27 16:38:34] [NOTICE] promoting server using '/usr/local/bin/pg_ctl -D /var/lib/postgresql/9.5/node_2/data promote'
+    NOTICE: promoting standby
+    NOTICE: promoting server using '/usr/local/bin/pg_ctl -D /var/lib/postgresql/9.5/node_2/data promote'
    server promoting
-    [2016-01-27 16:38:36] [NOTICE] STANDBY PROMOTE successful
-    [2016-01-27 16:38:36] [NOTICE] Executing pg_rewind on old master server
-    [2016-01-27 16:38:36] [NOTICE] 5 files copied to /var/lib/postgresql/9.5/data
-    [2016-01-27 16:38:36] [NOTICE] restarting server using '/usr/local/bin/pg_ctl -w -D /var/lib/postgresql/9.5/node_1/data -m fast restart'
+    NOTICE: STANDBY PROMOTE successful
+    NOTICE: Executing pg_rewind on old master server
+    NOTICE: 5 files copied to /var/lib/postgresql/9.5/data
+    NOTICE: restarting server using '/usr/local/bin/pg_ctl -w -D /var/lib/postgresql/9.5/node_1/data -m fast restart'
    pg_ctl: PID file "/var/lib/postgresql/9.5/node_1/data/postmaster.pid" does not exist
    Is server running?
    starting server anyway
-    [2016-01-27 16:38:37] [NOTICE] node 1 is replicating in state "streaming"
-    [2016-01-27 16:38:37] [NOTICE] switchover was successful
+    NOTICE: node 1 is replicating in state "streaming"
+    NOTICE: switchover was successful

 Messages containing the line `connection to database failed: FATAL: the database
 system is shutting down` are not errors - `repmgr` is polling the old master database
@@ -1024,11 +1088,11 @@ should have been updated to reflect this:

 ### Caveats

- The functionality provided `repmgr standby switchover` is primarily aimed
+- The functionality provided by `repmgr standby switchover` is primarily aimed
  at a two-server master/standby replication cluster and currently does
  not support additional standbys.
 - `repmgr standby switchover` is designed to use the `pg_rewind` utility,
-  standard in 9.5 and later and available for separately in 9.3 and 9.4
+  standard in 9.5 and later and available separately in 9.3 and 9.4
  (see note below)
 - `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
   data checksums were enabled when the cluster was initialized. See the
@@ -1038,11 +1102,6 @@ should have been updated to reflect this:
  the `repmgrd` may try and promote a standby by itself.
 - Any other standbys attached to the old master will need to be manually
  instructed to point to the new master (e.g. with `repmgr standby follow`).
- You must ensure that following a server start using `pg_ctl`, log output
-  is not send to STDERR (the default behaviour). If logging is not configured,
-  we recommend setting `logging_collector=on` in `postgresql.conf` and
-  providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
-  parameter.

 We hope to remove some of these restrictions in future versions of `repmgr`.

@@ -1088,8 +1147,9 @@ This will remove the standby record from `repmgr`'s internal metadata
 table (`repl_nodes`). A `standby_unregister` event notification will be
 recorded in the `repl_events` table.

-Note that this command will not stop the server itself or remove
-it from the replication cluster.
+Note that this command will not stop the server itself or remove it from
+the replication cluster. Note that if the standby was using a replication
+slot, this will not be removed.

 If the standby is not running, the command can be executed on another
 node by providing the id of the node to be unregistered using
@@ -1107,19 +1167,23 @@ Automatic failover with `repmgrd`
 and which can automate actions such as failover and updating standbys to
 follow the new master.

-To use `repmgrd` for automatic failover, the following `repmgrd` options must
-be set in `repmgr.conf`:
-
-    failover=automatic
-    promote_command='repmgr standby promote -f /etc/repmgr.conf'
-    follow_command='repmgr standby follow -f /etc/repmgr.conf'
-
-(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
-
-Additionally, `postgresql.conf` must contain the following line:
+To use `repmgrd` for automatic failover, `postgresql.conf` must contain the
+following line:

    shared_preload_libraries = 'repmgr_funcs'

+(changing this setting requires a restart of PostgreSQL).
+
+Additionally the following `repmgrd` options must be set in `repmgr.conf`:
+
+    failover=automatic
+    promote_command='repmgr standby promote -f /etc/repmgr.conf --log-to-file'
+    follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file'
+
+Note that the `--log-to-file` option will cause `repmgr`'s output to be logged to
+the destination configured to receive log output for `repmgrd`.
+See `repmgr.conf.sample` for further `repmgrd`-specific settings
+
 When `failover` is set to `automatic`, upon detecting failure of the current
 master, `repmgrd` will execute one of `promote_command` or `follow_command`,
 depending on whether the current server is becoming the new master or
@@ -1421,7 +1485,9 @@ functionality will be included in a feature release (e.g. 3.0.x to 3.1.x).

 In general `repmgr` can be upgraded as-is without any further action required,
 however feature releases may require the `repmgr` database to be upgraded.
-An SQL script will be provided - please check the release notes for details.
+An SQL script will be provided - please check the release notes for details:
+
+* http://repmgr.org/release-notes-3.3.html#UPGRADING


 Distribution-specific configuration
@@ -1524,7 +1590,7 @@ which contains connection details for the local database.
    bootstrapping new installations. To update an existing but 'stale'
    data directory (for example belonging to a failed master), `rsync`
    must be used by specifying `--rsync-only`. In this case,
-    password-less SSH connections between servers are required.
+    passwordless SSH connections between servers are required.

 * `standby promote`

@@ -1538,13 +1604,13 @@ which contains connection details for the local database.
    by using `standby follow` (see below); if `repmgrd` is active, it will
    handle this.

-    This command will not function if the current master is still running.
+    This command will fail with an error if the current master is still running.

 * `standby switchover`

    Promotes a standby to master and demotes the existing master to a standby.
    This command must be run on the standby to be promoted, and requires a
-    password-less SSH connection to the current master. Additionally the
+    passwordless SSH connection to the current master. Additionally the
    location of the master's `repmgr.conf` file must be provided with
    `-C/--remote-config-file`.

@@ -1565,7 +1631,7 @@ which contains connection details for the local database.

    Creates a witness server as a separate PostgreSQL instance. This instance
    can be on a separate server or a server running an existing node. The
-    witness server contain a copy of the repmgr metadata tables but will not
+    witness server contains a copy of the repmgr metadata tables but will not
    be set up as a standby; instead it will update its metadata copy each
    time a failover occurs.

@@ -1651,7 +1717,7 @@ which contains connection details for the local database.
      overview of connections between all databases in the cluster.

    These commands require a valid `repmgr.conf` file on each node.
-    Additionally password-less `ssh` connections are required between
+    Additionally passwordless `ssh` connections are required between
    all nodes.

    Example 1 (all nodes up):
@@ -1745,6 +1811,21 @@ which contains connection details for the local database.
    the current working directory; no additional arguments are required.


+### Further documentation
+
+As well as this README, the `repmgr` source contains following additional
+documentation files:
+
+* FAQ.md - frequently asked questions
+* CONTRIBUTING.md - how to contribute to `repmgr`
+* PACKAGES.md - details on building packages
+* SSH-RSYNC.md - how to set up passwordless SSH between nodes
+* docs/repmgrd-failover-mechanism.md - how repmgrd picks which node to promote
+* docs/repmgrd-node-fencing.md - how to "fence" a failed master node
+
+
+
+
 ### Error codes

 `repmgr` or `repmgrd` will return one of the following error codes on program
@@ -1814,6 +1895,7 @@ Thanks from the repmgr core team.
 Further reading
 ---------------

+* http://blog.2ndquadrant.com/repmgr-3-2-is-here-barman-support-brand-new-high-availability-features/
 * http://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
 * http://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
 * http://blog.2ndquadrant.com/easier_postgresql_90_clusters/
--- a/check_dir.c
+++ b/check_dir.c
@@ -1,6 +1,6 @@
 /*
 * check_dir.c - Directories management functions
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/check_dir.h
+++ b/check_dir.h
@@ -1,6 +1,6 @@
 /*
 * check_dir.h
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/compat.c
+++ b/compat.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * compat.c
+ *	  Provides a couple of useful string utility functions adapted
+ *	  from the backend code, which are not publicly exposed. They're
+ *	  unlikely to change but it would be worth keeping an eye on them
+ *	  for any fixes/improvements
+ *
+ * Copyright (c) 2ndQuadrant, 2010-2017
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "repmgr.h"
+#include "compat.h"
+
+/*
+ * Append the given string to the buffer, with suitable quoting for passing
+ * the string as a value, in a keyword/pair value in a libpq connection
+ * string
+ *
+ * This function is adapted from src/fe_utils/string_utils.c (before 9.6
+ * located in: src/bin/pg_dump/dumputils.c)
+ */
+void
+appendConnStrVal(PQExpBuffer buf, const char *str)
+{
+	const char *s;
+	bool		needquotes;
+
+	/*
+	 * If the string is one or more plain ASCII characters, no need to quote
+	 * it. This is quite conservative, but better safe than sorry.
+	 */
+	needquotes = true;
+	for (s = str; *s; s++)
+	{
+		if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
+			  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
+		{
+			needquotes = true;
+			break;
+		}
+		needquotes = false;
+	}
+
+	if (needquotes)
+	{
+		appendPQExpBufferChar(buf, '\'');
+		while (*str)
+		{
+			/* ' and \ must be escaped by to \' and \\ */
+			if (*str == '\'' || *str == '\\')
+				appendPQExpBufferChar(buf, '\\');
+
+			appendPQExpBufferChar(buf, *str);
+			str++;
+		}
+		appendPQExpBufferChar(buf, '\'');
+	}
+	else
+		appendPQExpBufferStr(buf, str);
+}
+
+/*
+ * Adapted from: src/fe_utils/string_utils.c
+ */
+void
+appendShellString(PQExpBuffer buf, const char *str)
+{
+	const char *p;
+
+	appendPQExpBufferChar(buf, '\'');
+	for (p = str; *p; p++)
+	{
+		if (*p == '\n' || *p == '\r')
+		{
+			fprintf(stderr,
+					_("shell command argument contains a newline or carriage return: \"%s\"\n"),
+					str);
+			exit(ERR_BAD_CONFIG);
+		}
+
+		if (*p == '\'')
+			appendPQExpBufferStr(buf, "'\"'\"'");
+		else
+			appendPQExpBufferChar(buf, *p);
+	}
+
+	appendPQExpBufferChar(buf, '\'');
+}
+
--- a/compat.h
+++ b/compat.h
@@ -0,0 +1,29 @@
+/*
+ * compat.h
+ * Copyright (c) 2ndQuadrant, 2010-2017
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+extern void
+appendConnStrVal(PQExpBuffer buf, const char *str);
+
+extern void
+appendShellString(PQExpBuffer buf, const char *str);
+
+#endif
--- a/config.c
+++ b/config.c
@@ -1,7 +1,7 @@
 /*
 * config.c - Functions to parse the config file
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -10,11 +10,11 @@
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * along with this program.	 If not, see <http://www.gnu.org/licenses/>.
 *
 */

@@ -30,7 +30,7 @@ static void tablespace_list_append(t_configuration_options *options, const char
 static void exit_with_errors(ItemList *config_errors);

 const static char *_progname = NULL;
-static char config_file_path[MAXPGPATH];
+static char config_file_path[MAXPGPATH] = "";
 static bool config_file_provided = false;
 bool config_file_found = false;

@@ -55,11 +55,11 @@ progname(void)
 *
 * Returns true if a configuration file could be parsed, otherwise false.
 *
- * Any configuration options changed in this function must also be changed in
- * reload_config()
+ * Any *repmgrd-specific* configuration options added/changed in this function must also be
+ * added/changed in reload_config()
 *
 * NOTE: this function is called before the logger is set up, so we need
- * to handle the verbose option ourselves; also the default log level is NOTICE,
+ * to handle the verbose option ourselves; also the default log level is INFO,
 * so we can't use DEBUG.
 */
 bool
@@ -99,9 +99,9 @@ load_config(const char *config_file, bool verbose, t_configuration_options *opti
 	/*
 	 * If no configuration file was provided, attempt to find a default file
 	 * in this order:
-	 *  - current directory
-	 *  - /etc/repmgr.conf
-	 *  - default sysconfdir
+	 *	- current directory
+	 *	- /etc/repmgr.conf
+	 *	- default sysconfdir
 	 *
 	 * here we just check for the existence of the file; parse_config()
 	 * will handle read errors etc.
@@ -181,6 +181,23 @@ load_config(const char *config_file, bool verbose, t_configuration_options *opti
 }


+bool
+parse_config(t_configuration_options *options)
+{
+	/* Collate configuration file errors here for friendlier reporting */
+	static ItemList config_errors = { NULL, NULL };
+
+	_parse_config(options, &config_errors);
+
+	if (config_errors.head != NULL)
+	{
+		exit_with_errors(&config_errors);
+	}
+
+	return true;
+}
+
+
 /*
 * Parse configuration file; if any errors are encountered,
 * list them and exit.
@@ -188,8 +205,8 @@ load_config(const char *config_file, bool verbose, t_configuration_options *opti
 * Ensure any default values set here are synced with repmgr.conf.sample
 * and any other documentation.
 */
-bool
-parse_config(t_configuration_options *options)
+void
+_parse_config(t_configuration_options *options, ItemList *error_list)
 {
 	FILE	   *fp;
 	char	   *s,
@@ -201,9 +218,6 @@ parse_config(t_configuration_options *options)
 	PQconninfoOption *conninfo_options;
 	char	   *conninfo_errmsg = NULL;

-	/* Collate configuration file errors here for friendlier reporting */
-	static ItemList config_errors = { NULL, NULL };
-
 	bool		node_found = false;

 	/* Initialize configuration options with sensible defaults
@@ -211,7 +225,7 @@ parse_config(t_configuration_options *options)
 	 * to be initialised here
 	 */
 	memset(options->cluster_name, 0, sizeof(options->cluster_name));
-	options->node = -1;
+	options->node = UNKNOWN_NODE_ID;
 	options->upstream_node = NO_UPSTREAM_NODE;
 	options->use_replication_slots = 0;
 	memset(options->conninfo, 0, sizeof(options->conninfo));
@@ -262,7 +276,7 @@ parse_config(t_configuration_options *options)
 	{
 		log_verbose(LOG_NOTICE, _("no configuration file provided and no default file found - "
 					 "continuing with default values\n"));
-		return true;
+		return;
 	}

 	fp = fopen(config_file_path, "r");
@@ -307,11 +321,11 @@ parse_config(t_configuration_options *options)
 			strncpy(options->cluster_name, value, MAXLEN);
 		else if (strcmp(name, "node") == 0)
 		{
-			options->node = repmgr_atoi(value, "node", &config_errors, false);
+			options->node = repmgr_atoi(value, "node", error_list, false);
 			node_found = true;
 		}
 		else if (strcmp(name, "upstream_node") == 0)
-			options->upstream_node = repmgr_atoi(value, "upstream_node", &config_errors, false);
+			options->upstream_node = repmgr_atoi(value, "upstream_node", error_list, false);
 		else if (strcmp(name, "conninfo") == 0)
 			strncpy(options->conninfo, value, MAXLEN);
 		else if (strcmp(name, "barman_server") == 0)
@@ -342,11 +356,11 @@ parse_config(t_configuration_options *options)
 			}
 			else
 			{
-				item_list_append(&config_errors,_("value for 'failover' must be 'automatic' or 'manual'\n"));
+				item_list_append(error_list, _("value for 'failover' must be 'automatic' or 'manual'\n"));
 			}
 		}
 		else if (strcmp(name, "priority") == 0)
-			options->priority = repmgr_atoi(value, "priority", &config_errors, true);
+			options->priority = repmgr_atoi(value, "priority", error_list, true);
 		else if (strcmp(name, "node_name") == 0)
 			strncpy(options->node_name, value, MAXLEN);
 		else if (strcmp(name, "promote_command") == 0)
@@ -364,17 +378,17 @@ parse_config(t_configuration_options *options)
 		else if (strcmp(name, "service_promote_command") == 0)
 			strncpy(options->service_promote_command, value, MAXLEN);
 		else if (strcmp(name, "master_response_timeout") == 0)
-			options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
+			options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", error_list, false);
 		/*
 		 * 'primary_response_timeout' as synonym for 'master_response_timeout' -
 		 * we'll switch terminology in a future release (3.1?)
 		 */
 		else if (strcmp(name, "primary_response_timeout") == 0)
-			options->master_response_timeout = repmgr_atoi(value, "primary_response_timeout", &config_errors, false);
+			options->master_response_timeout = repmgr_atoi(value, "primary_response_timeout", error_list, false);
 		else if (strcmp(name, "reconnect_attempts") == 0)
-			options->reconnect_attempts = repmgr_atoi(value, "reconnect_attempts", &config_errors, false);
+			options->reconnect_attempts = repmgr_atoi(value, "reconnect_attempts", error_list, false);
 		else if (strcmp(name, "reconnect_interval") == 0)
-			options->reconnect_interval = repmgr_atoi(value, "reconnect_interval", &config_errors, false);
+			options->reconnect_interval = repmgr_atoi(value, "reconnect_interval", error_list, false);
 		else if (strcmp(name, "pg_bindir") == 0)
 			strncpy(options->pg_bindir, value, MAXLEN);
 		else if (strcmp(name, "pg_ctl_options") == 0)
@@ -384,14 +398,14 @@ parse_config(t_configuration_options *options)
 		else if (strcmp(name, "logfile") == 0)
 			strncpy(options->logfile, value, MAXLEN);
 		else if (strcmp(name, "monitor_interval_secs") == 0)
-			options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
+			options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", error_list, false);
 		else if (strcmp(name, "retry_promote_interval_secs") == 0)
-			options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
+			options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", error_list, false);
 		else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
-			options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
+			options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", error_list, false);
 		else if (strcmp(name, "use_replication_slots") == 0)
 			/* XXX we should have a dedicated boolean argument format */
-			options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
+			options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", error_list, false);
 		else if (strcmp(name, "event_notification_command") == 0)
 			strncpy(options->event_notification_command, value, MAXLEN);
 		else if (strcmp(name, "event_notifications") == 0)
@@ -419,7 +433,7 @@ parse_config(t_configuration_options *options)
 					 _("no value provided for parameter \"%s\""),
 					 name);

-			item_list_append(&config_errors, error_message_buf);
+			item_list_append(error_list, error_message_buf);
 		}
 	}

@@ -428,11 +442,15 @@ parse_config(t_configuration_options *options)

 	if (node_found == false)
 	{
-		item_list_append(&config_errors, _("\"node\": parameter was not found"));
+		item_list_append(error_list, _("\"node\": parameter was not found"));
 	}
 	else if (options->node == 0)
 	{
-		item_list_append(&config_errors, _("\"node\": must be greater than zero"));
+		item_list_append(error_list, _("\"node\": must be greater than zero"));
+	}
+	else if (options->node < 0)
+	{
+		item_list_append(error_list, _("\"node\": must be a positive signed 32 bit integer, i.e. 2147483647 or less"));
 	}

 	if (strlen(options->conninfo))
@@ -452,18 +470,11 @@ parse_config(t_configuration_options *options)
 					 _("\"conninfo\": %s"),
 					 conninfo_errmsg);

-			item_list_append(&config_errors, error_message_buf);
+			item_list_append(error_list, error_message_buf);
 		}

 		PQconninfoFree(conninfo_options);
 	}
-
-	if (config_errors.head != NULL)
-	{
-		exit_with_errors(&config_errors);
-	}
-
-	return true;
 }


@@ -553,70 +564,85 @@ parse_line(char *buf, char *name, char *value)
 	trim(value);
 }

+
+/*
+ * reload_config()
+ *
+ * This is only called by repmgrd after receiving a SIGHUP or when a monitoring
+ * loop is started up; it therefore only needs to reload options required
+ * by repmgrd, which are as follows:
+ *
+ * changeable options:
+ * - failover
+ * - follow_command
+ * - logfacility
+ * - logfile
+ * - loglevel
+ * - master_response_timeout
+ * - monitor_interval_secs
+ * - priority
+ * - promote_command
+ * - reconnect_attempts
+ * - reconnect_interval
+ * - retry_promote_interval_secs
+ * - witness_repl_nodes_sync_interval_secs
+ *
+ * non-changeable options:
+ * - cluster_name
+ * - conninfo
+ * - node
+ * - node_name
+ *
+ * extract with something like:
+ *	 grep local_options\\. repmgrd.c | perl -n -e '/local_options\.([\w_]+)/ && print qq|$1\n|;' | sort | uniq
+
+ */
 bool
 reload_config(t_configuration_options *orig_options)
 {
 	PGconn	   *conn;
-	t_configuration_options new_options;
+	t_configuration_options new_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
 	bool	  config_changed = false;
+	bool	  log_config_changed = false;
+
+	static ItemList config_errors = { NULL, NULL };

 	/*
 	 * Re-read the configuration file: repmgr.conf
 	 */
-	log_info(_("reloading configuration file and updating repmgr tables\n"));
+	log_info(_("reloading configuration file\n"));

-	parse_config(&new_options);
-	if (new_options.node == -1)
+	_parse_config(&new_options, &config_errors);
+
+	if (config_errors.head != NULL)
 	{
+		/* XXX dump errors to log */
 		log_warning(_("unable to parse new configuration, retaining current configuration\n"));
 		return false;
 	}

+	/* The following options cannot be changed */
 	if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
 	{
-		log_warning(_("unable to change cluster name, retaining current configuration\n"));
+		log_warning(_("cluster_name cannot be changed, retaining current configuration\n"));
 		return false;
 	}

 	if (new_options.node != orig_options->node)
 	{
-		log_warning(_("unable to change node ID, retaining current configuration\n"));
+		log_warning(_("node ID cannot be changed, retaining current configuration\n"));
 		return false;
 	}

 	if (strcmp(new_options.node_name, orig_options->node_name) != 0)
 	{
-		log_warning(_("unable to change standby name, keeping current configuration\n"));
-		return false;
-	}
-
-	if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
-	{
-		log_warning(_("new value for 'failover' must be 'automatic' or 'manual'\n"));
-		return false;
-	}
-
-	if (new_options.master_response_timeout <= 0)
-	{
-		log_warning(_("new value for 'master_response_timeout' must be greater than zero\n"));
-		return false;
-	}
-
-	if (new_options.reconnect_attempts < 0)
-	{
-		log_warning(_("new value for 'reconnect_attempts' must be zero or greater\n"));
-		return false;
-	}
-
-	if (new_options.reconnect_interval < 0)
-	{
-		log_warning(_("new value for 'reconnect_interval' must be zero or greater\n"));
+		log_warning(_("node_name cannot be changed, keeping current configuration\n"));
 		return false;
 	}

 	if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
 	{
-		/* Test conninfo string */
+		/* Test conninfo string works*/
 		conn = establish_db_connection(new_options.conninfo, false);
 		if (!conn || (PQstatus(conn) != CONNECTION_OK))
 		{
@@ -633,34 +659,6 @@ reload_config(t_configuration_options *orig_options)
 	 * to manage them
 	 */

-	/* cluster_name */
-	if (strcmp(orig_options->cluster_name, new_options.cluster_name) != 0)
-	{
-		strcpy(orig_options->cluster_name, new_options.cluster_name);
-		config_changed = true;
-	}
-
-	/* conninfo */
-	if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
-	{
-		strcpy(orig_options->conninfo, new_options.conninfo);
-		config_changed = true;
-	}
-
-	/* barman_server */
-	if (strcmp(orig_options->barman_server, new_options.barman_server) != 0)
-	{
-		strcpy(orig_options->barman_server, new_options.barman_server);
-		config_changed = true;
-	}
-
-	/* node */
-	if (orig_options->node != new_options.node)
-	{
-		orig_options->node = new_options.node;
-		config_changed = true;
-	}
-
 	/* failover */
 	if (orig_options->failover != new_options.failover)
 	{
@@ -668,27 +666,6 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}

-	/* priority */
-	if (orig_options->priority != new_options.priority)
-	{
-		orig_options->priority = new_options.priority;
-		config_changed = true;
-	}
-
-	/* node_name */
-	if (strcmp(orig_options->node_name, new_options.node_name) != 0)
-	{
-		strcpy(orig_options->node_name, new_options.node_name);
-		config_changed = true;
-	}
-
-	/* promote_command */
-	if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
-	{
-		strcpy(orig_options->promote_command, new_options.promote_command);
-		config_changed = true;
-	}
-
 	/* follow_command */
 	if (strcmp(orig_options->follow_command, new_options.follow_command) != 0)
 	{
@@ -696,30 +673,6 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}

-	/*
-	 * XXX These ones can change with a simple SIGHUP?
-	 *
-	 * strcpy (orig_options->loglevel, new_options.loglevel); strcpy
-	 * (orig_options->logfacility, new_options.logfacility);
-	 *
-	 * logger_shutdown(); XXX do we have progname here ? logger_init(progname,
-	 * orig_options.loglevel, orig_options.logfacility);
-	 */
-
-	/* rsync_options */
-	if (strcmp(orig_options->rsync_options, new_options.rsync_options) != 0)
-	{
-		strcpy(orig_options->rsync_options, new_options.rsync_options);
-		config_changed = true;
-	}
-
-	/* ssh_options */
-	if (strcmp(orig_options->ssh_options, new_options.ssh_options) != 0)
-	{
-		strcpy(orig_options->ssh_options, new_options.ssh_options);
-		config_changed = true;
-	}
-
 	/* master_response_timeout */
 	if (orig_options->master_response_timeout != new_options.master_response_timeout)
 	{
@@ -727,6 +680,27 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}

+	/* monitor_interval_secs */
+	if (orig_options->monitor_interval_secs != new_options.monitor_interval_secs)
+	{
+		orig_options->monitor_interval_secs = new_options.monitor_interval_secs;
+		config_changed = true;
+	}
+
+	/* priority */
+	if (orig_options->priority != new_options.priority)
+	{
+		orig_options->priority = new_options.priority;
+		config_changed = true;
+	}
+
+	/* promote_command */
+	if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
+	{
+		strcpy(orig_options->promote_command, new_options.promote_command);
+		config_changed = true;
+	}
+
 	/* reconnect_attempts */
 	if (orig_options->reconnect_attempts != new_options.reconnect_attempts)
 	{
@@ -741,27 +715,6 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}

-	/* pg_ctl_options */
-	if (strcmp(orig_options->pg_ctl_options, new_options.pg_ctl_options) != 0)
-	{
-		strcpy(orig_options->pg_ctl_options, new_options.pg_ctl_options);
-		config_changed = true;
-	}
-
-	/* pg_basebackup_options */
-	if (strcmp(orig_options->pg_basebackup_options, new_options.pg_basebackup_options) != 0)
-	{
-		strcpy(orig_options->pg_basebackup_options, new_options.pg_basebackup_options);
-		config_changed = true;
-	}
-
-	/* monitor_interval_secs */
-	if (orig_options->monitor_interval_secs != new_options.monitor_interval_secs)
-	{
-		orig_options->monitor_interval_secs = new_options.monitor_interval_secs;
-		config_changed = true;
-	}
-
 	/* retry_promote_interval_secs */
 	if (orig_options->retry_promote_interval_secs != new_options.retry_promote_interval_secs)
 	{
@@ -769,20 +722,54 @@ reload_config(t_configuration_options *orig_options)
 		config_changed = true;
 	}

-	/* use_replication_slots */
-	if (orig_options->use_replication_slots != new_options.use_replication_slots)
+
+	/* witness_repl_nodes_sync_interval_secs */
+	if (orig_options->witness_repl_nodes_sync_interval_secs != new_options.witness_repl_nodes_sync_interval_secs)
 	{
-		orig_options->use_replication_slots = new_options.use_replication_slots;
+		orig_options->witness_repl_nodes_sync_interval_secs = new_options.witness_repl_nodes_sync_interval_secs;
 		config_changed = true;
 	}

+	/*
+	 * Handle changes to logging configuration
+	 */
+	if (strcmp(orig_options->logfacility, new_options.logfacility) != 0)
+	{
+		strcpy(orig_options->logfacility, new_options.logfacility);
+		log_config_changed = true;
+	}
+
+	if (strcmp(orig_options->logfile, new_options.logfile) != 0)
+	{
+		strcpy(orig_options->logfile, new_options.logfile);
+		log_config_changed = true;
+	}
+
+
+	if (strcmp(orig_options->loglevel, new_options.loglevel) != 0)
+	{
+		strcpy(orig_options->loglevel, new_options.loglevel);
+		log_config_changed = true;
+	}
+
+	if (log_config_changed == true)
+	{
+		log_notice(_("restarting logging with changed parameters\n"));
+		logger_shutdown();
+		logger_init(orig_options, progname());
+	}
+
 	if (config_changed == true)
 	{
-		log_debug(_("reload_config(): configuration has changed\n"));
+		log_notice(_("configuration file reloaded with changed parameters\n"));
 	}
-	else
+	/*
+	 * if logging configuration changed, don't say the configuration didn't
+	 * change, as it clearly has.
+	 */
+	else if (log_config_changed == false)
 	{
-		log_debug(_("reload_config(): configuration has not changed\n"));
+		log_info(_("configuration has not changed\n"));
 	}

 	return config_changed;
@@ -956,7 +943,7 @@ static void
 parse_event_notifications_list(t_configuration_options *options, const char *arg)
 {
 	const char *arg_ptr;
-	char	    event_type_buf[MAXLEN] = "";
+	char		event_type_buf[MAXLEN] = "";
 	char	   *dst_ptr = event_type_buf;


--- a/config.h
+++ b/config.h
@@ -1,7 +1,7 @@
 /*
 * config.h
 *
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -97,7 +97,7 @@ typedef struct
 * The following will initialize the structure with a minimal set of options;
 * actual defaults are set in parse_config() before parsing the configuration file
 */
-#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", "", "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }
+#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", UNKNOWN_NODE_ID, NO_UPSTREAM_NODE, "", "", "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }

 typedef struct ItemListCell
 {
@@ -131,8 +131,11 @@ void set_progname(const char *argv0);
 const char * progname(void);

 bool		load_config(const char *config_file, bool verbose, t_configuration_options *options, char *argv0);
-bool		reload_config(t_configuration_options *orig_options);
+
+void		_parse_config(t_configuration_options *options, ItemList *error_list);
 bool		parse_config(t_configuration_options *options);
+bool		reload_config(t_configuration_options *orig_options);
+
 void		parse_line(char *buff, char *name, char *value);
 char	   *trim(char *s);
 void		item_list_append(ItemList *item_list, char *error_message);
--- a/dbutils.c
+++ b/dbutils.c
@@ -1,7 +1,7 @@
 /*
 * dbutils.c - Database connection/management functions
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -33,6 +33,15 @@ char repmgr_schema[MAXLEN] = "";
 char repmgr_schema_quoted[MAXLEN] = "";

 static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);
+static bool _set_config(PGconn *conn, const char *config_param, const char *sqlquery);
+
+/*
+ * _establish_db_connection()
+ *
+ * Connect to a database using a conninfo string.
+ *
+ * NOTE: *do not* use this for replication connections; use establish_db_connection_by_params() instead.
+ */

 PGconn *
 _establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice, const bool verbose_only)
@@ -77,6 +86,19 @@ _establish_db_connection(const char *conninfo, const bool exit_on_error, const b
 		}
 	}

+	/*
+	 * set "synchronous_commit" to "local" in case synchronous replication is in use
+	 */
+
+	else if (set_config(conn, "synchronous_commit", "local") == false)
+	{
+		if (exit_on_error)
+		{
+			PQfinish(conn);
+			exit(ERR_DB_CON);
+		}
+	}
+
 	return conn;
 }

@@ -116,8 +138,12 @@ PGconn *
 establish_db_connection_by_params(const char *keywords[], const char *values[],
 								  const bool exit_on_error)
 {
-	/* Make a connection to the database */
-	PGconn	   *conn = PQconnectdbParams(keywords, values, true);
+	PGconn	   *conn;
+	bool	    replication_connection = false;
+	int	   	    i;
+
+	/* Connect to the database using the provided parameters */
+	conn = PQconnectdbParams(keywords, values, true);

 	/* Check to see that the backend connection was successfully made */
 	if ((PQstatus(conn) != CONNECTION_OK))
@@ -130,6 +156,28 @@ establish_db_connection_by_params(const char *keywords[], const char *values[],
 			exit(ERR_DB_CON);
 		}
 	}
+	else
+	{
+		/*
+		 * set "synchronous_commit" to "local" in case synchronous replication is in
+		 * use (provided this is not a replication connection)
+		 */
+
+		for (i = 0; keywords[i]; i++)
+		{
+			if (strcmp(keywords[i], "replication") == 0)
+				replication_connection = true;
+		}
+
+		if (replication_connection == false && set_config(conn, "synchronous_commit", "local") == false)
+		{
+			if (exit_on_error)
+			{
+				PQfinish(conn);
+				exit(ERR_DB_CON);
+			}
+		}
+	}

 	return conn;
 }
@@ -274,12 +322,9 @@ is_standby(PGconn *conn)
 bool
 is_pgup(PGconn *conn, int timeout)
 {
-	char		sqlquery[QUERY_STR_LEN];
-
 	/* Check the connection status twice in case it changes after reset */
 	bool		twice = false;

-	/* Check the connection status twice in case it changes after reset */
 	for (;;)
 	{
 		if (PQstatus(conn) != CONNECTION_OK)
@@ -299,8 +344,7 @@ is_pgup(PGconn *conn, int timeout)
 			if (wait_connection_availability(conn, timeout) != 1)
 				goto failed;

-			sqlquery_snprintf(sqlquery, "SELECT 1");
-			if (PQsendQuery(conn, sqlquery) == 0)
+			if (PQsendQuery(conn, "SELECT 1") == 0)
 			{
 				log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
 							PQerrorMessage(conn));
@@ -381,6 +425,8 @@ int
 get_server_version(PGconn *conn, char *server_version)
 {
 	PGresult   *res;
+	int         server_version_num;
+
 	res = PQexec(conn,
 				 "SELECT current_setting('server_version_num'), "
 				 "       current_setting('server_version')");
@@ -394,9 +440,12 @@ get_server_version(PGconn *conn, char *server_version)
 	}

 	if (server_version != NULL)
-		strcpy(server_version, PQgetvalue(res, 0, 0));
+		strcpy(server_version, PQgetvalue(res, 0, 1));

-	return atoi(PQgetvalue(res, 0, 0));
+	server_version_num = atoi(PQgetvalue(res, 0, 0));
+
+	PQclear(res);
+	return server_version_num;
 }


@@ -1085,15 +1134,25 @@ drop_replication_slot(PGconn *conn, char *slot_name)


 bool
-start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)
+start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint, int server_version_num)
 {
 	char		sqlquery[QUERY_STR_LEN];
 	PGresult   *res;

-	sqlquery_snprintf(sqlquery,
-					  "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_start_backup('repmgr_standby_clone_%ld', %s))",
-					  time(NULL),
-					  fast_checkpoint ? "TRUE" : "FALSE");
+	if (server_version_num >= 100000)
+	{
+		sqlquery_snprintf(sqlquery,
+						  "SELECT pg_catalog.pg_walfile_name(pg_catalog.pg_start_backup('repmgr_standby_clone_%ld', %s))",
+						  time(NULL),
+						  fast_checkpoint ? "TRUE" : "FALSE");
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery,
+						  "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_start_backup('repmgr_standby_clone_%ld', %s))",
+						  time(NULL),
+						  fast_checkpoint ? "TRUE" : "FALSE");
+	}

 	log_verbose(LOG_DEBUG, "start_backup():\n%s\n", sqlquery);

@@ -1121,12 +1180,19 @@ start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)


 bool
-stop_backup(PGconn *conn, char *last_wal_segment)
+stop_backup(PGconn *conn, char *last_wal_segment, int server_version_num)
 {
 	char		sqlquery[QUERY_STR_LEN];
 	PGresult   *res;

-	sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_stop_backup())");
+	if (server_version_num >= 100000)
+	{
+		sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_walfile_name(pg_catalog.pg_stop_backup())");
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_stop_backup())");
+	}

 	res = PQexec(conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -1151,19 +1217,12 @@ stop_backup(PGconn *conn, char *last_wal_segment)
 }


+
 bool
-set_config_bool(PGconn *conn, const char *config_param, bool state)
+_set_config(PGconn *conn, const char *config_param, const char *sqlquery)
 {
-	char		sqlquery[QUERY_STR_LEN];
 	PGresult   *res;

-	sqlquery_snprintf(sqlquery,
-					  "SET %s TO %s",
-					  config_param,
-					  state ? "TRUE" : "FALSE");
-
-	log_verbose(LOG_DEBUG, "set_config_bool():\n%s\n", sqlquery);
-
 	res = PQexec(conn, sqlquery);

 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
@@ -1178,6 +1237,36 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
 	return true;
 }

+bool
+set_config(PGconn *conn, const char *config_param,  const char *config_value)
+{
+	char		sqlquery[QUERY_STR_LEN];
+
+	sqlquery_snprintf(sqlquery,
+					  "SET %s TO '%s'",
+					  config_param,
+					  config_value);
+
+	log_verbose(LOG_DEBUG, "set_config():\n%s\n", sqlquery);
+
+	return _set_config(conn, config_param, sqlquery);
+}
+
+bool
+set_config_bool(PGconn *conn, const char *config_param, bool state)
+{
+	char		sqlquery[QUERY_STR_LEN];
+
+	sqlquery_snprintf(sqlquery,
+					  "SET %s TO %s",
+					  config_param,
+					  state ? "TRUE" : "FALSE");
+
+	log_verbose(LOG_DEBUG, "set_config_bool():\n%s\n", sqlquery);
+
+	return _set_config(conn, config_param, sqlquery);
+}
+

 /*
 * witness_copy_node_records()
@@ -1437,10 +1526,11 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
 	bool		success = true;
 	struct tm	ts;

-	/* Only attempt to write a record if a connection handle was provided.
-	   Also check that the repmgr schema has been properly intialised - if
-	   not it means no configuration file was provided, which can happen with
-	   e.g. `repmgr standby clone`, and we won't know which schema to write to.
+	/*
+	 * Only attempt to write a record if a connection handle was provided.
+	 * Also check that the repmgr schema has been properly initialised - if
+	 * not it means no configuration file was provided, which can happen with
+	 * e.g. `repmgr standby clone`, and we won't know which schema to write to.
 	 */
 	if (conn != NULL && strcmp(repmgr_schema, DEFAULT_REPMGR_SCHEMA_PREFIX) != 0)
 	{
@@ -1628,6 +1718,110 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
 	return success;
 }

+void
+create_checkpoint(PGconn *conn)
+{
+	char		sqlquery[MAXLEN];
+	PGresult   *res;
+
+	sqlquery_snprintf(sqlquery, "CHECKPOINT");
+	log_verbose(LOG_DEBUG, "checkpoint:\n%s\n", sqlquery);
+
+	res = PQexec(conn, sqlquery);
+	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		log_err(_("Unable to create CHECKPOINT:\n%s\n"),
+				PQerrorMessage(conn));
+		PQfinish(conn);
+		exit(ERR_DB_QUERY);
+	}
+
+	log_notice(_("CHECKPOINT created\n"));
+}
+
+
+bool
+update_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
+{
+	char		sqlquery[QUERY_STR_LEN];
+	char		upstream_node_id[MAXLEN];
+	char		slot_name_buf[MAXLEN];
+	PGresult   *res;
+
+	/* XXX this segment copied from create_node_record() */
+	if (upstream_node == NO_UPSTREAM_NODE)
+	{
+		/*
+		 * No explicit upstream node id provided for standby - attempt to
+		 * get primary node id
+		 */
+		if (strcmp(type, "standby") == 0)
+		{
+			int primary_node_id = get_master_node_id(conn, cluster_name);
+			maxlen_snprintf(upstream_node_id, "%i", primary_node_id);
+		}
+		else
+		{
+			maxlen_snprintf(upstream_node_id, "%s", "NULL");
+		}
+	}
+	else
+	{
+		maxlen_snprintf(upstream_node_id, "%i", upstream_node);
+	}
+
+	if (slot_name != NULL && slot_name[0])
+	{
+		maxlen_snprintf(slot_name_buf, "'%s'", slot_name);
+	}
+	else
+	{
+		maxlen_snprintf(slot_name_buf, "%s", "NULL");
+	}
+
+	/* XXX convert to placeholder query */
+	sqlquery_snprintf(sqlquery,
+					  "UPDATE %s.repl_nodes SET "
+					  "       type = '%s', "
+					  "       upstream_node_id = %s, "
+					  "       cluster = '%s', "
+					  "       name = '%s', "
+					  "       conninfo = '%s', "
+					  "       slot_name = %s, "
+					  "       priority = %i, "
+					  "       active = %s "
+					  " WHERE id = %i ",
+					  get_repmgr_schema_quoted(conn),
+					  type,
+					  upstream_node_id,
+					  cluster_name,
+					  node_name,
+					  conninfo,
+					  slot_name_buf,
+					  priority,
+					  active == true ? "TRUE" : "FALSE",
+					  node);
+
+	log_verbose(LOG_DEBUG, "update_node_record(): %s\n", sqlquery);
+
+	if (action != NULL)
+	{
+		log_verbose(LOG_DEBUG, "update_node_record(): action is \"%s\"\n", action);
+	}
+
+	res = PQexec(conn, sqlquery);
+	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		log_err(_("Unable to update node record\n%s\n"),
+				PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	PQclear(res);
+
+	return true;
+}

 /*
 * Update node record following change of status
@@ -1789,7 +1983,16 @@ _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_

 	node_info->node_id = atoi(PQgetvalue(res, 0, 0));
 	node_info->type = parse_node_type(PQgetvalue(res, 0, 1));
-	node_info->upstream_node_id = atoi(PQgetvalue(res, 0, 2));
+
+	if (PQgetisnull(res, 0, 2))
+	{
+		node_info->upstream_node_id = NO_UPSTREAM_NODE;
+	}
+	else
+	{
+		node_info->upstream_node_id = atoi(PQgetvalue(res, 0, 2));
+	}
+
 	strncpy(node_info->name, PQgetvalue(res, 0, 3), MAXLEN);
 	strncpy(node_info->conninfo_str, PQgetvalue(res, 0, 4), MAXLEN);
 	strncpy(node_info->slot_name, PQgetvalue(res, 0, 5), MAXLEN);
@@ -1889,3 +2092,64 @@ get_data_checksum_version(const char *data_directory)

 	return (int)control_file.data_checksum_version;
 }
+
+
+
+/* ========================== */
+/* backported from repmgr 4.x */
+/* ========================== */
+
+XLogRecPtr
+parse_lsn(const char *str)
+{
+	XLogRecPtr		ptr = InvalidXLogRecPtr;
+	uint32			high,
+					low;
+
+	if (sscanf(str, "%x/%x", &high, &low) == 2)
+		ptr = (((XLogRecPtr) high) << 32) + (XLogRecPtr) low;
+
+	return ptr;
+}
+
+
+XLogRecPtr
+get_last_wal_receive_location(PGconn *conn)
+{
+	PGresult   *res = NULL;
+	XLogRecPtr	ptr = InvalidXLogRecPtr;
+
+	if (PQserverVersion(conn) >= 100000)
+	{
+		res = PQexec(conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
+	}
+	else
+	{
+		res = PQexec(conn, "SELECT pg_catalog.pg_last_xlog_receive_location()");
+	}
+
+	if (PQresultStatus(res) == PGRES_TUPLES_OK)
+	{
+		ptr = parse_lsn(PQgetvalue(res, 0, 0));
+	}
+
+	PQclear(res);
+
+	return ptr;
+}
+
+
+bool
+is_server_available(const char *conninfo)
+{
+	PGPing		status = PQping(conninfo);
+
+	log_verbose(LOG_DEBUG, "is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status);
+
+	if (status == PQPING_OK)
+		return true;
+
+	log_warning("is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status);
+
+	return false;
+}
--- a/dbutils.h
+++ b/dbutils.h
@@ -1,7 +1,7 @@
 /*
 * dbutils.h
 *
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -28,6 +28,8 @@
 #include "strutil.h"


+#define format_lsn(x) (uint32) (x >> 32), (uint32) x
+
 typedef enum {
 	UNKNOWN = 0,
 	MASTER,
@@ -122,20 +124,28 @@ char       *get_repmgr_schema_quoted(PGconn *conn);
 bool		create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
 int			get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
 bool		drop_replication_slot(PGconn *conn, char *slot_name);
-bool		start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
-bool		stop_backup(PGconn *conn, char *last_wal_segment);
+bool		start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint, int server_version_num);
+bool		stop_backup(PGconn *conn, char *last_wal_segment, int server_version_num);
+bool		set_config(PGconn *conn, const char *config_param,  const char *config_value);
 bool		set_config_bool(PGconn *conn, const char *config_param, bool state);
 bool		witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
 bool		create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
 bool		delete_node_record(PGconn *conn, int node, char *action);
 int			get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
 int			get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info);
+bool        update_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
 bool        update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
 bool        update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
 bool        create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
+void        create_checkpoint(PGconn *conn);

 int		    get_node_replication_state(PGconn *conn, char *node_name, char *output);
 t_server_type parse_node_type(const char *type);
 int			get_data_checksum_version(const char *data_directory);
-#endif

+/* backported from repmgr 4.x */
+XLogRecPtr	parse_lsn(const char *str);
+XLogRecPtr	get_last_wal_receive_location(PGconn *conn);
+bool		is_server_available(const char *conninfo);
+
+#endif
--- a/dirmod.c
+++ b/dirmod.c
@@ -3,7 +3,7 @@
 * dirmod.c
 *	  directory handling functions
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
--- a/dirmod.h
+++ b/dirmod.h
@@ -1,6 +1,6 @@
 /*
 * dirmod.h
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/docs/repmgrd-failover-mechanism.md
+++ b/docs/repmgrd-failover-mechanism.md
@@ -0,0 +1,75 @@
+repmgrd's failover algorithm
+============================
+
+When implementing automatic failover, there are two factors which are critical in
+ensuring the desired result is achieved:
+
+  - has the master node genuinely failed?
+  - which is the best node to promote to the new master?
+
+This document outlines repmgrd's decision-making process during automatic failover
+for standbys directly connected to the master node.
+
+
+Master node failure detection
+-----------------------------
+
+If a `repmgrd` instance running on a PostgreSQL standby node is unable to connect to
+the master node, this doesn't neccesarily mean that the master is down and a
+failover is required. Factors such as network connectivity issues could mean that
+even though the standby node is isolated, the replication cluster as a whole
+is functioning correctly, and promoting the standby without further verification
+could result in a "split-brain" situation.
+
+In the event that `repmgrd` is unable to connect to the master node, it will attempt
+to reconnect to the master server several times (as defined by the `reconnect_attempts`
+parameter in `repmgr.conf`), with reconnection attempts  occuring at the interval
+specified by `reconnect_interval`. This happens to verify that the master is definitively
+not accessible (e.g. that connection was not lost due to a brief network glitch).
+
+Appropriate values for these settings will depend very much on the replication
+cluster environment. There will necessarily be a trade-off between the time it
+takes to assume the master is not reachable, and the reliability of that conclusion.
+A standby in a different physical location to the master will probably need a longer
+check interval to rule out possible network issues, whereas one located in the same
+rack with a direct connection between servers could perform the check very quickly.
+
+Note that it's possible the master comes back online after this point is reached,
+but before a new master has been selected; in this case it will be noticed
+during the selection of a new master and no actual failover will take place.
+
+Promotion candidate selection
+-----------------------------
+
+Once `repmgrd` has decided the master is definitively unreachable, following checks
+will be carried out:
+
+* attempts to connect to all other nodes in the cluster (including the witness
+  node, if defined) to establish the state of the cluster, including their
+  current LSN
+
+* If less than half of the nodes are visible (from the viewpoint
+  of this node), `repmgrd` will not take any further action. This is to ensure that
+  e.g. if a replication cluster is spread over multiple data centres, a split-brain
+  situation does not occur if there is a network failure between datacentres. Note
+  that if nodes are split evenly between data centres, a witness server can be
+  used to establish the "majority" data centre.
+
+* `repmgrd` polls all visible servers and waits for each node to return a valid LSN;
+  it updates the LSN previously  stored for this node if it has increased since
+  the initial check
+
+* once all LSNs have been retrieved, `repmgrd` will check for the highest LSN; if
+  its own node has the highest LSN, it will attempt to promote itself (using the
+  command defined in `promote_command` in `repmgr.conf`. Note that if using
+  `repmgr standby promote` as the promotion command, and the original master becomes available
+  before the promotion takes effect, `repmgr` will return an error and no promotion
+  will take place, and `repmgrd` will resume monitoring as usual.
+
+* if the node is not the promotion candidate, `repmgrd` will execute the
+  `follow_command` defined in `repmgr.conf`. If using `repmgr standby follow` here,
+  `repmgr` will attempt to detect the new master node and attach to that.
+
+
+
+
--- a/docs/repmgrd-node-fencing.md
+++ b/docs/repmgrd-node-fencing.md
@@ -0,0 +1,160 @@
+Fencing a failed master node with repmgrd and pgbouncer
+=======================================================
+
+With automatic failover, it's essential to ensure that a failed master
+remains inaccessible to your application, even if it comes back online
+again, to avoid a split-brain situation.
+
+By using `pgbouncer` together with `repmgrd`, it's possible to combine
+automatic failover with a process to isolate the failed master from
+your application and ensure that all connections which should go to
+the master are directed there smoothly without having to reconfigure
+your application. (Note that as a connection pooler, `pgbouncer` can
+benefit your application in other ways, but those are beyond the scope
+of this document).
+
+* * *
+
+> *WARNING*: automatic failover is tricky to get right. This document
+> demonstrates one possible implementation method, however you should
+> carefully configure and test any setup to suit the needs of your own
+> replication cluster/application.
+
+* * *
+
+In a failover situation, `repmgrd` promotes a standby to master by executing
+the command defined in `promote_command`. Normally this would be something like:
+
+    repmgr standby promote -f /etc/repmgr.conf
+
+By wrapping this in a custom script which adjusts the `pgbouncer` configuration
+on all nodes, it's possible to fence the failed master and redirect write
+connections to the new master.
+
+The script consists of three sections:
+
+* commands to pause `pgbouncer` on all nodes
+* the promotion command itself
+* commands to reconfigure and restart `pgbouncer` on all nodes
+
+Note that it requires password-less SSH access between all nodes to be able to
+update the `pgbouncer` configuration files.
+
+For the purposes of this demonstration, we'll assume there are 3 nodes (master
+and two standbys), with `pgbouncer` listening on port 6432 handling connections
+to a database called `appdb`.  The `postgres` system user must have write
+access to the `pgbouncer` configuration files on all nodes. We'll assume
+there's a main `pgbouncer` configuration file, `/etc/pgbouncer.ini`, which uses
+the `%include` directive (available from PgBouncer 1.6) to include a separate
+configuration file, `/etc/pgbouncer.database.ini`, which will be modified by
+`repmgr`.
+
+* * *
+
+> *NOTE*: in this self-contained demonstration, `pgbouncer` is running on the
+> database servers, however in a production environment it will make more
+> sense to run `pgbouncer` on either separate nodes or the application server.
+
+* * *
+
+`/etc/pgbouncer.ini` should look something like this:
+
+    [pgbouncer]
+
+    logfile = /var/log/pgbouncer/pgbouncer.log
+    pidfile = /var/run/pgbouncer/pgbouncer.pid
+
+    listen_addr = *
+    listen_port = 6532
+    unix_socket_dir = /tmp
+
+    auth_type = trust
+    auth_file = /etc/pgbouncer.auth
+
+    admin_users = postgres
+    stats_users = postgres
+
+    pool_mode = transaction
+
+    max_client_conn = 100
+    default_pool_size = 20
+    min_pool_size = 5
+    reserve_pool_size = 5
+    reserve_pool_timeout = 3
+
+    log_connections = 1
+    log_disconnections = 1
+    log_pooler_errors = 1
+
+    %include /etc/pgbouncer.database.ini
+
+The actual script is as follows; adjust the configurable items as appropriate:
+
+`/var/lib/postgres/repmgr/promote.sh`
+
+
+    #!/usr/bin/env bash
+    set -u
+    set -e
+
+    # Configurable items
+    PGBOUNCER_HOSTS="node1 node2 node3"
+    PGBOUNCER_DATABASE_INI="/etc/pgbouncer.database.ini"
+    PGBOUNCER_DATABASE="appdb"
+    PGBOUNCER_PORT=6432
+
+    REPMGR_DB="repmgr"
+    REPMGR_USER="repmgr"
+    REPMGR_SCHEMA="repmgr_test"
+
+    # 1. Pause running pgbouncer instances
+    for HOST in $PGBOUNCER_HOSTS
+    do
+        psql -t -c "pause" -h $HOST -p $PGBOUNCER_PORT -U postgres pgbouncer
+    done
+
+    # 2. Promote this node from standby to master
+
+    repmgr standby promote -f /etc/repmgr.conf
+
+    # 3. Reconfigure pgbouncer instances
+
+    PGBOUNCER_DATABASE_INI_NEW="/tmp/pgbouncer.database.ini"
+
+    for HOST in $PGBOUNCER_HOSTS
+    do
+        # Recreate the pgbouncer config file
+        echo -e "[databases]\n" > $PGBOUNCER_DATABASE_INI_NEW
+
+        psql -d $REPMGR_DB -U $REPMGR_USER -t -A \
+          -c "SELECT '${PGBOUNCER_DATABASE}-rw= ' || conninfo || ' application_name=pgbouncer_${HOST}' \
+              FROM ${REPMGR_SCHEMA}.repl_nodes \
+              WHERE active = TRUE AND type='master'" >> $PGBOUNCER_DATABASE_INI_NEW
+
+        psql -d $REPMGR_DB -U $REPMGR_USER -t -A \
+          -c "SELECT '${PGBOUNCER_DATABASE}-ro= ' || conninfo || ' application_name=pgbouncer_${HOST}' \
+              FROM ${REPMGR_SCHEMA}.repl_nodes \
+              WHERE node_name='${HOST}'" >> $PGBOUNCER_DATABASE_INI_NEW
+
+        rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$PGBOUNCER_DATABASE_INI
+
+        psql -tc "reload" -h $HOST -p $PGBOUNCER_PORT -U postgres pgbouncer
+        psql -tc "resume" -h $HOST -p $PGBOUNCER_PORT -U postgres pgbouncer
+
+    done
+
+    # Clean up generated file
+    rm $PGBOUNCER_DATABASE_INI_NEW
+
+    echo "Reconfiguration of pgbouncer complete"
+
+Script and template file should be installed on each node where
+`repmgrd` is running.
+
+Finally, set `promote_command` in `repmgr.conf` on each node to
+point to the custom promote script:
+
+    promote_command=/var/lib/postgres/repmgr/promote.sh
+
+and reload/restart any running `repmgrd` instances for the changes to take
+effect.
--- a/errcode.h
+++ b/errcode.h
@@ -1,6 +1,6 @@
 /*
 * errcode.h
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
--- a/expected/repmgr_funcs.out
+++ b/expected/repmgr_funcs.out
@@ -0,0 +1,18 @@
+/*
+ * repmgr_function.sql
+ * Copyright (c) 2ndQuadrant, 2010-2017
+ *
+ */
+-- SET SEARCH_PATH TO 'repmgr';
+CREATE FUNCTION repmgr_update_standby_location(text) RETURNS boolean
+AS '$libdir/repmgr_funcs', 'repmgr_update_standby_location'
+LANGUAGE C STRICT;
+CREATE FUNCTION repmgr_get_last_standby_location() RETURNS text
+AS '$libdir/repmgr_funcs', 'repmgr_get_last_standby_location'
+LANGUAGE C STRICT;
+CREATE FUNCTION repmgr_update_last_updated() RETURNS TIMESTAMP WITH TIME ZONE
+AS '$libdir/repmgr_funcs', 'repmgr_update_last_updated'
+LANGUAGE C STRICT;
+CREATE FUNCTION repmgr_get_last_updated() RETURNS TIMESTAMP WITH TIME ZONE
+AS '$libdir/repmgr_funcs', 'repmgr_get_last_updated'
+LANGUAGE C STRICT;
--- a/expected/repmgr_test.out
+++ b/expected/repmgr_test.out
@@ -0,0 +1,24 @@
+select * from repmgr_update_standby_location('');
+ repmgr_update_standby_location 
+--------------------------------
+ f
+(1 row)
+
+select * from repmgr_get_last_standby_location();
+ repmgr_get_last_standby_location 
+----------------------------------
+ 
+(1 row)
+
+select * from repmgr_update_last_updated();
+ repmgr_update_last_updated 
+----------------------------
+ 
+(1 row)
+
+select * from repmgr_get_last_updated();
+ repmgr_get_last_updated 
+-------------------------
+ 
+(1 row)
+
--- a/log.c
+++ b/log.c
@@ -1,6 +1,6 @@
 /*
 * log.c - Logging methods
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This module is a set of methods for logging (currently only syslog)
 *
@@ -44,10 +44,15 @@ static void _stderr_log_with_level(const char *level_name, int level, const char
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));

 int			log_type = REPMGR_STDERR;
-int			log_level = LOG_NOTICE;
-int			last_log_level = LOG_NOTICE;
+int			log_level = LOG_INFO;
+int			last_log_level = LOG_INFO;
 int			verbose_logging = false;
 int			terse_logging = false;
+/*
+ * Global variable to be set by the main application to ensure any log output
+ * emitted before logger_init is called, is output in the correct format
+ */
+int			logger_output_mode = OM_DAEMON;

 extern void
 stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
@@ -62,22 +67,31 @@ stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
 static void
 _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
 {
-	time_t		t;
-	struct tm  *tm;
-	char		buff[100];
+	char		buf[100];

 	/*
 	 * Store the requested level so that if there's a subsequent
-	 * log_hint(), we can suppress that if appropriate.
+	 * log_hint() or log_detail(), we can suppress that if appropriate.
 	 */
 	last_log_level = level;

 	if (log_level >= level)
 	{
-		time(&t);
-		tm = localtime(&t);
-		strftime(buff, 100, "[%Y-%m-%d %H:%M:%S]", tm);
-		fprintf(stderr, "%s [%s] ", buff, level_name);
+
+		/* Format log line prefix with timestamp if in daemon mode */
+		if (logger_output_mode == OM_DAEMON)
+		{
+			time_t		t;
+			struct tm  *tm;
+			time(&t);
+			tm = localtime(&t);
+			strftime(buf, 100, "[%Y-%m-%d %H:%M:%S]", tm);
+			fprintf(stderr, "%s [%s] ", buf, level_name);
+		}
+		else
+		{
+			fprintf(stderr, "%s: ", level_name);
+		}

 		vfprintf(stderr, fmt, ap);

@@ -99,6 +113,20 @@ log_hint(const char *fmt, ...)
 }


+void
+log_detail(const char *fmt, ...)
+{
+	va_list		ap;
+
+	if (terse_logging == false)
+	{
+		va_start(ap, fmt);
+		_stderr_log_with_level("DETAIL", last_log_level, fmt, ap);
+		va_end(ap);
+	}
+}
+
+
 void
 log_verbose(int level, const char *fmt, ...)
 {
@@ -176,6 +204,13 @@ logger_init(t_configuration_options *opts, const char *ident)
 			stderr_log_warning(_("Invalid log level \"%s\" (available values: DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG)\n"), level);
 	}

+	/*
+	 * STDERR only logging requested - finish here without setting up any further
+	 * logging facility.
+	 */
+	if (logger_output_mode == OM_COMMAND_LINE)
+		return true;
+
 	if (facility && *facility)
 	{

@@ -236,9 +271,10 @@ logger_init(t_configuration_options *opts, const char *ident)
 		stderr_log_notice(_("Redirecting logging output to '%s'\n"), opts->logfile);
 		fd = freopen(opts->logfile, "a", stderr);

-		/* It's possible freopen() may still fail due to e.g. a race condition;
-		   as it's not feasible to restore stderr after a failed freopen(),
-		   we'll write to stdout as a last resort.
+		/*
+		 * It's possible freopen() may still fail due to e.g. a race condition;
+		 * as it's not feasible to restore stderr after a failed freopen(),
+		 * we'll write to stdout as a last resort.
 		 */
 		if (fd == NULL)
 		{
--- a/log.h
+++ b/log.h
@@ -1,6 +1,6 @@
 /*
 * log.h
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -25,6 +25,9 @@
 #define REPMGR_SYSLOG 1
 #define REPMGR_STDERR 2

+#define OM_COMMAND_LINE 1
+#define OM_DAEMON       2
+
 extern void
 stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
@@ -123,6 +126,8 @@ bool		logger_shutdown(void);
 void		logger_set_verbose(void);
 void		logger_set_terse(void);

+void		log_detail(const char *fmt, ...)
+__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
 void		log_hint(const char *fmt, ...)
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
 void		log_verbose(int level, const char *fmt, ...)
@@ -132,5 +137,6 @@ extern int	log_type;
 extern int	log_level;
 extern int	verbose_logging;
 extern int	terse_logging;
+extern int	logger_output_mode;

 #endif /* _REPMGR_LOG_H_ */
--- a/repmgr.c
+++ b/repmgr.c
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -26,11 +26,14 @@
                 # the server's hostname or another identifier unambiguously
                 # associated with the server to avoid confusion

-# Database connection information as a conninfo string
-# This must be accessible to all servers in the cluster; for details see:
+# Database connection information as a conninfo string (this must be a
+# keyword/value string, not a connection URI).
 #
 #   https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
 #
+# All servers in the cluster must be able to access the database
+# using this connection string.
+#
 #conninfo='host=192.168.204.104 dbname=repmgr user=repmgr'
 #
 # If repmgrd is in use, consider explicitly setting `connect_timeout` in the
@@ -63,8 +66,14 @@
 # -------------------------------

 # Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
-# (default: NOTICE)
-#loglevel=NOTICE
+# (default: INFO)
+#loglevel=INFO
+
+# Note that logging facility settings will only apply to `repmgrd` by default;
+# `repmgr` will always write to STDERR unless the switch `--log-to-file` is
+# supplied, in which case it will log to the same destination as `repmgrd`.
+# This is mainly intended for those cases when `repmgr` is executed directly
+# by `repmgrd`.

 # Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
 # (default: STDERR)
@@ -137,8 +146,15 @@
 # external command arguments. Values shown are examples.

 #pg_ctl_options='-s'
-#pg_basebackup_options='--xlog-method=s'
+#pg_basebackup_options='--label=repmgr_backup'

+# This is the host name of the barman server, which is used for connecting over
+# to the barman server (passwordless ssh keys should be in place)
+#barman_server='backup_server'
+# If you are placing the barman.conf file in a non-standard path, or using
+# a name other than barman.conf, use this parameter to specify the path and
+# name of the barman configuration file.
+#barman_config='/path/to/barman.conf'

 # Standby clone settings
 # ----------------------
@@ -160,9 +176,11 @@
 # These settings are only applied when repmgrd is running. Values shown
 # are defaults.

-# Number of seconds to wait for a response from the primary server before
-# deciding it has failed.
+# monitoring interval in seconds; default is 2
+#monitor_interval_secs=2

+# Maximum number of seconds to wait for a response from the primary server
+# before deciding it has failed.
 #master_response_timeout=60

 # Number of attempts at what interval (in seconds) to try and
@@ -187,9 +205,6 @@
 #promote_command='repmgr standby promote -f /path/to/repmgr.conf'
 #follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'

-# monitoring interval in seconds; default is 2
-#monitor_interval_secs=2
-
 # change wait time for primary; before we bail out and exit when the primary
 # disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
 # seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
--- a/repmgr.h
+++ b/repmgr.h
@@ -1,6 +1,6 @@
 /*
 * repmgr.h
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -55,18 +55,22 @@
 #define OPT_COPY_EXTERNAL_CONFIG_FILES   4
 #define OPT_CONFIG_ARCHIVE_DIR           5
 #define OPT_PG_REWIND                    6
-#define OPT_PWPROMPT                     7
 #define OPT_CSV                          8
 #define OPT_NODE                         9
 #define OPT_WITHOUT_BARMAN               10
 #define OPT_NO_UPSTREAM_CONNECTION       11
 #define OPT_REGISTER_WAIT                12
 #define OPT_CLUSTER                      13
+#define OPT_LOG_TO_FILE                  14
+#define OPT_UPSTREAM_CONNINFO            15
+#define OPT_NO_CONNINFO_PASSWORD         16
+#define OPT_REPLICATION_USER             17

 /* deprecated command line options */
-#define OPT_INITDB_NO_PWPROMPT           999
-#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 998
+#define OPT_INITDB_NO_PWPROMPT           998
+#define OPT_IGNORE_EXTERNAL_CONFIG_FILES 999

+/* values for --copy-external-config-files */
 #define CONFIG_FILE_SAMEPATH 1
 #define CONFIG_FILE_PGDATA 2

@@ -74,53 +78,102 @@
 /* Run time options type */
 typedef struct
 {
+	/* general repmgr options */
+	char		config_file[MAXPGPATH];
+	bool		verbose;
+	bool		terse;
+	bool		force;
+	char		pg_bindir[MAXLEN]; /* overrides setting in repmgr.conf */
+
+	/* logging parameters */
+	char		loglevel[MAXLEN];  /* overrides setting in repmgr.conf */
+	bool		log_to_file;
+
+	/* connection parameters */
 	char		dbname[MAXLEN];
 	char		host[MAXLEN];
 	char		username[MAXLEN];
 	char		dest_dir[MAXPGPATH];
-	char		config_file[MAXPGPATH];
 	char		remote_user[MAXLEN];
 	char		superuser[MAXLEN];
+	char		masterport[MAXLEN];
+	bool		conninfo_provided;
+	bool		connection_param_provided;
+	bool		host_param_provided;
+
+	/* standby clone parameters */
+	bool		wal_keep_segments_used;
 	char		wal_keep_segments[MAXLEN];
-	bool		verbose;
-	bool		terse;
-	bool		force;
-	bool		wait_for_master;
 	bool		ignore_rsync_warn;
-	bool		witness_pwprompt;
 	bool		rsync_only;
 	bool		fast_checkpoint;
-	bool		csv_mode;
 	bool		without_barman;
 	bool		no_upstream_connection;
+	bool		no_conninfo_password;
 	bool		copy_external_config_files;
 	int			copy_external_config_files_destination;
-	bool		wait_register_sync;
-	int			wait_register_sync_seconds;
-	char		masterport[MAXLEN];
-	/*
-	 * configuration file parameters which can be overridden on the
-	 * command line
-	 */
-	char		loglevel[MAXLEN];
-
-	/* parameter used by STANDBY SWITCHOVER */
-	char		remote_config_file[MAXLEN];
-	char		pg_rewind[MAXPGPATH];
-	char		pg_ctl_mode[MAXLEN];
-	/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
-	char		config_archive_dir[MAXLEN];
-	/* parameter used by CLUSTER CLEANUP */
-	int			keep_history;
-	/* parameter used by {STANDBY|WITNESS} UNREGISTER */
-	int			node;
-
-	char		pg_bindir[MAXLEN];
+	char		upstream_conninfo[MAXLEN];
+	char		replication_user[MAXLEN];

 	char		recovery_min_apply_delay[MAXLEN];
+
+	/* standby register parameters */
+	bool		wait_register_sync;
+	int			wait_register_sync_seconds;
+
+	/* witness create parameters */
+	bool		witness_pwprompt;
+
+	/* standby follow parameters */
+	bool		wait_for_master;
+
+	/* cluster {show|matrix|crosscheck} parameters */
+	bool		csv_mode;
+
+	/* cluster cleanup parameters */
+	int			keep_history;
+
+	/* standby switchover parameters */
+	char		remote_config_file[MAXLEN];
+	bool		pg_rewind_supplied;
+	char		pg_rewind[MAXPGPATH];
+	char		pg_ctl_mode[MAXLEN];
+
+	/* standby {archive_config | restore_config} parameters  */
+	char		config_archive_dir[MAXLEN];
+
+	/* {standby|witness} unregister parameters */
+	int			node;
+
 }	t_runtime_options;

-#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, false, false, CONFIG_FILE_SAMEPATH, false, 0, "", "", "", "", "fast", "", 0, UNKNOWN_NODE_ID, "", ""}
+#define T_RUNTIME_OPTIONS_INITIALIZER { \
+		/* general repmgr options */	\
+		"", false, false, false, "",	\
+		/* logging parameters */ \
+		"", false,                      \
+		/* connection parameters */		\
+		"", "", "", "", "", "", "", 	\
+		false, false, false,		    \
+		/* standby clone parameters */  \
+		false, DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, \
+		false, CONFIG_FILE_SAMEPATH, "", "", "", \
+		/* standby register paarameters */ \
+	    false, 0,							 \
+		/* witness create parameters */ \
+		false,                          \
+		/* standby follow parameters */ \
+		false,                          \
+		/* cluster {show|matrix|crosscheck} parameters */ \
+		false,                          \
+		/* cluster cleanup parameters */ \
+		0,                              \
+		/* standby switchover parameters */ \
+		"", false, "", "fast",          \
+		/* standby {archive_config | restore_config} parameters  */ \
+		"",                             \
+		/* {standby|witness} unregister parameters */ \
+		UNKNOWN_NODE_ID }

 struct BackupLabel
 {
@@ -139,9 +192,10 @@ typedef struct
 {
 	char		slot[MAXLEN];
 	char		xlog_method[MAXLEN];
+	bool		no_slot; /* from PostgreSQL 10 */
 } t_basebackup_options;

-#define T_BASEBACKUP_OPTIONS_INITIALIZER { "", "" }
+#define T_BASEBACKUP_OPTIONS_INITIALIZER { "", "", false }

 typedef struct
 {
--- a/repmgr.sql
+++ b/repmgr.sql
@@ -1,7 +1,7 @@
 /*
 * repmgr.sql
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 */

--- a/repmgrd.c
+++ b/repmgrd.c
@@ -1,7 +1,7 @@
 /*
 * repmgrd.c - Replication manager daemon
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This module connects to the nodes of a replication cluster and monitors
 * how far are they from master
@@ -30,18 +30,10 @@
 #include <stdlib.h>
 #include <unistd.h>

-
-
 #include "repmgr.h"
-#include "config.h"
 #include "log.h"
-#include "strutil.h"
 #include "version.h"

-/* Required PostgreSQL headers */
-#include "access/xlogdefs.h"
-#include "pqexpbuffer.h"
-
 /* Message strings passed in repmgrSharedState->location */

 #define PASSIVE_NODE "PASSIVE_NODE"
@@ -71,6 +63,7 @@ bool		failover_done = false;
 bool		manual_mode_upstream_disconnected = false;

 char	   *pid_file = NULL;
+int			server_version_num = 0;

 static void help(void);
 static void usage(void);
@@ -145,8 +138,6 @@ main(int argc, char **argv)

 	FILE	   *fd;

-	int			server_version_num = 0;
-
 	set_progname(argv[0]);

 	/* Disallow running as root to prevent directory ownership problems */
@@ -207,6 +198,13 @@ main(int argc, char **argv)
 		}
 	}

+
+	/*
+	 * Tell the logger we're a daemon - this will ensure any output logged
+	 * before the logger is initialized will be formatted correctly
+	 */
+	logger_output_mode = OM_DAEMON;
+
 	/*
 	 * Parse the configuration file, if provided. If no configuration file
 	 * was provided, or one was but was incomplete, parse_config() will
@@ -247,6 +245,7 @@ main(int argc, char **argv)
 	}

 	logger_init(&local_options, progname());
+
 	if (verbose)
 		logger_set_verbose();

@@ -515,6 +514,33 @@ main(int argc, char **argv)
 				else if (node_info.type == STANDBY)
 				{
 					log_info(_("starting continuous standby node monitoring\n"));
+
+					/*
+					 * Call update_shared_memory() so it's not stuck at 0/0; this
+					 * will otherwise cause an infinite loop on other repmgrds if
+					 * this repmgrd does not enter failover.
+					 *
+					 * NOTE: this is a temporary workaround for a structural
+					 * issue resolved through architectural redesign in repmgr 4.
+					 */
+					if (local_options.failover == MANUAL_FAILOVER)
+					{
+						update_shared_memory(PASSIVE_NODE);
+					}
+					else
+					{
+						PQExpBufferData current_lsn;
+
+						XLogRecPtr last_wal_receive_location = get_last_wal_receive_location(my_local_conn);
+
+						initPQExpBuffer(&current_lsn);
+						appendPQExpBuffer(&current_lsn, "%X/%X",
+										  format_lsn(last_wal_receive_location));
+
+						update_shared_memory(current_lsn.data);
+
+						termPQExpBuffer(&current_lsn);
+					}
 				}

 				do
@@ -647,15 +673,15 @@ witness_monitor(void)
 			}
 			else
 			{
-				log_debug(_("new master found with node ID: %i\n"), master_options.node);
+				log_info(_("new master found with node ID: %i\n"), master_options.node);
 				connection_ok = true;

 				/*
 				 * Update the repl_nodes table from the new master to reflect the changed
 				 * node configuration
 				 *
-				 * XXX it would be neat to be able to handle this with e.g. table-based
-				 * logical replication
+				 * It would be neat to be able to handle this with e.g. table-based
+				 * logical replication if available in core
 				 */
 				witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);

@@ -710,26 +736,46 @@ witness_monitor(void)
 		return;
 	}

-	strcpy(monitor_witness_timestamp, PQgetvalue(res, 0, 0));
+	strncpy(monitor_witness_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
 	PQclear(res);

 	/*
 	 * Build the SQL to execute on master
 	 */
-	sqlquery_snprintf(sqlquery,
-					  "INSERT INTO %s.repl_monitor "
-					  "           (primary_node, standby_node, "
-					  "            last_monitor_time, last_apply_time, "
-					  "            last_wal_primary_location, last_wal_standby_location, "
-					  "            replication_lag, apply_lag )"
-					  "      VALUES(%d, %d, "
-					  "             '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
-					  "             pg_catalog.pg_current_xlog_location(), NULL, "
-					  "             0, 0) ",
-					  get_repmgr_schema_quoted(my_local_conn),
-					  master_options.node,
-					  local_options.node,
-					  monitor_witness_timestamp);
+	if (server_version_num >= 100000)
+	{
+		sqlquery_snprintf(sqlquery,
+						  "INSERT INTO %s.repl_monitor "
+						  "           (primary_node, standby_node, "
+						  "            last_monitor_time, last_apply_time, "
+						  "            last_wal_primary_location, last_wal_standby_location, "
+						  "            replication_lag, apply_lag )"
+						  "      VALUES(%d, %d, "
+						  "             '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
+						  "             pg_catalog.pg_current_wal_lsn(), NULL, "
+						  "             0, 0) ",
+						  get_repmgr_schema_quoted(my_local_conn),
+						  master_options.node,
+						  local_options.node,
+						  monitor_witness_timestamp);
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery,
+						  "INSERT INTO %s.repl_monitor "
+						  "           (primary_node, standby_node, "
+						  "            last_monitor_time, last_apply_time, "
+						  "            last_wal_primary_location, last_wal_standby_location, "
+						  "            replication_lag, apply_lag )"
+						  "      VALUES(%d, %d, "
+						  "             '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
+						  "             pg_catalog.pg_current_xlog_location(), NULL, "
+						  "             0, 0) ",
+						  get_repmgr_schema_quoted(my_local_conn),
+						  master_options.node,
+						  local_options.node,
+						  monitor_witness_timestamp);
+	}

 	/*
 	 * Execute the query asynchronously, but don't check for a result. We will
@@ -774,7 +820,6 @@ standby_monitor(void)
 	PGconn	   *upstream_conn;
 	char		upstream_conninfo[MAXCONNINFO];
 	int			upstream_node_id;
-	t_node_info upstream_node;

 	int			active_master_id;
 	const char *upstream_node_type = NULL;
@@ -829,6 +874,8 @@ standby_monitor(void)
 			: "upstream";
 	}

+
+
 	/*
 	 * Check that the upstream node is still available
 	 * If not, initiate failover process
@@ -837,9 +884,7 @@ standby_monitor(void)
 	 * local_options.reconnect_interval seconds
 	 */

-	check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
-
-	if (PQstatus(upstream_conn) != CONNECTION_OK)
+	if (!check_connection(&upstream_conn, upstream_node_type, upstream_conninfo))
 	{
 		int previous_master_node_id = master_options.node;

@@ -956,6 +1001,8 @@ standby_monitor(void)
 			 * Failover handling is handled differently depending on whether
 			 * the failed node is the master or a cascading standby
 			 */
+			t_node_info upstream_node;
+
 			upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);

 			if (upstream_node.type == MASTER)
@@ -1013,8 +1060,8 @@ standby_monitor(void)
 				 *
 				 * We should log a message so the user knows of the situation at hand.
 				 *
-				 * XXX check if the original master is still active and display a
-				 * warning
+				 * XXX check if the original master is still active and display a warning
+				 * XXX add event notification
 				 */
 				log_err(_("It seems this server was promoted manually (not by repmgr) so you might by in the presence of a split-brain.\n"));
 				log_err(_("Check your cluster and manually fix any anomaly.\n"));
@@ -1059,9 +1106,6 @@ standby_monitor(void)
 	 * from the upstream node to write monitoring information
 	 */

-	/* XXX not used? */
-	upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);
-
 	sprintf(sqlquery,
 			"SELECT id "
 			"  FROM %s.repl_nodes "
@@ -1119,21 +1163,42 @@ standby_monitor(void)
 	 * If receive_location is less than replay location, we were streaming WAL but are
 	 *   somehow disconnected and evidently in archive recovery
 	 */
-	sqlquery_snprintf(sqlquery,
-					  " SELECT ts, "
-					  "        CASE WHEN (receive_location IS NULL OR receive_location < replay_location) "
-					  "          THEN replay_location "
-					  "          ELSE receive_location"
-					  "        END AS receive_location,"
-					  "        replay_location, "
-					  "        replay_timestamp, "
-					  "        COALESCE(receive_location, '0/0') >= replay_location AS receiving_streamed_wal "
-					  "   FROM (SELECT CURRENT_TIMESTAMP AS ts, "
-					  "         pg_catalog.pg_last_xlog_receive_location() AS receive_location, "
-					  "         pg_catalog.pg_last_xlog_replay_location()  AS replay_location, "
-					  "         pg_catalog.pg_last_xact_replay_timestamp() AS replay_timestamp "
-					  "        ) q ");

+	if (server_version_num >= 100000)
+	{
+		sqlquery_snprintf(sqlquery,
+						  " SELECT ts, "
+						  "        CASE WHEN (receive_location IS NULL OR receive_location < replay_location) "
+						  "          THEN replay_location "
+						  "          ELSE receive_location"
+						  "        END AS receive_location,"
+						  "        replay_location, "
+						  "        replay_timestamp, "
+						  "        COALESCE(receive_location, '0/0') >= replay_location AS receiving_streamed_wal "
+						  "   FROM (SELECT CURRENT_TIMESTAMP AS ts, "
+						  "         pg_catalog.pg_last_wal_receive_lsn()  AS receive_location, "
+						  "         pg_catalog.pg_last_wal_replay_lsn()   AS replay_location, "
+						  "         pg_catalog.pg_last_xact_replay_timestamp() AS replay_timestamp "
+						  "        ) q ");
+
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery,
+						  " SELECT ts, "
+						  "        CASE WHEN (receive_location IS NULL OR receive_location < replay_location) "
+						  "          THEN replay_location "
+						  "          ELSE receive_location"
+						  "        END AS receive_location,"
+						  "        replay_location, "
+						  "        replay_timestamp, "
+						  "        COALESCE(receive_location, '0/0') >= replay_location AS receiving_streamed_wal "
+						  "   FROM (SELECT CURRENT_TIMESTAMP AS ts, "
+						  "         pg_catalog.pg_last_xlog_receive_location() AS receive_location, "
+						  "         pg_catalog.pg_last_xlog_replay_location()  AS replay_location, "
+						  "         pg_catalog.pg_last_xact_replay_timestamp() AS replay_timestamp "
+						  "        ) q ");
+	}


 	res = PQexec(my_local_conn, sqlquery);
@@ -1145,9 +1210,9 @@ standby_monitor(void)
 		return;
 	}

-	strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
+	strncpy(monitor_standby_timestamp,  PQgetvalue(res, 0, 0), MAXLEN);
 	strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN);
-	strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
+	strncpy(last_xlog_replay_location,  PQgetvalue(res, 0, 2), MAXLEN);
 	strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);

 	receiving_streamed_wal = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
@@ -1167,7 +1232,11 @@ standby_monitor(void)
 	 * TODO: investigate whether pg_current_xlog_insert_location() would be a better
 	 * choice; see: https://github.com/2ndQuadrant/repmgr/issues/189
 	 */
-	sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");
+
+	if (server_version_num >= 100000)
+		sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_wal_lsn()");
+	else
+		sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");

 	res = PQexec(master_conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -1181,10 +1250,22 @@ standby_monitor(void)
 	PQclear(res);

 	lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL);
-	lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
 	lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
+	lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
+
+	if (lsn_last_xlog_receive_location >= lsn_last_xlog_replay_location)
+	{
+		apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
+	}
+	else
+	{
+		/* This should never happen, but in case it does set apply lag to zero */
+		log_warning("Standby receive (%s) location appears less than standby replay location (%s)\n",
+					last_xlog_receive_location,
+					last_xlog_replay_location);
+		apply_lag = 0;
+	}

-	apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;

 	/* Calculate replication lag */
 	if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
@@ -1193,7 +1274,7 @@ standby_monitor(void)
 	}
 	else
 	{
-		/* This should never happen, but in case it does set lag to zero */
+		/* This should never happen, but in case it does set replication lag to zero */
 		log_warning("Master xlog (%s) location appears less than standby receive location (%s)\n",
 					last_wal_primary_location,
 					last_xlog_receive_location);
@@ -1238,8 +1319,23 @@ standby_monitor(void)
 	log_verbose(LOG_DEBUG, "standby_monitor:() %s\n", sqlquery);

 	if (PQsendQuery(master_conn, sqlquery) == 0)
-		log_warning(_("query could not be sent to master. %s\n"),
+	{
+		log_warning(_("query could not be sent to master: %s\n"),
 					PQerrorMessage(master_conn));
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery,
+						  "SELECT %s.repmgr_update_last_updated();",
+						  get_repmgr_schema_quoted(my_local_conn));
+		res = PQexec(my_local_conn, sqlquery);
+
+		/* not critical if the above query fails*/
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+			log_warning(_("unable to set last_updated: %s\n"), PQerrorMessage(my_local_conn));
+
+		PQclear(res);
+	}
 }


@@ -1303,7 +1399,7 @@ do_master_failover(void)
 	}

 	total_active_nodes = PQntuples(res);
-	log_debug(_("%d active nodes registered\n"), total_active_nodes);
+	log_info(_("%d active nodes registered\n"), total_active_nodes);

 	/*
 	 * Build an array with the nodes and indicate which ones are visible and
@@ -1352,7 +1448,7 @@ do_master_failover(void)
 			 *
 			 * If the master did come back at this point, the voting algorithm should decide
 			 * it's the "best candidate" anyway and no standby will promote itself or
-			 * attempt to follow* another server.
+			 * attempt to follow another server.
 			 *
 			 * If we don't try and connect to the master here (and the code generally
 			 * assumes it's failed anyway) but it does come back any time from here
@@ -1386,8 +1482,8 @@ do_master_failover(void)
 	}
 	PQclear(res);

-	log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
-			  total_active_nodes, visible_nodes);
+	log_info(_("total nodes counted: registered=%d, visible=%d\n"),
+			 total_active_nodes, visible_nodes);

 	/*
 	 * Am I on the group that should keep alive? If I see less than half of
@@ -1404,7 +1500,7 @@ do_master_failover(void)
 	/* Query all available nodes to determine readiness and LSN */
 	for (i = 0; i < total_active_nodes; i++)
 	{
-		log_debug("checking node %i...\n", nodes[i].node_id);
+		log_info("checking node %i...\n", nodes[i].node_id);

 		/* if the node is not visible, skip it */
 		if (!nodes[i].is_visible)
@@ -1428,27 +1524,25 @@ do_master_failover(void)
 		if (PQstatus(node_conn) != CONNECTION_OK)
 		{
 			log_err(_("It seems new problems are arising, manual intervention is needed\n"));
+			log_detail("%s\n", PQerrorMessage(node_conn));
 			terminate(ERR_FAILOVER_FAIL);
 		}

-		sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
-		res = PQexec(node_conn, sqlquery);
-		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		xlog_recptr = get_last_wal_receive_location(node_conn);
+
+		if (xlog_recptr == InvalidXLogRecPtr)
 		{
-			log_info(_("unable to retrieve node's last standby location: %s\n"),
+			log_info(_("unable to retrieve last standby location for node %i: %s\n"),
+					 nodes[i].node_id,
 					 PQerrorMessage(node_conn));

-			log_debug(_("connection details: %s\n"), nodes[i].conninfo_str);
-			PQclear(res);
+			log_detail(_("connection details: %s\n"), nodes[i].conninfo_str);
 			PQfinish(node_conn);
 			terminate(ERR_FAILOVER_FAIL);
 		}

-		xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
+		log_info(_("current LSN of node %i is: %X/%X\n"), nodes[i].node_id, format_lsn(xlog_recptr));

-		log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, PQgetvalue(res, 0, 0));
-
-		PQclear(res);
 		PQfinish(node_conn);

 		/* If position is 0/0, error */
@@ -1463,7 +1557,11 @@ do_master_failover(void)
 	}

 	/* last we get info about this node, and update shared memory */
-	sprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
+	if (server_version_num >= 100000)
+		sprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
+	else
+		sprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
+
 	res = PQexec(my_local_conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
@@ -1477,6 +1575,9 @@ do_master_failover(void)
 	}
 	/* write last location in shared memory */
 	update_shared_memory(PQgetvalue(res, 0, 0));
+
+	log_info("local node's LSN is %s\n", PQgetvalue(res, 0, 0));
+
 	PQclear(res);

 	/* Wait for each node to come up and report a valid LSN */
@@ -1513,7 +1614,9 @@ do_master_failover(void)
 		 */
 		if (PQstatus(node_conn) != CONNECTION_OK)
 		{
-			/* XXX */
+			log_err(_("connection to node %i has gone away:\n%s\n"),
+					nodes[i].node_id,
+					PQerrorMessage(node_conn));
 			log_info(_("At this point, it could be some race conditions "
 					   "that are acceptable, assume the node is restarting "
 					   "and starting failover procedure\n"));
@@ -1530,6 +1633,9 @@ do_master_failover(void)
 			res = PQexec(node_conn, sqlquery);
 			if (PQresultStatus(res) != PGRES_TUPLES_OK)
 			{
+				/*
+				 * Note: in repmgr4 we handle this kind of situation much more gracefully.
+				 */
 				log_err(_("PQexec failed: %s.\nReport an invalid value to not "
 						  "be considered as new master and exit.\n"),
 						PQerrorMessage(node_conn));
@@ -1562,8 +1668,8 @@ do_master_failover(void)
 					 */
 					if (strcmp(location_value, PASSIVE_NODE) == 0)
 					{
-						log_debug("node %i is passive mode\n", nodes[i].node_id);
 						log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
+						log_detail("node %i indicates it is a passive node\n", nodes[i].node_id);
 						nodes[i].xlog_location = InvalidXLogRecPtr;
 						continue_loop = false;
 					}
@@ -1573,7 +1679,8 @@ do_master_failover(void)
 					 */
 					else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
 					{
-						log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"), nodes[i].node_id);
+						log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"),
+									nodes[i].node_id);
 						nodes[i].xlog_location = InvalidXLogRecPtr;
 						continue_loop = false;
 					}
@@ -1581,12 +1688,8 @@ do_master_failover(void)
 					/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
 					else if (*location_value == '\0')
 					{
-						log_crit(
-							_("unable to obtain LSN from node %i"), nodes[i].node_id
-							);
-						log_hint(
-							_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
-							);
+						log_crit(_("unable to obtain LSN from node %i"), nodes[i].node_id);
+						log_hint(_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n"));

 						PQfinish(node_conn);
 						/* XXX shouldn't we just ignore this node? */
@@ -1598,14 +1701,14 @@ do_master_failover(void)
 					 * strategy keep checking
 					 */
 					else {
-						log_warning(_("unable to parse LSN \"%s\"\n"),
+						log_warning(_("unable to parse shared memory LSN \"%s\"\n"),
 									location_value);
 					}
 				}
 				else
 				{
 					log_debug(
-						_("invalid LSN returned from node %i: '%s'\n"),
+						_("invalid shared memory LSN returned from node %i: '%s'\n"),
 						nodes[i].node_id,
 						location_value);
 				}
@@ -1627,7 +1730,7 @@ do_master_failover(void)
 				nodes[i].xlog_location = xlog_recptr;
 			}

-			log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
+			log_info(_("shared memory LSN of node %i is: %s\n"), nodes[i].node_id, location_value);

 			ready_nodes++;
 			nodes[i].is_ready = true;
@@ -1683,7 +1786,7 @@ do_master_failover(void)
 		terminate(ERR_FAILOVER_FAIL);
 	}

-	log_debug("best candidate node id is %i\n", best_candidate.node_id);
+	log_info("best candidate node id is %i\n", best_candidate.node_id);

 	/* if local node is the best candidate, promote it */
 	if (best_candidate.node_id == local_options.node)
@@ -1699,9 +1802,9 @@ do_master_failover(void)
 		sleep(5);

 		log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
-
-		log_debug("promote command is: \"%s\"\n",
-				  local_options.promote_command);
+		log_detail(_("LSN is %X/%X\n"), format_lsn(best_candidate.xlog_location));
+		log_info("promote command is: \"%s\"\n",
+				 local_options.promote_command);

 		if (log_type == REPMGR_STDERR && *local_options.logfile)
 		{
@@ -1757,6 +1860,8 @@ do_master_failover(void)
 						  node_info.node_id,
 						  failed_master.node_id);

+		log_notice("%s\n", event_details.data);
+
 		/* my_local_conn is now the master */
 		create_event_record(my_local_conn,
 							&local_options,
@@ -1817,7 +1922,7 @@ do_master_failover(void)
 		}


-		log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command);
+		log_notice(_("executing follow command: \"%s\"\n"), local_options.follow_command);

 		r = system(local_options.follow_command);
 		if (r != 0)
@@ -2035,8 +2140,11 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
 {
 	int			connection_retries;

+	if (conninfo != NULL && is_server_available(conninfo))
+		return true;
+
 	/*
-	 * Check if the node is still available if after
+	 * Check if the node is still available; if after
 	 * local_options.reconnect_attempts * local_options.reconnect_interval
 	 * seconds of retries we cannot reconnect return false
 	 */
@@ -2086,18 +2194,21 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
 /*
 * set_local_node_status()
 *
- * If failure of the local node is detected, attempt to connect
- * to the current master server (as stored in the global variable
- * `master_conn`) and update its record to failed.
+ * Attempt to connect to the current master server (as stored in the global
+ * variable `master_conn`) and set the local node's status to the result
+ * of `is_standby(my_local_conn)`. Normally this will be used to mark
+ * a node as failed, but in some circumstances we may be marking it
+ * as recovered.
 */

 static bool
 set_local_node_status(void)
 {
-	PGresult       *res;
+	PGresult   *res;
 	char		sqlquery[QUERY_STR_LEN];
-	int		active_master_node_id = NODE_NOT_FOUND;
+	int			active_master_node_id = NODE_NOT_FOUND;
 	char		master_conninfo[MAXLEN];
+	bool		local_node_status;

 	if (!check_connection(&master_conn, "master", NULL))
 	{
@@ -2156,24 +2267,29 @@ set_local_node_status(void)

 	/*
 	 * Attempt to set the active record to the correct value.
-	 * First
 	 */

+	local_node_status = (is_standby(my_local_conn) == 1);
+
 	if (!update_node_record_status(master_conn,
 					    local_options.cluster_name,
 					    node_info.node_id,
 					    "standby",
 					    node_info.upstream_node_id,
-					    is_standby(my_local_conn)==1))
+					    local_node_status))
 	{
-		log_err(_("unable to set local node %i as inactive on master: %s\n"),
+		log_err(_("unable to set local node %i as %s on master: %s\n"),
 				node_info.node_id,
+				local_node_status == false ? "inactive" : "active",
 				PQerrorMessage(master_conn));

 		return false;
 	}

-	log_notice(_("marking this node (%i) as inactive on master\n"), node_info.node_id);
+	log_notice(_("marking this node (%i) as %s on master\n"),
+			   node_info.node_id,
+			   local_node_status == false ? "inactive" : "active");
+
 	return true;
 }

@@ -2314,13 +2430,13 @@ lsn_to_xlogrecptr(char *lsn, bool *format_ok)
 	if (format_ok != NULL)
 		*format_ok = true;

-	return (((XLogRecPtr) xlogid * 16 * 1024 * 1024 * 255) + xrecoff);
+	return (XLogRecPtr) ((uint64) xlogid) << 32 | (uint64) xrecoff;
 }

 void
 usage(void)
 {
-	log_err(_("%s: Replicator manager daemon \n"), progname());
+	log_err(_("%s: replication management daemon for PostgreSQL\n"), progname());
 	log_err(_("Try \"%s --help\" for more information.\n"), progname());
 }

--- a/sql/Makefile
+++ b/sql/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile
 #
-# Copyright (c) 2ndQuadrant, 2010-2016
+# Copyright (c) 2ndQuadrant, 2010-2017
 #

 MODULE_big = repmgr_funcs
--- a/sql/repmgr_funcs.sql.in
+++ b/sql/repmgr_funcs.sql.in
@@ -1,6 +1,6 @@
 /*
 * repmgr_function.sql
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 */

--- a/sql/repmgr_test.sql
+++ b/sql/repmgr_test.sql
@@ -0,0 +1,4 @@
+select * from repmgr_update_standby_location('');
+select * from repmgr_get_last_standby_location();
+select * from repmgr_update_last_updated();
+select * from repmgr_get_last_updated();
--- a/sql/uninstall_repmgr_funcs.sql
+++ b/sql/uninstall_repmgr_funcs.sql
@@ -1,6 +1,6 @@
 /*
 * uninstall_repmgr_funcs.sql
- * Copyright (c) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 */

--- a/strutil.c
+++ b/strutil.c
@@ -1,7 +1,7 @@
 /*
 * strutil.c
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@@ -90,31 +90,18 @@ maxlen_snprintf(char *str, const char *format,...)


 /*
- * Adapted from: src/fe_utils/string_utils.c
- *
- * Function not publicly available before PostgreSQL 9.6.
+ * Escape a string for use as a parameter in recovery.conf
+ * Caller must free returned value
 */
-void
-appendShellString(PQExpBuffer buf, const char *str)
+char *
+escape_recovery_conf_value(const char *src)
 {
-	const char *p;
+	char	   *result = escape_single_quotes_ascii(src);

-	appendPQExpBufferChar(buf, '\'');
-	for (p = str; *p; p++)
+	if (!result)
 	{
-		if (*p == '\n' || *p == '\r')
-		{
-			fprintf(stderr,
-					_("shell command argument contains a newline or carriage return: \"%s\"\n"),
-					str);
-			exit(ERR_BAD_CONFIG);
-		}
-
-		if (*p == '\'')
-			appendPQExpBufferStr(buf, "'\"'\"'");
-		else
-			appendPQExpBufferChar(buf, *p);
+		fprintf(stderr, _("%s: out of memory\n"), progname());
+		exit(ERR_INTERNAL);
 	}
-
-	appendPQExpBufferChar(buf, '\'');
+	return result;
 }
--- a/strutil.h
+++ b/strutil.h
@@ -1,6 +1,6 @@
 /*
 * strutil.h
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 *
 * This program is free software: you can redistribute it and/or modify
@@ -49,6 +49,6 @@ extern int
 maxlen_snprintf(char *str, const char *format,...)
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));

-extern void
-appendShellString(PQExpBuffer buf, const char *str);
+extern char *
+escape_recovery_conf_value(const char *src);
 #endif   /* _STRUTIL_H_ */
--- a/uninstall_repmgr.sql
+++ b/uninstall_repmgr.sql
@@ -1,7 +1,7 @@
 /*
 * uninstall_repmgr.sql
 *
- * Copyright (C) 2ndQuadrant, 2010-2016
+ * Copyright (c) 2ndQuadrant, 2010-2017
 *
 */

--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
 #ifndef _VERSION_H_
 #define _VERSION_H_

-#define REPMGR_VERSION "3.2dev"
+#define REPMGR_VERSION "3.4.0"

 #endif