Update code comments

Remove unused error code ERR_BAD_PASSWORD
README: update error code list
2026-03-22 22:56:29 +00:00 · 2016-07-12 10:59:48 +09:00 · 2016-07-12 10:59:42 +09:00 · 2016-07-12 10:59:37 +09:00 · 2016-07-12 10:59:30 +09:00 · 2016-07-12 10:59:26 +09:00
27 changed files with 1789 additions and 738 deletions
--- a/FAQ.md
+++ b/FAQ.md
@@ -38,7 +38,7 @@ General

  No. Hash indexes and replication do not mix well and their use is
  explicitly discouraged; see:
-    http://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175
+    https://www.postgresql.org/docs/current/interactive/sql-createindex.html#AEN74175

 `repmgr`
 --------
--- a/30
+++ b/30
@@ -1,4 +1,32 @@
-3.1.1   2016-02-
+3.1.4   2016-07-
+        repmgr: new configuration option for setting "restore_command"
+          in the recovery.conf file generated by repmgr (Martín)
+        repmgr: add --csv option to "repmgr cluster show" (Gianni)
+        repmgr: enable provision of a conninfo string as the -d/--dbname
+          parameter, similar to other PostgreSQL utilities (Ian)
+        repmgr: during switchover operations improve detection of
+          demotion candidate shutdown (Ian)
+        various bugfixes and documentation updates (Ian, Martín)
+
+3.1.3   2016-05-17
+        repmgrd: enable monitoring when a standby is catching up by
+          replaying archived WAL (Ian)
+        repmgrd: when upstream_node_id is NULL, assume upstream node
+          to be current master (Ian)
+        repmgrd: check for reappearance of the master node if standby
+          promotion fails (Ian)
+        improve handling of rsync failure conditions (Martín)
+
+3.1.2   2016-04-12
+        Fix pg_ctl path generation in do_standby_switchover() (Ian)
+        Regularly sync witness server repl_nodes table (Ian)
+        Documentation improvements (Gianni, dhyannataraj)
+        (Experimental) ensure repmgr handles failover slots when copying
+          in rsync mode (Craig, Ian)
+        rsync mode handling fixes (Martín)
+        Enable repmgr to compile against 9.6devel (Ian)
+
+3.1.1   2016-02-24
        Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
        Prevent repmgr/repmgrd running as root (Ian)

--- a/29
+++ b/29
@@ -2,23 +2,32 @@
 # Makefile
 # Copyright (c) 2ndQuadrant, 2010-2016

+HEADERS = $(wildcard *.h)
+
 repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
-repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
+repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o dirmod.o

 DATA = repmgr.sql uninstall_repmgr.sql

 PG_CPPFLAGS = -I$(libpq_srcdir)
-PG_LIBS = $(libpq_pgport)
+PG_LIBS     = $(libpq_pgport)

-all:  repmgrd repmgr
+
+all: repmgrd repmgr
 	$(MAKE) -C sql

 repmgrd: $(repmgrd_OBJS)
-	$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
+	$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
 	$(MAKE) -C sql

 repmgr: $(repmgr_OBJS)
-	$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
+	$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
+
+# Make all objects depend on all include files. This is a bit of a
+# shotgun approach, but the codebase is small enough that a complete rebuild
+# is very fast anyway.
+$(repmgr_OBJS): $(HEADERS)
+$(repmgrd_OBJS): $(HEADERS)

 ifdef USE_PGXS
 PG_CONFIG = pg_config
@@ -31,8 +40,8 @@ include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif

-# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
-# is overriding pgxs install.
+# XXX: This overrides the pgxs install target - we're building two binaries,
+# which is not supported by pgxs.mk's PROGRAM construct.
 install: install_prog install_ext

 install_prog:
@@ -43,6 +52,12 @@ install_prog:
 install_ext:
 	$(MAKE) -C sql install

+# Distribution-specific package building targets
+# ----------------------------------------------
+#
+# XXX we recommend using the PGDG-supplied packages where possible;
+# see README.md for details.
+
 install_rhel:
 	mkdir -p '$(DESTDIR)/etc/init.d/'
 	$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ This guide assumes that you are familiar with PostgreSQL administration and
 streaming replication concepts. For further details on streaming
 replication, see this link:

-  http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION
+  https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION

 The following terms are used throughout the `repmgr` documentation.

@@ -215,6 +215,34 @@ command line options:
 - `-b/--pg_bindir`


+### Command line options and environment variables
+
+For some commands, e.g. `repmgr standby clone`, database connection parameters
+need to be provided. Like other PostgreSQL utilities, following standard
+parameters can be used:
+
+- `-d/--dbname=DBNAME`
+- `-h/--host=HOSTNAME`
+- `-p/--port=PORT`
+- `-U/--username=USERNAME`
+
+If `-d/--dbname` contains an `=` sign or starts with a valid URI prefix (`postgresql://`
+or `postgres://`), it is treated as a conninfo string. See the PostgreSQL
+documentation for further details:
+
+  https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
+
+Note that if a `conninfo` string is provided, values set in this will override any
+provided as individual parameters. For example, with `-d 'host=foo' --host bar`, `foo`
+will be chosen over `bar`.
+
+Like other PostgreSQL utilities, `repmgr` will default to any values set in environment
+variables if explicit command line parameters are not provided. See the PostgreSQL
+documentation for further details:
+
+  https://www.postgresql.org/docs/current/static/libpq-envars.html
+
+
 Setting up a simple replication cluster with repmgr
 ---------------------------------------------------

@@ -237,15 +265,19 @@ both servers.
 On the master server, a PostgreSQL instance must be initialised and running.
 The following replication settings must be included in `postgresql.conf`:

+
+    # Enable replication connections; set this figure to at least one more
+    # than the number of standbys which will connect to this server
+    # (note that repmgr will execute `pg_basebackup` in WAL streaming mode,
+    # which requires two free WAL senders)
+
+    max_wal_senders = 10
+
    # Ensure WAL files contain enough information to enable read-only queries
    # on the standby

    wal_level = 'hot_standby'

-    # Enable up to 10 replication connections
-
-    max_wal_senders = 10
-
    # How much WAL to retain on the master to allow a temporarily
    # disconnected standby to catch up again. The larger this is, the
    # longer the standby can be disconnected. This is needed only in
@@ -259,6 +291,14 @@ The following replication settings must be included in `postgresql.conf`:

    hot_standby = on

+    # Enable WAL file archiving
+    archive_mode = on
+
+    # Set archive command to a script or application that will safely store
+    # you WALs in a secure place. /bin/true is an example of a command that
+    # ignores archiving. Use something more sensible.
+    archive_command = '/bin/true'
+

 * * *

@@ -284,11 +324,11 @@ similar to the following:

    local   replication   repmgr                              trust
    host    replication   repmgr      127.0.0.1/32            trust
-    host    replication   repmgr      192.168.1.0/32          trust
+    host    replication   repmgr      192.168.1.0/24          trust

    local   repmgr        repmgr                              trust
    host    repmgr        repmgr      127.0.0.1/32            trust
-    host    repmgr        repmgr      192.168.1.0/32          trust
+    host    repmgr        repmgr      192.168.1.0/24          trust

 Adjust according to your network environment and authentication requirements.

@@ -458,7 +498,11 @@ so should be used with care.
 Further options can be passed to the `pg_basebackup` utility via
 the setting `pg_basebackup_options` in `repmgr.conf`. See the PostgreSQL
 documentation for more details of available options:
+<<<<<<< HEAD
  http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
+=======
+  https://www.postgresql.org/docs/current/static/app-pgbasebackup.html
+>>>>>>> 72f9b0145afab1060dd1202c8f8937653c8b2e39

 ### Using rsync to clone a standby

@@ -476,7 +520,6 @@ and destination server as the contents of files existing on both servers need
 to be compared, meaning this method is not necessarily faster than making a
 fresh clone with `pg_basebackup`.

-
 ### Dealing with PostgreSQL configuration files

 By default, `repmgr` will attempt to copy the standard configuration files
@@ -491,6 +534,21 @@ which enables any valid `rsync` options to be passed to that command, e.g.:

    rsync_options='--exclude=postgresql.local.conf'

+### Controlling `primary_conninfo` in `recovery.conf`
+
+`repmgr` will create the `primary_conninfo` setting in `recovery.conf` based
+on the connection parameters provided to `repmgr standby clone` and PostgreSQL's
+standard connection defaults, including any environment variables set on the
+local node.
+
+To include specific connection parameters other than the standard host, port,
+username and database values (e.g. `sslmode`), include these in a `conninfo`-style
+tring passed to `repmgr` with `-d/--dbname` (see above for details), and/or set
+appropriate environment variables.
+
+Note that PostgreSQL will always set explicit defaults for `sslmode` and
+`sslcompression`.
+

 Setting up cascading replication with repmgr
 --------------------------------------------
@@ -594,13 +652,13 @@ place. If using the default `pg_basebackup` method, we recommend setting
    pg_basebackup_options='--xlog-method=stream'

 See the `pg_basebackup` documentation for details:
-    http://www.postgresql.org/docs/current/static/app-pgbasebackup.html
+    https://www.postgresql.org/docs/current/static/app-pgbasebackup.html

 Otherwise it's necessary to set `wal_keep_segments` to an appropriately high
 value.

 Further information on replication slots in the PostgreSQL documentation:
-    http://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS
+    https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS


 Promoting a standby server with repmgr
@@ -699,8 +757,9 @@ updated to reflect this:


 Note that with cascading replication, `repmgr standby follow` can also be
-used to detach a standby from its current upstream server and follow another
-upstream server, including the master.
+used to detach a standby from its current upstream server and follow the
+master. However it's currently not possible to have it follow another standby;
+we hope to improve this in a future release.


 Performing a switchover with repmgr
@@ -727,7 +786,7 @@ both passwordless SSH access and the path of `repmgr.conf` on that server.
 > careful preparation and with adequate attention. In particular you should
 > be confident that your network environment is stable and reliable.
 >
-> We recommend running `repmgr standby switchover`  at the most verbose
+> We recommend running `repmgr standby switchover` at the most verbose
 > logging level (`--log-level DEBUG --verbose`) and capturing all output
 > to assist troubleshooting any problems.
 >
@@ -793,7 +852,7 @@ should have been updated to reflect this:

 ### Caveats

- the functionality provided `repmgr standby switchover` is primarily aimed
+- The functionality provided `repmgr standby switchover` is primarily aimed
  at a two-server master/standby replication cluster and currently does
  not support additional standbys.
 - `repmgr standby switchover` is designed to use the `pg_rewind` utility,
@@ -802,11 +861,16 @@ should have been updated to reflect this:
 - `pg_rewind` *requires* that either `wal_log_hints` is enabled, or that
   data checksums were enabled when the cluster was initialized. See the
  `pg_rewind` documentation for details:
-     http://www.postgresql.org/docs/current/static/app-pgrewind.html
+     https://www.postgresql.org/docs/current/static/app-pgrewind.html
 - `repmgrd` should not be running when a switchover is carried out, otherwise
  the `repmgrd` may try and promote a standby by itself.
 - Any other standbys attached to the old master will need to be manually
  instructed to point to the new master (e.g. with `repmgr standby follow`).
+- You must ensure that following a server start using `pg_ctl`, log output
+  is not send to STDERR (the default behaviour). If logging is not configured,
+  We recommend setting `logging_collector=on` in `postgresql.conf` and
+  providing an explicit `-l/--log` setting in `repmgr.conf`'s `pg_ctl_options`
+  parameter.

 We hope to remove some of these restrictions in future versions of `repmgr`.

@@ -860,8 +924,8 @@ Adjust schema and node ID accordingly. A future `repmgr` release
 will make it possible to unregister failed standbys.


-Automatic failover with repmgrd
-------------------------------
+Automatic failover with `repmgrd`
+---------------------------------

 `repmgrd` is a management and monitoring daemon which runs on standby nodes
 and which can automate actions such as failover and updating standbys to
@@ -876,6 +940,10 @@ be set in `repmgr.conf`:

 (See `repmgr.conf.sample` for further `repmgrd`-specific settings).

+Additionally, `postgresql.conf` must contain the following line:
+
+    shared_preload_libraries = 'repmgr_funcs'
+
 When `failover` is set to `automatic`, upon detecting failure of the current
 master, `repmgrd` will execute one of `promote_command` or `follow_command`,
 depending on whether the current server is becoming the new master or
@@ -977,8 +1045,8 @@ during the failover:
    (3 rows)


-repmgrd log rotation
--------------------
+`repmgrd` log rotation
+----------------------

 Note that currently `repmgrd` does not provide logfile rotation. To ensure
 the current logfile does not grow indefinitely, configure your system's `logrotate`
@@ -994,12 +1062,36 @@ for up to 52 weeks and rotation forced if a file grows beyond 100Mb:
        create 0600 postgres postgres
    }

-Monitoring
----------
+
+`repmgrd` and PostgreSQL connection settings
+--------------------------------------------
+
+In addition to the `repmgr` configuration settings, parameters in the
+`conninfo` string influence how `repmgr` makes a network connection to
+PostgreSQL. In particular, if another server in the replication cluster
+is unreachable at network level, system network settings will influence
+the length of time it takes to determine that the connection is not possible.
+
+In particular explicitly setting a parameter for `connect_timeout` should
+be considered; the effective minimum value of `2` (seconds) will ensure
+that a connection failure at network level is reported as soon as possible,
+otherwise dependeing on the system settings (e.g. `tcp_syn_retries` in Linux)
+a delay of a minute or more is possible.
+
+For further details on `conninfo` network connection parameters, see:
+
+  https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
+
+
+Monitoring with `repmgrd`
+-------------------------

 When `repmgrd` is running with the option `-m/--monitoring-history`, it will
-constantly write node status information to the `repl_monitor` table, which can
-be queried easily using the view `repl_status`:
+constantly write standby node status information to the `repl_monitor` table,
+providing a near-real time overview of replication status on all nodes
+in the cluster.
+
+The view `repl_status` shows the most recent state for each node, e.g.:

    repmgr=# SELECT * FROM repmgr_test.repl_status;
    -[ RECORD 1 ]-------------+-----------------------------
@@ -1024,6 +1116,10 @@ table , it's advisable to regularly purge historical data with
 `repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
 many day's worth of data should be retained.

+Note that when a standby node is not streaming directly from its upstream
+node, i.e. recovering WAL from an archive, `apply_lag` will always
+appear as `0 bytes`.
+

 Using a witness server with repmgrd
 ------------------------------------
@@ -1309,20 +1405,22 @@ which contains connection details for the local database.
 `repmgr` or `repmgrd` will return one of the following error codes on program
 exit:

-* SUCCESS (0)              Program ran successfully.
-* ERR_BAD_CONFIG (1)       Configuration file could not be parsed or was invalid
-* ERR_BAD_RSYNC (2)        An rsync call made by the program returned an error
-* ERR_NO_RESTART (4)       An attempt to restart a PostgreSQL instance failed
-* ERR_DB_CON (6)           Error when trying to connect to a database
-* ERR_DB_QUERY (7)         Error while executing a database query
-* ERR_PROMOTED (8)         Exiting program because the node has been promoted to master
-* ERR_BAD_PASSWORD (9)     Password used to connect to a database was rejected
-* ERR_STR_OVERFLOW (10)    String overflow error
-* ERR_FAILOVER_FAIL (11)   Error encountered during failover (repmgrd only)
-* ERR_BAD_SSH (12)         Error when connecting to remote host via SSH
-* ERR_SYS_FAILURE (13)     Error when forking (repmgrd only)
-* ERR_BAD_BASEBACKUP (14)  Error when executing pg_basebackup
-* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
+* SUCCESS (0)               Program ran successfully.
+* ERR_BAD_CONFIG (1)        Configuration file could not be parsed or was invalid
+* ERR_BAD_RSYNC (2)         An rsync call made by the program returned an error (repmgr only)
+* ERR_NO_RESTART (4)        An attempt to restart a PostgreSQL instance failed
+* ERR_DB_CON (6)            Error when trying to connect to a database
+* ERR_DB_QUERY (7)          Error while executing a database query
+* ERR_PROMOTED (8)          Exiting program because the node has been promoted to master
+* ERR_STR_OVERFLOW (10)     String overflow error
+* ERR_FAILOVER_FAIL (11)    Error encountered during failover (repmgrd only)
+* ERR_BAD_SSH (12)          Error when connecting to remote host via SSH (repmgr only)
+* ERR_SYS_FAILURE (13)      Error when forking (repmgrd only)
+* ERR_BAD_BASEBACKUP (14)   Error when executing pg_basebackup (repmgr only)
+* ERR_MONITORING_FAIL (16)  Unrecoverable error encountered during monitoring (repmgrd only)
+* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
+* ERR_SWITCHOVER_FAIL (18)  Error encountered during switchover (repmgr only)
+

 Support and Assistance
 ----------------------
--- a/RHEL/repmgr3-93.spec
+++ b/RHEL/repmgr3-93.spec
@@ -1,61 +0,0 @@
-Summary: repmgr
-Name: repmgr
-Version: 3.0
-Release: 1
-License: GPLv3
-Group: System Environment/Daemons
-URL: http://repmgr.org
-Packager: Ian Barwick <ian@2ndquadrant.com>
-Vendor: 2ndQuadrant Limited
-Distribution: centos
-Source0: %{name}-%{version}.tar.gz
-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
-
-%description
-repmgr is a utility suite which greatly simplifies
-the process of setting up and managing replication
-using streaming replication within a cluster of
-PostgreSQL servers.
-
-%prep
-%setup
-
-%build
-export PATH=$PATH:/usr/pgsql-9.3/bin/
-%{__make} USE_PGXS=1
-
-%install
-[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
-
-export PATH=$PATH:/usr/pgsql-9.3/bin/
-%{__make} USE_PGXS=1 install DESTDIR=%{buildroot} INSTALL="install -p"
-%{__make} USE_PGXS=1 install_prog DESTDIR=%{buildroot} INSTALL="install -p"
-%{__make} USE_PGXS=1 install_rhel DESTDIR=%{buildroot} INSTALL="install -p"
-
-
-%clean
-[ "%{buildroot}" != "/" ] && %{__rm} -rf %{buildroot}
-
-
-%files
-%defattr(-,root,root)
-/usr/bin/repmgr
-/usr/bin/repmgrd
-/usr/pgsql-9.3/bin/repmgr
-/usr/pgsql-9.3/bin/repmgrd
-/usr/pgsql-9.3/lib/repmgr_funcs.so
-/usr/pgsql-9.3/share/contrib/repmgr.sql
-/usr/pgsql-9.3/share/contrib/repmgr_funcs.sql
-/usr/pgsql-9.3/share/contrib/uninstall_repmgr.sql
-/usr/pgsql-9.3/share/contrib/uninstall_repmgr_funcs.sql
-%attr(0755,root,root)/etc/init.d/repmgrd
-%attr(0644,root,root)/etc/sysconfig/repmgrd
-%attr(0644,root,root)/etc/repmgr/repmgr.conf.sample
-
-%changelog
-* Tue Mar 10 2015 Ian Barwick ian@2ndquadrant.com>
- build for repmgr 3.0
-* Thu Jun 05 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.2
- fix witness creation to create db and user if needed
-* Fri Apr 04 2014 Nathan Van Overloop <nathan.van.overloop@nexperteam.be> 2.0.1
- initial build for RHEL6
--- a/RHEL/repmgrd.init
+++ b/RHEL/repmgrd.init
@@ -1,133 +0,0 @@
-#!/bin/sh
-#
-# chkconfig: - 75 16
-# description: Enable repmgrd replication management and monitoring daemon for PostgreSQL
-# processname: repmgrd
-# pidfile="/var/run/${NAME}.pid"
-
-# Source function library.
-INITD=/etc/rc.d/init.d
-. $INITD/functions
-
-# Get function listing for cross-distribution logic.
-TYPESET=`typeset -f|grep "declare"`
-
-# Get network config.
-. /etc/sysconfig/network
-
-DESC="PostgreSQL replication management and monitoring daemon"
-NAME=repmgrd
-
-REPMGRD_ENABLED=no
-REPMGRD_OPTS=
-REPMGRD_USER=postgres
-REPMGRD_BIN=/usr/pgsql-9.3/bin/repmgrd
-REPMGRD_PIDFILE=/var/run/repmgrd.pid
-REPMGRD_LOCK=/var/lock/subsys/${NAME}
-REPMGRD_LOG=/var/lib/pgsql/9.3/data/pg_log/repmgrd.log
-
-# Read configuration variable file if it is present
-[ -r /etc/sysconfig/$NAME ] && . /etc/sysconfig/$NAME
-
-# For SELinux we need to use 'runuser' not 'su'
-if [ -x /sbin/runuser ]
-then
-    SU=runuser
-else
-    SU=su
-fi
-
-test -x $REPMGRD_BIN || exit 0
-
-case "$REPMGRD_ENABLED" in
-    [Yy]*)
-	break
-	;;
-    *)
-	exit 0
-	;;
-esac
-
-
-if [ -z "${REPMGRD_OPTS}" ]
-then
-    echo "Not starting ${NAME}, REPMGRD_OPTS not set in /etc/sysconfig/${NAME}"
-    exit 0
-fi
-
-start()
-{
-    REPMGRD_START=$"Starting ${NAME} service: "
-
-    # Make sure startup-time log file is valid
-    if [ ! -e "${REPMGRD_LOG}" -a ! -h "${REPMGRD_LOG}" ]
-    then
-        touch "${REPMGRD_LOG}" || exit 1
-        chown ${REPMGRD_USER}:postgres "${REPMGRD_LOG}"
-        chmod go-rwx "${REPMGRD_LOG}"
-        [ -x /sbin/restorecon ] && /sbin/restorecon "${REPMGRD_LOG}"
-    fi
-
-    echo -n "${REPMGRD_START}"
-    $SU -l $REPMGRD_USER -c "${REPMGRD_BIN} ${REPMGRD_OPTS} -p ${REPMGRD_PIDFILE} &" >> "${REPMGRD_LOG}" 2>&1 < /dev/null
-    sleep 2
-    pid=`head -n 1 "${REPMGRD_PIDFILE}" 2>/dev/null`
-    if [ "x${pid}" != "x" ]
-    then
-        success "${REPMGRD_START}"
-        touch "${REPMGRD_LOCK}"
-        echo $pid > "${REPMGRD_PIDFILE}"
-        echo
-    else
-        failure "${REPMGRD_START}"
-        echo
-        script_result=1
-    fi
-}
-
-stop()
-{
-    echo -n $"Stopping ${NAME} service: "
-    if [ -e "${REPMGRD_LOCK}" ]
-    then
-        killproc ${NAME}
-        ret=$? 
-        if [ $ret -eq 0 ]
-        then
-            echo_success
-            rm -f "${REPMGRD_PIDFILE}"
-            rm -f "${REPMGRD_LOCK}"
-        else
-            echo_failure
-            script_result=1
-        fi
-    else
-        # not running; per LSB standards this is "ok"   
-        echo_success
-    fi
-    echo
-}
-
-
-# See how we were called.
-case "$1" in
-  start)
-        start
-        ;;
-  stop)
-        stop
-        ;;
-  status)
-        status -p $REPMGRD_PIDFILE $NAME
-        script_result=$?
-        ;;
-  restart)
-        stop
-	start
-        ;;
-  *)
-        echo $"Usage: $0 {start|stop|status|restart}"
-        exit 2
-esac
-
-exit $script_result
--- a/RHEL/repmgrd.sysconfig
+++ b/RHEL/repmgrd.sysconfig
@@ -1,21 +0,0 @@
-# default settings for repmgrd. This file is source by /bin/sh from
-# /etc/init.d/repmgrd
-
-# disable repmgrd by default so it won't get started upon installation
-# valid values: yes/no
-REPMGRD_ENABLED=no
-
-# Options for repmgrd (required)
-#REPMGRD_OPTS="--verbose -d -f /var/lib/pgsql/repmgr/repmgr.conf"
-
-# User to run repmgrd as
-#REPMGRD_USER=postgres
-
-# repmgrd binary
-#REPMGRD_BIN=/usr/bin/repmgrd
-
-# pid file
-#REPMGRD_PIDFILE=/var/lib/pgsql/repmgr/repmgrd.pid
-
-# log file
-#REPMGRD_LOG=/var/lib/pgsql/repmgr/repmgrd.log
--- a/5
+++ b/5
@@ -53,8 +53,9 @@ Planned feature improvements
  requested, activate the replication slot using pg_receivexlog to negate the
  need to set `wal_keep_segments` just for the initial clone (9.4 and 9.5).

-* Take into account the fact that a standby can obtain WAL from an archive,
-  so even if direct streaming replication is interrupted, it may be up-to-date
+* repmgr: enable "standby follow" to point a standby at another standby, not
+  just the replication cluster master (see GitHub #130)
+

 Usability improvements
 ======================
--- a/config.c
+++ b/config.c
@@ -28,7 +28,7 @@ static void parse_event_notifications_list(t_configuration_options *options, con
 static void tablespace_list_append(t_configuration_options *options, const char *arg);
 static void exit_with_errors(ErrorList *config_errors);

-const static char *_progname = '\0';
+const static char *_progname = NULL;
 static char config_file_path[MAXPGPATH];
 static bool config_file_provided = false;
 bool config_file_found = false;
@@ -224,6 +224,7 @@ parse_config(t_configuration_options *options)
 	memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
 	memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
 	memset(options->pg_basebackup_options, 0, sizeof(options->pg_basebackup_options));
+	memset(options->restore_command, 0, sizeof(options->restore_command));

 	/* default master_response_timeout is 60 seconds */
 	options->master_response_timeout = 60;
@@ -235,7 +236,12 @@ parse_config(t_configuration_options *options)
 	options->monitor_interval_secs = 2;
 	options->retry_promote_interval_secs = 300;

+	/* default to resyncing repl_nodes table every 30 seconds on the witness server */
+	options->witness_repl_nodes_sync_interval_secs = 30;
+
 	memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
+	options->event_notifications.head = NULL;
+	options->event_notifications.tail = NULL;

 	options->tablespace_mapping.head = NULL;
 	options->tablespace_mapping.tail = NULL;
@@ -337,7 +343,8 @@ parse_config(t_configuration_options *options)
 			strncpy(options->follow_command, value, MAXLEN);
 		else if (strcmp(name, "master_response_timeout") == 0)
 			options->master_response_timeout = repmgr_atoi(value, "master_response_timeout", &config_errors, false);
-		/* 'primary_response_timeout' as synonym for 'master_response_timeout' -
+		/*
+		 * 'primary_response_timeout' as synonym for 'master_response_timeout' -
 		 * we'll switch terminology in a future release (3.1?)
 		 */
 		else if (strcmp(name, "primary_response_timeout") == 0)
@@ -358,6 +365,8 @@ parse_config(t_configuration_options *options)
 			options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
 		else if (strcmp(name, "retry_promote_interval_secs") == 0)
 			options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
+		else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
+			options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
 		else if (strcmp(name, "use_replication_slots") == 0)
 			/* XXX we should have a dedicated boolean argument format */
 			options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);
@@ -367,6 +376,8 @@ parse_config(t_configuration_options *options)
 			parse_event_notifications_list(options, value);
 		else if (strcmp(name, "tablespace_mapping") == 0)
 			tablespace_list_append(options, value);
+		else if (strcmp(name, "restore_command") == 0)
+			strncpy(options->restore_command, value, MAXLEN);
 		else
 		{
 			known_parameter = false;
--- a/config.h
+++ b/config.h
@@ -72,16 +72,22 @@ typedef struct
 	char		pg_bindir[MAXLEN];
 	char		pg_ctl_options[MAXLEN];
 	char		pg_basebackup_options[MAXLEN];
+	char		restore_command[MAXLEN];
 	char		logfile[MAXLEN];
 	int			monitor_interval_secs;
 	int			retry_promote_interval_secs;
+	int			witness_repl_nodes_sync_interval_secs;
 	int			use_replication_slots;
 	char		event_notification_command[MAXLEN];
 	EventNotificationList event_notifications;
 	TablespaceList tablespace_mapping;
 }	t_configuration_options;

-#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
+/*
+ * The following will initialize the structure with a minimal set of options;
+ * actual defaults are set in parse_config() before parsing the configuration file
+ */
+#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, { NULL, NULL } }

 typedef struct ErrorListCell
 {
--- a/dbutils.c
+++ b/dbutils.c
@@ -31,6 +31,7 @@
 char repmgr_schema[MAXLEN] = "";
 char repmgr_schema_quoted[MAXLEN] = "";

+static int _get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info);

 PGconn *
 _establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice)
@@ -420,7 +421,7 @@ guc_set_typed(PGconn *conn, const char *parameter, const char *op,
 					  " WHERE name = '%s' AND setting::%s %s '%s'::%s",
 					  parameter, datatype, op, value, datatype);

-	log_verbose(LOG_DEBUG, "guc_set_typed():n%s\n", sqlquery);
+	log_verbose(LOG_DEBUG, "guc_set_typed():\n%s\n", sqlquery);

 	res = PQexec(conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -538,7 +539,7 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output)

 	conninfo_options = PQconninfoParse(conninfo, NULL);

-	if (conninfo_options == false)
+	if (conninfo_options == NULL)
 	{
 		log_err(_("Unable to parse provided conninfo string \"%s\""), conninfo);
 		return false;
@@ -587,7 +588,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
 		upstream_conninfo = upstream_conninfo_out;

 	sqlquery_snprintf(sqlquery,
-					  "    SELECT un.conninfo, un.name, un.id "
+					  "    SELECT un.conninfo, un.id "
 					  "      FROM %s.repl_nodes un "
 					  "INNER JOIN %s.repl_nodes n "
 					  "        ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
@@ -604,7 +605,7 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,

 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
-		log_err(_("unable to get conninfo for upstream server\n%s\n"),
+		log_err(_("error when attempting to find upstream server\n%s\n"),
 				PQerrorMessage(standby_conn));
 		PQclear(res);
 		return NULL;
@@ -612,9 +613,36 @@ get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,

 	if (!PQntuples(res))
 	{
-		log_notice(_("no record found for upstream server"));
 		PQclear(res);
-		return NULL;
+		log_debug("no record found for upstream server\n");
+
+		sqlquery_snprintf(sqlquery,
+						  "    SELECT un.conninfo, un.id "
+						  "      FROM %s.repl_nodes un "
+						  "     WHERE un.cluster = '%s' "
+						  "       AND un.type='master' "
+						  "       AND un.active IS TRUE",
+						  get_repmgr_schema_quoted(standby_conn),
+						  cluster);
+		res = PQexec(standby_conn, sqlquery);
+
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+		{
+			log_err(_("error when attempting to find active master server\n%s\n"),
+					PQerrorMessage(standby_conn));
+			PQclear(res);
+			return NULL;
+		}
+
+		if (!PQntuples(res))
+		{
+			PQclear(res);
+			log_notice(_("no record found for active master server\n"));
+
+			return NULL;
+		}
+
+		log_debug("record found for active master server\n");
 	}

 	strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);
@@ -889,7 +917,7 @@ get_repmgr_schema_quoted(PGconn *conn)


 bool
-create_replication_slot(PGconn *conn, char *slot_name)
+create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
 {
 	char				sqlquery[QUERY_STR_LEN];
 	int					query_res;
@@ -926,9 +954,19 @@ create_replication_slot(PGconn *conn, char *slot_name)
 		return false;
 	}

-	sqlquery_snprintf(sqlquery,
-					  "SELECT * FROM pg_create_physical_replication_slot('%s')",
-					  slot_name);
+	/* In 9.6 and later, reserve the LSN straight away */
+	if (server_version_num >= 90600)
+	{
+		sqlquery_snprintf(sqlquery,
+						  "SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
+						  slot_name);
+	}
+	else
+	{
+		sqlquery_snprintf(sqlquery,
+						  "SELECT * FROM pg_create_physical_replication_slot('%s')",
+						  slot_name);
+	}

 	log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
 	log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
@@ -1111,7 +1149,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)


 /*
- * copy_configuration()
+ * witness_copy_node_records()
 *
 * Copy records in master's `repl_nodes` table to witness database
 *
@@ -1119,29 +1157,49 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
 * `repmgrd` after a failover event occurs
 */
 bool
-copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
+witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
 {
 	char		sqlquery[MAXLEN];
 	PGresult   *res;
 	int			i;

+	begin_transaction(witnessconn);
+
+	/* Defer constraints */
+	sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;");
+	log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
+
+	res = PQexec(witnessconn, sqlquery);
+	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		log_err(_("Unable to defer constraints:\n%s\n"),
+				PQerrorMessage(witnessconn));
+		rollback_transaction(witnessconn);
+
+		return false;
+	}
+
+	/* Truncate existing records */
 	sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));

-	log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
+	log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);

 	res = PQexec(witnessconn, sqlquery);
 	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
 	{
 		log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
 				PQerrorMessage(witnessconn));
+		rollback_transaction(witnessconn);
+
 		return false;
 	}

+	/* Get current records from primary */
 	sqlquery_snprintf(sqlquery,
-					  "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
+					  "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
 					  get_repmgr_schema_quoted(masterconn));

-	log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
+	log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);

 	res = PQexec(masterconn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -1149,20 +1207,23 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
 		log_err("Unable to retrieve node records from master:\n%s\n",
 				PQerrorMessage(masterconn));
 		PQclear(res);
+		rollback_transaction(witnessconn);
+
 		return false;
 	}

+	/* Insert primary records into witness table */
 	for (i = 0; i < PQntuples(res); i++)
 	{
 		bool node_record_created;

 		log_verbose(LOG_DEBUG,
-					"copy_configuration(): writing node record for node %s (id: %s)\n",
-					PQgetvalue(res, i, 4),
+					"witness_copy_node_records(): writing node record for node %s (id: %s)\n",
+					PQgetvalue(res, i, 3),
 					PQgetvalue(res, i, 0));

 		node_record_created = create_node_record(witnessconn,
-												 "copy_configuration",
+												 "witness_copy_node_records",
 												 atoi(PQgetvalue(res, i, 0)),
 												 PQgetvalue(res, i, 1),
 												 strlen(PQgetvalue(res, i, 2))
@@ -1174,7 +1235,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
 												 atoi(PQgetvalue(res, i, 5)),
 												 strlen(PQgetvalue(res, i, 6))
 													? PQgetvalue(res, i, 6)
-													: NULL
+												    : NULL,
+												 (strcmp(PQgetvalue(res, i, 7), "t") == 0)
+												 	? true
+												 	: false
 												 );

 		if (node_record_created == false)
@@ -1183,11 +1247,16 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)

 			log_err("Unable to copy node record to witness database\n%s\n",
 					PQerrorMessage(witnessconn));
+			rollback_transaction(witnessconn);
+
 			return false;
 		}
 	}
 	PQclear(res);

+	/* And finished */
+	commit_transaction(witnessconn);
+
 	return true;
 }

@@ -1200,7 +1269,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
 * XXX we should pass the record parameters as a struct.
 */
 bool
-create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
+create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
 {
 	char		sqlquery[QUERY_STR_LEN];
 	char		upstream_node_id[MAXLEN];
@@ -1241,8 +1310,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
 	sqlquery_snprintf(sqlquery,
 					  "INSERT INTO %s.repl_nodes "
 					  "       (id, type, upstream_node_id, cluster, "
-					  "        name, conninfo, slot_name, priority) "
-					  "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
+					  "        name, conninfo, slot_name, priority, active) "
+					  "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
 					  get_repmgr_schema_quoted(conn),
 					  node,
 					  type,
@@ -1251,7 +1320,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
 					  node_name,
 					  conninfo,
 					  slot_name_buf,
-					  priority);
+					  priority,
+					  active == true ? "TRUE" : "FALSE");

 	log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);

@@ -1291,7 +1361,7 @@ delete_node_record(PGconn *conn, int node, char *action)

 	if (action != NULL)
 	{
-		log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
+		log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
 	}

 	res = PQexec(conn, sqlquery);
@@ -1612,8 +1682,7 @@ int
 get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info)
 {
 	char		sqlquery[QUERY_STR_LEN];
-	PGresult   *res;
-	int         ntuples;
+	int		    result;

 	sqlquery_snprintf(
 		sqlquery,
@@ -1627,6 +1696,51 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info

 	log_verbose(LOG_DEBUG, "get_node_record():\n%s\n", sqlquery);

+	result = _get_node_record(conn, cluster, sqlquery, node_info);
+
+	if (result == 0)
+	{
+		log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
+	}
+
+	return result;
+}
+
+int
+get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info)
+{
+	char		sqlquery[QUERY_STR_LEN];
+	int result;
+
+	sqlquery_snprintf(
+		sqlquery,
+		"SELECT id, type, upstream_node_id, name, conninfo, slot_name, priority, active"
+		"  FROM %s.repl_nodes "
+		" WHERE cluster = '%s' "
+		"   AND name = '%s'",
+		get_repmgr_schema_quoted(conn),
+		cluster,
+		node_name);
+
+	log_verbose(LOG_DEBUG, "get_node_record_by_name():\n%s\n", sqlquery);
+
+	result = _get_node_record(conn, cluster, sqlquery, node_info);
+
+	if (result == 0)
+	{
+		log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %s\n", node_name);
+	}
+
+	return result;
+}
+
+
+static int
+_get_node_record(PGconn *conn, char *cluster, char *sqlquery, t_node_info *node_info)
+{
+	int         ntuples;
+	PGresult   *res;
+
 	res = PQexec(conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
@@ -1637,7 +1751,6 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info

 	if (ntuples == 0)
 	{
-		log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i\n", node_id);
 		return 0;
 	}

@@ -1658,6 +1771,9 @@ get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info
 }


+
+
+
 int
 get_node_replication_state(PGconn *conn, char *node_name, char *output)
 {
--- a/dbutils.h
+++ b/dbutils.h
@@ -52,18 +52,6 @@ typedef struct s_node_info
 }	t_node_info;


-/*
- * Struct to store replication slot information
- */
-
-typedef struct s_replication_slot
-{
-	char slot_name[MAXLEN];
-    char slot_type[MAXLEN];
-	bool active;
-}   t_replication_slot;
-
-
 #define T_NODE_INFO_INITIALIZER { \
  NODE_NOT_FOUND, \
  NO_UPSTREAM_NODE, \
@@ -78,6 +66,19 @@ typedef struct s_replication_slot
  InvalidXLogRecPtr \
 }

+/*
+ * Struct to store replication slot information
+ */
+
+typedef struct s_replication_slot
+{
+	char slot_name[MAXLEN];
+    char slot_type[MAXLEN];
+	bool active;
+}   t_replication_slot;
+
+
+
 PGconn *_establish_db_connection(const char *conninfo,
 								 const bool exit_on_error,
 								 const bool log_notice);
@@ -115,16 +116,17 @@ int			wait_connection_availability(PGconn *conn, long long timeout);
 bool		cancel_query(PGconn *conn, int timeout);
 char       *get_repmgr_schema(void);
 char       *get_repmgr_schema_quoted(PGconn *conn);
-bool		create_replication_slot(PGconn *conn, char *slot_name);
+bool		create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
 int			get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
 bool		drop_replication_slot(PGconn *conn, char *slot_name);
 bool		start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
 bool		stop_backup(PGconn *conn, char *last_wal_segment);
 bool		set_config_bool(PGconn *conn, const char *config_param, bool state);
-bool		copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
-bool		create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
+bool		witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
+bool		create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
 bool		delete_node_record(PGconn *conn, int node, char *action);
 int			get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
+int			get_node_record_by_name(PGconn *conn, char *cluster, const char *node_name, t_node_info *node_info);
 bool        update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
 bool        update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
 bool        create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
@@ -133,3 +135,4 @@ int		    get_node_replication_state(PGconn *conn, char *node_name, char *output)
 t_server_type parse_node_type(const char *type);
 int			get_data_checksum_version(const char *data_directory);
 #endif
+
--- a/debian/DEBIAN/control
+++ b/debian/DEBIAN/control
@@ -1,9 +1,9 @@
 Package: repmgr-auto
-Version: 3.0.1
+Version: 3.1.3
 Section: database
 Priority: optional
 Architecture: all
-Depends: rsync, postgresql-9.3 | postgresql-9.4
+Depends: rsync, postgresql-9.3 | postgresql-9.4 | postgresql-9.5
 Maintainer: Self built package <user@localhost>
 Description: PostgreSQL replication setup, magament and monitoring
 has two main executables
--- a/dirmod.c
+++ b/dirmod.c
@@ -0,0 +1,194 @@
+/*
+ *
+ * dirmod.c
+ *	  directory handling functions
+ *
+ * Copyright (C) 2ndQuadrant, 2010-2016
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "postgres_fe.h"
+
+/* Don't modify declarations in system headers */
+
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+/*
+ * pgfnames
+ *
+ * return a list of the names of objects in the argument directory.  Caller
+ * must call pgfnames_cleanup later to free the memory allocated by this
+ * function.
+ */
+char	  **
+pgfnames(const char *path)
+{
+	DIR		   *dir;
+	struct dirent *file;
+	char	  **filenames;
+	int			numnames = 0;
+	int			fnsize = 200;	/* enough for many small dbs */
+
+	dir = opendir(path);
+	if (dir == NULL)
+	{
+		return NULL;
+	}
+
+	filenames = (char **) palloc(fnsize * sizeof(char *));
+
+	while (errno = 0, (file = readdir(dir)) != NULL)
+	{
+		if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
+		{
+			if (numnames + 1 >= fnsize)
+			{
+				fnsize *= 2;
+				filenames = (char **) repalloc(filenames,
+											   fnsize * sizeof(char *));
+			}
+			filenames[numnames++] = pstrdup(file->d_name);
+		}
+	}
+
+	if (errno)
+	{
+		fprintf(stderr, _("could not read directory \"%s\": %s\n"),
+				path, strerror(errno));
+	}
+
+	filenames[numnames] = NULL;
+
+	if (closedir(dir))
+	{
+		fprintf(stderr, _("could not close directory \"%s\": %s\n"),
+				path, strerror(errno));
+	}
+
+	return filenames;
+}
+
+
+/*
+ *	pgfnames_cleanup
+ *
+ *	deallocate memory used for filenames
+ */
+void
+pgfnames_cleanup(char **filenames)
+{
+	char	  **fn;
+
+	for (fn = filenames; *fn; fn++)
+		pfree(*fn);
+
+	pfree(filenames);
+}
+
+
+/*
+ *	rmtree
+ *
+ *	Delete a directory tree recursively.
+ *	Assumes path points to a valid directory.
+ *	Deletes everything under path.
+ *	If rmtopdir is true deletes the directory too.
+ *	Returns true if successful, false if there was any problem.
+ *	(The details of the problem are reported already, so caller
+ *	doesn't really have to say anything more, but most do.)
+ */
+bool
+rmtree(const char *path, bool rmtopdir)
+{
+	bool		result = true;
+	char		pathbuf[MAXPGPATH];
+	char	  **filenames;
+	char	  **filename;
+	struct stat statbuf;
+
+	/*
+	 * we copy all the names out of the directory before we start modifying
+	 * it.
+	 */
+	filenames = pgfnames(path);
+
+	if (filenames == NULL)
+		return false;
+
+	/* now we have the names we can start removing things */
+	for (filename = filenames; *filename; filename++)
+	{
+		snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
+
+		/*
+		 * It's ok if the file is not there anymore; we were just about to
+		 * delete it anyway.
+		 *
+		 * This is not an academic possibility. One scenario where this
+		 * happens is when bgwriter has a pending unlink request for a file in
+		 * a database that's being dropped. In dropdb(), we call
+		 * ForgetDatabaseFsyncRequests() to flush out any such pending unlink
+		 * requests, but because that's asynchronous, it's not guaranteed that
+		 * the bgwriter receives the message in time.
+		 */
+		if (lstat(pathbuf, &statbuf) != 0)
+		{
+			if (errno != ENOENT)
+			{
+				result = false;
+			}
+			continue;
+		}
+
+		if (S_ISDIR(statbuf.st_mode))
+		{
+			/* call ourselves recursively for a directory */
+			if (!rmtree(pathbuf, true))
+			{
+				/* we already reported the error */
+				result = false;
+			}
+		}
+		else
+		{
+			if (unlink(pathbuf) != 0)
+			{
+				if (errno != ENOENT)
+				{
+					result = false;
+				}
+			}
+		}
+	}
+
+	if (rmtopdir)
+	{
+		if (rmdir(path) != 0)
+		{
+			result = false;
+		}
+	}
+
+	pgfnames_cleanup(filenames);
+
+	return result;
+}
+
--- a/dirmod.h
+++ b/dirmod.h
@@ -0,0 +1,23 @@
+/*
+ * dirmod.h
+ * Copyright (c) 2ndQuadrant, 2010-2016
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _DIRMOD_H_
+#define _DIRMOD_H_
+
+#endif
--- a/errcode.h
+++ b/errcode.h
@@ -29,7 +29,6 @@
 #define ERR_DB_CON 6
 #define ERR_DB_QUERY 7
 #define ERR_PROMOTED 8
-#define ERR_BAD_PASSWORD 9
 #define ERR_STR_OVERFLOW 10
 #define ERR_FAILOVER_FAIL 11
 #define ERR_BAD_SSH 12
@@ -37,5 +36,7 @@
 #define ERR_BAD_BASEBACKUP 14
 #define ERR_INTERNAL 15
 #define ERR_MONITORING_FAIL 16
+#define ERR_BAD_BACKUP_LABEL 17
+#define ERR_SWITCHOVER_FAIL 18

 #endif   /* _ERRCODE_H_ */
--- a/log.c
+++ b/log.c
@@ -40,7 +40,8 @@
 /* #define REPMGR_DEBUG */

 static int	detect_log_facility(const char *facility);
-static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap);
+static void _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_list ap)
+__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));

 int			log_type = REPMGR_STDERR;
 int			log_level = LOG_NOTICE;
@@ -48,7 +49,7 @@ int			last_log_level = LOG_NOTICE;
 int			verbose_logging = false;
 int			terse_logging = false;

-void
+extern void
 stderr_log_with_level(const char *level_name, int level, const char *fmt, ...)
 {
 	va_list		arglist;
--- a/log.h
+++ b/log.h
@@ -25,7 +25,7 @@
 #define REPMGR_SYSLOG 1
 #define REPMGR_STDERR 2

-void
+extern void
 stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));

@@ -123,8 +123,10 @@ bool		logger_shutdown(void);
 void		logger_set_verbose(void);
 void		logger_set_terse(void);

-void		log_hint(const char *fmt, ...);
-void		log_verbose(int level, const char *fmt, ...);
+void		log_hint(const char *fmt, ...)
+__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2)));
+void		log_verbose(int level, const char *fmt, ...)
+__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));

 extern int	log_type;
 extern int	log_level;
--- a/repmgr.c
+++ b/repmgr.c
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -15,21 +15,29 @@
 # schema (pattern: "repmgr_{cluster}"); while this name will be quoted
 # to preserve case, we recommend using lower case and avoiding whitespace
 # to facilitate easier querying of the repmgr views and tables.
-cluster=example_cluster
+#cluster=example_cluster

 # Node ID and name
 # (Note: we recommend to avoid naming nodes after their initial
-#  replication funcion, as this will cause confusion when e.g.
+#  replication function, as this will cause confusion when e.g.
 #  "standby2" is promoted to primary)
-node=2           # a unique integer
-node_name=node2  # an arbitrary (but unique) string; we recommend using
+#node=2           # a unique integer
+#node_name=node2  # an arbitrary (but unique) string; we recommend using
                 # the server's hostname or another identifier unambiguously
                 # associated with the server to avoid confusion

 # Database connection information as a conninfo string
 # This must be accessible to all servers in the cluster; for details see:
-#   http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
-conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
+#
+#   https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
+#
+#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
+#
+# If repmgrd is in use, consider explicitly setting `connect_timeout` in the
+# conninfo string to determine the length of time which elapses before
+# a network connection attempt is abandoned; for details see:
+#
+#   https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT

 # Optional configuration items
 # ============================
@@ -37,15 +45,16 @@ conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
 # Replication settings
 # ---------------------

-# when using cascading replication and a standby is to be connected to an
-# upstream standby, specify that node's ID with 'upstream_node'. The node
-# must exist before the new standby can be registered. If a standby is
-# to connect directly to a primary node, this parameter is not required.
-upstream_node=1
+# When using cascading replication, a standby can connect to another
+# upstream standby node which is specified by setting 'upstream_node'.
+# In that case, the upstream node must exist before the new standby
+# can be registered. If 'upstream_node' is not set, then the standby
+# will connect directly to the primary node.
+#upstream_node=1

 # use physical replication slots - PostgreSQL 9.4 and later only
 # (default: 0)
-use_replication_slots=0
+#use_replication_slots=0

 # NOTE: 'max_replication_slots' should be configured for at least the
 # number of standbys which will connect to the primary.
@@ -55,15 +64,15 @@ use_replication_slots=0

 # Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
 # (default: NOTICE)
-loglevel=NOTICE
+#loglevel=NOTICE

 # Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
 # (default: STDERR)
-logfacility=STDERR
+#logfacility=STDERR

 # stderr can be redirected to an arbitrary file:
 #
-logfile='/var/log/repmgr/repmgr.log'
+#logfile='/var/log/repmgr/repmgr.log'

 # event notifications can be passed to an arbitrary external program
 # together with the following parameters:
@@ -77,12 +86,12 @@ logfile='/var/log/repmgr/repmgr.log'
 # the values provided for "%t" and "%d" will probably contain spaces,
 # so should be quoted in the provided command configuration, e.g.:
 #
-event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
+#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'

 # By default, all notifications will be passed; the notification types
 # can be filtered to explicitly named ones:
 #
-event_notifications=master_register,standby_register,witness_create
+#event_notifications=master_register,standby_register,witness_create


 # Environment/command settings
@@ -90,17 +99,17 @@ event_notifications=master_register,standby_register,witness_create

 # path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
 # (if not provided, defaults to system $PATH)
-pg_bindir=/usr/bin/
+#pg_bindir=/usr/bin/

 # external command options

-rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
-ssh_options=-o "StrictHostKeyChecking no"
+#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
+#ssh_options=-o "StrictHostKeyChecking no"

 # external command arguments. Values shown are examples.

-pg_ctl_options='-s'
-pg_basebackup_options='--xlog-method=s'
+#pg_ctl_options='-s'
+#pg_basebackup_options='--xlog-method=s'


 # Standby clone settings
@@ -112,6 +121,10 @@ pg_basebackup_options='--xlog-method=s'
 #
 # tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace

+# You can specify a restore_command to be used in the recovery.conf that
+# will be placed in the cloned standby
+#
+# restore_command = cp /path/to/archived/wals/%f %p

 # Failover settings (repmgrd)
 # ---------------------------
@@ -122,27 +135,30 @@ pg_basebackup_options='--xlog-method=s'
 # Number of seconds to wait for a response from the primary server before
 # deciding it has failed.

-master_response_timeout=60
+#master_response_timeout=60

 # Number of attempts at what interval (in seconds) to try and
 # connect to  a server to establish its status (e.g. master
 # during failover)
-reconnect_attempts=6
-reconnect_interval=10
+#reconnect_attempts=6
+#reconnect_interval=10

 # Autofailover options
-failover=manual     # one of 'automatic', 'manual'
+#failover=manual    # one of 'automatic', 'manual'
                    # (default: manual)
-priority=100        # a value of zero or less prevents the node being promoted to primary
+#priority=100       # a value of zero or less prevents the node being promoted to primary
                    # (default: 100)
-promote_command='repmgr standby promote -f /path/to/repmgr.conf'
-follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
+#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
+#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'

 # monitoring interval in seconds; default is 2
-monitor_interval_secs=2
+#monitor_interval_secs=2

 # change wait time for primary; before we bail out and exit when the primary
 # disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
 # seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
 # default value is 300)
-retry_promote_interval_secs=300
+#retry_promote_interval_secs=300
+
+# Number of seconds after which the witness server resyncs the repl_nodes table
+#witness_repl_nodes_sync_interval_secs=15
--- a/repmgr.h
+++ b/repmgr.h
@@ -28,12 +28,11 @@
 #include "dbutils.h"
 #include "errcode.h"
 #include "config.h"
+#include "dirmod.h"

 #define MIN_SUPPORTED_VERSION		"9.3"
 #define MIN_SUPPORTED_VERSION_NUM	90300

-#include "config.h"
-#define MAXFILENAME		1024
 #define ERRBUFF_SIZE	512

 #define DEFAULT_WAL_KEEP_SEGMENTS	"5000"
@@ -57,8 +56,8 @@ typedef struct
 	char		dbname[MAXLEN];
 	char		host[MAXLEN];
 	char		username[MAXLEN];
-	char		dest_dir[MAXFILENAME];
-	char		config_file[MAXFILENAME];
+	char		dest_dir[MAXPGPATH];
+	char		config_file[MAXPGPATH];
 	char		remote_user[MAXLEN];
 	char		superuser[MAXLEN];
 	char		wal_keep_segments[MAXLEN];
@@ -71,7 +70,7 @@ typedef struct
 	bool		rsync_only;
 	bool		fast_checkpoint;
 	bool		ignore_external_config_files;
-	char		pg_ctl_mode[MAXLEN];
+	bool		csv_mode;
 	char		masterport[MAXLEN];
 	/*
 	 * configuration file parameters which can be overridden on the
@@ -81,7 +80,8 @@ typedef struct

 	/* parameter used by STANDBY SWITCHOVER */
 	char		remote_config_file[MAXLEN];
-	char		pg_rewind[MAXFILENAME];
+	char		pg_rewind[MAXPGPATH];
+	char		pg_ctl_mode[MAXLEN];
 	/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
 	char		config_archive_dir[MAXLEN];
 	/* parameter used by CLUSTER CLEANUP */
@@ -96,7 +96,19 @@ typedef struct
 	bool		initdb_no_pwprompt;
 }	t_runtime_options;

-#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false }
+#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, "", "", "", "", "fast", "", 0, "", "", "", false }
+
+struct BackupLabel
+{
+	XLogRecPtr start_wal_location;
+	char start_wal_file[MAXLEN];
+	XLogRecPtr checkpoint_location;
+	char backup_from[MAXLEN];
+	char backup_method[MAXLEN];
+	char start_time[MAXLEN];
+	char label[MAXLEN];
+	XLogRecPtr min_failover_slot_lsn;
+};

 extern char		repmgr_schema[MAXLEN];
 extern bool		config_file_found;
--- a/repmgrd.c
+++ b/repmgrd.c
@@ -44,11 +44,11 @@


 /* Local info */
-t_configuration_options local_options;
+t_configuration_options local_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
 PGconn	   *my_local_conn = NULL;

 /* Master info */
-t_configuration_options master_options;
+t_configuration_options master_options = T_CONFIGURATION_OPTIONS_INITIALIZER;

 PGconn	   *master_conn = NULL;

@@ -61,8 +61,6 @@ bool		failover_done = false;

 char	   *pid_file = NULL;

-t_configuration_options config = T_CONFIGURATION_OPTIONS_INITIALIZER;
-
 static void help(void);
 static void usage(void);
 static void check_cluster_configuration(PGconn *conn);
@@ -274,7 +272,14 @@ main(int argc, char **argv)
 	/* Retrieve record for this node from the local database */
 	node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);

-	/* No node record found - exit gracefully */
+	/*
+	 * No node record found - exit gracefully
+	 *
+	 * Note: it's highly unlikely this situation will occur when starting
+	 * repmgrd on a witness, unless someone goes to the trouble of
+	 * deleting the node record from the previously copied table.
+	 */
+
 	if (node_info.node_id == NODE_NOT_FOUND)
 	{
 		log_err(_("No metadata record found for this node - terminating\n"));
@@ -291,9 +296,12 @@ main(int argc, char **argv)
 	 */
 	do
 	{
+		/* Timer for repl_nodes synchronisation interval */
+		int sync_repl_nodes_elapsed = 0;
+
 		/*
 		 * Set my server mode, establish a connection to master and start
-		 * monitor
+		 * monitoring
 		 */

 		switch (node_info.type)
@@ -389,12 +397,12 @@ main(int argc, char **argv)
 			case STANDBY:

 				/* We need the node id of the master server as well as a connection to it */
-				log_info(_("connecting to master node '%s'\n"),
+				log_info(_("connecting to master node of cluster '%s'\n"),
 						 local_options.cluster_name);

 				master_conn = get_master_connection(my_local_conn,
-													 local_options.cluster_name,
-													 &master_options.node, NULL);
+													local_options.cluster_name,
+													&master_options.node, NULL);

 				if (master_conn == NULL)
 				{
@@ -402,8 +410,7 @@ main(int argc, char **argv)
 					initPQExpBuffer(&errmsg);

 					appendPQExpBuffer(&errmsg,
-									  _("unable to connect to master node '%s'"),
-									  master_options.node_name);
+									  _("unable to connect to master node"));

 					log_err("%s\n", errmsg.data);

@@ -453,19 +460,37 @@ main(int argc, char **argv)

 				do
 				{
-					log_verbose(LOG_DEBUG, "standby check loop...\n");
-
-					if (node_info.type == WITNESS)
-					{
-						witness_monitor();
-					}
-					else if (node_info.type == STANDBY)
+					if (node_info.type == STANDBY)
 					{
+						log_verbose(LOG_DEBUG, "standby check loop...\n");
 						standby_monitor();
 					}
+					else if (node_info.type == WITNESS)
+					{
+						log_verbose(LOG_DEBUG, "witness check loop...\n");
+						witness_monitor();
+					}

 					sleep(local_options.monitor_interval_secs);

+					/*
+					 * On a witness node, regularly resync the repl_nodes table
+					 * to keep up with any changes on the primary
+					 *
+					 * TODO: only resync the table if changes actually detected
+					 */
+					if (node_info.type == WITNESS)
+					{
+						sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
+						log_debug(_("seconds since last node record sync: %i (sync interval: %i)\n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
+						if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
+						{
+							log_debug(_("Resyncing repl_nodes table\n"));
+							witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
+							sync_repl_nodes_elapsed = 0;
+						}
+					}
+
 					if (got_SIGHUP)
 					{
 						/*
@@ -480,6 +505,7 @@ main(int argc, char **argv)
 						}
 						got_SIGHUP = false;
 					}
+
 					if (failover_done)
 					{
 						log_debug(_("standby check loop will terminate\n"));
@@ -572,7 +598,7 @@ witness_monitor(void)
 				 * XXX it would be neat to be able to handle this with e.g. table-based
 				 * logical replication
 				 */
-				copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
+				witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);

 				break;
 			}
@@ -639,7 +665,7 @@ witness_monitor(void)
 					  "            replication_lag, apply_lag )"
 					  "      VALUES(%d, %d, "
 					  "             '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
-					  "             pg_current_xlog_location(), NULL, "
+					  "             pg_catalog.pg_current_xlog_location(), NULL, "
 					  "             0, 0) ",
 					  get_repmgr_schema_quoted(my_local_conn),
 					  master_options.node,
@@ -667,16 +693,19 @@ standby_monitor(void)
 {
 	PGresult   *res;
 	char		monitor_standby_timestamp[MAXLEN];
-	char		last_wal_master_location[MAXLEN];
-	char		last_wal_standby_received[MAXLEN];
-	char		last_wal_standby_applied[MAXLEN];
-	char		last_wal_standby_applied_timestamp[MAXLEN];
-	bool		last_wal_standby_received_gte_replayed;
+	char		last_wal_primary_location[MAXLEN];
+	char		last_xlog_receive_location[MAXLEN];
+	char		last_xlog_replay_location[MAXLEN];
+	char		last_xact_replay_timestamp[MAXLEN];
+	bool		last_xlog_receive_location_gte_replayed;
 	char		sqlquery[QUERY_STR_LEN];

-	XLogRecPtr	lsn_master;
-	XLogRecPtr	lsn_standby_received;
-	XLogRecPtr	lsn_standby_applied;
+	XLogRecPtr	lsn_master_current_xlog_location;
+	XLogRecPtr	lsn_last_xlog_receive_location;
+	XLogRecPtr	lsn_last_xlog_replay_location;
+
+	long long unsigned int replication_lag;
+	long long unsigned int apply_lag;

 	int			connection_retries,
 				ret;
@@ -688,8 +717,9 @@ standby_monitor(void)
 	t_node_info upstream_node;

 	int			active_master_id;
-	const char *type = NULL;
+	const char *upstream_node_type = NULL;

+	bool		receiving_streamed_wal = true;
 	/*
 	 * Verify that the local node is still available - if not there's
 	 * no point in doing much else anyway
@@ -714,19 +744,19 @@ standby_monitor(void)
 	upstream_conn = get_upstream_connection(my_local_conn,
 											local_options.cluster_name,
 											local_options.node,
-											&upstream_node_id, upstream_conninfo);
+											&upstream_node_id,
+											upstream_conninfo);

-	type = upstream_node_id == master_options.node
+	upstream_node_type = (upstream_node_id == master_options.node)
 		? "master"
 		: "upstream";

-	// ZZZ "5 minutes"?
 	/*
-	 * Check if the upstream node is still available, if after 5 minutes of retries
-	 * we cannot reconnect, try to get a new upstream node.
+	 * Check that the upstream node is still available
+	 * If not, initiate failover process
 	 */

-	check_connection(&upstream_conn, type, upstream_conninfo);
+	check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
 	/*
 	 * This takes up to local_options.reconnect_attempts *
 	 * local_options.reconnect_interval seconds
@@ -739,7 +769,7 @@ standby_monitor(void)

 		if (local_options.failover == MANUAL_FAILOVER)
 		{
-			log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), type);
+			log_err(_("Unable to reconnect to %s. Now checking if another node has been promoted.\n"), upstream_node_type);

 			for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
 			{
@@ -790,26 +820,24 @@ standby_monitor(void)
 		else if (local_options.failover == AUTOMATIC_FAILOVER)
 		{
 			/*
-			 * When we returns from this function we will have a new master
+			 * When we return from this function we will have a new master
 			 * and a new master_conn
-			 */
-
-			/*
+			 *
 			 * Failover handling is handled differently depending on whether
 			 * the failed node is the master or a cascading standby
 			 */
-			upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
+			upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);

-            if (upstream_node.type == MASTER)
-            {
-                log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
-                          node_info.upstream_node_id);
-                do_master_failover();
-            }
-            else
-            {
-                log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"),
-                          node_info.upstream_node_id);
+			if (upstream_node.type == MASTER)
+			{
+				log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
+						  node_info.upstream_node_id);
+				do_master_failover();
+			}
+			else
+			{
+				log_debug(_("failure detected on upstream node %i; attempting to reconnect to new upstream node\n"),
+						  node_info.upstream_node_id);

 				if (!do_upstream_standby_failover(upstream_node))
 				{
@@ -817,20 +845,20 @@ standby_monitor(void)
 					initPQExpBuffer(&errmsg);

 					appendPQExpBuffer(&errmsg,
-									  _("unable to reconnect to new upstream node, terminating..."));
+							  _("unable to reconnect to new upstream node, terminating..."));

 					log_err("%s\n", errmsg.data);

 					create_event_record(master_conn,
-										&local_options,
-										local_options.node,
-										"repmgrd_shutdown",
-										false,
-										errmsg.data);
+							    &local_options,
+							    local_options.node,
+							    "repmgrd_shutdown",
+							    false,
+							    errmsg.data);

 					terminate(ERR_DB_CON);
 				}
-            }
+			}
 			return;
 		}
 	}
@@ -901,7 +929,7 @@ standby_monitor(void)
 	 * from the upstream node to write monitoring information
 	 */

-	upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
+	upstream_node = get_node_info(my_local_conn, local_options.cluster_name, upstream_node_id);

 	sprintf(sqlquery,
 			"SELECT id "
@@ -933,7 +961,7 @@ standby_monitor(void)

 	if (active_master_id != master_options.node)
 	{
-		log_notice(_("connecting to active master (node %i)...\n"), active_master_id); \
+		log_notice(_("connecting to active master (node %i)...\n"), active_master_id);
 		if (master_conn != NULL)
 		{
 			PQfinish(master_conn);
@@ -956,9 +984,11 @@ standby_monitor(void)

 	/* Get local xlog info */
 	sqlquery_snprintf(sqlquery,
-					  "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
-					  "pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), "
-					  "pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()");
+					  "SELECT CURRENT_TIMESTAMP, "
+					  "pg_catalog.pg_last_xlog_receive_location(), "
+					  "pg_catalog.pg_last_xlog_replay_location(), "
+					  "pg_catalog.pg_last_xact_replay_timestamp(), "
+					  "pg_catalog.pg_last_xlog_receive_location() >= pg_catalog.pg_last_xlog_replay_location()");

 	res = PQexec(my_local_conn, sqlquery);
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -970,32 +1000,50 @@ standby_monitor(void)
 	}

 	strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
-	strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
-	strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
-	strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
-	last_wal_standby_received_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
+	strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN);
+	strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
+	strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
+
+	last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
 		? true
 		: false;

+	/*
+	 * If pg_last_xlog_receive_location is NULL, this means we're in archive
+	 * recovery and will need to calculate lag based on pg_last_xlog_replay_location
+	 */
+
+	/*
+	 * Replayed WAL is greater than received streamed WAL
+	 */
+	if (PQgetisnull(res, 0, 1))
+	{
+		receiving_streamed_wal = false;
+	}
+
 	PQclear(res);

 	/*
 	 * In the unusual event of a standby becoming disconnected from the primary,
 	 * while this repmgrd remains connected to the primary,  subtracting
-	 * "lsn_standby_applied" from "lsn_standby_received" and coercing to
+	 * "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to
 	 * (long long unsigned int) will result in a meaningless, very large
 	 * value which will overflow a BIGINT column and spew error messages into the
 	 * PostgreSQL log. In the absence of a better strategy, skip attempting
 	 * to insert a monitoring record.
 	 */
-	if (last_wal_standby_received_gte_replayed == false)
+	if (receiving_streamed_wal == true && last_xlog_receive_location_gte_replayed == false)
 	{
 		log_verbose(LOG_WARNING,
-					"Invalid replication_lag value calculated - is this standby connected to its upstream?\n");
-		return;
+					"Replayed WAL newer than received WAL - is this standby connected to its upstream?\n");
 	}

-	/* Get master xlog info */
+	/*
+	 * Get master xlog position
+	 *
+	 * TODO: investigate whether pg_current_xlog_insert_location() would be a better
+	 * choice; see: https://github.com/2ndQuadrant/repmgr/issues/189
+	 */
 	sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_current_xlog_location()");

 	res = PQexec(master_conn, sqlquery);
@@ -1006,34 +1054,73 @@ standby_monitor(void)
 		return;
 	}

-	strncpy(last_wal_master_location, PQgetvalue(res, 0, 0), MAXLEN);
+	strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
 	PQclear(res);

-	/* Calculate the lag */
-	lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL);
-	lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
-	lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
+	lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_primary_location, NULL);
+	lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
+
+	/* Calculate apply lag */
+	if (last_xlog_receive_location_gte_replayed == false)
+	{
+		/*
+		 * We're not receiving streaming WAL - in this case the receive location
+		 * equals the last replayed location
+		 */
+		apply_lag = 0;
+		strncpy(last_xlog_receive_location, last_xlog_replay_location, MAXLEN);
+		lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
+	}
+	else
+	{
+		apply_lag = (long long unsigned int)lsn_last_xlog_receive_location - lsn_last_xlog_replay_location;
+		lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
+	}
+
+	/* Calculate replication lag */
+	if (lsn_master_current_xlog_location >= lsn_last_xlog_receive_location)
+	{
+		replication_lag = (long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location);
+	}
+	else
+	{
+		/* This should never happen, but in case it does set lag to zero */
+		log_warning("Master xlog (%s) location appears less than standby receive location (%s)\n",
+					last_wal_primary_location,
+					last_xlog_receive_location);
+		replication_lag = 0;
+	}

 	/*
 	 * Build the SQL to execute on master
 	 */
 	sqlquery_snprintf(sqlquery,
 					  "INSERT INTO %s.repl_monitor "
-					  "           (primary_node, standby_node, "
-					  "            last_monitor_time, last_apply_time, "
-					  "            last_wal_primary_location, last_wal_standby_location, "
-					  "            replication_lag, apply_lag ) "
-					  "     VALUES(%d, %d, "
-					  "            '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
-					  "            '%s', '%s', "
-					  "            %llu, %llu) ",
+					  "           (primary_node, "
+					  "            standby_node, "
+					  "            last_monitor_time, "
+					  "            last_apply_time, "
+					  "            last_wal_primary_location, "
+					  "            last_wal_standby_location, "
+					  "            replication_lag, "
+					  "            apply_lag ) "
+					  "     VALUES(%d, "
+					  "            %d, "
+					  "            '%s'::TIMESTAMP WITH TIME ZONE, "
+					  "            '%s'::TIMESTAMP WITH TIME ZONE, "
+					  "            '%s', "
+					  "            '%s', "
+					  "            %llu, "
+					  "            %llu) ",
 					  get_repmgr_schema_quoted(master_conn),
-					  master_options.node, local_options.node,
-					  monitor_standby_timestamp, last_wal_standby_applied_timestamp,
-					  last_wal_master_location, last_wal_standby_received,
-					  (long long unsigned int)(lsn_master - lsn_standby_received),
-					  (long long unsigned int)(lsn_standby_received - lsn_standby_applied));
-
+					  master_options.node,
+					  local_options.node,
+					  monitor_standby_timestamp,
+					  last_xact_replay_timestamp,
+					  last_wal_primary_location,
+					  last_xlog_receive_location,
+					  replication_lag,
+					  apply_lag);
 	/*
 	 * Execute the query asynchronously, but don't check for a result. We will
 	 * check the result next time we pause for a monitor step.
@@ -1070,7 +1157,7 @@ do_master_failover(void)
 	XLogRecPtr	xlog_recptr;
 	bool		lsn_format_ok;

-	char		last_wal_standby_applied[MAXLEN];
+	char		last_xlog_replay_location[MAXLEN];

 	PGconn	   *node_conn = NULL;

@@ -1080,8 +1167,8 @@ do_master_failover(void)
 	 */
 	t_node_info nodes[FAILOVER_NODES_MAX_CHECK];

-    /* Store details of the failed node here */
-    t_node_info failed_master = T_NODE_INFO_INITIALIZER;
+	/* Store details of the failed node here */
+	t_node_info failed_master = T_NODE_INFO_INITIALIZER;

 	/* Store details of the best candidate for promotion to master here */
 	t_node_info best_candidate = T_NODE_INFO_INITIALIZER;
@@ -1091,7 +1178,7 @@ do_master_failover(void)
 			"SELECT id, conninfo, type, upstream_node_id "
 			"  FROM %s.repl_nodes "
 			" WHERE cluster = '%s' "
-            "   AND active IS TRUE "
+		        "   AND active IS TRUE "
 			"   AND priority > 0 "
 			" ORDER BY priority DESC, id "
 			" LIMIT %i ",
@@ -1104,7 +1191,6 @@ do_master_failover(void)
 	{
 		log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(my_local_conn));
 		PQclear(res);
-		PQfinish(my_local_conn);
 		terminate(ERR_DB_QUERY);
 	}

@@ -1253,8 +1339,8 @@ do_master_failover(void)
 				  " considered as new master and exit.\n"),
 				PQerrorMessage(my_local_conn));
 		PQclear(res);
-		sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
-		update_shared_memory(last_wal_standby_applied);
+		sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
+		update_shared_memory(last_xlog_replay_location);
 		terminate(ERR_DB_QUERY);
 	}
 	/* write last location in shared memory */
@@ -1384,9 +1470,6 @@ do_master_failover(void)
 		PQfinish(node_conn);
 	}

-	/* Close the connection to this server */
-	PQfinish(my_local_conn);
-	my_local_conn = NULL;

 	/*
 	 * determine which one is the best candidate to promote to master
@@ -1434,18 +1517,24 @@ do_master_failover(void)
 		terminate(ERR_FAILOVER_FAIL);
 	}

+	log_debug("best candidate node id is %i\n", best_candidate.node_id);
+
 	/* if local node is the best candidate, promote it */
 	if (best_candidate.node_id == local_options.node)
 	{
 		PQExpBufferData event_details;

+		/* Close the connection to this server */
+		PQfinish(my_local_conn);
+		my_local_conn = NULL;
+
 		initPQExpBuffer(&event_details);
 		/* wait */
 		sleep(5);

 		log_notice(_("this node is the best candidate to be the new master, promoting...\n"));

-		log_debug(_("promote command is: \"%s\"\n"),
+		log_debug("promote command is: \"%s\"\n",
 				  local_options.promote_command);

 		if (log_type == REPMGR_STDERR && *local_options.logfile)
@@ -1456,6 +1545,33 @@ do_master_failover(void)
 		r = system(local_options.promote_command);
 		if (r != 0)
 		{
+			/*
+			 * Check whether the primary reappeared, which will have caused the
+			 * promote command to fail
+			 */
+			my_local_conn = establish_db_connection(local_options.conninfo, false);
+
+			if (my_local_conn != NULL)
+			{
+				int master_node_id;
+
+				master_conn = get_master_connection(my_local_conn,
+													local_options.cluster_name,
+													&master_node_id, NULL);
+
+				if (master_conn != NULL && master_node_id == failed_master.node_id)
+				{
+					log_notice(_("Original master reappeared before this standby was promoted - no action taken\n"));
+
+					PQfinish(master_conn);
+					master_conn = NULL;
+
+					/* no failover occurred but we'll want to restart connections */
+					failover_done = true;
+					return;
+				}
+			}
+
 			log_err(_("promote command failed. You could check and try it manually.\n"));

 			terminate(ERR_DB_QUERY);
@@ -1487,11 +1603,39 @@ do_master_failover(void)
 	{
 		PGconn	   *new_master_conn;
 		PQExpBufferData event_details;
+		int master_node_id;

 		initPQExpBuffer(&event_details);
+
 		/* wait */
 		sleep(10);

+		/*
+		 * Check whether the primary reappeared while we were waiting, so we
+		 * don't end up following the promotion candidate
+		 */
+
+		master_conn = get_master_connection(my_local_conn,
+											local_options.cluster_name,
+											&master_node_id, NULL);
+
+		if (master_conn != NULL && master_node_id == failed_master.node_id)
+		{
+			log_notice(_("Original master reappeared - no action taken\n"));
+
+			PQfinish(master_conn);
+			/* no failover occurred but we'll want to restart connections */
+			failover_done = true;
+			return;
+		}
+
+
+		/* Close the connection to this server */
+		PQfinish(my_local_conn);
+		my_local_conn = NULL;
+
+		/* XXX double-check the promotion candidate did become the new primary */
+
 		log_notice(_("node %d is the best candidate for new master, attempting to follow...\n"),
 				 best_candidate.node_id);

@@ -1615,7 +1759,7 @@ do_upstream_standby_failover(t_node_info upstream_node)

 		if (PQntuples(res) == 0)
 		{
-			log_err(_("no node with id %i found"), upstream_node_id);
+			log_err(_("no node with id %i found\n"), upstream_node_id);
 			PQclear(res);
 			return false;
 		}
@@ -1780,7 +1924,7 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
 static bool
 set_local_node_status(void)
 {
-        PGresult       *res;
+	PGresult       *res;
 	char		sqlquery[QUERY_STR_LEN];
 	int		active_master_node_id = NODE_NOT_FOUND;
 	char		master_conninfo[MAXLEN];
@@ -1873,10 +2017,12 @@ check_cluster_configuration(PGconn *conn)
 	log_info(_("checking cluster configuration with schema '%s'\n"), get_repmgr_schema());

 	sqlquery_snprintf(sqlquery,
-					  "SELECT oid FROM pg_class "
+					  "SELECT oid FROM pg_catalog.pg_class "
 					  " WHERE oid = '%s.repl_nodes'::regclass ",
-			                  get_repmgr_schema_quoted(master_conn));
+					  get_repmgr_schema_quoted(master_conn));
+
 	res = PQexec(conn, sqlquery);
+
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
 		log_err(_("PQexec failed: %s\n"), PQerrorMessage(conn));
@@ -1948,6 +2094,8 @@ check_node_configuration(void)
 		/* Adding the node */
 		log_info(_("adding node %d to cluster '%s'\n"),
 				 local_options.node, local_options.cluster_name);
+
+		/* XXX use create_node_record() */
 		sqlquery_snprintf(sqlquery,
 						  "INSERT INTO %s.repl_nodes"
 						  "           (id, cluster, name, conninfo, priority, witness) "
@@ -2069,7 +2217,7 @@ terminate(int retval)


 static void
-update_shared_memory(char *last_wal_standby_applied)
+update_shared_memory(char *last_xlog_replay_location)
 {
 	PGresult   *res;
 	char		sqlquery[QUERY_STR_LEN];
@@ -2077,7 +2225,7 @@ update_shared_memory(char *last_wal_standby_applied)
 	sprintf(sqlquery,
 			"SELECT %s.repmgr_update_standby_location('%s')",
 			get_repmgr_schema_quoted(my_local_conn),
-			last_wal_standby_applied);
+			last_xlog_replay_location);

 	/* If an error happens, just inform about that and continue */
 	res = PQexec(my_local_conn, sqlquery);
@@ -2293,12 +2441,14 @@ get_node_info(PGconn *conn, char *cluster, int node_id)
 							errmsg.data);

 		PQfinish(conn);
+		conn = NULL;
+
 		terminate(ERR_DB_QUERY);
 	}

 	if (res == 0)
 	{
-		log_warning(_("No record found record for node %i\n"), node_id);
+		log_warning(_("No record found for node %i\n"), node_id);
 	}

 	return node_info;
--- a/sql/repmgr2_repmgr3.sql
+++ b/sql/repmgr2_repmgr3.sql
@@ -63,6 +63,15 @@ UPDATE repl_nodes SET type = 'master' WHERE id = $master_id;

 -- UPDATE repl_nodes SET active = FALSE WHERE id IN (...);

+/* There's also an event table which we need to create */
+CREATE TABLE repl_events (
+  node_id          INTEGER NOT NULL,
+  event            TEXT NOT NULL,
+  successful       BOOLEAN NOT NULL DEFAULT TRUE,
+  event_timestamp  TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  details          TEXT NULL
+);
+
 /* When you're sure of your changes, commit them */

 -- COMMIT;
--- a/sql/repmgr3.1.1_repmgr3.1.2.sql
+++ b/sql/repmgr3.1.1_repmgr3.1.2.sql
@@ -0,0 +1,32 @@
+/*
+ * Update a repmgr 3.1.1 installation to repmgr 3.1.2
+ * --------------------------------------------------
+ *
+ * This update is only required if repmgrd is being used in conjunction
+ * with a witness server.
+ *
+ * The new repmgr package should be installed first. Then
+ * carry out these steps:
+ *
+ *   1. (If repmgrd is used) stop any running repmgrd instances
+ *   2. On the master node, execute the SQL statement listed below
+ *   3. (If repmgrd is used) restart repmgrd
+ */
+
+/*
+ * If your repmgr installation is not included in your repmgr
+ * user's search path, please set the search path to the name
+ * of the repmgr schema to ensure objects are installed in
+ * the correct location.
+ *
+ * The repmgr schema is  "repmgr_" + the cluster name defined in
+ * 'repmgr.conf'.
+ */
+
+-- SET search_path TO 'name_of_repmgr_schema';
+
+BEGIN;
+
+ALTER TABLE repl_nodes DROP CONSTRAINT repl_nodes_upstream_node_id_fkey,
+      ADD CONSTRAINT repl_nodes_upstream_node_id_fkey FOREIGN KEY (upstream_node_id) REFERENCES repl_nodes(id) DEFERRABLE;
+COMMIT;
--- a/sql/repmgr_funcs.c
+++ b/sql/repmgr_funcs.c
@@ -83,7 +83,12 @@ _PG_init(void)
 	 * resources in repmgr_shmem_startup().
 	 */
 	RequestAddinShmemSpace(repmgr_memsize());
+
+#if (PG_VERSION_NUM >= 90600)
+	RequestNamedLWLockTranche("repmgr", 1);
+#else
 	RequestAddinLWLocks(1);
+#endif

 	/*
 	 * Install hooks.
@@ -128,7 +133,11 @@ repmgr_shmem_startup(void)
 	if (!found)
 	{
 		/* First time through ... */
+#if (PG_VERSION_NUM >= 90600)
+		shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
+#else
 		shared_state->lock = LWLockAssign();
+#endif
 		snprintf(shared_state->location,
 				 sizeof(shared_state->location), "%X/%X", 0, 0);
 	}
--- a/strutil.h
+++ b/strutil.h
@@ -24,12 +24,17 @@
 #include <stdlib.h>
 #include "errcode.h"

+
 #define QUERY_STR_LEN	8192
 #define MAXLEN			1024
 #define MAXLINELENGTH	4096
 #define MAXVERSIONSTR	16
 #define MAXCONNINFO		1024

+/* Why? http://stackoverflow.com/a/5459929/398670 */
+#define STR(x) CppAsString(x)
+
+#define MAXLEN_STR STR(MAXLEN)

 extern int
 xsnprintf(char *str, size_t size, const char *format,...)
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
 #ifndef _VERSION_H_
 #define _VERSION_H_

-#define REPMGR_VERSION "3.1.1"
+#define REPMGR_VERSION "3.1.4"

 #endif