From 5606434a97441b460567ba895b5a0cce6a88a9c1 Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Thu, 27 Jul 2017 18:11:49 +0900
Subject: [PATCH] Initial BDR failover documentation

---
 doc/bdr-failover.md      | 168 +++++++++++++++++++++++++++++++++++++++
 scripts/bdr-pgbouncer.sh | 101 +++++++++++++++++++++++
 2 files changed, 269 insertions(+)
 create mode 100644 doc/bdr-failover.md
 create mode 100644 scripts/bdr-pgbouncer.sh

diff --git a/doc/bdr-failover.md b/doc/bdr-failover.md
new file mode 100644
index 00000000..cf4e5a1a
--- /dev/null
+++ b/doc/bdr-failover.md
@@ -0,0 +1,168 @@
+BDR failover with repmgrd
+=========================
+
+`repmgr 4` provides support for monitoring BDR nodes and taking action in case
+one of the nodes fails.
+
+    *NOTE* Due to the nature of BDR, it's only safe to use this solution for
+    a two-node scenario. Introducing additional nodes will create an inherent
+    risk of node desynchronisation if a node goes down without being cleanly
+    removed from the cluster.
+
+In contrast to streaming replication, there's no concept of "promoting" a new
+primary node with BDR. Instead, "failover" involves monitoring both nodes
+with `repmgrd` and redirecting queries from the failed node to the remaining
+active node. This can be done by using the event notification script generated by
+`repmgrd` to dynamically reconfigure a proxy server/connection pooler such
+as PgBouncer.
+
+
+Prerequisites
+-------------
+
+`repmgr 4` requires PostgreSQL 9.6 with the BDR 2 extension enabled and
+configured for a two-node BDR network. `repmgr 4` packages
+must be installed on each node before attempting to configure repmgr.
+
+    *NOTE* `repmgr 4` will refuse to install if it detects more than two
+    BDR nodes.
+
+Application database connections *must* be passed through a proxy server/
+connection pooler such as PgBouncer, and it must be possible to dynamically
+reconfigure that from `repmgrd`. The example demonstrated in this document
+will use PgBouncer.
+
+The proxy server / connection poolers must not be installed on the database
+servers.
+
+
+Configuration
+-------------
+
+
+Sample configuration for `repmgr.conf`:
+
+    node_id=1
+    node_name='node1'
+    conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2'
+    replication_type='bdr'
+
+    event_notifications=bdr_failover
+    event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
+
+    # repmgrd options
+    reconnect_attempts=5
+    reconnect_interval=6
+
+Adjust settings as appropriate; copy and adjust for the second node (particularly
+the values `node_id`, `node_name` and `conninfo`).
+
+Note that the values provided for the `conninfo` string must be valid for
+connections from *both* nodes in the cluster.
+
+If defined, `event_notifications` will restrict execution of `event_notification_command`
+to the specified events.
+
+`event_notification_command` is the script which does the actual "heavy lifting"
+of reconfiguring the proxy server/ connection pooler. It is fully user-definable;
+a sample implementation is documented below.
+
+
+repmgr setup
+------------
+
+Register both nodes:
+
+    $ repmgr -f /etc/repmgr.conf bdr register
+    NOTICE: attempting to install extension "repmgr"
+    NOTICE: "repmgr" extension successfully installed
+    NOTICE: node record created for node 'node1' (ID: 1)
+    NOTICE: BDR node 1 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5501)
+
+    $ repmgr -f /etc/repmgr.conf bdr register
+    NOTICE: node record created for node 'node2' (ID: 2)
+    NOTICE: BDR node 2 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5502)
+
+The `repmgr` extension will be automatically created when the first
+node is registered, and will be propagated to the second node.
+
+    *IMPORTANT* ensure the repmgr package is available on both nodes before
+    attempting to register the first node
+
+
+At this point the meta data for both nodes has been created; executing
+`repmgr cluster show` (on either node) should produce output like this:
+
+    $ repmgr -f /etc/repmgr.conf cluster show
+     ID | Name  | Role | Status    | Upstream | Connection string
+    ----+-------+------+-----------+----------+-----------------------------------------------------
+     1  | node1 | bdr  | * running |          | host=node1 dbname=bdrtest user=repmgr
+     2  | node2 | bdr  | * running |          | host=node2 dbname=bdrtest user=repmgr
+
+Additionally it's possible to see a log of significant events; so far
+this will only record the two node registrations (in reverse chronological order):
+
+     Node ID | Event        | OK | Timestamp           | Details
+    ---------+--------------+----+---------------------+----------------------------------------------
+     2       | bdr_register | t  | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2)
+     1       | bdr_register | t  | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1)
+
+
+Defining the "event_notification_command"
+-----------------------------------------
+
+Key to "failover" execution is the `event_notification_command`, which is a
+user-definable script which should reconfigure the  proxy server/
+connection pooler.
+
+Each time `repmgr` (or `repmgrd`) records an event, it can optionally
+execute the script defined in `event_notification_command` to
+take further action; details of the event will be passed as parameters.
+Following placeholders are available to the script:
+
+    %n - node ID
+    %e - event type
+    %s - success (1 or 0)
+    %t - timestamp
+    %d - details
+    %c - conninfo string of the next available node
+    %a - name of the next available node
+
+Note that `%c` and `%a` will only be provided during `bdr_failover`
+events, which is what is of interest here.
+
+The provided sample script (`scripts/bdr-pgbouncer.sh`) is configured like
+this:
+
+    event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'
+
+and parses the configures parameters like this:
+
+    NODE_ID=$1
+    EVENT_TYPE=$2
+    SUCCESS=$3
+    NEXT_CONNINFO=$4
+    NEXT_NODE_NAME=$5
+
+It also contains some hard-coded values about the PgBouncer configuration for
+both nodes; these will need to be adjusted for your local environment of course
+(ideally the scripts would be maintained as templates and generated by some
+kind of provisioning system).
+
+
+
+repmgrd
+-------
+
+Node failover
+-------------
+
+
+Node recovery
+-------------
+
+Following failure of a BDR node, if the node subsequently becomes available again,
+a `bdr_recovery` event will be generated. This could potentially be used to
+reconfigure PgBouncer automatically to bring the node back into the available pool,
+however it would be prudent to manually verify the node's status before
+exposing it to the application.
diff --git a/scripts/bdr-pgbouncer.sh b/scripts/bdr-pgbouncer.sh
new file mode 100644
index 00000000..fa244cc9
--- /dev/null
+++ b/scripts/bdr-pgbouncer.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+set -u
+set -e
+
+# Process parameters passed to script
+# -----------------------------------
+#
+# This assumes the repmgr "event_notification_command" is defined like this:
+#
+#   event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
+#
+# Adjust as appropriate.
+
+NODE_ID=$1
+EVENT_TYPE=$2
+SUCCESS=$3
+NEXT_CONNINFO=$4
+NEXT_NODE_NAME=$5
+
+if [ "$EVENT_TYPE" != "bdr_failover" ]; then
+    echo "unable to handle event type '$EVENT_TYPE'"
+    exit
+fi
+
+# Define database name here
+# -------------------------
+#
+# Note: this assumes the BDR-enabled database has the same name on
+# both hosts
+
+BDR_DBNAME=bdr_db
+
+# Define PgBouncer hosts here
+# ---------------------------
+
+PGBOUNCER_HOSTS="host1 host2"
+PGBOUNCER_PORTS=(6432 6432)
+PGBOUNCER_DATABASE_INI=(/path/to/pgbouncer.database.ini /path/to/pgbouncer.database.ini)
+
+
+# Define local host info here
+# ---------------------------
+
+THIS_HOST="host1"
+THIS_PGBOUNCER_PORT="6432"
+THIS_DB_PORT="5432"
+
+# Pause all pgbouncer nodes to minimize impact on clients
+# -------------------------------------------------------
+
+i=0
+for HOST in $PGBOUNCER_HOSTS
+do
+    PORT="${PGBOUNCER_PORTS[$i]}"
+
+    psql -tc "pause" -h $HOST -p $PORT -U postgres pgbouncer
+
+    i=$((i+1))
+done
+
+# Copy pgbouncer database ini file to all nodes and restart pgbouncer
+# -------------------------------------------------------------------
+
+i=0
+THIS_HOSTPORT="$THIS_HOST$THIS_PGBOUNCER_PORT"
+PGBOUNCER_DATABASE_INI_NEW="/tmp/pgbouncer.database.ini.new"
+
+for HOST in $PGBOUNCER_HOSTS
+do
+    PORT="${PGBOUNCER_PORTS[$i]}"
+
+    # Recreate the pgbouncer config file
+    # ----------------------------------
+    echo -e "[databases]\n" > $PGBOUNCER_DATABASE_INI_NEW
+
+    echo -e "$BDR_DBNAME= $NEXT_CONNINFO application_name=pgbouncer_$PORT" >> $PGBOUNCER_DATABASE_INI_NEW
+
+    # Copy file to host
+    # -----------------
+    CONFIG="${PGBOUNCER_DATABASE_INI[$i]}"
+
+    if [ "$HOST$PORT" != "$THIS_HOSTPORT" ]; then
+      rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$CONFIG
+    else
+      cp $PGBOUNCER_DATABASE_INI_NEW $CONFIG
+    fi
+
+    # Reload and resume PgBouncer
+    # ---------------------------
+
+    psql -tc "reload" -h $HOST -p $PORT -U postgres pgbouncer
+    psql -tc "resume" -h $HOST -p $PORT -U postgres pgbouncer
+
+    i=$((i+1))
+done
+
+
+# Clean up generated file
+rm $PGBOUNCER_DATABASE_INI_NEW
+
+echo "Reconfiguration of pgbouncer complete"