From 5606434a97441b460567ba895b5a0cce6a88a9c1 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 27 Jul 2017 18:11:49 +0900 Subject: [PATCH] Initial BDR failover documentation --- doc/bdr-failover.md | 168 +++++++++++++++++++++++++++++++++++++++ scripts/bdr-pgbouncer.sh | 101 +++++++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 doc/bdr-failover.md create mode 100644 scripts/bdr-pgbouncer.sh diff --git a/doc/bdr-failover.md b/doc/bdr-failover.md new file mode 100644 index 00000000..cf4e5a1a --- /dev/null +++ b/doc/bdr-failover.md @@ -0,0 +1,168 @@ +BDR failover with repmgrd +========================= + +`repmgr 4` provides support for monitoring BDR nodes and taking action in case +one of the nodes fails. + + *NOTE* Due to the nature of BDR, it's only safe to use this solution for + a two-node scenario. Introducing additional nodes will create an inherent + risk of node desynchronisation if a node goes down without being cleanly + removed from the cluster. + +In contrast to streaming replication, there's no concept of "promoting" a new +primary node with BDR. Instead, "failover" involves monitoring both nodes +with `repmgrd` and redirecting queries from the failed node to the remaining +active node. This can be done by using the event notification script generated by +`repmgrd` to dynamically reconfigure a proxy server/connection pooler such +as PgBouncer. + + +Prerequisites +------------- + +`repmgr 4` requires PostgreSQL 9.6 with the BDR 2 extension enabled and +configured for a two-node BDR network. `repmgr 4` packages +must be installed on each node before attempting to configure repmgr. + + *NOTE* `repmgr 4` will refuse to install if it detects more than two + BDR nodes. + +Application database connections *must* be passed through a proxy server/ +connection pooler such as PgBouncer, and it must be possible to dynamically +reconfigure that from `repmgrd`. The example demonstrated in this document +will use PgBouncer. + +The proxy server / connection poolers must not be installed on the database +servers. + + +Configuration +------------- + + +Sample configuration for `repmgr.conf`: + + node_id=1 + node_name='node1' + conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2' + replication_type='bdr' + + event_notifications=bdr_failover + event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1' + + # repmgrd options + reconnect_attempts=5 + reconnect_interval=6 + +Adjust settings as appropriate; copy and adjust for the second node (particularly +the values `node_id`, `node_name` and `conninfo`). + +Note that the values provided for the `conninfo` string must be valid for +connections from *both* nodes in the cluster. + +If defined, `event_notifications` will restrict execution of `event_notification_command` +to the specified events. + +`event_notification_command` is the script which does the actual "heavy lifting" +of reconfiguring the proxy server/ connection pooler. It is fully user-definable; +a sample implementation is documented below. + + +repmgr setup +------------ + +Register both nodes: + + $ repmgr -f /etc/repmgr.conf bdr register + NOTICE: attempting to install extension "repmgr" + NOTICE: "repmgr" extension successfully installed + NOTICE: node record created for node 'node1' (ID: 1) + NOTICE: BDR node 1 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5501) + + $ repmgr -f /etc/repmgr.conf bdr register + NOTICE: node record created for node 'node2' (ID: 2) + NOTICE: BDR node 2 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5502) + +The `repmgr` extension will be automatically created when the first +node is registered, and will be propagated to the second node. + + *IMPORTANT* ensure the repmgr package is available on both nodes before + attempting to register the first node + + +At this point the meta data for both nodes has been created; executing +`repmgr cluster show` (on either node) should produce output like this: + + $ repmgr -f /etc/repmgr.conf cluster show + ID | Name | Role | Status | Upstream | Connection string + ----+-------+------+-----------+----------+----------------------------------------------------- + 1 | node1 | bdr | * running | | host=node1 dbname=bdrtest user=repmgr + 2 | node2 | bdr | * running | | host=node2 dbname=bdrtest user=repmgr + +Additionally it's possible to see a log of significant events; so far +this will only record the two node registrations (in reverse chronological order): + + Node ID | Event | OK | Timestamp | Details + ---------+--------------+----+---------------------+---------------------------------------------- + 2 | bdr_register | t | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2) + 1 | bdr_register | t | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1) + + +Defining the "event_notification_command" +----------------------------------------- + +Key to "failover" execution is the `event_notification_command`, which is a +user-definable script which should reconfigure the proxy server/ +connection pooler. + +Each time `repmgr` (or `repmgrd`) records an event, it can optionally +execute the script defined in `event_notification_command` to +take further action; details of the event will be passed as parameters. +Following placeholders are available to the script: + + %n - node ID + %e - event type + %s - success (1 or 0) + %t - timestamp + %d - details + %c - conninfo string of the next available node + %a - name of the next available node + +Note that `%c` and `%a` will only be provided during `bdr_failover` +events, which is what is of interest here. + +The provided sample script (`scripts/bdr-pgbouncer.sh`) is configured like +this: + + event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"' + +and parses the configures parameters like this: + + NODE_ID=$1 + EVENT_TYPE=$2 + SUCCESS=$3 + NEXT_CONNINFO=$4 + NEXT_NODE_NAME=$5 + +It also contains some hard-coded values about the PgBouncer configuration for +both nodes; these will need to be adjusted for your local environment of course +(ideally the scripts would be maintained as templates and generated by some +kind of provisioning system). + + + +repmgrd +------- + +Node failover +------------- + + +Node recovery +------------- + +Following failure of a BDR node, if the node subsequently becomes available again, +a `bdr_recovery` event will be generated. This could potentially be used to +reconfigure PgBouncer automatically to bring the node back into the available pool, +however it would be prudent to manually verify the node's status before +exposing it to the application. diff --git a/scripts/bdr-pgbouncer.sh b/scripts/bdr-pgbouncer.sh new file mode 100644 index 00000000..fa244cc9 --- /dev/null +++ b/scripts/bdr-pgbouncer.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +set -u +set -e + +# Process parameters passed to script +# ----------------------------------- +# +# This assumes the repmgr "event_notification_command" is defined like this: +# +# event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1' +# +# Adjust as appropriate. + +NODE_ID=$1 +EVENT_TYPE=$2 +SUCCESS=$3 +NEXT_CONNINFO=$4 +NEXT_NODE_NAME=$5 + +if [ "$EVENT_TYPE" != "bdr_failover" ]; then + echo "unable to handle event type '$EVENT_TYPE'" + exit +fi + +# Define database name here +# ------------------------- +# +# Note: this assumes the BDR-enabled database has the same name on +# both hosts + +BDR_DBNAME=bdr_db + +# Define PgBouncer hosts here +# --------------------------- + +PGBOUNCER_HOSTS="host1 host2" +PGBOUNCER_PORTS=(6432 6432) +PGBOUNCER_DATABASE_INI=(/path/to/pgbouncer.database.ini /path/to/pgbouncer.database.ini) + + +# Define local host info here +# --------------------------- + +THIS_HOST="host1" +THIS_PGBOUNCER_PORT="6432" +THIS_DB_PORT="5432" + +# Pause all pgbouncer nodes to minimize impact on clients +# ------------------------------------------------------- + +i=0 +for HOST in $PGBOUNCER_HOSTS +do + PORT="${PGBOUNCER_PORTS[$i]}" + + psql -tc "pause" -h $HOST -p $PORT -U postgres pgbouncer + + i=$((i+1)) +done + +# Copy pgbouncer database ini file to all nodes and restart pgbouncer +# ------------------------------------------------------------------- + +i=0 +THIS_HOSTPORT="$THIS_HOST$THIS_PGBOUNCER_PORT" +PGBOUNCER_DATABASE_INI_NEW="/tmp/pgbouncer.database.ini.new" + +for HOST in $PGBOUNCER_HOSTS +do + PORT="${PGBOUNCER_PORTS[$i]}" + + # Recreate the pgbouncer config file + # ---------------------------------- + echo -e "[databases]\n" > $PGBOUNCER_DATABASE_INI_NEW + + echo -e "$BDR_DBNAME= $NEXT_CONNINFO application_name=pgbouncer_$PORT" >> $PGBOUNCER_DATABASE_INI_NEW + + # Copy file to host + # ----------------- + CONFIG="${PGBOUNCER_DATABASE_INI[$i]}" + + if [ "$HOST$PORT" != "$THIS_HOSTPORT" ]; then + rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$CONFIG + else + cp $PGBOUNCER_DATABASE_INI_NEW $CONFIG + fi + + # Reload and resume PgBouncer + # --------------------------- + + psql -tc "reload" -h $HOST -p $PORT -U postgres pgbouncer + psql -tc "resume" -h $HOST -p $PORT -U postgres pgbouncer + + i=$((i+1)) +done + + +# Clean up generated file +rm $PGBOUNCER_DATABASE_INI_NEW + +echo "Reconfiguration of pgbouncer complete"