From e7bb3e9d50807f4ec87fc3fb4c64f115d6ada8aa Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Fri, 22 Sep 2017 16:29:14 +0900
Subject: [PATCH] Add section on promoting standby

---
 doc/cloning-standbys.sgml  |   2 +-
 doc/command-reference.sgml | 206 ++++++++++++++++++++++++++++++++++++-
 doc/filelist.sgml          |   1 +
 doc/promoting-standby.sgml |  74 +++++++++++++
 doc/repmgr.sgml            |   1 +
 5 files changed, 282 insertions(+), 2 deletions(-)
 create mode 100644 doc/promoting-standby.sgml
diff --git a/doc/cloning-standbys.sgml b/doc/cloning-standbys.sgml
index 05b513a8..2c1fe095 100644
--- a/doc/cloning-standbys.sgml
+++ b/doc/cloning-standbys.sgml
@@ -308,7 +308,7 @@
    </para>
    <para>
     After starting the standby, the cluster will look like this, showing that <literal>node3</literal>
-    is attached to <literal>node3</literal>, not the primary (<literal>node1</literal>).
+    is attached to <literal>node2</literal>, not the primary (<literal>node1</literal>).
     <programlisting>
     $ repmgr -f /etc/repmgr.conf cluster show
      ID | Name  | Role    | Status    | Upstream | Location | Connection string
diff --git a/doc/command-reference.sgml b/doc/command-reference.sgml
index 2627c5a5..6fb419f9 100644
--- a/doc/command-reference.sgml
+++ b/doc/command-reference.sgml
@@ -148,6 +148,26 @@
   </sect2>
  </sect1>
 
+ <sect1 id="repmgr-standby-promote" xreflabel="repmgr standby promote">
+  <indexterm>
+    <primary>repmgr standby promote</primary>
+  </indexterm>
+  <title>repmgr standby promote</title>
+  <para>
+   Promotes a standby to a primary if the current primary has failed. This
+   command requires a valid <filename>repmgr.conf</filename> file for the standby, either
+   specified explicitly  with <literal>-f/--config-file</literal> or located in a
+    default location; no additional arguments are required.
+  </para>
+  <para>
+     If the standby promotion succeeds, the server will not need to be
+    restarted. However any other standbys will need to follow the new server,
+     by using <xref linkend="repmgr-standby-follow">; if <command>repmgrd</command> is active, it will
+    handle this automatically.
+  </para>
+
+ </sect1>
+
  <sect1 id="repmgr-standby-follow" xreflabel="repmgr standby follow">
   <indexterm>
     <primary>repmgr standby follow</primary>
@@ -170,6 +190,7 @@
  </sect1>
 
 
+
  <sect1 id="repmgr-node-rejoin" xreflabel="repmgr node rejoin">
   <indexterm>
     <primary>repmgr node rejoin</primary>
@@ -179,8 +200,191 @@
    Enables a dormant (stopped) node to be rejoined to the replication cluster.
   </para>
   <para>
-    This can optionally use `pg_rewind` to re-integrate a node which has diverged
+    This can optionally use <command>pg_rewind</command> to re-integrate a node which has diverged
     from the rest of the cluster, typically a failed primary.
   </para>
  </sect1>
+
+ <sect1 id="repmgr-cluster-show" xreflabel="repmgr cluster show">
+  <indexterm>
+    <primary>repmgr cluster show</primary>
+  </indexterm>
+  <title>repmgr cluster show</title>
+  <para>
+    Displays information about each active node in the replication cluster. This
+    command polls each registered server and shows its role (<literal>primary</literal> /
+    <literal>standby</literal> / <literal>bdr</literal>) and status. It polls each server
+    directly and can be run on any node in the cluster; this is also useful when analyzing
+    connectivity from a particular node.
+  </para>
+  <para>
+    This command requires either a valid <filename>repmgr.conf</filename> file or a database
+    connection string to one of the registered nodes; no  additional arguments are needed.
+  </para>
+
+  <para>
+    Example:
+    <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster show
+
+     ID | Name  | Role    | Status    | Upstream | Location | Connection string
+    ----+-------+---------+-----------+----------+----------+-----------------------------------------
+     1  | node1 | primary | * running |          | default  | host=db_node1 dbname=repmgr user=repmgr
+     2  | node2 | standby |   running | node1    | default  | host=db_node2 dbname=repmgr user=repmgr
+     3  | node3 | standby |   running | node1    | default  | host=db_node3 dbname=repmgr user=repmgr</programlisting>
+  </para>
+
+  <para>
+    To show database connection errors when polling nodes, run the command in
+    <literal>--verbose</literal> mode.
+  </para>
+  <para>
+    The `cluster show` command accepts an optional parameter <literal>--csv</literal>, which
+    outputs the replication cluster's status in a simple CSV format, suitable for
+    parsing by scripts:
+    <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster show --csv
+    1,-1,-1
+    2,0,0
+    3,0,1</programlisting>
+  </para>
+  <para>
+    The columns have following meanings:
+    <itemizedlist spacing="compact" mark="bullet">
+     <listitem>
+      <simpara>
+        node ID
+      </simpara>
+      <simpara>
+        availability (0 = available, -1 = unavailable)
+      </simpara>
+      <simpara>
+        recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
+      </simpara>
+     </listitem>
+    </itemizedlist>
+  </para>
+
+  <para>
+   Note that the availability is tested by connecting from the node where
+   <command>repmgr cluster show</command> is executed, and does not necessarily imply the node
+   is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
+    a better overviews of connections between nodes.
+  </para>
+ </sect1>
+
+ <sect1 id="repmgr-cluster-matrix" xreflabel="repmgr cluster matrix">
+  <indexterm>
+    <primary>repmgr cluster matrix</primary>
+  </indexterm>
+  <title>repmgr cluster matric</title>
+  <para>
+    <command>repmgr cluster matrix</command> runs  <command>repmgr cluster show</command> on each
+    node and arranges the results in a matrix, recording success or failure.
+  </para>
+  <para>
+    <command>repmgr cluster matrix</command> requires a valid <filename>repmgr.conf</filename>
+    file on each node.  Additionally passwordless `ssh` connections are required between
+    all nodes.
+  </para>
+  <para>
+    Example 1 (all nodes up):
+    <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster matrix
+
+    Name   | Id |  1 |  2 |  3
+    -------+----+----+----+----
+    node1 |  1 |  * |  * |  *
+    node2 |  2 |  * |  * |  *
+    node3 |  3 |  * |  * |  *</programlisting>
+  </para>
+  <para>
+    Example 2 (<literal>node1</literal> and <literal>node2</literal> up, <literal>node3</literal> down):
+    <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster matrix
+
+    Name   | Id |  1 |  2 |  3
+    -------+----+----+----+----
+     node1 |  1 |  * |  * |  x
+     node2 |  2 |  * |  * |  x
+     node3 |  3 |  ? |  ? |  ?
+    </programlisting>
+  </para>
+  <para>
+   Each row corresponds to one server, and indicates the result of
+   testing an outbound connection from that server.
+  </para>
+  <para>
+    Since <literal>node3</literal> is down, all the entries in its row are filled with
+    <literal>?</literal>, meaning that there we cannot test outbound connections.
+  </para>
+  <para>
+    The other two nodes are up; the corresponding rows have <literal>x</literal> in the
+    column corresponding to <literal>node3</literal>, meaning that inbound connections to
+    that node have failed, and `*` in the columns corresponding to
+    <literal>node1</literal> and <literal>node2</literal>, meaning that inbound connections
+    to these nodes have succeeded.
+  </para>
+  <para>
+    Example 3 (all nodes up, firewall dropping packets originating
+    from <literal>node1</literal> and directed to port 5432 on <literal>node3</literal>) -
+    running <command>repmgr cluster matrix</command> from <literal>node1</literal> gives the following output:
+    <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster matrix
+
+    Name   | Id |  1 |  2 |  3
+    -------+----+----+----+----
+     node1 |  1 |  * |  * |  x
+     node2 |  2 |  * |  * |  *
+     node3 |  3 |  ? |  ? |  ?</programlisting>
+  </para>
+  <para>
+    Note this may take some time depending on the <varname>connect_timeout</varname>
+    setting in the node <varname>conninfo</varname> strings; default is
+    <literal>1 minute</literal> which means without modification the above
+    command would take around 2 minutes to run; see comment elsewhere about setting
+    <varname>connect_timeout</varname>)
+  </para>
+  <para>
+   The matrix tells us that we cannot connect from <literal>node1</literal> to <literal>node3</literal>,
+   and that (therefore) we don't know the state of any outbound
+   connection from <literal>node3</literal>.
+  </para>
+  <para>
+    In this case, the <xref linkend="repmgr-cluster-crosscheck"> command will produce a more
+    useful result.
+  </para>
+ </sect1>
+
+
+ <sect1 id="repmgr-cluster-crosscheck" xreflabel="repmgr cluster crosscheck">
+  <indexterm>
+    <primary>repmgr cluster crosscheck</primary>
+  </indexterm>
+  <title>repmgr cluster crosscheck</title>
+  <para>
+    <command>repmgr cluster crosscheck</command> is similar to <xref linkend="repmgr-cluster-matrix">,
+    but cross-checks connections between each combination of nodes. In "Example 3" in
+    <xref linkend="repmgr-cluster-matrix"> we have no information about the state of <literal>node3</literal>.
+    However by running <command>repmgr cluster crosscheck</command> it's possible to get a better
+    overview of the cluster situation:
+    <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster crosscheck
+
+    Name   | Id |  1 |  2 |  3
+    -------+----+----+----+----
+     node1 |  1 |  * |  * |  x
+     node2 |  2 |  * |  * |  *
+     node3 |  3 |  * |  * |  *</programlisting>
+  </para>
+  <para>
+   What happened is that <command>repmgr cluster crosscheck</command> merged its own
+   <command>repmgr cluster matrix</command> with the <command>repmgr cluster matrix</command>
+   output from <literal>node2</literal>; the latter is able to connect to <literal>node3</literal>
+   and therefore determine the state ofx outbound connections from that node.
+  </para>
+
+ </sect1>
+
+
 </chapter>
diff --git a/doc/filelist.sgml b/doc/filelist.sgml
index e4e2a6b8..5d16624a 100644
--- a/doc/filelist.sgml
+++ b/doc/filelist.sgml
@@ -40,6 +40,7 @@
 <!ENTITY configuration-file      SYSTEM "configuration-file.sgml">
 <!ENTITY configuration-file-settings      SYSTEM "configuration-file-settings.sgml">
 <!ENTITY cloning-standbys  SYSTEM "cloning-standbys.sgml">
+<!ENTITY promoting-standby  SYSTEM "promoting-standby.sgml">
 <!ENTITY command-reference SYSTEM "command-reference.sgml">
 <!ENTITY appendix-signatures      SYSTEM "appendix-signatures.sgml">
 
diff --git a/doc/promoting-standby.sgml b/doc/promoting-standby.sgml
new file mode 100644
index 00000000..de515951
--- /dev/null
+++ b/doc/promoting-standby.sgml
@@ -0,0 +1,74 @@
+<chapter id="promoting-standby" xreflabel="promoting a standby">
+ <title>Promoting a standby server with repmgr</title>
+ <para>
+   If a primary server fails or needs to be removed from the replication cluster,
+   a new primary server must be designated, to ensure the cluster continues
+   to function correctly. This can be done with <xref linkend="repmgr-standby-promote">,
+   which promotes the standby on the current server to primary.
+ </para>
+
+ <para>
+  To demonstrate this, set up a replication cluster with a primary and two attached
+  standby servers so that the cluster looks like this:
+  <programlisting>
+     $ repmgr -f /etc/repmgr.conf cluster show
+     ID | Name  | Role    | Status    | Upstream | Location | Connection string
+    ----+-------+---------+-----------+----------+----------+--------------------------------------
+     1  | node1 | primary | * running |          | default  | host=node1 dbname=repmgr user=repmgr
+     2  | node2 | standby |   running | node1    | default  | host=node2 dbname=repmgr user=repmgr
+     3  | node3 | standby |   running | node1    | default  | host=node3 dbname=repmgr user=repmgr</programlisting>
+ </para>
+ <para>
+  Stop the current primary with e.g.:
+  <programlisting>
+   $ pg_ctl -D /var/lib/postgresql/data -m fast stop</programlisting>
+ </para>
+ <para>
+  At this point the replication cluster will be in a partially disabled state, with
+  both standbys accepting read-only connections while attempting to connect to the
+  stopped primary. Note that the &repmgr; metadata table will not yet have been updated;
+  executing <xref linkend="repmgr-cluster-show"> will note the discrepancy:
+  <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster show
+     ID | Name  | Role    | Status        | Upstream | Location | Connection string
+    ----+-------+---------+---------------+----------+----------+--------------------------------------
+     1  | node1 | primary | ? unreachable |          | default  | host=node1 dbname=repmgr user=repmgr
+     2  | node2 | standby |   running     | node1    | default  | host=node2 dbname=repmgr user=repmgr
+     3  | node3 | standby |   running     | node1    | default  | host=node3 dbname=repmgr user=repmgr
+
+    WARNING: following issues were detected
+    node "node1" (ID: 1) is registered as an active primary but is unreachable</programlisting>
+ </para>
+ <para>
+  Now promote the first standby with:
+  <programlisting>
+   $ repmgr -f /etc/repmgr.conf standby promote</programlisting>
+ </para>
+ <para>
+  This will produce output similar to the following:
+  <programlisting>
+    INFO: connecting to standby database
+    NOTICE: promoting standby
+    DETAIL: promoting server using "pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/postgresql/data' promote"
+    server promoting
+    INFO: reconnecting to promoted server
+    NOTICE: STANDBY PROMOTE successful
+    DETAIL: node 2 was successfully promoted to primary</programlisting>
+ </para>
+ <para>
+  Executing <xref linkend="repmgr-cluster-show"> will show the current state; as there is now an
+  active primary, the previous warning will not be displayed:
+  <programlisting>
+    $ repmgr -f /etc/repmgr.conf cluster show
+     ID | Name  | Role    | Status    | Upstream | Location | Connection string
+    ----+-------+---------+-----------+----------+----------+--------------------------------------
+     1  | node1 | primary | - failed  |          | default  | host=node1 dbname=repmgr user=repmgr
+     2  | node2 | primary | * running |          | default  | host=node2 dbname=repmgr user=repmgr
+     3  | node3 | standby |   running | node1    | default  | host=node3 dbname=repmgr user=repmgr</programlisting>
+ </para>
+ <para>
+  However the sole remaining standby (<literal>node3</literal>) is still trying to replicate from the failed
+  primary; <xref linkend="repmgr-standby-follow"> must now be executed to rectify this situation.
+ </para>
+</chapter>
+
diff --git a/doc/repmgr.sgml b/doc/repmgr.sgml
index a8a8b055..a6b17330 100644
--- a/doc/repmgr.sgml
+++ b/doc/repmgr.sgml
@@ -69,6 +69,7 @@
 
   &configuration;
   &cloning-standbys;
+  &promoting-standby;
   &command-reference;
  </part>