From 09b8a866056578bb66c427be1de63af1c69a088e Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Thu, 19 Apr 2018 16:46:41 +0900
Subject: [PATCH] doc: improve configuration documentation

With special attention to setting service commands, and extra special
mention of "pg_ctlcluster" for Debian/Ubuntu users.
---
 doc/configuration-file-settings.sgml    |   9 +-
 doc/configuration-service-commands.sgml | 111 ++++++++++
 doc/configuration.sgml                  |   1 +
 doc/filelist.sgml                       |   1 +
 doc/repmgrd-configuration.sgml          | 273 +++++++++++++-----------
 doc/repmgrd-monitoring.sgml             |   4 +
 repmgr.conf.sample                      |   3 +
 7 files changed, 270 insertions(+), 132 deletions(-)
 create mode 100644 doc/configuration-service-commands.sgml
diff --git a/doc/configuration-file-settings.sgml b/doc/configuration-file-settings.sgml
index ebf8f17d..937b54b9 100644
--- a/doc/configuration-file-settings.sgml
+++ b/doc/configuration-file-settings.sgml
@@ -1,10 +1,10 @@
 <sect1 id="configuration-file-settings" xreflabel="configuration file settings">
   <indexterm>
     <primary>repmgr.conf</primary>
-    <secondary>settings</secondary>
+    <secondary>basic settings</secondary>
   </indexterm>
 
- <title>Configuration file settings</title>
+ <title>Basic configuration file settings</title>
  <para>
    Each <filename>repmgr.conf</filename> file must contain the following parameters:
  </para>
@@ -92,7 +92,10 @@
 
   <para>
     For a full list of annotated configuration items, see the file
-    <ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>.
+    <ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</ulink>.
+  </para>
+  <para>
+    For <application>repmgrd</application>-specific settings, see <xref linkend="repmgrd-configuration">.
   </para>
 
   <note>
diff --git a/doc/configuration-service-commands.sgml b/doc/configuration-service-commands.sgml
new file mode 100644
index 00000000..6e0dfa23
--- /dev/null
+++ b/doc/configuration-service-commands.sgml
@@ -0,0 +1,111 @@
+<sect1 id="configuration-service-commands" xreflabel="service command settings">
+  <indexterm>
+    <primary>repmgr.conf</primary>
+    <secondary>service command settings</secondary>
+  </indexterm>
+  <title>Service command settings</title>
+
+  <para>
+    In some circumstances, &repmgr; (and <application>repmgrd</application>) need to
+    be able to stop, start or restart PostgreSQL. &repmgr; commands which need to do this
+    include <link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>,
+    <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link> and
+    <link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
+  </para>
+  <para>
+    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
+    server. However this can lead to various problems, particularly when PostgreSQL has been
+    installed from packages, and expecially so if <application>systemd</application> is in use.
+  </para>
+
+
+  <note>
+    <para>
+      If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
+      See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
+      entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
+    </para>
+  </note>
+
+
+  <para>
+    With this in mind, we recommend to <emphasis>always</emphasis> configure &repmgr; to use the
+    available system service commands.
+  </para>
+
+  <para>
+    To do this, specify the appropriate command for each action
+    in <filename>repmgr.conf</filename> using the following configuration
+    parameters:
+    <programlisting>
+    service_start_command
+    service_stop_command
+    service_restart_command
+    service_reload_command</programlisting>
+  </para>
+
+  <note>
+    <para>
+      It's also possible to specify a <varname>service_promote_command</varname>;
+      this overrides any value contained in the setting <varname>promote_command</varname>.
+      This is intended for systems which provide a package-level promote command,
+      such as Debian's <application>pg_ctlcluster</application>.
+    </para>
+  </note>
+
+  <para>
+    To confirm which command &repmgr; will execute for each action, use
+    <command>repmgr node service --list --action=...</command>, e.g.:
+    <programlisting>
+      repmgr -f /etc/repmgr.conf node service --list --action=stop
+      repmgr -f /etc/repmgr.conf node service --list --action=start
+      repmgr -f /etc/repmgr.conf node service --list --action=restart
+      repmgr -f /etc/repmgr.conf node service --list --action=reload</programlisting>
+  </para>
+
+  <para>
+     These commands will be executed by the system user which &repmgr; runs as (usually <literal>postgres</literal>)
+     and will probably require passwordless sudo access to be able to execute the command.
+  </para>
+  <para>
+    For example, using <application>systemd</application> on CentOS 7, the service commands can be
+    set as follows:
+    <programlisting>
+      service_start_command   = 'sudo systemctl start postgresql-9.6'
+      service_stop_command    = 'sudo systemctl stop postgresql-9.6'
+      service_restart_command = 'sudo systemctl restart postgresql-9.6'
+      service_reload_command  = 'sudo systemctl reload postgresql-9.6'</programlisting>
+    and <filename>/etc/sudoers</filename> should be set as follows:
+    <programlisting>
+      Defaults:postgres !requiretty
+      postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
+        /usr/bin/systemctl start postgresql-9.6, \
+        /usr/bin/systemctl restart postgresql-9.6 \
+        /usr/bin/systemctl reload postgresql-9.6</programlisting>
+  </para>
+
+  <important>
+    <indexterm>
+      <primary>pg_ctlcluster</primary>
+      <secondary>service command settings</secondary>
+    </indexterm>
+    <para>
+      Debian/Ubuntu users: instead of calling <command>sudo systemctl</command> directly, use
+      <command>sudo pg_ctlcluster</command>, e.g.:
+    <programlisting>
+      service_start_command   = 'sudo pg_ctlcluster 9.6 main start'
+      service_stop_command    = 'sudo pg_ctlcluster 9.6 main stop'
+      service_restart_command = 'sudo pg_ctlcluster 9.6 main restart'
+      service_reload_command  = 'sudo pg_ctlcluster 9.6 main reload'</programlisting>
+      and set <filename>/etc/sudoers</filename> accordingly.
+    </para>
+    <para>
+      While <command>pg_ctlcluster</command> should work, it's strongly recommended
+      to use <command>sudo pg_ctlcluster</command> on <application>systemd</application>
+      systems, to ensure <application>systemd</application> has a correct picture of
+      the PostgreSQL application state.
+    </para>
+
+  </important>
+
+</sect1>
diff --git a/doc/configuration.sgml b/doc/configuration.sgml
index 95585032..85e742b7 100644
--- a/doc/configuration.sgml
+++ b/doc/configuration.sgml
@@ -3,6 +3,7 @@
 
   &configuration-file;
   &configuration-file-settings;
+  &configuration-service-commands;
 
   <sect1 id="configuration-permissions" xreflabel="User permissions">
     <indexterm>
diff --git a/doc/filelist.sgml b/doc/filelist.sgml
index 7fa683a3..bf6ec0da 100644
--- a/doc/filelist.sgml
+++ b/doc/filelist.sgml
@@ -39,6 +39,7 @@
 <!ENTITY configuration      SYSTEM "configuration.sgml">
 <!ENTITY configuration-file      SYSTEM "configuration-file.sgml">
 <!ENTITY configuration-file-settings      SYSTEM "configuration-file-settings.sgml">
+<!ENTITY configuration-service-commands   SYSTEM "configuration-service-commands.sgml">
 <!ENTITY cloning-standbys  SYSTEM "cloning-standbys.sgml">
 <!ENTITY promoting-standby  SYSTEM "promoting-standby.sgml">
 <!ENTITY follow-new-primary  SYSTEM "follow-new-primary.sgml">
diff --git a/doc/repmgrd-configuration.sgml b/doc/repmgrd-configuration.sgml
index 35e6c62a..9623cd67 100644
--- a/doc/repmgrd-configuration.sgml
+++ b/doc/repmgrd-configuration.sgml
@@ -1,63 +1,145 @@
 <chapter id="repmgrd-configuration">
 
- <indexterm>
-   <primary>repmgrd</primary>
-   <secondary>configuration</secondary>
- </indexterm>
+  <indexterm>
+    <primary>repmgrd</primary>
+    <secondary>configuration</secondary>
+  </indexterm>
 
- <title>repmgrd configuration</title>
- <para>
-  To use <application>repmgrd</application>, its associated function library must be
-  included in <filename>postgresql.conf</filename> with:
+  <title>repmgrd configuration</title>
 
-  <programlisting>
-    shared_preload_libraries = 'repmgr'</programlisting>
- </para>
- <para>
-  Changing this setting requires a restart of PostgreSQL; for more details see
-  the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
- </para>
- <para>
-  Additionally the following <application>repmgrd</application> options *must* be set in
-  <filename>repmgr.conf</filename> (adjust configuration file locations as appropriate):
-  <programlisting>
-    failover=automatic
-    promote_command='repmgr standby promote -f /etc/repmgr.conf --log-to-file'
-    follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
- </para>
- <para>
-  Note that the <literal>--log-to-file</literal> option will cause
-  output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
-  to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
-  See <filename>repmgr.conf.sample</filename> for further <application>repmgrd</application>-specific settings.
- </para>
- <para>
-  When <varname>failover</varname> is set to <literal>automatic</literal>, upon detecting failure
-  of the current  primary, <application>repmgrd</application> will execute either
-  <varname>promote_command</varname> (if the current server is to become the new primary) or
-  <varname>follow_command</varname> (if the current serverneeds to follow another server which has
-  become the new primary.
- </para>
- <note>
-   <para>
-     These commands can be any valid shell script which results in one of these
-     two actions happening, but if &repmgr;'s <command>standby follow</command> or
-     <command>standby promote</command>
-     commands are not executed (either directly as shown here, or from a script which
-     performs other actions), the &repmgr; metadata will not be updated and
-     &repmgr; will no longer function reliably.
-   </para>
- </note>
+  <para>
+    <application>repmgrd</application> is a daemon which runs on each PostgreSQL node,
+    monitoring the local node, and (unless it's the primary node) the upstream server
+    (the primary server or with cascading replication, another standby) which it's
+    connected to.
+  </para>
+  <para>
+    <application>repmgrd</application> can be configured to provide failover
+    capability in case the primary upstream node becomes unreachable, and/or
+    provide monitoring data to the &repmgr; metadatabase.
+  </para>
 
- <para>
-   The <varname>follow_command</varname> should provide the <literal>--upstream-node-id=%n</literal>
-   option to <command>repmgr standby follow</command>; the <literal>%n</literal> will be replaced by
-   <application>repmgrd</application> with the ID of the new primary node. If this is not provided, &repmgr;
-   will attempt to determine the new primary by itself, but  if the
-   original primary comes back online after the new primary is promoted, there is a risk that
-   <command>repmgr standby follow</command> will result in the node continuing to follow
-   the original primary.
- </para>
+  <sect1 id="repmgrd-basic-configuration">
+    <title>repmgrd basic configuration</title>
+
+    <para>
+      To use <application>repmgrd</application>, its associated function library <emphasis>must</emphasis> be
+      included in <filename>postgresql.conf</filename> with:
+
+      <programlisting>
+        shared_preload_libraries = 'repmgr'</programlisting>
+    </para>
+    <para>
+      Changing this setting requires a restart of PostgreSQL; for more details see
+      the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
+    </para>
+
+    <sect2 id="repmgrd-automatic-failover-configuration">
+      <title>automatic failover configuration</title>
+      <para>
+        If using automatic failover, the following <application>repmgrd</application> options *must* be set in
+        <filename>repmgr.conf</filename> :
+        <programlisting>
+          failover=automatic
+          promote_command='/usr/bin/repmgr standby promote -f /etc/repmgr.conf --log-to-file'
+          follow_command='/usr/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
+      </para>
+      <para>
+        Adjust file paths as appropriate; we recomment specifying the full path to the &repmgr; binary.
+      </para>
+      <para>
+        Note that the <literal>--log-to-file</literal> option will cause
+        output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
+        to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
+        See <filename><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</ulink></filename>
+        for further <application>repmgrd</application>-specific settings.
+      </para>
+      <para>
+        When <varname>failover</varname> is set to <literal>automatic</literal>, upon detecting failure
+        of the current  primary, <application>repmgrd</application> will execute one of:
+      </para>
+      <itemizedlist spacing="compact" mark="bullet">
+        <listitem>
+          <simpara>
+            <varname>promote_command</varname> (if the current server is to become the new primary)
+          </simpara>
+        </listitem>
+        <listitem>
+          <simpara>
+            <varname>follow_command</varname> (if the current server needs to follow another server which has
+            become the new primary)
+          </simpara>
+        </listitem>
+      </itemizedlist>
+      <note>
+        <para>
+          These commands can be any valid shell script which results in one of these
+          two actions happening, but if &repmgr;'s <command>standby follow</command> or
+          <command>standby promote</command>
+          commands are not executed (either directly as shown here, or from a script which
+          performs other actions), the &repmgr; metadata will not be updated and
+          &repmgr; will no longer function reliably.
+        </para>
+      </note>
+
+      <para>
+        The <varname>follow_command</varname> should provide the <literal>--upstream-node-id=%n</literal>
+        option to <command>repmgr standby follow</command>; the <literal>%n</literal> will be replaced by
+        <application>repmgrd</application> with the ID of the new primary node. If this is not provided, &repmgr;
+        will attempt to determine the new primary by itself, but if the
+        original primary comes back online after the new primary is promoted, there is a risk that
+        <command>repmgr standby follow</command> will result in the node continuing to follow
+        the original primary.
+      </para>
+    </sect2>
+
+    <sect2 id="repmgrd-service-configuration">
+      <indexterm>
+        <primary>repmgrd</primary>
+        <secondary>PostgreSQL service configuration</secondary>
+      </indexterm>
+      <title>PostgreSQL service configuration</title>
+      <para>
+        If using automatic failover, currently <application>repmgrd</application> will need to execute
+        <link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>
+        to restart PostgreSQL on standbys to have them follow a new primary.
+      </para>
+      <para>
+        To ensure this happens smoothly, it's essential to provide the appropriate system/service restart
+        command appropriate to your operating system via <varname>service_restart_command</varname>
+        in <filename>repmgr.conf</filename>. If you don't do this, <application>repmgrd</application>
+        will default to using <command>pg_ctl</command>, which can result in unexpected problems,
+        particularly on <application>systemd</application>-based systems.
+      </para>
+      <para>
+        For more details, see <xref linkend="configuration-service-commands">.
+      </para>
+    </sect2>
+
+    <sect2 id="repmgrd-monitoring-configuration">
+      <indexterm>
+        <primary>repmgrd</primary>
+        <secondary>monitoring configuration</secondary>
+      </indexterm>
+      <title>Monitoring configuration</title>
+      <para>
+        To enable monitoring, set:
+        <programlisting>
+          monitoring_history=yes</programlisting>
+        in <filename>repmgr.conf</filename>.
+      </para>
+      <para>
+        The default monitoring interval is 2 seconds; this value can be explicitly set using:
+        <programlisting>
+          monitor_interval_secs=&lt;seconds&gt;</programlisting>
+        in <filename>repmgr.conf</filename>.
+      </para>
+      <para>
+        For more details on monitoring, see <xref linkend="repmgrd-monitoring">.
+      </para>
+    </sect2>
+
+  </sect1>
 
  <sect1 id="repmgrd-connection-settings">
  <title>repmgrd connection settings</title>
@@ -84,86 +166,19 @@
  </sect1>
 
 
- <sect1 id="repmgrd-service-command">
+
+ <sect1 id="repmgrd-log-rotation">
    <indexterm>
-     <primary>service commands</primary>
+     <primary>log rotation</primary>
      <secondary>repmgrd</secondary>
    </indexterm>
 
-   <title>repmgrd and service commands</title>
-   <para>
-     By default, &repmgr; will use <application>pg_ctl</application> to
-     stop, start, restart, reloadthe PostgreSQL cluster.
-     However, if installed from a package, particularly under
-     <application>pg_ctl</application>, it's advisable to specify
-     the appropriate service commands to perform these options.
-   </para>
-   <para>
-     To do this, specify the appropriate command for each action
-     in <filename>repmgr.conf</filename> using the following configuration
-     parameters:
-     <programlisting>
-    service_start_command
-    service_stop_command
-    service_restart_command
-    service_reload_command</programlisting>
-   </para>
-
-   <note>
-     <para>
-       It's also possible to specify a <varname>service_promote_command</varname>;
-       this overrides any value contained in the setting <varname>promote_command</varname>.
-       This is intended for systems which provide a package-level promote command,
-       such as Debian's <application>pg_ctlcluster</application>.
-     </para>
-   </note>
-
-   <para>
-     To confirm which command &repmgr; will execute for each action, use
-     <command>repmgr node service --list --action=...</command>, e.g.:
-     <programlisting>
-       repmgr -f /etc/repmgr.conf node service --list --action=stop
-       repmgr -f /etc/repmgr.conf node service --list --action=start
-       repmgr -f /etc/repmgr.conf node service --list --action=restart
-       repmgr -f /etc/repmgr.conf node service --list --action=reload</programlisting>
-   </para>
-
-
-   <para>
-     These commands will be executed by the system user which &repmgr; runs as (usually <literal>postgres</literal>)
-     and will probably require passwordless sudo access to be able to execute the command.
-   </para>
-   <para>
-     For example, using <application>systemd</application> on CentOS 7, the service commands can be
-     set as follows:
-     <programlisting>
-       service_start_command = 'sudo systemctl start postgresql-9.6'
-       service_stop_command = 'sudo systemctl stop postgresql-9.6'
-       service_restart_command = 'sudo systemctl restart postgresql-9.6'</programlisting>
-     and <filename>/etc/sudoers</filename> should be set as follows:
-     <programlisting>
-    Defaults:postgres !requiretty
-    postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
-       /usr/bin/systemctl start postgresql-9.6, \
-       /usr/bin/systemctl restart postgresql-9.6</programlisting>
-   </para>
-
-   <note>
-     <para>
-       If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
-       See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
-       entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
-     </para>
-   </note>
- </sect1>
-
-
- <sect1 id="repmgrd-log-rotation">
   <title>repmgrd log rotation</title>
   <para>
-   To ensure the current <application>repmgrd</application> logfile does not grow
-   indefinitely, configure your system's <command>logrotate</command> to
-   regularly rotate it.
+   To ensure the current <application>repmgrd</application> logfile
+   (specified in <filename>repmgr.conf</filename> with the parameter
+   <option>log_file</option> does not grow indefinitely, configure your
+   system's <command>logrotate</command> to regularly rotate it.
   </para>
   <para>
    Sample configuration to rotate logfiles weekly with retention for
diff --git a/doc/repmgrd-monitoring.sgml b/doc/repmgrd-monitoring.sgml
index e20d3f07..60de86ed 100644
--- a/doc/repmgrd-monitoring.sgml
+++ b/doc/repmgrd-monitoring.sgml
@@ -3,6 +3,10 @@
    <primary>repmgrd</primary>
    <secondary>monitoring</secondary>
  </indexterm>
+ <indexterm>
+   <primary>monitoring</primary>
+   <secondary>with repmgrd</secondary>
+ </indexterm>
 
  <title>Monitoring with repmgrd</title>
  <para>
diff --git a/repmgr.conf.sample b/repmgr.conf.sample
index 5d9f8fd5..ac9b137c 100644
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -290,6 +290,9 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 #       /usr/bin/systemctl start postgresql-9.6, \
 #       /usr/bin/systemctl restart postgresql-9.6
 #
+# Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
+#
+# For more details, see: https://repmgr.org/docs/4.0/configuration-service-commands.html
 
 #service_start_command = ''
 #service_stop_command = ''