doc: add a link to the current documentation from the contents page

doc: fix typos
doc: fix typo
2026-03-23 07:06:30 +00:00 · 2019-04-03 10:47:19 +09:00 · 2018-10-23 09:22:11 +09:00 · 2018-10-23 09:00:46 +09:00 · 2018-10-16 13:24:48 +09:00 · 2018-10-16 11:39:54 +09:00
17 changed files with 543 additions and 88 deletions
--- a/configfile.c
+++ b/configfile.c
@@ -1052,11 +1052,13 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
 * - repmgrd_standby_startup_timeout
 * - retry_promote_interval_secs
 *
- * non-changeable options
+ * non-changeable options (repmgrd references these from the "repmgr.nodes"
+ * table, not the configuration file)
 *
 * - node_id
 * - node_name
 * - data_directory
+ * - location
 * - priority
 * - replication_type
 *
@@ -1272,7 +1274,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
 		config_changed = true;
 	}

-	/* promote_delay */
+	/* promote_delay (for testing use only; not documented */
 	if (orig_options->promote_delay != new_options.promote_delay)
 	{
 		orig_options->promote_delay = new_options.promote_delay;
--- a/18
+++ b/18
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for repmgr 4.1.1.
+# Generated by GNU Autoconf 2.69 for repmgr 4.1.2.
 #
 # Report bugs to <pgsql-bugs@postgresql.org>.
 #
@@ -582,8 +582,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='repmgr'
 PACKAGE_TARNAME='repmgr'
-PACKAGE_VERSION='4.1.1'
-PACKAGE_STRING='repmgr 4.1.1'
+PACKAGE_VERSION='4.1.2'
+PACKAGE_STRING='repmgr 4.1.2'
 PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
 PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'

@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures repmgr 4.1.1 to adapt to many kinds of systems.
+\`configure' configures repmgr 4.1.2 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1239,7 +1239,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of repmgr 4.1.1:";;
+     short | recursive ) echo "Configuration of repmgr 4.1.2:";;
   esac
  cat <<\_ACEOF

@@ -1313,7 +1313,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-repmgr configure 4.1.1
+repmgr configure 4.1.2
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by repmgr $as_me 4.1.1, which was
+It was created by repmgr $as_me 4.1.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by repmgr $as_me 4.1.1, which was
+This file was extended by repmgr $as_me 4.1.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -2422,7 +2422,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-repmgr config.status 4.1.1
+repmgr config.status 4.1.2
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/configure.in
+++ b/configure.in
@@ -1,4 +1,4 @@
-AC_INIT([repmgr], [4.1.1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
+AC_INIT([repmgr], [4.1.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])

 AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])

--- a/controldata.c
+++ b/controldata.c
@@ -227,7 +227,15 @@ get_controlfile(const char *DataDir)

 	control_file_info->control_file_processed = true;

-	if (version_num >= 90500)
+	if (version_num >= 110000)
+	{
+		ControlFileData11 *ptr = (struct ControlFileData11 *)ControlFileDataPtr;
+		control_file_info->system_identifier = ptr->system_identifier;
+		control_file_info->state = ptr->state;
+		control_file_info->checkPoint = ptr->checkPoint;
+		control_file_info->data_checksum_version = ptr->data_checksum_version;
+	}
+	else if (version_num >= 90500)
 	{
 		ControlFileData95 *ptr = (struct ControlFileData95 *)ControlFileDataPtr;
 		control_file_info->system_identifier = ptr->system_identifier;
--- a/controldata.h
+++ b/controldata.h
@@ -265,6 +265,71 @@ typedef struct ControlFileData95

 } ControlFileData95;

+/*
+ * Following field removed in 11:
+ *
+ *  XLogRecPtr	prevCheckPoint;
+ *
+ * In 10, following field appended *after* "data_checksum_version":
+ *
+ * 	char		mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
+ *
+ * (but we don't care about that)
+ */
+
+typedef struct ControlFileData11
+{
+	uint64		system_identifier;
+
+	uint32		pg_control_version;		/* PG_CONTROL_VERSION */
+	uint32		catalog_version_no;		/* see catversion.h */
+
+	DBState		state;			/* see enum above */
+	pg_time_t	time;			/* time stamp of last pg_control update */
+	XLogRecPtr	checkPoint;		/* last check point record ptr */
+
+	CheckPoint95	checkPointCopy; /* copy of last check point record */
+
+	XLogRecPtr	unloggedLSN;	/* current fake LSN value, for unlogged rels */
+
+	XLogRecPtr	minRecoveryPoint;
+	TimeLineID	minRecoveryPointTLI;
+	XLogRecPtr	backupStartPoint;
+	XLogRecPtr	backupEndPoint;
+	bool		backupEndRequired;
+
+	int			wal_level;
+	bool		wal_log_hints;
+	int			MaxConnections;
+	int			max_worker_processes;
+	int			max_prepared_xacts;
+	int			max_locks_per_xact;
+	bool		track_commit_timestamp;
+
+	uint32		maxAlign;		/* alignment requirement for tuples */
+	double		floatFormat;	/* constant 1234567.0 */
+
+	uint32		blcksz;			/* data block size for this DB */
+	uint32		relseg_size;	/* blocks per segment of large relation */
+
+	uint32		xlog_blcksz;	/* block size within WAL files */
+	uint32		xlog_seg_size;	/* size of each WAL segment */
+
+	uint32		nameDataLen;	/* catalog name field width */
+	uint32		indexMaxKeys;	/* max number of columns in an index */
+
+	uint32		toast_max_chunk_size;	/* chunk size in TOAST tables */
+	uint32		loblksize;		/* chunk size in pg_largeobject */
+
+	bool		enableIntTimes; /* int64 storage enabled? */
+
+	bool		float4ByVal;	/* float4 pass-by-value? */
+	bool		float8ByVal;	/* float8, int8, etc pass-by-value? */
+
+	uint32		data_checksum_version;
+
+} ControlFileData11;
+


 extern DBState get_db_state(const char *data_directory);
--- a/doc/appendix-release-notes.sgml
+++ b/doc/appendix-release-notes.sgml
@@ -33,10 +33,9 @@
      <para>
        <itemizedlist>

-
          <listitem>
            <para>
-              <command><link linkend="repmgr-standby-clone">repmgr standby switchover --dry-run</link></command>
+              <command><link linkend="repmgr-standby-switchover">repmgr standby switchover --dry-run</link></command>
              no longer copies external configuration files to test they can be copied; this avoids making
              any changes to the target system. (GitHub #491).
            </para>
@@ -45,7 +44,7 @@
          <listitem>
            <para>
              <command><link linkend="repmgr-cluster-cleanup">repmgr cluster cleanup</link></command>:
-              add <literal>cluster_cleanup</literal> event. (GitHub #492)
+              add <literal>cluster_cleanup</literal> event. (GitHub #492).
            </para>
          </listitem>

--- a/doc/configuration-file-service-commands.sgml
+++ b/doc/configuration-file-service-commands.sgml
@@ -17,15 +17,15 @@
    <link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
  </para>
  <para>
-    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
+    By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
    server. However this can lead to various problems, particularly when PostgreSQL has been
-    installed from packages, and expecially so if <application>systemd</application> is in use.
+    installed from packages, and especially so if <application>systemd</application> is in use.
  </para>


  <note>
    <para>
-      If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
+      If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
      See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
      entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
    </para>
@@ -99,7 +99,7 @@
      Defaults:postgres !requiretty
      postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
        /usr/bin/systemctl start postgresql-9.6, \
-        /usr/bin/systemctl restart postgresql-9.6 \
+        /usr/bin/systemctl restart postgresql-9.6, \
        /usr/bin/systemctl reload postgresql-9.6</programlisting>
  </para>

--- a/doc/install-packages.sgml
+++ b/doc/install-packages.sgml
@@ -47,7 +47,7 @@
    <title>2ndQuadrant public RPM yum repository</title>

    <para>
-      Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
+      Beginning with <ulink url="https://repmgr.org/docs/4.1/release-4.0.5.html">repmgr 4.0.5</ulink>,
      <ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
 	  <ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
 	  including &repmgr;. We recommend using this for all future &repmgr; releases.
--- a/doc/repmgr-cluster-show.sgml
+++ b/doc/repmgr-cluster-show.sgml
@@ -81,36 +81,56 @@

  <refsect1>
    <title>Options</title>
-    <para>
-      <command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
-      outputs the replication cluster's status in a simple CSV format, suitable for
-      parsing by scripts:
-      <programlisting>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>--csv</option></term>
+        <listitem>
+		  <para>
+			<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
+			outputs the replication cluster's status in a simple CSV format, suitable for
+			parsing by scripts:
+			<programlisting>
    $ repmgr -f /etc/repmgr.conf cluster show --csv
    1,-1,-1
    2,0,0
    3,0,1</programlisting>
-    </para>
-    <para>
-      The columns have following meanings:
-      <itemizedlist spacing="compact" mark="bullet">
-        <listitem>
-          <simpara>
-            node ID
-          </simpara>
-        </listitem>
-        <listitem>
-          <simpara>
+		  </para>
+		  <para>
+			The columns have following meanings:
+			<itemizedlist spacing="compact" mark="bullet">
+			  <listitem>
+				<simpara>
+				  node ID
+				</simpara>
+			  </listitem>
+			  <listitem>
+				<simpara>
            availability (0 = available, -1 = unavailable)
-          </simpara>
-        </listitem>
+				</simpara>
+			  </listitem>
+			  <listitem>
+				<simpara>
+				  recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
+				</simpara>
+			  </listitem>
+			</itemizedlist>
+		  </para>
+		</listitem>
+	  </varlistentry>
+
+      <varlistentry>
+        <term><option>--verbose</option></term>
        <listitem>
-          <simpara>
-            recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
-          </simpara>
+          <para>
+			Display the full text of any database connection error messages
+          </para>
        </listitem>
-      </itemizedlist>
-    </para>
+      </varlistentry>
+
+	</variablelist>
+
  </refsect1>


--- a/doc/repmgr-node-rejoin.sgml
+++ b/doc/repmgr-node-rejoin.sgml
@@ -67,10 +67,10 @@
        <term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
        <listitem>
          <para>
-            Execute <application>pg_rewind</application> if necessary.
+            Execute <application>pg_rewind</application>.
          </para>
          <para>
-            It is only necessary to provide the <application>pg_rewind</application>
+            It is only necessary to provide the <application>pg_rewind</application> path
            if using PostgreSQL 9.3 or 9.4, and <application>pg_rewind</application>
            is not installed in the PostgreSQL <filename>bin</filename> directory.
          </para>
@@ -193,7 +193,7 @@
    </note>

    <para>
-      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
+      To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
      pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
      to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
    </para>
@@ -226,6 +226,15 @@
    INFO: pg_rewind would now be executed
    DETAIL: pg_rewind command is:
      pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
+
+    <note>
+      <para>
+        If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
+        this checks the prerequisites for using <application>pg_rewind</application>, but cannot
+        predict the outcome of actually executing <application>pg_rewind</application>.
+      </para>
+    </note>
+
    <programlisting>
    $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
         --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
--- a/doc/repmgr.sgml
+++ b/doc/repmgr.sgml
@@ -25,7 +25,13 @@
   <para>
   This is the official documentation of &repmgr; &repmgrversion; for
   use with PostgreSQL 9.3 - PostgreSQL 10.
-   It describes the functionality supported by the current version of &repmgr;.
+   </para>
+   <para>
+     &repmgr; is being continually developed and we strongly recommend using the
+     latest version. Please check the
+     <ulink url="https://repmgr.org/">repmgr website</ulink> for details
+     about the current &repmgr; version as well as the
+     <ulink url="https://repmgr.org/docs/current/index.html">current documentation</ulink>.
   </para>

   <para>
--- a/doc/repmgrd-configuration.sgml
+++ b/doc/repmgrd-configuration.sgml
@@ -34,24 +34,6 @@
      the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
    </para>

-    <para>
-      To apply configuration file changes to a running <application>repmgrd</application>
-      daemon, execute the operating system's r<application>repmgrd</application> service reload command
-      (see <xref linkend="appendix-packages"> for examples),
-      or for instances  which were manually started, execute <command>kill -HUP</command>, e.g.
-      <command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
-    </para>
-    <note>
-      <para>
-        Check the <application>repmgrd</application> log to see what changes were
-        applied, or if any issues were encountered when reloading the configuration.
-      </para>
-    </note>
-    <para>
-      Note that only a subset of configuration file parameters can be changed on a
-      running <application>repmgrd</application> daemon.
-    </para>
-

    <sect2 id="repmgrd-automatic-failover-configuration">
      <title>automatic failover configuration</title>
@@ -167,6 +149,203 @@
      </para>
    </sect2>

+    <sect2 id="repmgrd-reloading-configuration"xreflabel="reloading repmgrd configuration">
+      <indexterm>
+        <primary>repmgrd</primary>
+        <secondary>applying configuration changes</secondary>
+      </indexterm>
+      <title>Applying configuration changes to repmgrd</title>
+      <para>
+        To apply configuration file changes to a running <application>repmgrd</application>
+        daemon, execute the operating system's <application>repmgrd</application> service reload command
+        (see <xref linkend="appendix-packages"> for examples),
+          or for instances  which were manually started, execute <command>kill -HUP</command>, e.g.
+          <command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
+      </para>
+      <tip>
+        <para>
+          Check the <application>repmgrd</application> log to see what changes were
+          applied, or if any issues were encountered when reloading the configuration.
+        </para>
+      </tip>
+      <para>
+        Note that only the following subset of configuration file parameters can be changed on a
+        running <application>repmgrd</application> daemon:
+      </para>
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>
+            <varname>async_query_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>bdr_local_monitoring_only</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>bdr_recovery_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>conninfo</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>degraded_monitoring_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>event_notification_command</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>event_notifications</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>failover</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>follow_command</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_facility</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_file</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_level</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>log_status_interval</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>monitor_interval_secs</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>monitoring_history</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>primary_notification_timeout</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>promote_command</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>reconnect_attempts</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>reconnect_interval</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>repmgrd_standby_startup_timeout</varname>
+          </simpara>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        The following set of configuration file parameters must be updated via
+        <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
+        as they require changes to the <literal>repmgr.nodes</literal> table so they are visible to
+        all nodes in the replication cluster:
+      </para>
+      <itemizedlist spacing="compact" mark="bullet">
+
+        <listitem>
+          <simpara>
+            <varname>node_id</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>node_name</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>data_directory</varname>
+          </simpara>
+        </listitem>
+
+        <listitem>
+          <simpara>
+            <varname>location</varname>
+          </simpara>
+        </listitem>
+
+
+        <listitem>
+          <simpara>
+            <varname>priority</varname>
+          </simpara>
+        </listitem>
+
+      </itemizedlist>
+
+      <note>
+        <para>
+          After executing <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
+          <application>repmgrd</application> <emphasis>must</emphasis> be restarted for the changes to take effect.
+        </para>
+      </note>
+
+    </sect2>
+
  </sect1>

  <sect1 id="repmgrd-daemon">
--- a/repmgr-action-cluster.c
+++ b/repmgr-action-cluster.c
@@ -84,6 +84,7 @@ do_cluster_show(void)
 	ItemList	warnings = {NULL, NULL};
 	bool		success = false;
 	bool		error_found = false;
+	bool		connection_error_found = false;

 	/* Connect to local database to obtain cluster connection data */
 	log_verbose(LOG_INFO, _("connecting to database"));
@@ -141,14 +142,26 @@ do_cluster_show(void)
 		}
 		else
 		{
-			char		error[MAXLEN];
-
-			strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
 			cell->node_info->node_status = NODE_STATUS_DOWN;
 			cell->node_info->recovery_type = RECTYPE_UNKNOWN;
-			item_list_append_format(&warnings,
-									"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
-									cell->node_info->node_name, cell->node_info->node_id, trim(error));
+
+			connection_error_found = true;
+
+			if (runtime_options.verbose)
+			{
+				char		error[MAXLEN];
+
+				strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
+				item_list_append_format(&warnings,
+										"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
+										cell->node_info->node_name, cell->node_info->node_id, trim(error));
+			}
+			else
+			{
+				item_list_append_format(&warnings,
+										"unable to  connect to node \"%s\" (ID: %i)",
+										cell->node_info->node_name, cell->node_info->node_id);
+			}
 		}

 		initPQExpBuffer(&details);
@@ -437,6 +450,11 @@ do_cluster_show(void)
 		{
 			printf(_("  - %s\n"), cell->string);
 		}
+
+		if (runtime_options.verbose == false && connection_error_found == true)
+		{
+			log_hint(_("execute with --verbose option to see connection error messages"));
+		}
 	}

 	/*
--- a/repmgr.c
+++ b/repmgr.c
@@ -416,9 +416,9 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
 		LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);

 		shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
-
-		LWLockRelease(shared_state->lock);
 	}

+	LWLockRelease(shared_state->lock);
+
 	PG_RETURN_VOID();
 }
--- a/repmgr.conf.sample
+++ b/repmgr.conf.sample
@@ -161,7 +161,7 @@
 # Examples:
 #
 #   pg_ctl_options='-s'
-#   pg_basebackup_options='--label=repmgr_backup
+#   pg_basebackup_options='--label=repmgr_backup'
 #   rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
 #   ssh_options=-o "StrictHostKeyChecking no"

@@ -281,9 +281,9 @@ ssh_options='-q -o ConnectTimeout=10'	# Options to append to "ssh"
 					# a value of zero prevents the node being promoted to primary
 					# (default: 100)

-#reconnect_attempts=6			# Number attempts which will be made to reconnect to an unreachable
+#reconnect_attempts=6			# Number of attempts which will be made to reconnect to an unreachable
 					# primary (or other upstream node)
-#reconnect_interval=10			# Interval between attempts  to reconnect to an unreachable
+#reconnect_interval=10			# Interval between attempts to reconnect to an unreachable
 					# primary (or other upstream node)
 #promote_command=			# command repmgrd executes when promoting a new primary; use something like:
 					#
--- a/repmgr_version.h.in
+++ b/repmgr_version.h.in
@@ -1,2 +1,2 @@
 #define REPMGR_VERSION_DATE ""
-#define REPMGR_VERSION "4.1.1"
+#define REPMGR_VERSION "4.1.2"
--- a/repmgrd-physical.c
+++ b/repmgrd-physical.c
@@ -268,7 +268,12 @@ monitor_streaming_primary(void)
 		 * TODO: cache node list here, refresh at `node_list_refresh_interval`
 		 * also return reason for inavailability so we can log it
 		 */
-		if (is_server_available(local_node_info.conninfo) == false)
+
+		(void) connection_ping(local_conn);
+
+		check_connection(&local_node_info, &local_conn);
+
+		if (PQstatus(local_conn) != CONNECTION_OK)
 		{

 			/* local node is down, we were expecting it to be up */
@@ -308,6 +313,7 @@ monitor_streaming_primary(void)
 				if (local_node_info.node_status == NODE_STATUS_UP)
 				{
 					int			local_node_unreachable_elapsed = calculate_elapsed(local_node_unreachable_start);
+					int 		stored_local_node_id = UNKNOWN_NODE_ID;

 					initPQExpBuffer(&event_details);

@@ -324,6 +330,17 @@ monitor_streaming_primary(void)
 											  event_details.data);
 					termPQExpBuffer(&event_details);

+					/*
+					 * If the local node was restarted, we'll need to reinitialise values
+					 * stored in shared memory.
+					 */
+
+					stored_local_node_id = repmgrd_get_local_node_id(local_conn);
+					if (stored_local_node_id == UNKNOWN_NODE_ID)
+					{
+						repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
+					}
+
 					goto loop;
 				}

@@ -991,6 +1008,13 @@ monitor_streaming_standby(void)
 								continue;
 							}

+							/* skip witness node - we can't possibly "follow" that */
+
+							if (cell->node_info->type == WITNESS)
+							{
+								continue;
+							}
+
 							cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);

 							if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
@@ -1013,6 +1037,7 @@ monitor_streaming_standby(void)
 							follow_new_primary(follow_node_id);
 						}
 					}
+
 					clear_node_info_list(&sibling_nodes);
 				}
 			}
@@ -1136,8 +1161,11 @@ loop:
 		}
 		else
 		{
+			/* we've reconnected to the local node after an outage */
 			if (local_node_info.active == false)
 			{
+				int stored_local_node_id = UNKNOWN_NODE_ID;
+
 				if (PQstatus(primary_conn) == CONNECTION_OK)
 				{
 					if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
@@ -1153,19 +1181,29 @@ loop:
 										  local_node_info.node_name,
 										  local_node_info.node_id);

-						log_warning("%s", event_details.data)
+						log_notice("%s", event_details.data);

-
-							create_event_notification(primary_conn,
-													  &config_file_options,
-													  local_node_info.node_id,
-													  "standby_recovery",
-													  true,
-													  event_details.data);
+						create_event_notification(primary_conn,
+												  &config_file_options,
+												  local_node_info.node_id,
+												  "standby_recovery",
+												  true,
+												  event_details.data);

 						termPQExpBuffer(&event_details);
 					}
 				}
+
+				/*
+				 * If the local node was restarted, we'll need to reinitialise values
+				 * stored in shared memory.
+				 */
+
+				stored_local_node_id = repmgrd_get_local_node_id(local_conn);
+				if (stored_local_node_id == UNKNOWN_NODE_ID)
+				{
+					repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
+				}
 			}
 		}

@@ -1201,7 +1239,7 @@ monitor_streaming_witness(void)
 	/*
 	 * At this point we can't trust the local copy of "repmgr.nodes", as
 	 * it may not have been updated. We'll scan the cluster for the current
-	 * primary and refresh the copy from that before proceeding further.
+[''	 * primary and refresh the copy from that before proceeding further.
 	 */
 	primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);

@@ -1437,6 +1475,105 @@ monitor_streaming_witness(void)
 		}
 loop:

+		/*
+		 * handle local node failure
+		 *
+		 * currently we'll just check the connection, and try to reconnect
+		 *
+		 * TODO: add timeout, after which we run in degraded state
+		 */
+
+		(void) connection_ping(local_conn);
+
+		check_connection(&local_node_info, &local_conn);
+
+		if (PQstatus(local_conn) != CONNECTION_OK)
+		{
+			if (local_node_info.active == true)
+			{
+				bool success = true;
+				PQExpBufferData event_details;
+
+				initPQExpBuffer(&event_details);
+
+				local_node_info.active = false;
+
+				appendPQExpBuffer(&event_details,
+								  _("unable to connect to local node \"%s\" (ID: %i), marking inactive"),
+								  local_node_info.node_name,
+								  local_node_info.node_id);
+				log_notice("%s", event_details.data);
+
+				if (PQstatus(primary_conn) == CONNECTION_OK)
+				{
+					if (update_node_record_set_active(primary_conn, local_node_info.node_id, false) == false)
+					{
+						success = false;
+						log_warning(_("unable to mark node \"%s\" (ID: %i) as inactive"),
+									  local_node_info.node_name,
+									  local_node_info.node_id);
+					}
+				}
+
+				create_event_notification(primary_conn,
+										  &config_file_options,
+										  local_node_info.node_id,
+										  "standby_failure",
+										  success,
+										  event_details.data);
+
+				termPQExpBuffer(&event_details);
+			}
+		}
+		else
+		{
+			/* we've reconnected to the local node after an outage */
+			if (local_node_info.active == false)
+			{
+				int stored_local_node_id = UNKNOWN_NODE_ID;
+
+				if (PQstatus(primary_conn) == CONNECTION_OK)
+				{
+					if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
+					{
+						PQExpBufferData event_details;
+
+						initPQExpBuffer(&event_details);
+
+						local_node_info.active = true;
+
+						appendPQExpBuffer(&event_details,
+										  _("reconnected to local node \"%s\" (ID: %i), marking active"),
+										  local_node_info.node_name,
+										  local_node_info.node_id);
+
+						log_notice("%s", event_details.data);
+
+						create_event_notification(primary_conn,
+												  &config_file_options,
+												  local_node_info.node_id,
+												  "standby_recovery",
+												  true,
+												  event_details.data);
+
+						termPQExpBuffer(&event_details);
+					}
+				}
+
+				/*
+				 * If the local node was restarted, we'll need to reinitialise values
+				 * stored in shared memory.
+				 */
+
+				stored_local_node_id = repmgrd_get_local_node_id(local_conn);
+				if (stored_local_node_id == UNKNOWN_NODE_ID)
+				{
+					repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
+				}
+			}
+		}
+
+
 		/* refresh repmgr.nodes after "witness_sync_interval" seconds */

 		{
@@ -1480,6 +1617,7 @@ loop:
 		}


+
 		if (got_SIGHUP)
 		{
 			handle_sighup(&local_conn, WITNESS);
@@ -2256,6 +2394,8 @@ follow_new_primary(int new_primary_id)
 	RecordStatus record_status = RECORD_NOT_FOUND;
 	bool		new_primary_ok = false;

+	log_verbose(LOG_DEBUG, "follow_new_primary(): new primary id is %i", new_primary_id);
+
 	record_status = get_node_record(local_conn, new_primary_id, &new_primary);

 	if (record_status != RECORD_FOUND)
@@ -2934,9 +3074,18 @@ check_connection(t_node_info *node_info, PGconn **conn)
 		}
 		else
 		{
+			int 		stored_local_node_id = UNKNOWN_NODE_ID;
+
 			log_info(_("reconnected to node \"%s\" (ID: %i)"),
 					 node_info->node_name,
 					 node_info->node_id);
+
+			stored_local_node_id = repmgrd_get_local_node_id(*conn);
+			if (stored_local_node_id == UNKNOWN_NODE_ID)
+			{
+				repmgrd_set_local_node_id(*conn, config_file_options.node_id);
+			}
+
 		}
 	}
 }
Author	SHA1	Message	Date
Ian Barwick	222f7e6080	doc: add a link to the current documentation from the contents page	2019-04-03 10:47:19 +09:00
Ian Barwick	446695e328	doc: fix typos	2018-10-23 09:22:11 +09:00
Ian Barwick	ec3da13e22	doc: fix typo Per user report on mailing list.	2018-10-23 09:00:46 +09:00
Ian Barwick	1488c014ff	Changes for a 4.1.2 snapshot release	2018-10-16 13:24:48 +09:00
Ian Barwick	f471316504	repmgrd: improve promotion script failure handling While scanning for a new primary following a promotion script failure, repmgrd was treating a witness server as a potential new primary and would attempt to "follow" it. Fortunately "repmgr standby follow" would do the right thing and choose the actual primary, if available, otherwise do nothing, so the cluster would eventually end up in the correct state, albeit for the wrong reason. By skipping the witness server as a potential new primary, repmgrd will do the right thing if the original primary does come back online, i.e. resume monitoring as before.	2018-10-16 11:39:54 +09:00
Gilles Pietri	726299f7ef	Missing comma in sudoers example	2018-10-11 09:59:15 +09:00
Ian Barwick	7fda2a1bcf	doc: fix typo in repmgr.conf.sample	2018-10-08 09:37:41 +09:00
Ian Barwick	d26141b8ab	Fix LWLockRelease() call in unset_bdr_failover_handler()	2018-10-08 09:37:31 +09:00
Ian Barwick	4a6b5fe913	Update control file checks for PostgreSQL 11	2018-09-27 14:08:39 +09:00
Ian Barwick	a71e644255	repmgrd: document parameters which can be reloaded via SIGHUP Also add a new subsection with details on reloading repmgrd configuration.	2018-09-27 10:44:34 +09:00
Ian Barwick	8646fd6004	doc: fix link in 4.1.1 release notes	2018-09-25 14:30:57 +09:00
Ian Barwick	3e1bb1a523	doc: minor fixes to "repmgr.conf.sample"	2018-09-25 10:54:54 +09:00
Ian Barwick	f5e58fc062	doc: update "repmgr node rejoin" documentation Clarify various points related to --force-rewind and pg_rewind usage.	2018-09-14 14:09:33 +09:00
Ian Barwick	6b95a96f3a	repmgr: improve "cluster show" output Only output full contents of connection error messages in --verbose mode, otherwise it can spew a lot of text onto the screen.	2018-09-12 14:17:39 +09:00
Ian Barwick	bd146ae9ac	repmgrd: update local node id in shared memory after local node restart Also ensure local node restarts are handled more elegantly, so we're not surprised by a stale connection handle. GitHub #502.	2018-09-12 14:17:35 +09:00
Ian Barwick	c7f8e48d12	Bump version 4.1.2	2018-09-07 13:08:55 +09:00
Ian Barwick	322190516c	doc: update link	2018-09-05 15:41:32 +09:00