mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
65 Commits
dev/update
...
REL4_2_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aa442e0dd5 | ||
|
|
1a35f7a2a4 | ||
|
|
5b87156870 | ||
|
|
d29df0067f | ||
|
|
7926011fa4 | ||
|
|
488afa9f7f | ||
|
|
0f873df13a | ||
|
|
b71ce07210 | ||
|
|
4b4f98e897 | ||
|
|
b1c048a74b | ||
|
|
24d7f7227e | ||
|
|
8add8231f8 | ||
|
|
160ace6436 | ||
|
|
3ea97d407f | ||
|
|
0feaf8db1b | ||
|
|
63fabfd848 | ||
|
|
a965bc005f | ||
|
|
8e0088d51e | ||
|
|
a66ef24293 | ||
|
|
630e25b83a | ||
|
|
4a084e5e7d | ||
|
|
4a9bd87c53 | ||
|
|
0512826ed5 | ||
|
|
677487d893 | ||
|
|
e78c762d2c | ||
|
|
7195512b99 | ||
|
|
4a327c8be2 | ||
|
|
1091cd0c68 | ||
|
|
6fbc09741a | ||
|
|
1a82eecdb6 | ||
|
|
5886772cdb | ||
|
|
8bbcaffac7 | ||
|
|
71dda21b85 | ||
|
|
ae729adb32 | ||
|
|
687e707a7b | ||
|
|
ae3f302dc2 | ||
|
|
d4216d053e | ||
|
|
075d193a87 | ||
|
|
7652a19e62 | ||
|
|
7ae771e573 | ||
|
|
44de7751f9 | ||
|
|
d098c6a114 | ||
|
|
3c5ef69f38 | ||
|
|
b74129f363 | ||
|
|
fdcc3850c8 | ||
|
|
46f46612f8 | ||
|
|
a554914854 | ||
|
|
4f0b10cac6 | ||
|
|
9fe1e9cb3e | ||
|
|
0cafeb3828 | ||
|
|
79e79bd5f2 | ||
|
|
a4e21fd8fe | ||
|
|
e826f72312 | ||
|
|
1e8b3313ee | ||
|
|
b5772d88dd | ||
|
|
22614573b9 | ||
|
|
77c9092794 | ||
|
|
15bbe04a6f | ||
|
|
0842560a88 | ||
|
|
8bec4946bc | ||
|
|
3ab22f9442 | ||
|
|
3a9c36a36c | ||
|
|
2ded8987ac | ||
|
|
6311f3f30a | ||
|
|
12ec6c7abc |
4
FAQ.md
4
FAQ.md
@@ -1,8 +1,10 @@
|
||||
FAQ - Frequently Asked Questions about repmgr
|
||||
=============================================
|
||||
|
||||
The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/4.0/appendix-faq.html "repmgr FAQ")
|
||||
The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/current/appendix-faq.html "repmgr FAQ")
|
||||
|
||||
The repmgr 3.x FAQ can be found here:
|
||||
|
||||
https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
|
||||
|
||||
Note that repmgr 3.x is no longer supported.
|
||||
|
||||
5
HISTORY
5
HISTORY
@@ -1,12 +1,13 @@
|
||||
4.2.0 2018-??-??
|
||||
4.2 2018-10-24
|
||||
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
||||
GitHub #504 (Ian)
|
||||
repmgr: add "--node-id" option to "repmgr cluster cleanup"; GitHub #493 (Ian)
|
||||
repmgr: report unreachable nodes when running "repmgr cluster (matrix|crosscheck);
|
||||
GitHub #246 (Ian)
|
||||
repmgr: add configuration file parameter "repmgr_bindir"; GitHub #246 (Ian)
|
||||
repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
|
||||
repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
|
||||
repmgrd: fix parsing of -d/--daemonize option (Ian)
|
||||
repmgrd: support "pausing" of repmgrd (Ian)
|
||||
|
||||
4.1.1 2018-09-05
|
||||
logging: explicitly log the text of failed queries as ERRORs to
|
||||
|
||||
@@ -30,13 +30,18 @@ all: \
|
||||
PG_CPPFLAGS = -std=gnu89 -I$(includedir_internal) -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
|
||||
SHLIB_LINK = $(libpq)
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
|
||||
OBJS = \
|
||||
repmgr.o
|
||||
|
||||
include Makefile.global
|
||||
|
||||
ifeq ($(vpath_build),yes)
|
||||
HEADERS = $(wildcard *.h)
|
||||
else
|
||||
HEADERS_built = $(wildcard *.h)
|
||||
endif
|
||||
|
||||
$(info Building against PostgreSQL $(MAJORVERSION))
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ operations.
|
||||
`repmgr 4` is a complete rewrite of the existing `repmgr` codebase, allowing
|
||||
the use of all of the latest features in PostgreSQL replication.
|
||||
|
||||
PostgreSQL 10, 9.6 and 9.5 are fully supported.
|
||||
PostgreSQL 11, 10, 9.6 and 9.5 are fully supported.
|
||||
PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
||||
|
||||
`repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
|
||||
@@ -19,7 +19,7 @@ PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
||||
|
||||
`repmgr 4` supports monitoring of a two-node BDR 2.0 cluster on PostgreSQL 9.6
|
||||
only. Note that BDR 2.0 is not publicly available; please contact 2ndQuadrant
|
||||
for details. `repmgr 4` will support future public BDR releases.
|
||||
for details.
|
||||
|
||||
|
||||
Documentation
|
||||
@@ -27,7 +27,7 @@ Documentation
|
||||
|
||||
The main `repmgr` documentation is available here:
|
||||
|
||||
> [repmgr 4 documentation](https://repmgr.org/docs/4.0/index.html)
|
||||
> [repmgr 4 documentation](https://repmgr.org/docs/4.2/index.html)
|
||||
|
||||
The `README` file for `repmgr` 3.x is available here:
|
||||
|
||||
|
||||
@@ -816,13 +816,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
if (options->archive_ready_warning >= options->archive_ready_critical)
|
||||
{
|
||||
item_list_append(error_list,
|
||||
_("\archive_ready_critical\" must be greater than \"archive_ready_warning\""));
|
||||
_("\"archive_ready_critical\" must be greater than \"archive_ready_warning\""));
|
||||
}
|
||||
|
||||
if (options->replication_lag_warning >= options->replication_lag_critical)
|
||||
{
|
||||
item_list_append(error_list,
|
||||
_("\replication_lag_critical\" must be greater than \"replication_lag_warning\""));
|
||||
_("\"replication_lag_critical\" must be greater than \"replication_lag_warning\""));
|
||||
}
|
||||
|
||||
if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
|
||||
@@ -1531,6 +1531,9 @@ parse_bool(const char *s, const char *config_item, ItemList *error_list)
|
||||
{
|
||||
PQExpBufferData errors;
|
||||
|
||||
if (s == NULL)
|
||||
return true;
|
||||
|
||||
if (strcasecmp(s, "0") == 0)
|
||||
return false;
|
||||
|
||||
|
||||
40
dbutils.c
40
dbutils.c
@@ -30,6 +30,8 @@
|
||||
#include "controldata.h"
|
||||
#include "dirutil.h"
|
||||
|
||||
#define NODE_RECORD_PARAM_COUNT 11
|
||||
|
||||
/* mainly for use by repmgrd */
|
||||
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
||||
|
||||
@@ -1832,7 +1834,7 @@ atobool(const char *value)
|
||||
/* =================== */
|
||||
|
||||
ExtensionStatus
|
||||
get_repmgr_extension_status(PGconn *conn)
|
||||
get_repmgr_extension_status(PGconn *conn, t_extension_versions *extversions)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
@@ -1843,7 +1845,11 @@ get_repmgr_extension_status(PGconn *conn)
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBufferStr(&query,
|
||||
" SELECT ae.name, e.extname "
|
||||
" SELECT ae.name, e.extname, "
|
||||
" ae.default_version, "
|
||||
" (ae.default_version::numeric * 10)::INT AS available, "
|
||||
" ae.installed_version, "
|
||||
" (ae.installed_version::numeric * 10)::INT AS installed "
|
||||
" FROM pg_catalog.pg_available_extensions ae "
|
||||
"LEFT JOIN pg_catalog.pg_extension e "
|
||||
" ON e.extname=ae.name "
|
||||
@@ -1866,7 +1872,24 @@ get_repmgr_extension_status(PGconn *conn)
|
||||
/* 2. Check if extension installed */
|
||||
else if (PQgetisnull(res, 0, 1) == 0)
|
||||
{
|
||||
status = REPMGR_INSTALLED;
|
||||
int available_version = atoi(PQgetvalue(res, 0, 3));
|
||||
int installed_version = atoi(PQgetvalue(res, 0, 5));
|
||||
|
||||
/* caller wants to know which versions are installed/available */
|
||||
if (extversions != NULL)
|
||||
{
|
||||
strncpy(extversions->default_version, PQgetvalue(res, 0, 2), 7);
|
||||
strncpy(extversions->installed_version, PQgetvalue(res, 0, 4), 7);
|
||||
}
|
||||
|
||||
if (available_version > installed_version)
|
||||
{
|
||||
status = REPMGR_OLD_VERSION_INSTALLED;
|
||||
}
|
||||
else
|
||||
{
|
||||
status = REPMGR_INSTALLED;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2478,8 +2501,8 @@ _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info)
|
||||
|
||||
char *slot_name_ptr = NULL;
|
||||
|
||||
int param_count = 11;
|
||||
const char *param_values[param_count];
|
||||
int param_count = NODE_RECORD_PARAM_COUNT;
|
||||
const char *param_values[NODE_RECORD_PARAM_COUNT];
|
||||
|
||||
PGresult *res;
|
||||
bool success = true;
|
||||
@@ -4539,6 +4562,13 @@ get_new_primary(PGconn *conn, int *primary_node_id)
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* repmgr.get_new_primary() will return UNKNOWN_NODE_ID if
|
||||
* "follow_new_primary" is false
|
||||
*/
|
||||
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
||||
success = false;
|
||||
|
||||
*primary_node_id = new_primary_node_id;
|
||||
|
||||
return success;
|
||||
|
||||
26
dbutils.h
26
dbutils.h
@@ -47,6 +47,7 @@ typedef enum
|
||||
typedef enum
|
||||
{
|
||||
REPMGR_INSTALLED = 0,
|
||||
REPMGR_OLD_VERSION_INSTALLED,
|
||||
REPMGR_AVAILABLE,
|
||||
REPMGR_UNAVAILABLE,
|
||||
REPMGR_UNKNOWN
|
||||
@@ -104,6 +105,20 @@ typedef enum
|
||||
} BackupState;
|
||||
|
||||
|
||||
/*
|
||||
* Struct to store extension version information
|
||||
*/
|
||||
|
||||
typedef struct s_extension_versions {
|
||||
char default_version[8];
|
||||
char installed_version[8];
|
||||
} t_extension_versions;
|
||||
|
||||
#define T_EXTENSION_VERSIONS_INITIALIZER { \
|
||||
"", \
|
||||
"", \
|
||||
}
|
||||
|
||||
/*
|
||||
* Struct to store node information
|
||||
*/
|
||||
@@ -361,15 +376,13 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
bool atobool(const char *value);
|
||||
|
||||
/* connection functions */
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||
|
||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_primary_db_connection(PGconn *conn,
|
||||
PGconn *establish_primary_db_connection(PGconn *conn,
|
||||
const bool exit_on_error);
|
||||
|
||||
PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||
PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||
|
||||
@@ -395,7 +408,6 @@ bool has_passfile(void);
|
||||
bool begin_transaction(PGconn *conn);
|
||||
bool commit_transaction(PGconn *conn);
|
||||
bool rollback_transaction(PGconn *conn);
|
||||
bool check_cluster_schema(PGconn *conn);
|
||||
|
||||
/* GUC manipulation functions */
|
||||
bool set_config(PGconn *conn, const char *config_param, const char *config_value);
|
||||
@@ -421,7 +433,7 @@ bool repmgrd_is_paused(PGconn *conn);
|
||||
bool repmgrd_pause(PGconn *conn, bool pause);
|
||||
|
||||
/* extension functions */
|
||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn, t_extension_versions *extversions);
|
||||
|
||||
/* node management functions */
|
||||
void checkpoint(PGconn *conn);
|
||||
|
||||
@@ -21,13 +21,17 @@
|
||||
in PostgreSQL 9.3, as well as improved automated failover support
|
||||
via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
|
||||
and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
|
||||
series will no longer be actively maintained.
|
||||
series is no longer maintained.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
|
||||
with PostgreSQL 9.3, we recommend using repmgr 4.x. &repmgr; 2.x is
|
||||
no longer maintained.
|
||||
</para>
|
||||
<para>
|
||||
See also <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
|
||||
and <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-replication-slots-advantage" xreflabel="Advantages of replication slots">
|
||||
@@ -35,15 +39,25 @@
|
||||
<para>
|
||||
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
||||
primary server will retain WAL files until they have been consumed
|
||||
by all standby servers. This makes WAL file management much easier,
|
||||
and if used &repmgr; will no longer insist on a fixed minimum number
|
||||
(default: 5000) of WAL files being retained.
|
||||
by all standby servers. This means standby servers should never
|
||||
fail due to not being able to retrieve required WAL files from the
|
||||
primary.
|
||||
</para>
|
||||
<para>
|
||||
However this does mean that if a standby is no longer connected to the
|
||||
primary, the presence of the replication slot will cause WAL files
|
||||
to be retained indefinitely.
|
||||
to be retained indefinitely, and eventually lead to disk space
|
||||
exhaustion.
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
2ndQuadrant's recommended configuration is to configure
|
||||
<ulink url="https://www.pgbarman.org/">Barman</ulink> as a fallback
|
||||
source of WAL files, rather than maintain replication slots for
|
||||
each standby. See also: <link linkend="cloning-from-barman-restore-command">Using Barman as a WAL file source</link>.
|
||||
</para>
|
||||
</tip>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
|
||||
@@ -116,6 +130,74 @@
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-required-for-replication">
|
||||
<title>Is &repmgr; required for streaming replication?</title>
|
||||
<para>
|
||||
No.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; (together with <application>repmgrd</application>) assists with
|
||||
<emphasis>managing</emphasis> replication. It does not actually perform replication, which
|
||||
is part of the core PostgreSQL functionality.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-what-if-repmgr-uninstalled">
|
||||
<title>Will replication stop working if &repmgr; is uninstalled?</title>
|
||||
<para>
|
||||
No. See preceding question.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-version-mix">
|
||||
<title>Does it matter if different &repmgr; versions are present in the replication cluster?</title>
|
||||
<para>
|
||||
Yes. If different "major" &repmgr; versions (e.g. 3.3.x and 4.1.x) are present,
|
||||
&repmgr; (in particular <application>repmgrd</application>)
|
||||
may not run, or run properly, or in the worst case (if different <application>repmgrd</application>
|
||||
versions are running and there are differences in the failover implementation) break
|
||||
your replication cluster.
|
||||
</para>
|
||||
<para>
|
||||
If different "minor" &repmgr; versions (e.g. 4.1.1 and 4.1.6) are installed,
|
||||
&repmgr; will function, but we strongly recommend always running the same version
|
||||
to ensure there are no unexpected suprises, e.g. a newer version behaving slightly
|
||||
differently to the older version.
|
||||
</para>
|
||||
<para>
|
||||
See also <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-upgrade-repmgr">
|
||||
<title>Should I upgrade &repmgr;?</title>
|
||||
<para>
|
||||
Yes.
|
||||
</para>
|
||||
<para>
|
||||
We don't release new versions for fun, you know. Upgrading may require a little effort,
|
||||
but running an older &repmgr; version with bugs which have since been fixed may end up
|
||||
costing you more effort. The same applies to PostgreSQL itself.
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-conf-data-directory">
|
||||
<title>Why do I need to specify the data directory location in repmgr.conf?</title>
|
||||
<para>
|
||||
In some circumstances &repmgr; may need to access a PostgreSQL data
|
||||
directory while the PostgreSQL server is not running, e.g. to confirm
|
||||
it shut down cleanly during a <link linkend="performing-switchover">switchover</link>.
|
||||
</para>
|
||||
<para>
|
||||
Additionally, this provides support when using &repmgr; on PostgreSQL 9.6 and
|
||||
earlier, where the <literal>repmgr</literal> user is not a superuser; in that
|
||||
case the <literal>repmgr</literal> user will not be able to access the
|
||||
<literal>data_directory</literal> configuration setting, access to which is restricted
|
||||
to superusers. (In PostgreSQL 10 and later, non-superusers can be added to the
|
||||
group <option>pg_read_all_settings</option> which will enable them to read this setting).
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
||||
|
||||
@@ -64,7 +64,7 @@
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
|
||||
<entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
@@ -276,7 +276,7 @@
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/4.1/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
|
||||
<entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
|
||||
@@ -17,8 +17,18 @@
|
||||
|
||||
<sect1 id="release-4.2">
|
||||
<title>Release 4.2</title>
|
||||
<para><emphasis>???, 2018</emphasis></para>
|
||||
<para><emphasis>Wed October 24, 2018</emphasis></para>
|
||||
|
||||
<para>
|
||||
&repmgr; 4.2 is a major release, with the main new feature being the
|
||||
ability to <link linkend="repmgrd-pausing">pause repmgrd</link>, e.g. during planned maintenance
|
||||
operations. Various other usability enhancements and a couple of bug fixes are also included;
|
||||
see notes below for details.
|
||||
</para>
|
||||
<para>
|
||||
A restart of the PostgreSQL server <emphasis>is</emphasis> required
|
||||
for this release. For detailed upgrade instructions, see
|
||||
<link linkend="upgrading-major-version">Upgrading a major version release</link>.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
@@ -96,10 +106,44 @@
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
<sect2>
|
||||
<title>repmgrd enhancements</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> can now be "paused", i.e. instructed
|
||||
not to take any action such as a failover, even if the prerequisites for such an
|
||||
action are detected.
|
||||
</para>
|
||||
<para>
|
||||
This removes the need to stop <application>repmgrd</application> on all nodes when
|
||||
performing a planned operation such as a switchover.
|
||||
</para>
|
||||
<para>
|
||||
For further details, see <link linkend="repmgrd-pausing">Pausing repmgrd</link>.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr;: fix "Missing replication slots" label in
|
||||
<command><link linkend="repmgr-node-check">repmgr node check</link></command>. (GitHub #507)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application>: fix parsing of <option>-d/--daemonize</option> option.
|
||||
|
||||
@@ -4,5 +4,5 @@ BDR failover with repmgrd
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
> [BDR failover with repmgrd](https://repmgr.org/docs/4.0/repmgrd-bdr.html)
|
||||
> [BDR failover with repmgrd](https://repmgr.org/docs/current/repmgrd-bdr.html)
|
||||
|
||||
|
||||
@@ -4,4 +4,4 @@ Changes in repmgr 4
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
> [Release notes](https://repmgr.org/docs/4.0/release-4.0.html)
|
||||
> [Release notes](https://repmgr.org/docs/current/release-4.0.html)
|
||||
|
||||
@@ -243,8 +243,8 @@
|
||||
</simpara>
|
||||
<simpara>
|
||||
As an alternative we recommend using 2ndQuadrant's <ulink url="https://www.pgbarman.org/">Barman</ulink>,
|
||||
which offloads WAL management to a separate server, negating the need to use replication
|
||||
slots to reserve WAL. See section <xref linkend="cloning-from-barman">
|
||||
which offloads WAL management to a separate server, removing the requirement to use a replication
|
||||
slot for each individual standby to reserve WAL. See section <xref linkend="cloning-from-barman">
|
||||
for more details on using &repmgr; together with Barman.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
@@ -17,15 +17,15 @@
|
||||
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
|
||||
</para>
|
||||
<para>
|
||||
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
|
||||
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
|
||||
server. However this can lead to various problems, particularly when PostgreSQL has been
|
||||
installed from packages, and expecially so if <application>systemd</application> is in use.
|
||||
installed from packages, and especially so if <application>systemd</application> is in use.
|
||||
</para>
|
||||
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
|
||||
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
|
||||
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
|
||||
</para>
|
||||
@@ -74,12 +74,12 @@
|
||||
|
||||
<para>
|
||||
To confirm which command &repmgr; will execute for each action, use
|
||||
<command>repmgr node service --list --action=...</command>, e.g.:
|
||||
<command><link linkend="repmgr-node-service">repmgr node service --list-actions --action=...</link></command>, e.g.:
|
||||
<programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=restart
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=reload</programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=restart
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=reload</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
<sect1 id="configuration-file" xreflabel="configuration file location">
|
||||
<sect1 id="configuration-file" xreflabel="configuration file">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>repmgr.conf location</secondary>
|
||||
<secondary>repmgr.conf</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file location</title>
|
||||
<title>Configuration file</title>
|
||||
|
||||
<para>
|
||||
<application>repmgr</application> and <application>repmgrd</application>
|
||||
use a common configuration file, by default called
|
||||
@@ -21,6 +21,55 @@
|
||||
for more details.
|
||||
</para>
|
||||
|
||||
<sect2 id="configuration-file-format" xreflabel="configuration file format">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>format</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file format</title>
|
||||
|
||||
<para>
|
||||
<filename>repmgr.conf</filename> is a plain text file with one parameter/value
|
||||
combination per line.
|
||||
</para>
|
||||
<para>
|
||||
Whitespace is insignificant (except within a quoted parameter value) and blank lines are ignored.
|
||||
Hash marks (#) designate the remainder of the line as a comment. Parameter values that are not simple
|
||||
identifiers or numbers should be single-quoted. Note that single quote can not be embedded
|
||||
in a parameter value.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
&repmgr; will interpret double-quotes as being part of a string value; only use single quotes
|
||||
to quote parameter values.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<para>
|
||||
Example of a valid <filename>repmgr.conf</filename> file:
|
||||
<programlisting>
|
||||
# repmgr.conf
|
||||
|
||||
node_id=1
|
||||
node_name= node1
|
||||
conninfo ='host=node1 dbname=repmgr user=repmgr connect_timeout=2'
|
||||
data_directory = /var/lib/pgsql/11/data</programlisting>
|
||||
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="configuration-file-location" xreflabel="configuration file location">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file location</title>
|
||||
|
||||
<para>
|
||||
The configuration file will be searched for in the following locations:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
@@ -50,7 +99,7 @@
|
||||
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
||||
an error will be raised if it is not found or not readable, and no attempt will be made to
|
||||
check default locations; this is to prevent <application>repmgr</application> unexpectedly
|
||||
reading the wrong configuraton file.
|
||||
reading the wrong configuration file.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
@@ -65,5 +114,7 @@
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
</note>
|
||||
</sect1>
|
||||
</note>
|
||||
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
@@ -1,6 +1,292 @@
|
||||
<chapter id="configuration" xreflabel="Configuration">
|
||||
<title>repmgr configuration</title>
|
||||
|
||||
<sect1 id="configuration-prerequisites" xreflabel="Prerequisites for configuration">
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>prerequisites</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>ssh</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Prerequisites for configuration</title>
|
||||
<para>
|
||||
Following software must be installed on both servers:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><application>PostgreSQL</application></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgr</application>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
At network level, connections between the PostgreSQL port (default: <literal>5432</literal>)
|
||||
must be possible between all nodes.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Passwordless <command>SSH</command> connectivity between all servers in the replication cluster
|
||||
is not required, but is necessary in the following cases:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
||||
data directory (as is the case with e.g. <link linkend="packages-debian-ubuntu">Debian packages</link>);
|
||||
in this case <command>rsync</command> must also be installed on all servers.
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
||||
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
|
||||
This will make it faster to detect any SSH connection errors.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<sect2 id="configuration-postgresql" xreflabel="PostgreSQL configuration">
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>PostgreSQL</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>PostgreSQL configuration</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>PostgreSQL configuration for &repmgr;</title>
|
||||
<para>
|
||||
The following PostgreSQL configuration parameters may need to be changed in order
|
||||
for &repmgr; (and replication itself) to function correctly.
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>hot_standby</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>hot_standby</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>hot_standby</option> must always be set to <literal>on</literal>, as &repmgr; needs
|
||||
to be able to connect to each server it manages.
|
||||
</para>
|
||||
<para>
|
||||
Note that <option>hot_standby</option> defaults to <literal>on</literal> from PostgreSQL 10
|
||||
and later; in PostgreSQL 9.6 and earlier, the default was <literal>off</literal>.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY">hot_standby</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>wal_level</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_level</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>wal_level</option> must be one of <option>replica</option> or <option>logical</option>
|
||||
(PostgreSQL 9.5 and earlier: one of <option>hot_standby</option> or <option>logical</option>).
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL">wal_level</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>max_wal_senders</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>max_wal_senders</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>max_wal_senders</option> must be set to a value of <literal>2</literal> or greater.
|
||||
In general you will need one WAL sender for each standby which will attach to the PostgreSQL
|
||||
instance; additionally &repmgr; will require two free WAL senders in order to clone further
|
||||
standbys.
|
||||
</para>
|
||||
<para>
|
||||
<option>max_wal_senders</option> should be set to an appropriate value on all PostgreSQL
|
||||
instances in the replication cluster which may potentially become a primary server or
|
||||
(in cascading replication) the upstream server of a standby.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-WAL-SENDERS">max_wal_senders</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>max_replication_slots</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>max_replication_slots</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If you are intending to use replication slots, <option>max_replication_slots</option>
|
||||
must be set to a non-zero value.
|
||||
</para>
|
||||
<para>
|
||||
<option>max_replication_slots</option> should be set to an appropriate value on all PostgreSQL
|
||||
instances in the replication cluster which may potentially become a primary server or
|
||||
(in cascading replication) the upstream server of a standby.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-REPLICATION-SLOTS">max_replication_slots</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>wal_log_hints</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_log_hints</option></term>
|
||||
<listitem>
|
||||
<para>If you are intending to use <application>pg_rewind</application>,
|
||||
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||
<option>wal_log_hints</option>.
|
||||
</para>
|
||||
<para>
|
||||
For more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LOG-HINTS">wal_log_hints</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>archive_mode</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>archive_mode</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
We suggest setting <option>archive_mode</option> to <literal>on</literal> (and
|
||||
<option>archive_command</option> to <literal>/bin/true</literal>; see below)
|
||||
even if you are currently not planning to use WAL file archiving.
|
||||
</para>
|
||||
<para>
|
||||
This will make it simpler to set up WAL file archiving if it is ever required,
|
||||
as changes to <option>archive_mode</option> require a full PostgreSQL server
|
||||
restart, while <option>archive_command</option> changes can be applied via a normal
|
||||
configuration reload.
|
||||
</para>
|
||||
<para>
|
||||
However, &repmgr; itself does not require WAL file archiving.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-MODE">archive_mode</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>archive_command</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>archive_command</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If you have set <option>archive_mode</option> to <literal>on</literal> but are not currently planning
|
||||
to use WAL file archiving, set <option>archive_command</option> to a command which does nothing but returns
|
||||
<literal>true</literal>, such as <command>/bin/true</command>. See above for details.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND">archive_command</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>wal_keep_segments</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_keep_segments</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Normally there is no need to set <option>wal_keep_segments</option> (default: <literal>0</literal>), as it
|
||||
is <emphasis>not</emphasis> a reliable way of ensuring that all required WAL segments are available to standbys.
|
||||
Replication slots and/or an archiving solution such as Barman are recommended to ensure standbys have a reliable
|
||||
source of WAL segments at all times.
|
||||
</para>
|
||||
<para>
|
||||
The only reason ever to set <option>wal_keep_segments</option> is you have
|
||||
you have configured <option>pg_basebackup_options</option>
|
||||
in <filename>repmgr.conf</filename> to include the setting <literal>--wal-method=fetch</literal>
|
||||
(PostgreSQL 9.6 and earlier: <literal>--xlog-method=fetch</literal>)
|
||||
<emphasis>and</emphasis> you have <emphasis>not</emphasis> set <option>restore_command</option>
|
||||
in <filename>repmgr.conf</filename> to fetch WAL files from a reliable source such as Barman,
|
||||
in which case you'll need to set <option>wal_keep_segments</option>
|
||||
to a sufficiently high number to ensure that all WAL files required by the standby
|
||||
are retained. However we do not recommend managing replication in this way.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-WAL-KEEP-SEGMENTS">wal_keep_segments</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
<para>
|
||||
See also the <link linkend="quickstart-postgresql-configuration">PostgreSQL configuration</link> section in the
|
||||
<link linkend="quickstart">Quick-start guide</link>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
&configuration-file;
|
||||
&configuration-file-required-settings;
|
||||
&configuration-file-log-settings;
|
||||
|
||||
@@ -74,6 +74,7 @@
|
||||
<!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
|
||||
<!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
|
||||
<!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
|
||||
<!ENTITY repmgr-node-service SYSTEM "repmgr-node-service.sgml">
|
||||
<!ENTITY repmgr-cluster-show SYSTEM "repmgr-cluster-show.sgml">
|
||||
<!ENTITY repmgr-cluster-matrix SYSTEM "repmgr-cluster-matrix.sgml">
|
||||
<!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
|
||||
|
||||
@@ -29,9 +29,10 @@
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; packages are designed to be compatible with the community-provided PostgreSQL packages.
|
||||
&repmgr; RPM packages are designed to be compatible with the community-provided PostgreSQL packages
|
||||
and 2ndQuadrant's <ulink url="https://www.2ndquadrant.com/en/resources/2ndqpostgres/">2ndQPostgres</ulink>.
|
||||
They may not work with vendor-specific packages such as those provided by RedHat for RHEL
|
||||
customers, as the filesystem layout may be different to the community RPMs.
|
||||
customers, as the PostgreSQL filesystem layout may be different to the community RPMs.
|
||||
Please contact your support vendor for assistance.
|
||||
</para>
|
||||
</note>
|
||||
@@ -47,54 +48,76 @@
|
||||
<title>2ndQuadrant public RPM yum repository</title>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.1/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Locate the repository RPM for your PostgreSQL version from the list at:
|
||||
<ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Locate the repository RPM for your PostgreSQL version from the list at:
|
||||
<ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository definition for your distribution and PostgreSQL version
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
||||
</para>
|
||||
<para>
|
||||
For example, for PostgreSQL 10 on CentOS, execute:
|
||||
<programlisting>
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
||||
</para>
|
||||
<para>
|
||||
For example, for PostgreSQL 10 on CentOS, execute:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/release/get/10/rpm | sudo bash</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Verify that the repository is installed with:
|
||||
<programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For PostgreSQL 9.6 on CentOS, execute:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/release/get/9.6/rpm | sudo bash</programlisting>
|
||||
</para>
|
||||
|
||||
|
||||
<para>
|
||||
Verify that the repository is installed with:
|
||||
<programlisting>
|
||||
sudo yum repolist</programlisting>
|
||||
The output should contain two entries like this:
|
||||
<programlisting>
|
||||
The output should contain two entries like this:
|
||||
<programlisting>
|
||||
2ndquadrant-dl-default-release-pg10/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 4
|
||||
2ndquadrant-dl-default-release-pg10-debug/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 - Debug 3</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
$ yum install repmgr10</programlisting>
|
||||
sudo yum install repmgr10</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For packages for PostgreSQL 9.6 and earlier, the package name does not contain
|
||||
a period between major and minor version numbers, e.g.
|
||||
<literal>repmgr96</literal>.
|
||||
</para>
|
||||
</note>
|
||||
<tip>
|
||||
<para>
|
||||
To determine the names of available packages, execute:
|
||||
<programlisting>
|
||||
yum search repmgr</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
@@ -163,16 +186,15 @@ $ yum install repmgr10</programlisting>
|
||||
<title>2ndQuadrant public apt repository for Debian/Ubuntu</title>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
|
||||
<ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
<ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -180,13 +202,13 @@ $ yum install repmgr10</programlisting>
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository definition for your distribution and PostgreSQL version
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlisting>
|
||||
</para>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
This will automatically install the following additional packages, if not already present:
|
||||
@@ -203,12 +225,12 @@ curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlist
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<listitem>
|
||||
<para>
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
$ apt-get install postgresql-10-repmgr</programlisting>
|
||||
</para>
|
||||
sudo apt-get install postgresql-10-repmgr</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For packages for PostgreSQL 9.6 and earlier, the package name includes
|
||||
@@ -216,11 +238,11 @@ $ apt-get install postgresql-10-repmgr</programlisting>
|
||||
<literal>postgresql-9.6-repmgr</literal>.
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
</para>
|
||||
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
@@ -13,8 +13,9 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.3, including PostgreSQL 10.
|
||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
||||
&repmgr; 4.x is compatible with all PostgreSQL versions from 9.3. See
|
||||
section <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
|
||||
for an overview of version compatibility.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
@@ -31,10 +32,24 @@
|
||||
<para>
|
||||
&repmgr; must be installed on each server in the replication cluster.
|
||||
If installing repmgr from packages, the package version must match the PostgreSQL
|
||||
version. If installing from source, repmgr must be compiled against the same
|
||||
version. If installing from source, &repmgr; must be compiled against the same
|
||||
major version.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
The same "major" &repmgr; version (e.g. <literal>4.2.x</literal>) <emphasis>must</emphasis>
|
||||
be installed on all node in the replication cluster. We strongly recommend keeping all
|
||||
nodes on the same (preferably latest) "minor" &repmgr; version to minimize the risk
|
||||
of incompatibilities.
|
||||
</simpara>
|
||||
<simpara>
|
||||
If different "major" &repmgr; versions (e.g. 3.3.x and 4.1.x)
|
||||
are installed on different nodes, in the best case &repmgr; (in particular <application>repmgrd</application>)
|
||||
will not run. In the worst case, you will end up with a broken cluster.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
A dedicated system user for &repmgr; is <emphasis>not</emphasis> required; as many &repmgr; and
|
||||
<application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
|
||||
@@ -42,32 +57,10 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Passwordless <command>ssh</command> connectivity between all servers in the replication cluster
|
||||
is not required, but is necessary in the following cases:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
||||
data directory (in which case <command>rsync</command> is also required)</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
||||
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
See also <link linkend="configuration-prerequisites">Prerequisites for configuration</link>
|
||||
for information on networking requirements.
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
|
||||
This will make it faster to detect any SSH connection errors.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
We recommend using a session multiplexer utility such as <command>screen</command> or
|
||||
@@ -76,4 +69,111 @@
|
||||
terminated if your <command>ssh</command> session to the server is interrupted or closed.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<sect2 id="install-compatibility-matrix">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgr</primary>
|
||||
<secondary>compatibility matrix</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>compatibility matrix</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>&repmgr; compatibility matrix</title>
|
||||
<para>
|
||||
The following table provides an overview of which &repmgr; version supports
|
||||
which PostgreSQL version.
|
||||
</para>
|
||||
|
||||
|
||||
<table id="repmgr-compatibility-matrix">
|
||||
<title>&repmgr; compatibility matrix</title>
|
||||
|
||||
<tgroup cols="2">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; version
|
||||
</entry>
|
||||
<entry>
|
||||
Latest release
|
||||
</entry>
|
||||
<entry>
|
||||
Supported PostgreSQL versions
|
||||
</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 4.x
|
||||
</entry>
|
||||
<entry>
|
||||
<link linkend="release-4.2">4.2</link> (2018-10-24)
|
||||
</entry>
|
||||
<entry>
|
||||
9.3, 9.4, 9.5, 9.6, 10, 11
|
||||
</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 3.x
|
||||
</entry>
|
||||
<entry>
|
||||
<ulink url="https://repmgr.org/release-notes-3.3.2.html">3.3.2</ulink> (2017-05-30)
|
||||
</entry>
|
||||
<entry>
|
||||
9.3, 9.4, 9.5, 9.6
|
||||
</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 2.x
|
||||
</entry>
|
||||
<entry>
|
||||
<ulink url="https://repmgr.org/release-notes-2.0.3.html">2.0.3</ulink> (2015-04-16)
|
||||
</entry>
|
||||
<entry>
|
||||
9.0, 9.1, 9.2, 9.3, 9.4
|
||||
</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
The &repmgr; 2.x and 3.x series are no longer maintained or supported.
|
||||
We strongly recommend upgrading to the latest &repmgr; version.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
|
||||
<para>
|
||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
||||
</para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
PostgreSQL 9.3 does not support replication slots, so corresponding &repmgr; functionality
|
||||
is not available.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
In PostgreSQL 9.3 and PostgreSQL 9.4, <command>pg_rewind</command> is not part of the core
|
||||
distribution. <command>pg_rewind</command> will need to be compiled separately to be able
|
||||
to use any &repmgr; functionality which takes advantage of it.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
@@ -26,12 +26,68 @@
|
||||
add the <ulink
|
||||
url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
|
||||
repository to your <filename>sources.list</filename> if you
|
||||
have not already done so. Then install the pre-requisites for
|
||||
building PostgreSQL with:
|
||||
have not already done so, and ensure the source repository is enabled.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If not configured, the source repository can be added by including
|
||||
a <literal>deb-src</literal> line as a copy of the existing <literal>deb</literal>
|
||||
line in the repository file, which is usually
|
||||
<filename>/etc/apt/sources.list.d/pgdg.list</filename>, e.g.:
|
||||
<programlisting>
|
||||
deb http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main
|
||||
deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
<para>
|
||||
Then install the prerequisites for
|
||||
building PostgreSQL with e.g.:
|
||||
<programlisting>
|
||||
sudo apt-get update
|
||||
sudo apt-get build-dep postgresql-9.6</programlisting>
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL version for your target repmgr version.
|
||||
</simpara>
|
||||
</important>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using <command>apt-get build-dep</command> is not possible, the
|
||||
following packages may need to be installed manually:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><literal>llibedit-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibkrb5-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibpam0g-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibreadline-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibselinux1-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibssl-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibxml2-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibxslt1-dev</literal></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
@@ -45,15 +101,45 @@
|
||||
sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
|
||||
sudo yum-builddep postgresql96</programlisting>
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL version for your target repmgr version.
|
||||
</simpara>
|
||||
</important>
|
||||
<note>
|
||||
<para>
|
||||
If using <command>yum-builddep</command> is not possible, the
|
||||
following packages may need to be installed manually:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><literal>libselinux-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>libxml2-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>libxslt-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>openssl-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>pam-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>readline-devel</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL versions for your target repmgr version.
|
||||
</simpara>
|
||||
</note>
|
||||
</sect2>
|
||||
|
||||
|
||||
@@ -80,7 +166,7 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There are also tags for each &repmgr; release, e.g. <filename>4.0.5</filename>.
|
||||
There are also tags for each &repmgr; release, e.g. <literal>v4.2.0</literal>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@@ -165,7 +251,7 @@
|
||||
<note>
|
||||
<simpara>
|
||||
Due to changes in PostgreSQL's documentation build system from PostgreSQL 10,
|
||||
the documentation can currently only be built agains PostgreSQL 9.6 or earlier.
|
||||
the documentation can currently only be built against PostgreSQL 9.6 or earlier.
|
||||
This limitation will be fixed when time and resources permit.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
<chapter id="quickstart" xreflabel="Quick-start guide">
|
||||
<title>Quick-start guide</title>
|
||||
|
||||
<indexterm>
|
||||
<primary>quickstart</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
This section gives a quick introduction to &repmgr;, including setting up a
|
||||
sample &repmgr; installation and a basic replication cluster.
|
||||
@@ -50,7 +54,8 @@
|
||||
</para>
|
||||
<para>
|
||||
If you want <application>repmgr</application> to copy configuration files which are
|
||||
located outside the PostgreSQL data directory, and/or to test <command>switchover</command>
|
||||
located outside the PostgreSQL data directory, and/or to test
|
||||
<command><link linkend="repmgr-standby-switchover">switchover</link></command>
|
||||
functionality, you will also need passwordless SSH connections between both servers, and
|
||||
<application>rsync</application> should be installed.
|
||||
</para>
|
||||
@@ -63,7 +68,7 @@
|
||||
</tip>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-postgresql-configuration">
|
||||
<sect1 id="quickstart-postgresql-configuration" xreflabel="PostgreSQL configuration">
|
||||
<title>PostgreSQL configuration</title>
|
||||
<para>
|
||||
On the primary server, a PostgreSQL instance must be initialised and running.
|
||||
@@ -78,6 +83,13 @@
|
||||
|
||||
max_wal_senders = 10
|
||||
|
||||
# Enable replication slots; set this figure to at least one more
|
||||
# than the number of standbys which will connect to this server.
|
||||
# Note that repmgr will only make use of replication slots if
|
||||
# "use_replication_slots" is set to "true" in repmgr.conf
|
||||
|
||||
max_replication_slots = 0
|
||||
|
||||
# Ensure WAL files contain enough information to enable read-only queries
|
||||
# on the standby.
|
||||
#
|
||||
@@ -102,16 +114,6 @@
|
||||
# you WALs in a secure place. /bin/true is an example of a command that
|
||||
# ignores archiving. Use something more sensible.
|
||||
archive_command = '/bin/true'
|
||||
|
||||
# If you have configured "pg_basebackup_options"
|
||||
# in "repmgr.conf" to include the setting "--xlog-method=fetch" (from
|
||||
# PostgreSQL 10 "--wal-method=fetch"), *and* you have not set
|
||||
# "restore_command" in "repmgr.conf"to fetch WAL files from another
|
||||
# source such as Barman, you'll need to set "wal_keep_segments" to a
|
||||
# high enough value to ensure that all WAL files generated while
|
||||
# the standby is being cloned are retained until the standby starts up.
|
||||
#
|
||||
# wal_keep_segments = 5000
|
||||
</programlisting>
|
||||
<tip>
|
||||
<simpara>
|
||||
@@ -126,6 +128,9 @@
|
||||
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||
</para>
|
||||
<para>
|
||||
See also the <link linkend="configuration-postgresql">PostgreSQL configuration</link> section in the <link linkend="configuration">repmgr configuaration guide</link>.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-repmgr-user-database">
|
||||
@@ -196,11 +201,20 @@
|
||||
<sect1 id="quickstart-standby-preparation">
|
||||
<title>Preparing the standby</title>
|
||||
<para>
|
||||
On the standby, do not create a PostgreSQL instance, but do ensure the destination
|
||||
On the standby, do <emphasis>not</emphasis> create a PostgreSQL instance (i.e.
|
||||
do not execute <application>initdb</application> or any database creation
|
||||
scripts provided by packages), but do ensure the destination
|
||||
data directory (and any other directories which you want PostgreSQL to use)
|
||||
exist and are owned by the <literal>postgres</literal> system user. Permissions
|
||||
must be set to <literal>0700</literal> (<literal>drwx------</literal>).
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
&repmgr; will place a copy of the primary's database files in this directory.
|
||||
It will however refuse to run if a PostgreSQL instance has already been
|
||||
created there.
|
||||
</simpara>
|
||||
</tip>
|
||||
<para>
|
||||
Check the primary database is reachable from the standby using <application>psql</application>:
|
||||
</para>
|
||||
|
||||
@@ -150,11 +150,31 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
An issue was encountered while attempting to retrieve
|
||||
&repmgr; metadata.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_DB_CONN (6)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to connect to the local PostgreSQL instance.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more issues were detected.
|
||||
One or more issues were detected with the replication configuration,
|
||||
e.g. a node was not in its expected state.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
151
doc/repmgr-node-service.sgml
Normal file
151
doc/repmgr-node-service.sgml
Normal file
@@ -0,0 +1,151 @@
|
||||
<refentry id="repmgr-node-service">
|
||||
<indexterm>
|
||||
<primary>repmgr node service</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr node service</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr node service</refname>
|
||||
<refpurpose>show or execute the system service command to stop/start/restart/reload/promote a node</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Shows or executes the system service command to stop/start/restart/reload a node.
|
||||
</para>
|
||||
<para>
|
||||
This command is mainly meant for internal &repmgr; usage, but is useful for
|
||||
confirming the command configuration.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Log the steps which would be taken, including displaying the command which would be executed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--action</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The action to perform. One of <literal>start</literal>, <literal>stop</literal>,
|
||||
<literal>restart</literal>, <literal>reload</literal> or <literal>promote</literal>.
|
||||
</para>
|
||||
<para>
|
||||
If the parameter <option>--list-actions</option> is provided together with
|
||||
<option>--action</option>, the command which would be executed will be printed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--list-actions</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
List all configured commands.
|
||||
</para>
|
||||
<para>
|
||||
If the parameter <option>--action</option> is provided together with
|
||||
<option>--list-actions</option>, the command which would be executed for that
|
||||
particular action will be printed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--checkpoint</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Issue a <command>CHECKPOINT</command> before stopping or restarting the node.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr node service</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
No issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_LOCAL_COMMAND (5)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Execution of the system service command failed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Examples</title>
|
||||
<para>
|
||||
See what action would be taken for a restart:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint --dry-run
|
||||
INFO: a CHECKPOINT would be issued here
|
||||
INFO: would execute server command "sudo service postgresql-11 restart"</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Restart the PostgreSQL instance:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint
|
||||
NOTICE: issuing CHECKPOINT
|
||||
DETAIL: executing server command "sudo service postgresql-11 restart"
|
||||
Redirecting to /bin/systemctl restart postgresql-11.service</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
List all commands:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions
|
||||
Following commands would be executed for each action:
|
||||
|
||||
start: "sudo service postgresql-11 start"
|
||||
stop: "sudo service postgresql-11 stop"
|
||||
restart: "sudo service postgresql-11 restart"
|
||||
reload: "sudo service postgresql-11 reload"
|
||||
promote: "/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote"</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
List a single command:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions --action=promote
|
||||
/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote </programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
@@ -213,12 +213,7 @@
|
||||
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
|
||||
possible without actually changing the status of either node.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
<application>repmgrd</application> must be shut down on all nodes while a switchover is being
|
||||
executed. This restriction will be removed in a future &repmgr; version.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<para>
|
||||
External database connections, e.g. from an application, should not be permitted while
|
||||
the switchover is taking place. In particular, active transactions on the primary
|
||||
|
||||
@@ -23,14 +23,19 @@
|
||||
use of the witness server with <application>repmgrd</application>.
|
||||
</para>
|
||||
<para>
|
||||
When executing <command>repmgr witness register</command>, connection information
|
||||
for the cluster primary server must also be provided. &repmgr; will automatically
|
||||
use the <varname>user</varname> and <varname>dbname</varname> values defined
|
||||
in the <varname>conninfo</varname> string defined in the witness node's
|
||||
<filename>repmgr.conf</filename>, if these are not explicitly provided.
|
||||
When executing <command>repmgr witness register</command>, database connection
|
||||
information for the cluster primary server must also be provided.
|
||||
</para>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen
|
||||
In most cases it's only necessary to provide the primary's hostname with
|
||||
the <option>-h</option>/<option>--host</option> option; &repmgr; will
|
||||
automatically use the <varname>user</varname> and <varname>dbname</varname>
|
||||
values defined in the <varname>conninfo</varname> string defined in the
|
||||
witness node's <filename>repmgr.conf</filename>, unless these are explicitly
|
||||
provided as command line options.
|
||||
</para>
|
||||
<para>
|
||||
Execute with the <option>--dry-run</option> option to check what would happen
|
||||
without actually registering the witness server.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -24,8 +24,14 @@
|
||||
<abstract>
|
||||
<para>
|
||||
This is the official documentation of &repmgr; &repmgrversion; for
|
||||
use with PostgreSQL 9.3 - PostgreSQL 10.
|
||||
It describes the functionality supported by the current version of &repmgr;.
|
||||
use with PostgreSQL 9.3 - PostgreSQL 11.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; is being continually developed and we strongly recommend using the
|
||||
latest version. Please check the
|
||||
<ulink url="https://repmgr.org/">repmgr website</ulink> for details
|
||||
about the current &repmgr; version as well as the
|
||||
<ulink url="https://repmgr.org/docs/current/index.html">current documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@@ -108,6 +114,7 @@
|
||||
&repmgr-node-status;
|
||||
&repmgr-node-check;
|
||||
&repmgr-node-rejoin;
|
||||
&repmgr-node-service;
|
||||
&repmgr-cluster-show;
|
||||
&repmgr-cluster-matrix;
|
||||
&repmgr-cluster-crosscheck;
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
<title>BDR failover with repmgrd</title>
|
||||
<para>
|
||||
&repmgr; 4.x provides support for monitoring BDR nodes and taking action in
|
||||
&repmgr; 4.x provides support for monitoring a pair of BDR 2.x nodes and taking action in
|
||||
case one of the nodes fails.
|
||||
</para>
|
||||
<note>
|
||||
@@ -31,8 +31,21 @@
|
||||
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
|
||||
It is <emphasis>not</emphasis> required for later BDR versions.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
|
||||
<title>Prerequisites</title>
|
||||
<important>
|
||||
<para>
|
||||
This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
|
||||
It is <emphasis>not</emphasis> required for later BDR versions.
|
||||
</para>
|
||||
</important>
|
||||
<para>
|
||||
&repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
|
||||
enabled and configured for a two-node BDR network. &repmgr; 4 packages
|
||||
|
||||
@@ -478,9 +478,6 @@ REPMGRD_OPTS="--daemonize=false"
|
||||
<para>
|
||||
From <application>repmgrd</application> 4.1, ensure <varname>REPMGRD_OPTS</varname> includes
|
||||
<option>--daemonize=false</option>, as daemonization is handled by the service command.
|
||||
We recommend setting <varname>repmgrd_pid_file</varname> in <filename>repmgr.conf</filename> to the
|
||||
same value set in <varname>REPMGRD_PIDFILE</varname> to prevent another <application>repmgrd</application>
|
||||
instance from being started manually.
|
||||
</para>
|
||||
<para>
|
||||
If using <application>systemd</application>, you may need to execute <command>systemctl daemon-reload</command>.
|
||||
|
||||
@@ -21,13 +21,14 @@
|
||||
<para>
|
||||
However, <application>repmgrd</application> is unable to distinguish between
|
||||
planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
|
||||
or upgrading a server), and an actual server outage. In versions prior to &repmgr; 4.2
|
||||
it was necessary to stop <application>repmgrd</application> on all nodes (or at least
|
||||
or installing PostgreSQL maintenance released), and an actual server outage. In versions prior to
|
||||
&repmgr; 4.2 it was necessary to stop <application>repmgrd</application> on all nodes (or at least
|
||||
on all nodes where <application>repmgrd</application> is
|
||||
<link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
|
||||
to prevent <application>repmgrd</application> from making changes to the
|
||||
to prevent <application>repmgrd</application> from making unintentional changes to the
|
||||
replication cluster.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
|
||||
can now be "paused", i.e. instructed not to take any action such as performing a failover.
|
||||
@@ -35,6 +36,14 @@
|
||||
each <application>repmgrd</application> individually.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
For major PostgreSQL upgrades, e.g. from PostgreSQL 10 to PostgreSQL 11,
|
||||
<application>repmgrd</application> should be shut down completely and only started up
|
||||
once the &repmgr; packages for the new PostgreSQL major version have been installed.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<sect1 id="repmgrd-pausing-prerequisites">
|
||||
<title>Prerequisites for pausing <application>repmgrd</application></title>
|
||||
<para>
|
||||
@@ -121,7 +130,7 @@ NOTICE: node 3 (node3) paused</programlisting>
|
||||
<para>
|
||||
To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf daemon pause
|
||||
$ repmgr -f /etc/repmgr.conf daemon unpause
|
||||
NOTICE: node 1 (node1) unpaused
|
||||
NOTICE: node 2 (node2) unpaused
|
||||
NOTICE: node 3 (node3) unpaused</programlisting>
|
||||
|
||||
@@ -84,11 +84,12 @@
|
||||
|
||||
<para>
|
||||
Double-check which commands will be used to stop/start/restart the current
|
||||
primary; on the current primary execute:
|
||||
primary; this can be done by e.g. executing <command><link linkend="repmgr-node-service">repmgr node service</link></command>
|
||||
on the current primary:
|
||||
<programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=restart</programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=restart</programlisting>
|
||||
|
||||
</para>
|
||||
|
||||
@@ -388,7 +389,7 @@ HINT: check the primary server status before performing any further actions</pro
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Note that <varname>shutdown_check_timeout</varname>is set on the node where
|
||||
Note that <varname>shutdown_check_timeout</varname> is set on the node where
|
||||
<command>repmgr standby switchover</command> is executed (promotion candidate); setting it on the
|
||||
demotion candidate (former primary) will have no effect.
|
||||
</para>
|
||||
|
||||
@@ -4,6 +4,6 @@ Upgrading from repmgr 3
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/4.0/upgrading-from-repmgr-3.html)
|
||||
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/current/upgrading-from-repmgr-3.html)
|
||||
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
<title>Upgrading repmgr</title>
|
||||
|
||||
<para>
|
||||
&repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
|
||||
&repmgr; is updated regularly with minor releases (e.g. 4.0.1 to 4.0.2)
|
||||
containing bugfixes and other minor improvements. Any substantial new
|
||||
functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
|
||||
functionality will be included in a major release (e.g. 4.0 to 4.1).
|
||||
</para>
|
||||
|
||||
<sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
|
||||
@@ -19,83 +19,110 @@
|
||||
</indexterm>
|
||||
<title>Upgrading repmgr 4.x and later</title>
|
||||
<para>
|
||||
&repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
|
||||
of the following steps:
|
||||
<orderedlist>
|
||||
From version 4, &repmgr; consists of three elements:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Stop <application>repmgrd</application> (if in use) on all nodes where it is running
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
the <application>repmgr</application> and <application>repmgrd</application> executables
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Install the updated package (or compile the updated source)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
the objects for the &repmgr; PostgreSQL extension (SQL files for creating/updating
|
||||
repmgr metadata, and the extension control file)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
For major releases, e.g. from <literal>4.0.x</literal> to <literal>4.1</literal>,
|
||||
where the <literal>repmgr</literal> shared object library has been updated,
|
||||
restart PostgreSQL.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
For major releases, e.g. from <literal>4.0.x</literal> to <literal>4.1</literal>,
|
||||
execute <command>ALTER EXTENSION repmgr UPDATE</command>
|
||||
on the primary node in the database where the &repmgr; extension is installed.
|
||||
</simpara>
|
||||
<simpara>
|
||||
This will update the extension metadata and, if necessary, apply
|
||||
changes to the &repmgr; extension objects.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Start <application>repmgrd</application> (if in use).
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</orderedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
the shared library module used by <application>repmgrd</application> which
|
||||
is resident in the PostgreSQL backend
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
With <emphasis>minor releases</emphasis>, usually changes are only made to the <application>repmgr</application>
|
||||
and <application>repmgrd</application> executables. In this case, the upgrade is quite straightforward,
|
||||
and is simply a case of installing the new version, and restarting <application>repmgrd</application>
|
||||
(if running).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
For <emphasis>major releases</emphasis>, the &repmgr; PostgreSQL extension will need to be updated
|
||||
to the latest version. Additionally, if the shared library module has been updated (this is sometimes,
|
||||
but not always the case), PostgreSQL itself will need to be restarted on each node.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<para>
|
||||
Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
|
||||
changes to the shared object file used by <application>repmgrd</application>; check the
|
||||
<link linkend="appendix-release-notes">release notes</link> for details.
|
||||
</para>
|
||||
|
||||
<sect2 id="upgrading-replication-cluster" xreflabel="Upgrading a replication cluster">
|
||||
<sect2 id="upgrading-minor-version" xreflabel="Upgrading a minor version release">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>repmgr 4.x and later</secondary>
|
||||
<secondary>minor release</secondary>
|
||||
</indexterm>
|
||||
<title>Upgrading a replication cluster</title>
|
||||
<title>Upgrading a minor version release</title>
|
||||
|
||||
<para>
|
||||
The process for installing minor version upgrades is quite straightforward:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
install the new &repmgr; version
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
restart <application>repmgrd</application> on all nodes where it is running
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
Some packaging systems (e.g. <link linkend="packages-debian-ubuntu">Debian/Ubuntu</link>
|
||||
may restart <application>repmgrd</application> as part of the package upgrade process.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
The same &repmgr; "major version" (e.g. <literal>4.2</literal>) must be
|
||||
installed on all nodes in the replication cluster. While it's possible to have differing
|
||||
&repmgr; "minor versions" (e.g. <literal>4.2.1</literal>) on different nodes,
|
||||
we strongly recommend updating all nodes to the latest minor version.
|
||||
Minor version upgrades can be performed in any order on the nodes in the replication
|
||||
cluster.
|
||||
</para>
|
||||
<note>
|
||||
|
||||
<para>
|
||||
A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
Minor version upgrades can be performed in any order on the nodes in the replicaiton
|
||||
cluster. In general it makes sense to start on the primary.
|
||||
The same &repmgr; "major version" (e.g. <literal>4.2</literal>) must be
|
||||
installed on all nodes in the replication cluster. While it's possible to have differing
|
||||
&repmgr; "minor versions" (e.g. <literal>4.2.1</literal>) on different nodes,
|
||||
we strongly recommend updating all nodes to the latest minor version.
|
||||
</para>
|
||||
<para>
|
||||
A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
|
||||
</para>
|
||||
</note>
|
||||
</note>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="upgrading-major-version" xreflabel="Upgrading a major version release">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>major release</secondary>
|
||||
</indexterm>
|
||||
<title>Upgrading a major version release</title>
|
||||
<para>
|
||||
"major version" upgrades need to be planned more carefully, as they may include
|
||||
changes to the &repmgr; metadata (which need to be propagated from the primary to all
|
||||
@@ -111,7 +138,14 @@
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Stop <application>repmgrd</application> (if in use) on all nodes where it is running
|
||||
Stop <application>repmgrd</application> (if in use) on all nodes where it is running.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Disable the <application>repmgrd</application> service on all nodes where it is in use;
|
||||
this is to prevent packages from prematurely restarting <application>repmgrd</application>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
@@ -121,12 +155,21 @@
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If running a <literal>systemd</literal>-based Linux distribution, execute (as <literal>root</literal>,
|
||||
or with appropriate <literal>sudo</literal> permissions):
|
||||
<programlisting>
|
||||
systemctl daemon-reload</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
If necessary, restart PostgreSQL, then <application>repmgrd</application> (if in use)
|
||||
on each node. The order in which this is applied to individual nodes is not critical,
|
||||
and it's also fine to restart on all nodes first before starting <application>repmgrd</application>.
|
||||
If the &repmgr; shared library module has been updated (check the <link linkend="appendix-release-notes">release notes</link>!),
|
||||
restart PostgreSQL, then <application>repmgrd</application> (if in use) on each node,
|
||||
The order in which this is applied to individual nodes is not critical,
|
||||
and it's also fine to restart PostgreSQL on all nodes first before starting <application>repmgrd</application>.
|
||||
</simpara>
|
||||
<simpara>
|
||||
Note that if the upgrade requires a PostgreSQL restart, <application>repmgrd</application>
|
||||
@@ -138,11 +181,18 @@
|
||||
<para>
|
||||
On the primary node, execute
|
||||
<programlisting>
|
||||
ALTER EXTENSION repmgr UPDATE</programlisting>
|
||||
ALTER EXTENSION repmgr UPDATE</programlisting>
|
||||
in the database where &repmgr; is installed.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Reenable the <application>repmgrd</application> service on all nodes where it is in use, and
|
||||
ensure it is running.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</orderedlist>
|
||||
</para>
|
||||
<tip>
|
||||
@@ -154,6 +204,17 @@
|
||||
</tip>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="upgrading-check-repmgrd" xreflabel="Checking repmgrd status after an upgrade">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>checking repmgrd status</secondary>
|
||||
</indexterm>
|
||||
<title>Checking repmgrd status after an upgrade</title>
|
||||
<para>
|
||||
From <link linkend="release-4.2">repmgr 4.2</link>, once the upgrade is complete, execute the <command><link linkend="repmgr-daemon-status">repmgr daemon status</link></command>
|
||||
command (on any node) to show an overview of the status of <application>repmgrd</application> on all nodes.
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
|
||||
|
||||
@@ -1 +1 @@
|
||||
<!ENTITY repmgrversion "4.2dev">
|
||||
<!ENTITY repmgrversion "4.2">
|
||||
|
||||
@@ -126,7 +126,7 @@ do_bdr_register(void)
|
||||
}
|
||||
|
||||
/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
|
||||
extension_status = get_repmgr_extension_status(conn);
|
||||
extension_status = get_repmgr_extension_status(conn, NULL);
|
||||
|
||||
if (extension_status == REPMGR_UNKNOWN)
|
||||
{
|
||||
@@ -232,7 +232,7 @@ do_bdr_register(void)
|
||||
}
|
||||
|
||||
/* check repmgr schema exists, skip if not */
|
||||
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn);
|
||||
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);
|
||||
|
||||
if (other_node_extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
@@ -442,7 +442,7 @@ do_bdr_unregister(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
extension_status = get_repmgr_extension_status(conn);
|
||||
extension_status = get_repmgr_extension_status(conn, NULL);
|
||||
if (extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
log_error(_("repmgr is not installed on database \"%s\""), dbname);
|
||||
|
||||
@@ -1527,7 +1527,7 @@ do_cluster_help(void)
|
||||
|
||||
printf(_("CLUSTER CLEANUP\n"));
|
||||
puts("");
|
||||
printf(_(" \"cluster cleanup\" purges records from the \"repmgr.monitor\" table.\n"));
|
||||
printf(_(" \"cluster cleanup\" purges records from the \"repmgr.monitoring_history\" table.\n"));
|
||||
puts("");
|
||||
printf(_(" -k, --keep-history=VALUE retain indicated number of days of history (default: 0)\n"));
|
||||
puts("");
|
||||
|
||||
@@ -2963,6 +2963,7 @@ do_node_help(void)
|
||||
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
||||
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
||||
printf(_(" --list-actions show what command would be performed for each action\n"));
|
||||
printf(_(" --checkpoint issue a CHECKPOINT before stopping or restarting the node\n"));
|
||||
puts("");
|
||||
|
||||
|
||||
|
||||
@@ -3718,9 +3718,18 @@ do_standby_switchover(void)
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* close all connections - we'll reestablish later */
|
||||
for (cell = all_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
PQfinish(cell->node_info->conn);
|
||||
cell->node_info->conn = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Sanity checks completed - prepare for the switchover
|
||||
*/
|
||||
@@ -3801,7 +3810,7 @@ do_standby_switchover(void)
|
||||
shutdown_command);
|
||||
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
clear_node_info_list(&all_nodes);
|
||||
|
||||
key_value_list_free(&remote_config_files);
|
||||
|
||||
return;
|
||||
@@ -4087,9 +4096,27 @@ do_standby_switchover(void)
|
||||
|
||||
if (sibling_node_record.type == WITNESS)
|
||||
{
|
||||
PGconn *witness_conn = NULL;
|
||||
|
||||
/* TODO: create "repmgr witness resync" or similar */
|
||||
appendPQExpBuffer(&remote_command_str,
|
||||
"witness register -d \\'%s\\' --force 2>/dev/null && echo \"1\" || echo \"0\"",
|
||||
local_node_record.conninfo);
|
||||
|
||||
/*
|
||||
* Notify the witness repmgrd about the new primary, as at this point it will be assuming
|
||||
* a failover situation is in place. It will detect the new primary at some point, this
|
||||
* just speeds up the process.
|
||||
*
|
||||
* In the unlikely event repmgrd is not running or not in use, this will have no effect.
|
||||
*/
|
||||
witness_conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||
|
||||
if (PQstatus(witness_conn) == CONNECTION_OK)
|
||||
{
|
||||
notify_follow_primary(witness_conn, local_node_record.node_id);
|
||||
}
|
||||
PQfinish(witness_conn);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -4144,8 +4171,6 @@ do_standby_switchover(void)
|
||||
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
|
||||
|
||||
|
||||
PQfinish(local_conn);
|
||||
|
||||
/*
|
||||
@@ -4372,7 +4397,7 @@ check_source_server()
|
||||
* to be used as a standalone clone tool)
|
||||
*/
|
||||
|
||||
extension_status = get_repmgr_extension_status(primary_conn);
|
||||
extension_status = get_repmgr_extension_status(primary_conn, NULL);
|
||||
|
||||
if (extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
|
||||
@@ -37,6 +37,7 @@ do_witness_register(void)
|
||||
PGconn *witness_conn = NULL;
|
||||
PGconn *primary_conn = NULL;
|
||||
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
@@ -214,33 +215,45 @@ do_witness_register(void)
|
||||
}
|
||||
}
|
||||
|
||||
extension_status = get_repmgr_extension_status(witness_conn, NULL);
|
||||
|
||||
/*
|
||||
* if repmgr.nodes contains entries, delete if -F/--force provided,
|
||||
* otherwise exit with error
|
||||
* Check if the witness database already contains node records;
|
||||
* only do this if the extension is actually installed.
|
||||
*/
|
||||
if (get_all_node_records(witness_conn, &nodes) == false)
|
||||
if (extension_status == REPMGR_INSTALLED
|
||||
|| extension_status == REPMGR_OLD_VERSION_INSTALLED)
|
||||
{
|
||||
/* get_all_node_records() will display the error */
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
/*
|
||||
* if repmgr.nodes contains entries, exit with error unless
|
||||
* -F/--force provided (which will cause the existing records
|
||||
* to be overwritten)
|
||||
*/
|
||||
|
||||
log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
|
||||
|
||||
if (nodes.node_count > 0)
|
||||
{
|
||||
if (!runtime_options.force)
|
||||
if (get_all_node_records(witness_conn, &nodes) == false)
|
||||
{
|
||||
log_error(_("witness node is already initialised and contains node records"));
|
||||
log_hint(_("use option -F/--force to reinitialise the node"));
|
||||
PQfinish(primary_conn);
|
||||
/* get_all_node_records() will display the error */
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
clear_node_info_list(&nodes);
|
||||
log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
|
||||
|
||||
if (nodes.node_count > 0)
|
||||
{
|
||||
if (!runtime_options.force)
|
||||
{
|
||||
log_error(_("witness node is already initialised and contains node records"));
|
||||
log_hint(_("use option -F/--force to reinitialise the node"));
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
clear_node_info_list(&nodes);
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
@@ -471,13 +484,15 @@ void do_witness_help(void)
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS] witness register\n"), progname());
|
||||
printf(_(" %s [OPTIONS] witness unregister\n"), progname());
|
||||
|
||||
puts("");
|
||||
printf(_("WITNESS REGISTER\n"));
|
||||
puts("");
|
||||
printf(_(" \"witness register\" registers a witness node.\n"));
|
||||
puts("");
|
||||
printf(_(" Requires provision of connection information for the primary\n"));
|
||||
printf(_(" Requires provision of connection information for the primary node,\n"));
|
||||
printf(_(" typically usually just the host name.\n"));
|
||||
puts("");
|
||||
printf(_(" -h/--host host name of the primary node\n"));
|
||||
printf(_(" --dry-run check prerequisites but don't make any changes\n"));
|
||||
printf(_(" -F, --force overwrite an existing node record\n"));
|
||||
puts("");
|
||||
|
||||
@@ -29,15 +29,13 @@
|
||||
*
|
||||
* NODE STATUS
|
||||
* NODE CHECK
|
||||
* NODE REJOIN
|
||||
* NODE SERVICE
|
||||
*
|
||||
* DAEMON STATUS
|
||||
* DAEMON PAUSE
|
||||
* DAEMON UNPAUSE
|
||||
*
|
||||
* For internal use:
|
||||
* NODE REJOIN
|
||||
* NODE SERVICE
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -776,6 +774,7 @@ main(int argc, char **argv)
|
||||
* BDR { REGISTER | UNREGISTER } |
|
||||
* NODE { STATUS | CHECK | REJOIN | SERVICE } |
|
||||
* CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
|
||||
* DAEMON { STATUS | PAUSE | UNPAUSE }
|
||||
*
|
||||
* [node] is an optional hostname, provided instead of the -h/--host
|
||||
* option
|
||||
@@ -809,6 +808,7 @@ main(int argc, char **argv)
|
||||
action = PRIMARY_REGISTER;
|
||||
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
|
||||
action = PRIMARY_UNREGISTER;
|
||||
/* allow "primary check"/"primary status" as aliases for "node check"/"node status" */
|
||||
else if (strcasecmp(repmgr_action, "CHECK") == 0)
|
||||
action = NODE_CHECK;
|
||||
else if (strcasecmp(repmgr_action, "STATUS") == 0)
|
||||
@@ -835,6 +835,7 @@ main(int argc, char **argv)
|
||||
action = STANDBY_FOLLOW;
|
||||
else if (strcasecmp(repmgr_action, "SWITCHOVER") == 0)
|
||||
action = STANDBY_SWITCHOVER;
|
||||
/* allow "standby check"/"standby status" as aliases for "node check"/"node status" */
|
||||
else if (strcasecmp(repmgr_action, "CHECK") == 0)
|
||||
action = NODE_CHECK;
|
||||
else if (strcasecmp(repmgr_action, "STATUS") == 0)
|
||||
@@ -1398,7 +1399,7 @@ check_cli_parameters(const int action)
|
||||
if (!runtime_options.host_param_provided)
|
||||
{
|
||||
item_list_append_format(&cli_errors,
|
||||
_("host name for the source node must be provided when executing %s"),
|
||||
_("host name for the source node must be provided with -h/--host when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
|
||||
@@ -1455,7 +1456,7 @@ check_cli_parameters(const int action)
|
||||
if (!runtime_options.host_param_provided)
|
||||
{
|
||||
item_list_append_format(&cli_errors,
|
||||
_("host name for the source node must be provided when executing %s"),
|
||||
_("host name for the source node must be provided with -h/--host when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
}
|
||||
@@ -2004,7 +2005,7 @@ do_help(void)
|
||||
|
||||
puts("");
|
||||
|
||||
printf(_(" Execute \"%s {primary|standby|bdr|node|cluster} --help\" to see command-specific options\n"), progname());
|
||||
printf(_(" Execute \"%s {primary|standby|bdr|node|cluster|witness|daemon} --help\" to see command-specific options\n"), progname());
|
||||
|
||||
puts("");
|
||||
|
||||
@@ -2052,8 +2053,6 @@ do_help(void)
|
||||
printf(_(" -t, --terse don't display detail, hints and other non-critical output\n"));
|
||||
printf(_(" -v, --verbose display additional log output (useful for debugging)\n"));
|
||||
|
||||
|
||||
|
||||
puts("");
|
||||
}
|
||||
|
||||
@@ -2080,8 +2079,9 @@ create_repmgr_extension(PGconn *conn)
|
||||
bool is_superuser = false;
|
||||
PGconn *superuser_conn = NULL;
|
||||
PGconn *schema_create_conn = NULL;
|
||||
t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;
|
||||
|
||||
extension_status = get_repmgr_extension_status(conn);
|
||||
extension_status = get_repmgr_extension_status(conn, &extversions);
|
||||
|
||||
switch (extension_status)
|
||||
{
|
||||
@@ -2093,8 +2093,15 @@ create_repmgr_extension(PGconn *conn)
|
||||
log_error(_("\"repmgr\" extension is not available"));
|
||||
return false;
|
||||
|
||||
case REPMGR_OLD_VERSION_INSTALLED:
|
||||
log_error(_("an older version of the \"repmgr\" extension is installed"));
|
||||
log_detail(_("version %s is installed but newer version %s is available"),
|
||||
extversions.installed_version,
|
||||
extversions.default_version);
|
||||
log_hint(_("execute \"ALTER EXTENSION repmgr UPGRADE\""));
|
||||
return false;
|
||||
|
||||
case REPMGR_INSTALLED:
|
||||
/* TODO: check version */
|
||||
log_info(_("\"repmgr\" extension is already installed"));
|
||||
return true;
|
||||
|
||||
|
||||
6
repmgr.c
6
repmgr.c
@@ -58,7 +58,7 @@
|
||||
|
||||
#define TRANCHE_NAME "repmgrd"
|
||||
#define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt"
|
||||
|
||||
#define REPMGRD_STATE_FILE_BUF_SIZE 128
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
@@ -256,8 +256,8 @@ set_local_node_id(PG_FUNCTION_ARGS)
|
||||
|
||||
if (file != NULL)
|
||||
{
|
||||
int buffer_size = 128;
|
||||
char buffer[buffer_size];
|
||||
int buffer_size = REPMGRD_STATE_FILE_BUF_SIZE;
|
||||
char buffer[REPMGRD_STATE_FILE_BUF_SIZE];
|
||||
|
||||
if (fgets(buffer, buffer_size, file) != NULL)
|
||||
{
|
||||
|
||||
@@ -5,7 +5,13 @@
|
||||
# Some configuration items will be set with a default value; this
|
||||
# is noted for each item. Where no default value is shown, the
|
||||
# parameter will be treated as empty or false.
|
||||
|
||||
#
|
||||
# IMPORTANT: string values can be provided as-is, or enclosed in single quotes
|
||||
# (but not double-quotes, which will be interpreted as part of the string), e.g.:
|
||||
#
|
||||
# node_name=foo
|
||||
# node_name = 'foo'
|
||||
#
|
||||
# =============================================================================
|
||||
# Required configuration items
|
||||
# =============================================================================
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "4.2dev"
|
||||
#define REPMGR_VERSION "4.2"
|
||||
|
||||
@@ -71,6 +71,8 @@ static void notify_followers(NodeInfoList *standby_nodes, int follow_node_id);
|
||||
|
||||
static void check_connection(t_node_info *node_info, PGconn **conn);
|
||||
|
||||
static bool check_primary_status(int degraded_monitoring_elapsed);
|
||||
|
||||
static bool wait_primary_notification(int *new_primary_id);
|
||||
static FailoverState follow_new_primary(int new_primary_id);
|
||||
static FailoverState witness_follow_new_primary(int new_primary_id);
|
||||
@@ -341,6 +343,13 @@ monitor_streaming_primary(void)
|
||||
repmgrd_set_pid(local_conn, getpid(), pid_file);
|
||||
}
|
||||
|
||||
/*
|
||||
* check that the local node is still primary, otherwise switch
|
||||
* to standby monitoring
|
||||
*/
|
||||
if (check_primary_status(-1) == false)
|
||||
return;
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
@@ -393,143 +402,10 @@ monitor_streaming_primary(void)
|
||||
{
|
||||
local_node_info.node_status = NODE_STATUS_UP;
|
||||
|
||||
/* check to see if the node has been restored as a standby */
|
||||
if (get_recovery_type(local_conn) == RECTYPE_STANDBY)
|
||||
{
|
||||
PGconn *new_primary_conn;
|
||||
if (check_primary_status(degraded_monitoring_elapsed) == false)
|
||||
return;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("reconnected to node after %i seconds, node is now a standby, switching to standby monitoring"),
|
||||
degraded_monitoring_elapsed);
|
||||
log_notice("%s", event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
primary_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
new_primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);
|
||||
|
||||
if (PQstatus(new_primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
close_connection(&new_primary_conn);
|
||||
log_warning(_("unable to connect to new primary node %i"), primary_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
RecordStatus record_status;
|
||||
|
||||
log_debug("primary node id is now %i", primary_node_id);
|
||||
|
||||
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
bool resume_monitoring = true;
|
||||
|
||||
log_debug("node %i is registered with type = %s",
|
||||
config_file_options.node_id,
|
||||
get_node_type_string(local_node_info.type));
|
||||
|
||||
/*
|
||||
* node has recovered but metadata not updated - we can do that ourselves,
|
||||
*/
|
||||
if (local_node_info.type == PRIMARY)
|
||||
{
|
||||
log_notice(_("node \"%s\" (ID: %i) still registered as primary, setting to standby"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
|
||||
if (update_node_record_set_active_standby(new_primary_conn, config_file_options.node_id) == false)
|
||||
{
|
||||
resume_monitoring = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
resume_monitoring = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (resume_monitoring == true)
|
||||
{
|
||||
monitoring_state = MS_NORMAL;
|
||||
log_notice(_("former primary has been restored as standby after %i seconds, updating node record and resuming monitoring"),
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node restored as standby after %i seconds, monitoring connection to upstream node %i"),
|
||||
degraded_monitoring_elapsed,
|
||||
local_node_info.upstream_node_id);
|
||||
|
||||
create_event_notification(new_primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_standby_reconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
close_connection(&new_primary_conn);
|
||||
|
||||
/* restart monitoring as standby */
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (record_status == RECORD_NOT_FOUND)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("no metadata record found for this node on current primary %i"),
|
||||
primary_node_id);
|
||||
|
||||
log_error("%s", event_details.data);
|
||||
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
||||
|
||||
close_connection(&new_primary_conn);
|
||||
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_shutdown",
|
||||
false,
|
||||
event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
monitoring_state = MS_NORMAL;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("reconnected to primary node after %i seconds, resuming monitoring"),
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_local_reconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
goto loop;
|
||||
}
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -541,6 +417,11 @@ monitor_streaming_primary(void)
|
||||
*/
|
||||
}
|
||||
loop:
|
||||
|
||||
/* check node is still primary, if not restart monitoring */
|
||||
if (check_primary_status(-1) == false)
|
||||
return;
|
||||
|
||||
/* emit "still alive" log message at regular intervals, if requested */
|
||||
if (config_file_options.log_status_interval > 0)
|
||||
{
|
||||
@@ -575,6 +456,181 @@ loop:
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If monitoring a primary, it's possible that after an outage of the local node
|
||||
* (due to e.g. a switchover), the node has come back as a standby. We therefore
|
||||
* need to verify its status and if everything looks OK, restart monitoring in
|
||||
* standby mode.
|
||||
*/
|
||||
bool
|
||||
check_primary_status(int degraded_monitoring_elapsed)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
|
||||
/* check to see if the node has been restored as a standby */
|
||||
if (get_recovery_type(local_conn) == RECTYPE_STANDBY)
|
||||
{
|
||||
PGconn *new_primary_conn;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
if (degraded_monitoring_elapsed > 0)
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("reconnected to node after %i seconds, node is now a standby, switching to standby monitoring"),
|
||||
degraded_monitoring_elapsed);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBufferStr(&event_details,
|
||||
_("node is now a standby, switching to standby monitoring"));
|
||||
}
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
primary_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
new_primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);
|
||||
|
||||
if (PQstatus(new_primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
close_connection(&new_primary_conn);
|
||||
log_warning(_("unable to connect to new primary node %i"), primary_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
RecordStatus record_status;
|
||||
|
||||
log_debug("primary node id is now %i", primary_node_id);
|
||||
|
||||
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
bool resume_monitoring = true;
|
||||
|
||||
log_debug("node %i is registered with type = %s",
|
||||
config_file_options.node_id,
|
||||
get_node_type_string(local_node_info.type));
|
||||
|
||||
/*
|
||||
* node has recovered but metadata not updated - we can do that ourselves,
|
||||
*/
|
||||
if (local_node_info.type == PRIMARY)
|
||||
{
|
||||
log_notice(_("node \"%s\" (ID: %i) still registered as primary, setting to standby"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
|
||||
if (update_node_record_set_active_standby(new_primary_conn, config_file_options.node_id) == false)
|
||||
{
|
||||
resume_monitoring = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
resume_monitoring = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (resume_monitoring == true)
|
||||
{
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
if (degraded_monitoring_elapsed > 0)
|
||||
{
|
||||
monitoring_state = MS_NORMAL;
|
||||
|
||||
log_notice(_("former primary has been restored as standby after %i seconds, updating node record and resuming monitoring"),
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node restored as standby after %i seconds, monitoring connection to upstream node %i"),
|
||||
degraded_monitoring_elapsed,
|
||||
local_node_info.upstream_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node has become a standby, monitoring connection to upstream node %i"),
|
||||
local_node_info.upstream_node_id);
|
||||
}
|
||||
|
||||
create_event_notification(new_primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_standby_reconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
close_connection(&new_primary_conn);
|
||||
|
||||
/* restart monitoring as standby */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (record_status == RECORD_NOT_FOUND)
|
||||
{
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("no metadata record found for this node on current primary %i"),
|
||||
primary_node_id);
|
||||
|
||||
log_error("%s", event_details.data);
|
||||
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
||||
|
||||
close_connection(&new_primary_conn);
|
||||
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_shutdown",
|
||||
false,
|
||||
event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (degraded_monitoring_elapsed > 0)
|
||||
{
|
||||
monitoring_state = MS_NORMAL;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("reconnected to primary node after %i seconds, resuming monitoring"),
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_local_reconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
monitor_streaming_standby(void)
|
||||
{
|
||||
@@ -595,7 +651,7 @@ monitor_streaming_standby(void)
|
||||
*/
|
||||
if (local_node_info.upstream_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
local_node_info.upstream_node_id = get_primary_node_id(local_conn);
|
||||
upstream_conn = get_primary_connection(local_conn, &local_node_info.upstream_node_id, NULL);
|
||||
|
||||
/*
|
||||
* Terminate if there doesn't appear to be an active cluster primary.
|
||||
@@ -608,34 +664,40 @@ monitor_streaming_standby(void)
|
||||
log_error(_("unable to determine an active primary for this cluster, terminating"));
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
(void) get_node_record(upstream_conn, local_node_info.upstream_node_id, &upstream_node_info);
|
||||
|
||||
}
|
||||
|
||||
record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info);
|
||||
|
||||
/*
|
||||
* Terminate if we can't find the record for the node we're supposed to
|
||||
* monitor. This is a "fix-the-config" situation, not a lot else we can
|
||||
* do.
|
||||
*/
|
||||
if (record_status == RECORD_NOT_FOUND)
|
||||
else
|
||||
{
|
||||
log_error(_("no record found for upstream node (ID: %i), terminating"),
|
||||
local_node_info.upstream_node_id);
|
||||
log_hint(_("ensure the upstream node is registered correctly"));
|
||||
record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &upstream_node_info);
|
||||
|
||||
terminate(ERR_DB_CONN);
|
||||
}
|
||||
else if (record_status == RECORD_ERROR)
|
||||
{
|
||||
log_error(_("unable to retrieve record for upstream node (ID: %i), terminating"),
|
||||
local_node_info.upstream_node_id);
|
||||
/*
|
||||
* Terminate if we can't find the record for the node we're supposed to
|
||||
* monitor. This is a "fix-the-config" situation, not a lot else we can
|
||||
* do.
|
||||
*/
|
||||
if (record_status == RECORD_NOT_FOUND)
|
||||
{
|
||||
log_error(_("no record found for upstream node (ID: %i), terminating"),
|
||||
local_node_info.upstream_node_id);
|
||||
log_hint(_("ensure the upstream node is registered correctly"));
|
||||
|
||||
terminate(ERR_DB_CONN);
|
||||
terminate(ERR_DB_CONN);
|
||||
}
|
||||
else if (record_status == RECORD_ERROR)
|
||||
{
|
||||
log_error(_("unable to retrieve record for upstream node (ID: %i), terminating"),
|
||||
local_node_info.upstream_node_id);
|
||||
|
||||
terminate(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
log_debug("connecting to upstream node %i: \"%s\"", upstream_node_info.node_id, upstream_node_info.conninfo);
|
||||
|
||||
upstream_conn = establish_db_connection(upstream_node_info.conninfo, false);
|
||||
}
|
||||
|
||||
log_debug("connecting to upstream node %i: \"%s\"", upstream_node_info.node_id, upstream_node_info.conninfo);
|
||||
|
||||
upstream_conn = establish_db_connection(upstream_node_info.conninfo, false);
|
||||
|
||||
/*
|
||||
* Upstream node must be running at repmgrd startup.
|
||||
@@ -652,6 +714,15 @@ monitor_streaming_standby(void)
|
||||
terminate(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info);
|
||||
|
||||
if (upstream_node_info.node_id == local_node_info.node_id)
|
||||
{
|
||||
PQfinish(upstream_conn);
|
||||
upstream_conn = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* refresh upstream node record from upstream node, so it's as up-to-date
|
||||
* as possible
|
||||
@@ -682,6 +753,23 @@ monitor_streaming_standby(void)
|
||||
primary_conn = upstream_conn;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible monitoring has been restarted after some outage which
|
||||
* resulted in the local node being marked as inactive; if so mark it
|
||||
* as active again.
|
||||
*/
|
||||
if (local_node_info.active == false)
|
||||
{
|
||||
if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
local_node_info.active = true;
|
||||
}
|
||||
}
|
||||
|
||||
primary_node_id = get_primary_node_id(primary_conn);
|
||||
|
||||
/* Log startup event */
|
||||
@@ -766,6 +854,7 @@ monitor_streaming_standby(void)
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
check_connection(&local_node_info, &local_conn);
|
||||
log_debug("YYY here");
|
||||
}
|
||||
|
||||
try_reconnect(&upstream_conn, &upstream_node_info);
|
||||
@@ -778,6 +867,43 @@ monitor_streaming_standby(void)
|
||||
if (upstream_node_info.type == PRIMARY)
|
||||
{
|
||||
primary_conn = upstream_conn;
|
||||
|
||||
if (get_recovery_type(primary_conn) == RECTYPE_STANDBY)
|
||||
{
|
||||
ExecStatusType ping_result;
|
||||
|
||||
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
|
||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||
PQfinish(upstream_conn);
|
||||
upstream_conn = NULL;
|
||||
termPQExpBuffer(&event_details);
|
||||
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
/* check local connection */
|
||||
ping_result = connection_ping(local_conn);
|
||||
|
||||
if (ping_result != PGRES_TUPLES_OK)
|
||||
{
|
||||
int i;
|
||||
|
||||
PQfinish(local_conn);
|
||||
|
||||
for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
|
||||
{
|
||||
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
||||
|
||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||
break;
|
||||
|
||||
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||
i + 1,
|
||||
config_file_options.repmgrd_standby_startup_timeout);
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
@@ -1140,6 +1266,7 @@ loop:
|
||||
|
||||
check_connection(&local_node_info, &local_conn);
|
||||
|
||||
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
if (local_node_info.active == true)
|
||||
@@ -1180,11 +1307,37 @@ loop:
|
||||
}
|
||||
else
|
||||
{
|
||||
int stored_local_node_id = repmgrd_get_local_node_id(local_conn);
|
||||
|
||||
/*
|
||||
* If the local node was restarted, we'll need to reinitialise values
|
||||
* stored in shared memory.
|
||||
*/
|
||||
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||
repmgrd_set_pid(local_conn, getpid(), pid_file);
|
||||
}
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
if (get_recovery_type(primary_conn) == RECTYPE_STANDBY)
|
||||
{
|
||||
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
|
||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||
PQfinish(primary_conn);
|
||||
primary_conn = NULL;
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* we've reconnected to the local node after an outage */
|
||||
if (local_node_info.active == false)
|
||||
{
|
||||
int stored_local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
|
||||
@@ -1194,7 +1347,6 @@ loop:
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
local_node_info.active = true;
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("reconnected to local node \"%s\" (ID: %i), marking active"),
|
||||
local_node_info.node_name,
|
||||
@@ -1212,18 +1364,6 @@ loop:
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the local node was restarted, we'll need to reinitialise values
|
||||
* stored in shared memory.
|
||||
*/
|
||||
|
||||
stored_local_node_id = repmgrd_get_local_node_id(local_conn);
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||
repmgrd_set_pid(local_conn, getpid(), pid_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1258,53 +1398,46 @@ monitor_streaming_witness(void)
|
||||
|
||||
/*
|
||||
* At this point we can't trust the local copy of "repmgr.nodes", as
|
||||
* it may not have been updated. We'll scan the cluster for the current
|
||||
* primary and refresh the copy from that before proceeding further.
|
||||
* it may not have been updated. We'll scan the cluster to find the
|
||||
* current primary and refresh the copy from that before proceeding
|
||||
* further.
|
||||
*/
|
||||
primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);
|
||||
|
||||
/*
|
||||
* Primary node must be running at repmgrd startup.
|
||||
* Primary node should be running at repmgrd startup.
|
||||
*
|
||||
* We could possibly have repmgrd skip to degraded monitoring mode until
|
||||
* it comes up, but there doesn't seem to be much point in doing that.
|
||||
* Otherwise we'll skip to degraded monitoring.
|
||||
*/
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable connect to upstream node (ID: %i), terminating"),
|
||||
upstream_node_info.node_id);
|
||||
log_hint(_("primary node must be running before repmgrd can start"));
|
||||
|
||||
terminate(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
/* synchronise local copy of "repmgr.nodes", in case it was stale */
|
||||
witness_copy_node_records(primary_conn, local_conn);
|
||||
|
||||
/*
|
||||
* refresh upstream node record from primary, so it's as up-to-date
|
||||
* as possible
|
||||
*/
|
||||
record_status = get_node_record(primary_conn, primary_node_id, &upstream_node_info);
|
||||
|
||||
/*
|
||||
* This is unlikely to happen; if it does emit a warning for diagnostic
|
||||
* purposes and plough on regardless.
|
||||
*
|
||||
* A check for the existence of the record will have already been carried out
|
||||
* in main().
|
||||
*/
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_warning(_("unable to retrieve node record from primary"));
|
||||
}
|
||||
|
||||
|
||||
/* Log startup event */
|
||||
if (startup_event_logged == false)
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
|
||||
char *event_type = startup_event_logged == false
|
||||
? "repmgrd_start"
|
||||
: "repmgrd_upstream_reconnect";
|
||||
|
||||
/* synchronise local copy of "repmgr.nodes", in case it was stale */
|
||||
witness_copy_node_records(primary_conn, local_conn);
|
||||
|
||||
/*
|
||||
* refresh upstream node record from primary, so it's as up-to-date
|
||||
* as possible
|
||||
*/
|
||||
record_status = get_node_record(primary_conn, primary_node_id, &upstream_node_info);
|
||||
|
||||
/*
|
||||
* This is unlikely to happen; if it does emit a warning for diagnostic
|
||||
* purposes and plough on regardless.
|
||||
*
|
||||
* A check for the existence of the record will have already been carried out
|
||||
* in main().
|
||||
*/
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_warning(_("unable to retrieve node record from primary"));
|
||||
}
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
@@ -1312,25 +1445,58 @@ monitor_streaming_witness(void)
|
||||
upstream_node_info.node_name,
|
||||
upstream_node_info.node_id);
|
||||
|
||||
log_info("%s", event_details.data);
|
||||
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_start",
|
||||
event_type,
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
startup_event_logged = true;
|
||||
|
||||
log_info("%s", event_details.data);
|
||||
if (startup_event_logged == false)
|
||||
startup_event_logged = true;
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
monitoring_state = MS_NORMAL;
|
||||
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
||||
INSTR_TIME_SET_CURRENT(witness_sync_interval_start);
|
||||
|
||||
upstream_node_info.node_status = NODE_STATUS_UP;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Here we're unable to connect to a primary despite having scanned all
|
||||
* known nodes, so we'll grab the record of the node we think is primary
|
||||
* and continue straight to degraded monitoring in the hope a primary
|
||||
* will appear.
|
||||
*/
|
||||
|
||||
monitoring_state = MS_NORMAL;
|
||||
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
||||
INSTR_TIME_SET_CURRENT(witness_sync_interval_start);
|
||||
primary_node_id = get_primary_node_id(local_conn);
|
||||
|
||||
upstream_node_info.node_status = NODE_STATUS_UP;
|
||||
log_debug("unable to find current primary; setting primary_node_id to last known ID %i", primary_node_id);
|
||||
|
||||
record_status = get_node_record(local_conn, primary_node_id, &upstream_node_info);
|
||||
|
||||
/*
|
||||
* This is unlikely to happen, but if for whatever reason there's
|
||||
* no primary record in the local table, we should just give up
|
||||
*/
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_error(_("unable to retrieve node record for last known primary %i"),
|
||||
primary_node_id);
|
||||
log_hint(_("execute \"repmgr witness register --force\" to sync the local node records"));
|
||||
PQfinish(local_conn);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
monitoring_state = MS_DEGRADED;
|
||||
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
||||
upstream_node_info.node_status = NODE_STATUS_DOWN;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
@@ -1338,7 +1504,7 @@ monitor_streaming_witness(void)
|
||||
{
|
||||
if (upstream_node_info.node_status == NODE_STATUS_UP)
|
||||
{
|
||||
instr_time upstream_node_unreachable_start;
|
||||
instr_time upstream_node_unreachable_start;
|
||||
|
||||
INSTR_TIME_SET_CURRENT(upstream_node_unreachable_start);
|
||||
|
||||
@@ -1371,6 +1537,17 @@ monitor_streaming_witness(void)
|
||||
upstream_node_unreachable_elapsed);
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
/* check upstream is still primary */
|
||||
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
||||
{
|
||||
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
|
||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||
PQfinish(primary_conn);
|
||||
primary_conn = NULL;
|
||||
termPQExpBuffer(&event_details);
|
||||
return;
|
||||
}
|
||||
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
@@ -1428,14 +1605,25 @@ monitor_streaming_witness(void)
|
||||
upstream_node_info.node_id,
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
/* check upstream is still primary */
|
||||
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
||||
{
|
||||
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
|
||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||
PQfinish(primary_conn);
|
||||
primary_conn = NULL;
|
||||
termPQExpBuffer(&event_details);
|
||||
return;
|
||||
}
|
||||
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_upstream_reconnect",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
goto loop;
|
||||
@@ -1467,6 +1655,12 @@ monitor_streaming_witness(void)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* skip node if configured as a witness node - we can't possibly "follow" that */
|
||||
if (cell->node_info->type == WITNESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
@@ -1594,14 +1788,29 @@ loop:
|
||||
}
|
||||
|
||||
|
||||
/* refresh repmgr.nodes after "witness_sync_interval" seconds */
|
||||
/*
|
||||
* Refresh repmgr.nodes after "witness_sync_interval" seconds, and check if primary
|
||||
* has changed
|
||||
*/
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
int witness_sync_interval_elapsed = calculate_elapsed(witness_sync_interval_start);
|
||||
|
||||
if (witness_sync_interval_elapsed >= config_file_options.witness_sync_interval)
|
||||
{
|
||||
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
||||
{
|
||||
log_notice(_("current upstream node \"%s\" (node ID: %i) is not primary, restarting monitoring"),
|
||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||
PQfinish(primary_conn);
|
||||
primary_conn = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
log_debug("synchronising witness node records");
|
||||
witness_copy_node_records(primary_conn, local_conn);
|
||||
|
||||
INSTR_TIME_SET_CURRENT(witness_sync_interval_start);
|
||||
}
|
||||
}
|
||||
@@ -3092,6 +3301,8 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
||||
if (is_server_available(node_info->conninfo) == false)
|
||||
{
|
||||
log_warning(_("connection to node %i lost"), node_info->node_id);
|
||||
PQfinish(*conn);
|
||||
*conn = NULL;
|
||||
}
|
||||
|
||||
if (PQstatus(*conn) != CONNECTION_OK)
|
||||
|
||||
34
repmgrd.c
34
repmgrd.c
@@ -88,6 +88,7 @@ main(int argc, char **argv)
|
||||
|
||||
RecordStatus record_status;
|
||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
||||
t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;
|
||||
|
||||
FILE *fd;
|
||||
|
||||
@@ -175,7 +176,6 @@ main(int argc, char **argv)
|
||||
|
||||
/* daemon options */
|
||||
|
||||
|
||||
case 'd':
|
||||
daemonize = true;
|
||||
break;
|
||||
@@ -184,7 +184,6 @@ main(int argc, char **argv)
|
||||
daemonize = parse_bool(optarg, "-d/--daemonize", &cli_errors);
|
||||
break;
|
||||
|
||||
|
||||
case 'p':
|
||||
strncpy(pid_file, optarg, MAXPGPATH);
|
||||
break;
|
||||
@@ -391,7 +390,7 @@ main(int argc, char **argv)
|
||||
*/
|
||||
|
||||
/* Check "repmgr" the extension is installed */
|
||||
extension_status = get_repmgr_extension_status(local_conn);
|
||||
extension_status = get_repmgr_extension_status(local_conn, &extversions);
|
||||
|
||||
if (extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
@@ -404,19 +403,32 @@ main(int argc, char **argv)
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
log_error(_("repmgr extension not found on this node"));
|
||||
if (extension_status == REPMGR_OLD_VERSION_INSTALLED)
|
||||
{
|
||||
log_error(_("an older version of the \"repmgr\" extension is installed"));
|
||||
log_detail(_("version %s is installed but newer version %s is available"),
|
||||
extversions.installed_version,
|
||||
extversions.default_version);
|
||||
log_hint(_("verify the repmgr installation is updated properly before continuing"));
|
||||
|
||||
if (extension_status == REPMGR_AVAILABLE)
|
||||
{
|
||||
log_detail(_("repmgr extension is available but not installed in database \"%s\""),
|
||||
PQdb(local_conn));
|
||||
}
|
||||
else if (extension_status == REPMGR_UNAVAILABLE)
|
||||
else
|
||||
{
|
||||
log_detail(_("repmgr extension is not available on this node"));
|
||||
log_error(_("repmgr extension not found on this node"));
|
||||
|
||||
if (extension_status == REPMGR_AVAILABLE)
|
||||
{
|
||||
log_detail(_("repmgr extension is available but not installed in database \"%s\""),
|
||||
PQdb(local_conn));
|
||||
}
|
||||
else if (extension_status == REPMGR_UNAVAILABLE)
|
||||
{
|
||||
log_detail(_("repmgr extension is not available on this node"));
|
||||
}
|
||||
|
||||
log_hint(_("check that this node is part of a repmgr cluster"));
|
||||
}
|
||||
|
||||
log_hint(_("check that this node is part of a repmgr cluster"));
|
||||
close_connection(&local_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user