mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
49 Commits
v5.3.3
...
REL5_1_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d43270008c | ||
|
|
a99768fdd8 | ||
|
|
2a4e81ef1c | ||
|
|
0ad6aceceb | ||
|
|
53c9eacbc4 | ||
|
|
e93f1c0439 | ||
|
|
7332d0251c | ||
|
|
7006a6d9c3 | ||
|
|
b8677a0fa2 | ||
|
|
992d2e0e49 | ||
|
|
5c71809261 | ||
|
|
22f2ee3050 | ||
|
|
2371f30f8a | ||
|
|
eddf06b60b | ||
|
|
b9874cd751 | ||
|
|
8313944535 | ||
|
|
3bfaa8e722 | ||
|
|
ca42dd563b | ||
|
|
a2f73a5086 | ||
|
|
af144d39cb | ||
|
|
68ad58f5fc | ||
|
|
c76fee98ef | ||
|
|
de634eb593 | ||
|
|
03c2c8cebd | ||
|
|
da7db96e76 | ||
|
|
b4c9064903 | ||
|
|
5abef8e4ed | ||
|
|
0813a31c08 | ||
|
|
4c7be798e0 | ||
|
|
0a7c7ae7ab | ||
|
|
9ce1c2e640 | ||
|
|
5a2399cb25 | ||
|
|
54d33dd50b | ||
|
|
2b0218456a | ||
|
|
2b773480e7 | ||
|
|
a934f19e80 | ||
|
|
9040d53e55 | ||
|
|
cb19311b35 | ||
|
|
9ce7cb7012 | ||
|
|
687ed68ce2 | ||
|
|
8472d99277 | ||
|
|
177b84345d | ||
|
|
555351f8c1 | ||
|
|
1e90d5e018 | ||
|
|
d45e64ca9e | ||
|
|
374f19675d | ||
|
|
ce88f3ec43 | ||
|
|
5acdd69add | ||
|
|
71e23107e7 |
4
HISTORY
4
HISTORY
@@ -1,4 +1,4 @@
|
|||||||
5.1 2020-??-??
|
5.1.0 2020-04-13
|
||||||
repmgr: remove BDR 2.x support
|
repmgr: remove BDR 2.x support
|
||||||
repmgr: don't query upstream's data directory (Ian)
|
repmgr: don't query upstream's data directory (Ian)
|
||||||
repmgr: rename --recovery-conf-only to --replication-conf-only (Ian)
|
repmgr: rename --recovery-conf-only to --replication-conf-only (Ian)
|
||||||
@@ -14,6 +14,8 @@
|
|||||||
repmgr: improve "standby switchover" completion checks (Ian)
|
repmgr: improve "standby switchover" completion checks (Ian)
|
||||||
repmgr: add replication configuration file ownership check to
|
repmgr: add replication configuration file ownership check to
|
||||||
"standby switchover" (Ian)
|
"standby switchover" (Ian)
|
||||||
|
repmgr: check the demotion candidate's registered repmgr.conf file can
|
||||||
|
be found (laixiong; GitHub 615)
|
||||||
repmgr: consolidate replication connection code (Ian)
|
repmgr: consolidate replication connection code (Ian)
|
||||||
repmgr: check permissions for "pg_promote()" and fall back to pg_ctl
|
repmgr: check permissions for "pg_promote()" and fall back to pg_ctl
|
||||||
if necessary (Ian)
|
if necessary (Ian)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ EXTENSION = repmgr
|
|||||||
|
|
||||||
DATA = \
|
DATA = \
|
||||||
repmgr--unpackaged--4.0.sql \
|
repmgr--unpackaged--4.0.sql \
|
||||||
|
repmgr--unpackaged--5.1.sql \
|
||||||
repmgr--4.0.sql \
|
repmgr--4.0.sql \
|
||||||
repmgr--4.0--4.1.sql \
|
repmgr--4.0--4.1.sql \
|
||||||
repmgr--4.1.sql \
|
repmgr--4.1.sql \
|
||||||
|
|||||||
@@ -29,7 +29,7 @@
|
|||||||
#define TARGET_TIMELINE_LATEST 0
|
#define TARGET_TIMELINE_LATEST 0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is defined src/include/utils.h, however it's not practical
|
* This is defined in src/include/utils.h, however it's not practical
|
||||||
* to include that from a frontend application.
|
* to include that from a frontend application.
|
||||||
*/
|
*/
|
||||||
#define PG_AUTOCONF_FILENAME "postgresql.auto.conf"
|
#define PG_AUTOCONF_FILENAME "postgresql.auto.conf"
|
||||||
@@ -189,6 +189,7 @@ typedef struct
|
|||||||
|
|
||||||
/* undocumented test settings */
|
/* undocumented test settings */
|
||||||
int promote_delay;
|
int promote_delay;
|
||||||
|
int failover_delay;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -243,8 +244,9 @@ typedef struct
|
|||||||
/* barman settings */ \
|
/* barman settings */ \
|
||||||
"", "", "", \
|
"", "", "", \
|
||||||
/* rsync/ssh settings */ \
|
/* rsync/ssh settings */ \
|
||||||
"", "", \
|
"", "", \
|
||||||
/* undocumented test settings */ \
|
/* undocumented test settings */ \
|
||||||
|
0, \
|
||||||
0 \
|
0 \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
18
configure
vendored
18
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for repmgr 5.1.
|
# Generated by GNU Autoconf 2.69 for repmgr 5.1.0.
|
||||||
#
|
#
|
||||||
# Report bugs to <repmgr@googlegroups.com>.
|
# Report bugs to <repmgr@googlegroups.com>.
|
||||||
#
|
#
|
||||||
@@ -582,8 +582,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='repmgr'
|
PACKAGE_NAME='repmgr'
|
||||||
PACKAGE_TARNAME='repmgr'
|
PACKAGE_TARNAME='repmgr'
|
||||||
PACKAGE_VERSION='5.1'
|
PACKAGE_VERSION='5.1.0'
|
||||||
PACKAGE_STRING='repmgr 5.1'
|
PACKAGE_STRING='repmgr 5.1.0'
|
||||||
PACKAGE_BUGREPORT='repmgr@googlegroups.com'
|
PACKAGE_BUGREPORT='repmgr@googlegroups.com'
|
||||||
PACKAGE_URL='https://repmgr.org/'
|
PACKAGE_URL='https://repmgr.org/'
|
||||||
|
|
||||||
@@ -1181,7 +1181,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures repmgr 5.1 to adapt to many kinds of systems.
|
\`configure' configures repmgr 5.1.0 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1242,7 +1242,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of repmgr 5.1:";;
|
short | recursive ) echo "Configuration of repmgr 5.1.0:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1316,7 +1316,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
repmgr configure 5.1
|
repmgr configure 5.1.0
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -1335,7 +1335,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by repmgr $as_me 5.1, which was
|
It was created by repmgr $as_me 5.1.0, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2487,7 +2487,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by repmgr $as_me 5.1, which was
|
This file was extended by repmgr $as_me 5.1.0, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -2550,7 +2550,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
repmgr config.status 5.1
|
repmgr config.status 5.1.0
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([repmgr], [5.1], [repmgr@googlegroups.com], [repmgr], [https://repmgr.org/])
|
AC_INIT([repmgr], [5.1.0], [repmgr@googlegroups.com], [repmgr], [https://repmgr.org/])
|
||||||
|
|
||||||
AC_COPYRIGHT([Copyright (c) 2010-2020, 2ndQuadrant Ltd.])
|
AC_COPYRIGHT([Copyright (c) 2010-2020, 2ndQuadrant Ltd.])
|
||||||
|
|
||||||
|
|||||||
58
dbutils.c
58
dbutils.c
@@ -2951,7 +2951,7 @@ get_child_nodes(PGconn *conn, int node_id, NodeInfoList *node_list)
|
|||||||
" WHERE n.upstream_node_id = %i ",
|
" WHERE n.upstream_node_id = %i ",
|
||||||
node_id);
|
node_id);
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "get_active_sibling_node_records():\n%s", query.data);
|
log_verbose(LOG_DEBUG, "get_child_nodes():\n%s", query.data);
|
||||||
|
|
||||||
res = PQexec(conn, query.data);
|
res = PQexec(conn, query.data);
|
||||||
|
|
||||||
@@ -5720,16 +5720,16 @@ get_node_replication_stats(PGconn *conn, t_node_info *node_info)
|
|||||||
|
|
||||||
|
|
||||||
NodeAttached
|
NodeAttached
|
||||||
is_downstream_node_attached(PGconn *conn, char *node_name)
|
is_downstream_node_attached(PGconn *conn, char *node_name, char **node_state)
|
||||||
{
|
{
|
||||||
PQExpBufferData query;
|
PQExpBufferData query;
|
||||||
PGresult *res = NULL;
|
PGresult *res = NULL;
|
||||||
int c = 0;
|
const char *state = NULL;
|
||||||
|
|
||||||
initPQExpBuffer(&query);
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
appendPQExpBuffer(&query,
|
appendPQExpBuffer(&query,
|
||||||
" SELECT pg_catalog.count(*) "
|
" SELECT pid, state "
|
||||||
" FROM pg_catalog.pg_stat_replication "
|
" FROM pg_catalog.pg_stat_replication "
|
||||||
" WHERE application_name = '%s'",
|
" WHERE application_name = '%s'",
|
||||||
node_name);
|
node_name);
|
||||||
@@ -5748,31 +5748,53 @@ is_downstream_node_attached(PGconn *conn, char *node_name)
|
|||||||
return NODE_ATTACHED_UNKNOWN;
|
return NODE_ATTACHED_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQntuples(res) != 1)
|
termPQExpBuffer(&query);
|
||||||
{
|
|
||||||
log_verbose(LOG_WARNING, _("unexpected number of tuples (%i) returned"), PQntuples(res));
|
/*
|
||||||
|
* If there's more than one entry in pg_stat_application, there's no
|
||||||
|
* way we can reliably determine which one belongs to the node we're
|
||||||
|
* checking, so there's nothing more we can do.
|
||||||
|
*/
|
||||||
|
if (PQntuples(res) > 1)
|
||||||
|
{
|
||||||
|
log_error(_("multiple entries with \"application_name\" set to \"%s\" found in \"pg_stat_replication\""),
|
||||||
|
node_name);
|
||||||
|
log_hint(_("verify that a unique node name is configured for each node"));
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
return NODE_ATTACHED_UNKNOWN;
|
return NODE_ATTACHED_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = atoi(PQgetvalue(res, 0, 0));
|
if (PQntuples(res) == 0)
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
|
||||||
PQclear(res);
|
|
||||||
|
|
||||||
if (c == 0)
|
|
||||||
{
|
{
|
||||||
log_verbose(LOG_WARNING, _("node \"%s\" not found in \"pg_stat_replication\""), node_name);
|
log_warning(_("node \"%s\" not found in \"pg_stat_replication\""), node_name);
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
return NODE_DETACHED;
|
return NODE_DETACHED;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c > 1)
|
state = PQgetvalue(res, 0, 1);
|
||||||
log_verbose(LOG_WARNING, _("multiple entries with \"application_name\" set to \"%s\" found in \"pg_stat_replication\""),
|
|
||||||
node_name);
|
if (node_state != NULL)
|
||||||
|
{
|
||||||
|
*node_state = palloc0(strlen(state) + 1);
|
||||||
|
strncpy(*node_state, state, strlen(state));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(state, "streaming") != 0)
|
||||||
|
{
|
||||||
|
log_warning(_("node \"%s\" attached in state \"%s\""),
|
||||||
|
node_name,
|
||||||
|
state);
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return NODE_NOT_ATTACHED;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
return NODE_ATTACHED;
|
return NODE_ATTACHED;
|
||||||
}
|
}
|
||||||
|
|||||||
11
dbutils.h
11
dbutils.h
@@ -119,9 +119,14 @@ typedef enum
|
|||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
/* unable to query "pg_stat_replication" or other error */
|
||||||
NODE_ATTACHED_UNKNOWN = -1,
|
NODE_ATTACHED_UNKNOWN = -1,
|
||||||
NODE_DETACHED,
|
/* node has record in "pg_stat_replication" and state is not "streaming" */
|
||||||
NODE_ATTACHED
|
NODE_ATTACHED,
|
||||||
|
/* node has record in "pg_stat_replication" but state is not "streaming" */
|
||||||
|
NODE_NOT_ATTACHED,
|
||||||
|
/* node has no record in "pg_stat_replication" */
|
||||||
|
NODE_DETACHED
|
||||||
} NodeAttached;
|
} NodeAttached;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
@@ -589,7 +594,7 @@ bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *repl
|
|||||||
int get_replication_lag_seconds(PGconn *conn);
|
int get_replication_lag_seconds(PGconn *conn);
|
||||||
TimeLineID get_node_timeline(PGconn *conn, char *timeline_id_str);
|
TimeLineID get_node_timeline(PGconn *conn, char *timeline_id_str);
|
||||||
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
||||||
NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name);
|
NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name, char **node_state);
|
||||||
void set_upstream_last_seen(PGconn *conn, int upstream_node_id);
|
void set_upstream_last_seen(PGconn *conn, int upstream_node_id);
|
||||||
int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
||||||
|
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ clean:
|
|||||||
rm -f repmgr.html
|
rm -f repmgr.html
|
||||||
rm -f repmgr-A4.pdf
|
rm -f repmgr-A4.pdf
|
||||||
rm -f repmgr-US.pdf
|
rm -f repmgr-US.pdf
|
||||||
|
rm -f html/*
|
||||||
|
|
||||||
maintainer-clean:
|
maintainer-clean:
|
||||||
rm -rf html
|
rm -rf html
|
||||||
|
|||||||
@@ -471,7 +471,7 @@ repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
|
|||||||
<title>Debian/Ubuntu</title>
|
<title>Debian/Ubuntu</title>
|
||||||
<para>
|
<para>
|
||||||
An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
|
An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
|
||||||
<ulink url="http://atalia.postgresql.org/morgue/r/repmgr/">http://atalia.postgresql.org/morgue/r/repmgr/</ulink>
|
<ulink url="https://apt-archive.postgresql.org/">https://apt-archive.postgresql.org/</ulink>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|||||||
@@ -17,12 +17,12 @@
|
|||||||
|
|
||||||
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
||||||
|
|
||||||
<sect1 id="release-5.1">
|
<sect1 id="release-5.1.0">
|
||||||
<title>Release 5.1</title>
|
<title id="release-current">Release 5.1.0</title>
|
||||||
<para><emphasis>?? ?? ??, 2020</emphasis></para>
|
<para><emphasis>Mon 13 April, 2020</emphasis></para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 5.1 is a major release.
|
&repmgr; 5.1.0 is a major release.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
For details on how to upgrade an existing &repmgr; installation, see
|
For details on how to upgrade an existing &repmgr; installation, see
|
||||||
@@ -50,6 +50,23 @@
|
|||||||
<para>
|
<para>
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The requirement that the &repmgr; user is a database superuser has been
|
||||||
|
removed as far as possible.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In theory, &repmgr; can be operated with a normal database user for managing
|
||||||
|
the &repmgr; database, and a separate replication user for managing replication
|
||||||
|
connections (and replication slots, if these are in use).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Some operations will still require superuser permissions, e.g. for issuing
|
||||||
|
a <command>CHECKPOINT</command> as par of a switchover operation; in this case
|
||||||
|
a valid superuser should be provided with the <option>-S</option>/<option>--superuser</option>
|
||||||
|
option.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
@@ -73,6 +90,7 @@
|
|||||||
Improve logging and checking of potential failure situations.
|
Improve logging and checking of potential failure situations.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
@@ -82,12 +100,37 @@
|
|||||||
data directory.
|
data directory.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
|
Provide additional information in <option>--dry-run mode</option> output.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
|
Checks that the demotion candidate's registered repmgr.conf file can be found, to
|
||||||
|
prevent confusing references to an incorrectly configured data directory. GitHub 615.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<link linkend="repmgr-node-check"><command>repmgr node check</command></link>:
|
<link linkend="repmgr-node-check"><command>repmgr node check</command></link>:
|
||||||
accept option <option>-S</option>/<option>--superuser</option>. GitHub #621.
|
accept option <option>-S</option>/<option>--superuser</option>. GitHub #621.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-check"><command>repmgr node check</command></link>:
|
||||||
|
add <option>--upstream</option> option to check whether the node is attached
|
||||||
|
to the expected upstream node.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
@@ -112,6 +155,15 @@
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>:
|
||||||
|
in <option>--dry-run mode</option>, display promote command which will be executed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>
|
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>
|
||||||
@@ -123,8 +175,17 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>:
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
in PostgreSQL 12 and later, use <varname>service_promote_command</varname> if set.
|
check for demotion candidate reattachment as late as possible to avoid spurious failure
|
||||||
|
reports.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgrd;: check for presence of <option>promote_command</option> and
|
||||||
|
<option>follow_command</option> on receipt of <literal>SIGHUP</literal>. GitHub 614.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
@@ -150,7 +211,7 @@
|
|||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="release-5.0">
|
<sect1 id="release-5.0">
|
||||||
<title id="release-current">Release 5.0</title>
|
<title>Release 5.0</title>
|
||||||
<para><emphasis>Tue 15 October, 2019</emphasis></para>
|
<para><emphasis>Tue 15 October, 2019</emphasis></para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -182,6 +182,14 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
|
|||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
In examples provided in this documentation, it is assumed the configuration file is located
|
||||||
|
at <filename>/etc/repmgr.conf</filename>. If &repmgr; is installed from a package, the
|
||||||
|
configuration file will probably be located at another location specified by the packager;
|
||||||
|
see appendix <xref linkend="appendix-packages"/> for configuration file locations in
|
||||||
|
different packaging systems.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
||||||
an error will be raised if it is not found or not readable, and no attempt will be made to
|
an error will be raised if it is not found or not readable, and no attempt will be made to
|
||||||
@@ -202,6 +210,61 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
|
|||||||
<filename>/path/to/repmgr.conf</filename>).
|
<filename>/path/to/repmgr.conf</filename>).
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="configuration-file-postgresql-major-upgrades" xreflabel="configuration file and PostgreSQL major version upgrades">
|
||||||
|
<title>Configuration file and PostgreSQL major version upgrades</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr.conf</primary>
|
||||||
|
<secondary>ostgreSQL major version upgrades</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
When upgrading the PostgreSQL cluster to a new major version, <filename>repmgr.conf</filename>
|
||||||
|
will probably needed to be updated.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Usually <option>pg_bindir</option> and <option>data_directory</option> will need to be modified,
|
||||||
|
particularly if the default package locations are used, as these usually change.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
It's also possible the location of <filename>repmgr.conf</filename> itself will change
|
||||||
|
(e.g. from <filename>/etc/repmgr/11/repmgr.conf</filename> to <filename>/etc/repmgr/12/repmgr.conf</filename>).
|
||||||
|
This is stored as part of the &repmgr; metadata and is used by &repmgr; to execute &repmgr; remotely
|
||||||
|
(e.g. during a <link linkend="performing-switchover">switchover operation</link>).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the content and/or location of <filename>repmgr.conf</filename> has changed, the &repmgr; metadata
|
||||||
|
needs to be updated to reflect this. The &repmgr; metadata can be updated on each node with:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="repmgr-primary-register">
|
||||||
|
<command>repmgr primary register --force -f /path/to/repmgr.conf</command>
|
||||||
|
</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="repmgr-standby-register">
|
||||||
|
<command>repmgr standby register --force -f /path/to/repmgr.conf</command>
|
||||||
|
</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="repmgr-witness-register">
|
||||||
|
<command>repmgr witness register --force -f /path/to/repmgr.conf -h primary_host</command>
|
||||||
|
</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -127,8 +127,31 @@ node2:5432:repmgr:repmgr:foo
|
|||||||
node2:5432:replication:repluser:foo
|
node2:5432:replication:repluser:foo
|
||||||
node3:5432:repmgr:repmgr:foo
|
node3:5432:repmgr:repmgr:foo
|
||||||
node3:5432:replication:repluser:foo</programlisting>
|
node3:5432:replication:repluser:foo</programlisting>
|
||||||
|
If you are planning to use the <option>-S</option>/<option>--superuser</option> option,
|
||||||
|
there must also be an entry enabling the superuser to connect to the &repmgr; database.
|
||||||
|
Assuming the superuser is <literal>postgres</literal>, the file would look like this:
|
||||||
|
<programlisting>
|
||||||
|
node1:5432:repmgr:repmgr:foo
|
||||||
|
node1:5432:repmgr:postgres:foo
|
||||||
|
node1:5432:replication:repluser:foo
|
||||||
|
node2:5432:repmgr:repmgr:foo
|
||||||
|
node2:5432:repmgr:postgres:foo
|
||||||
|
node2:5432:replication:repluser:foo
|
||||||
|
node3:5432:repmgr:repmgr:foo
|
||||||
|
node3:5432:repmgr:postgres:foo
|
||||||
|
node3:5432:replication:repluser:foo</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The <filename>~/.pgpass</filename> file can be simplified with the use of wildcards if
|
||||||
|
there is no requirement to restrict provision of passwords to particular hosts, ports
|
||||||
|
or databases. The preceding file could then be formatted like this:
|
||||||
|
<programlisting>
|
||||||
|
*:*:*:repmgr:foo
|
||||||
|
*:*:*:postgres:foo
|
||||||
|
</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
It's possible to specify an alternative location for the <filename>~/.pgpass</filename> file, either via
|
It's possible to specify an alternative location for the <filename>~/.pgpass</filename> file, either via
|
||||||
@@ -140,6 +163,11 @@ node3:5432:replication:repluser:foo</programlisting>
|
|||||||
location on all nodes, as when connecting to a remote node, the file referenced is the one on the
|
location on all nodes, as when connecting to a remote node, the file referenced is the one on the
|
||||||
local node.
|
local node.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
Additionally, you <emphasis>must</emphasis> specify the passfile location in <filename>repmgr.conf</filename>
|
||||||
|
with the <option>passfile</option> option so &repmgr; can write the correct path when creating the
|
||||||
|
<option>primary_conninfo</option> parameter for replication configuration on standbys.
|
||||||
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
@@ -43,7 +43,12 @@
|
|||||||
<programlisting>
|
<programlisting>
|
||||||
repmgr node rejoin -d '$conninfo'</programlisting>
|
repmgr node rejoin -d '$conninfo'</programlisting>
|
||||||
|
|
||||||
where <literal>$conninfo</literal> is the conninfo string of any reachable node in the cluster.
|
where <literal>$conninfo</literal> is the PostgreSQL <literal>conninfo</literal> string of the
|
||||||
|
<emphasis>current</emphasis> primary node (or that of any reachable node in the cluster, but
|
||||||
|
<emphasis>not</emphasis> the local node). This is so that &repmgr; can fetch up-to-date information
|
||||||
|
about the current state of the cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
|
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
|
||||||
otherwise available.
|
otherwise available.
|
||||||
</para>
|
</para>
|
||||||
@@ -283,7 +288,15 @@
|
|||||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<important>
|
<refsect2 id="repmgr-node-rejoin-pg-rewind-config-files" xreflabel="pg_rewind and configuration files">
|
||||||
|
|
||||||
|
<title><command>pg_rewind</command> and configuration file retention</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>pg_rewind</primary>
|
||||||
|
<secondary>configuration file retention</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||||
@@ -291,17 +304,27 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
To prevent this happening, provide a comma-separated list of files to retain
|
To prevent this happening, provide a comma-separated list of files to retain
|
||||||
using the <literal>--config-file</literal> command line option; the specified files
|
using the <option>--config-file</option> command line option; the specified files
|
||||||
will be archived in a temporary directory (whose parent directory can be specified with
|
will be archived in a temporary directory (whose parent directory can be specified with
|
||||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
<option>--config-archive-dir</option>, default: <filename>/tmp</filename>)
|
||||||
complete.
|
and restored once the rewind operation is complete.
|
||||||
</para>
|
</para>
|
||||||
</important>
|
</refsect2>
|
||||||
|
|
||||||
<para>
|
<refsect2 id="repmgr-node-rejoin-pg-rewind-example" xreflabel="example using repmgr node rejoin and pg_rewind">
|
||||||
Example, first using <literal>--dry-run</literal>, then actually executing the
|
|
||||||
<literal>node rejoin command</literal>.
|
<title>Example using <command>repmgr node rejoin</command> and <command>pg_rewind</command></title>
|
||||||
<programlisting>
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>pg_rewind</primary>
|
||||||
|
<secondary>configuration file retention</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example, first using <option>--dry-run</option>, then actually executing the
|
||||||
|
<literal>node rejoin command</literal>.
|
||||||
|
<programlisting>
|
||||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run
|
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run
|
||||||
INFO: replication connection to the rejoin target node was successful
|
INFO: replication connection to the rejoin target node was successful
|
||||||
@@ -317,17 +340,17 @@
|
|||||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'
|
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'
|
||||||
INFO: prerequisites for executing NODE REJOIN are met</programlisting>
|
INFO: prerequisites for executing NODE REJOIN are met</programlisting>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
||||||
this checks the prerequisites for using <application>pg_rewind</application>, but is
|
this checks the prerequisites for using <application>pg_rewind</application>, but is
|
||||||
not an absolute guarantee that actually executing <application>pg_rewind</application>
|
not an absolute guarantee that actually executing <application>pg_rewind</application>
|
||||||
will succeed. See also section <xref linkend="repmgr-node-rejoin-caveats"/> below.
|
will succeed. See also section <xref linkend="repmgr-node-rejoin-caveats"/> below.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind
|
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind
|
||||||
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
||||||
@@ -339,8 +362,8 @@
|
|||||||
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||||
NOTICE: NODE REJOIN successful
|
NOTICE: NODE REJOIN successful
|
||||||
DETAIL: node 2 is now attached to node 3</programlisting>
|
DETAIL: node 2 is now attached to node 3</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
</refsect2>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-node-rejoin-caveats" xreflabel="Caveats">
|
<refsect1 id="repmgr-node-rejoin-caveats" xreflabel="Caveats">
|
||||||
@@ -378,6 +401,51 @@
|
|||||||
is running in <option>--dry-run</option> mode.
|
is running in <option>--dry-run</option> mode.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<warning>
|
||||||
|
<para>
|
||||||
|
In all current PostgreSQL versions (as of September 2020), <application>pg_rewind</application>
|
||||||
|
contains a corner-case bug which affects standbys in a very specific situation.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This situation occurs when a standby was shut down <emphasis>before</emphasis> its
|
||||||
|
primary node, and an attempt is made to attach this standby to another primary
|
||||||
|
in the same cluster (following a "split brain" situation where the standby
|
||||||
|
was connected to the wrong primary). In this case, &repmgr; will correctly determine
|
||||||
|
that <application>pg_rewind</application> should be executed, however
|
||||||
|
<application>pg_rewind</application> incorrectly decides that no action is necessary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In this situation, &repmgr; will report something like:
|
||||||
|
<programlisting>
|
||||||
|
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 1
|
||||||
|
DETAIL: rejoin target server's timeline 3 forked off current database system timeline 2 before current recovery point 0/7019C10</programlisting>
|
||||||
|
but when executed, <application>pg_rewind</application> will report:
|
||||||
|
<programlisting>
|
||||||
|
pg_rewind: servers diverged at WAL location 0/7015540 on timeline 2
|
||||||
|
pg_rewind: no rewind required</programlisting>
|
||||||
|
and if an attempt is made to attach the standby to the new primary, PostgreSQL logs on the standby
|
||||||
|
will contain errors like:
|
||||||
|
<programlisting>
|
||||||
|
[2020-09-07 15:01:41 UTC] LOG: 00000: replication terminated by primary server
|
||||||
|
[2020-09-07 15:01:41 UTC] DETAIL: End of WAL reached on timeline 2 at 0/7015540.
|
||||||
|
[2020-09-07 15:01:41 UTC] LOG: 00000: new timeline 3 forked off current database system timeline 2 before current recovery point 0/7019C10</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Currently it is not possible to resolve this situation using <application>pg_rewind</application>.
|
||||||
|
A <ulink url="https://www.postgresql.org/message-id/flat/CABvVfJU-LDWvoz4-Yow3Ay5LZYTuPD7eSjjE4kGyNZpXC6FrVQ@mail.gmail.com">patch</ulink>
|
||||||
|
has been submitted and will hopefully be included in a forthcoming PostgreSQL minor release.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
As a workaround, start the primary server the standby was previously attached to,
|
||||||
|
and ensure the standby can be attached to it. If <application>pg_rewind</application> was actually executed,
|
||||||
|
it will have copied in the <filename>.history</filename> file from the target primary server; this must
|
||||||
|
be removed. <command>repmgr node rejoin</command> can then be used to attach the standby to the original
|
||||||
|
primary. Ensure any changes pending on the primary have propogated to the standby. Then shut down the primary
|
||||||
|
server <emphasis>first</emphasis>, before shutting down the standby. It should then be possible to
|
||||||
|
use <command>repmgr node rejoin</command> to attach the standby to the new primary.
|
||||||
|
</para>
|
||||||
|
</warning>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
|
|||||||
@@ -95,7 +95,6 @@
|
|||||||
NOTICE: promoting standby to primary
|
NOTICE: promoting standby to primary
|
||||||
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' promote"
|
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' promote"
|
||||||
server promoting
|
server promoting
|
||||||
DEBUG: setting node 2 as primary and marking existing primary as failed
|
|
||||||
NOTICE: STANDBY PROMOTE successful
|
NOTICE: STANDBY PROMOTE successful
|
||||||
DETAIL: server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
DETAIL: server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
||||||
</para>
|
</para>
|
||||||
@@ -170,6 +169,42 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option></term>
|
||||||
|
<term><option>--force</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Ignore warnings and continue anyway.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This option is relevant in the following situations if <option>--siblings-follow</option> was specified:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
If one or more sibling nodes was not reachable via SSH, the standby will be promoted anyway.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
If the promotion candidate has insufficient free walsenders to accomodate the standbys which will
|
||||||
|
be attached to it, the standby will be promoted anyway.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
If replication slots are in use but the promotion candidate has insufficient free replication slots
|
||||||
|
to accomodate the standbys which will be attached to it, the standby will be promoted anyway.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that if the <option>-F</option>/<option>--force</option> option is used when any of the above
|
||||||
|
situations is encountered, the onus is on the user to manually resolve any resulting issues.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,34 @@
|
|||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually register the witness
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option>/<option>--force</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Overwrite an existing node record
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-witness-register-events">
|
<refsect1 id="repmgr-witness-register-events">
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -15,9 +15,13 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
&repmgrd; can be configured to provide failover
|
&repmgrd; can be configured to provide failover
|
||||||
capability in case the primary upstream node becomes unreachable, and/or
|
capability in case the primary or upstream node becomes unreachable, and/or
|
||||||
provide monitoring data to the &repmgr; metadatabase.
|
provide monitoring data to the &repmgr; metadatabase.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
From &repmgr; 4.4, when running on the primary node, &repmgrd; can also monitor
|
||||||
|
standby disconnections/reconnections (see <xref linkend="repmgrd-primary-child-disconnection"/>).
|
||||||
|
</para>
|
||||||
|
|
||||||
<sect1 id="repmgrd-basic-configuration">
|
<sect1 id="repmgrd-basic-configuration">
|
||||||
<title>repmgrd configuration</title>
|
<title>repmgrd configuration</title>
|
||||||
@@ -583,7 +587,8 @@ repmgrd_service_stop_command='sudo systemctl repmgr12 stop'
|
|||||||
the option <option>monitor_interval_secs</option> (see above).
|
the option <option>monitor_interval_secs</option> (see above).
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
For more details on monitoring, see <xref linkend="repmgrd-monitoring"/>.
|
For more details on monitoring, see <xref linkend="repmgrd-monitoring"/>. For information on
|
||||||
|
monitoring standby disconnections, see <xref linkend="repmgrd-primary-child-disconnection"/>.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|||||||
@@ -201,9 +201,13 @@ ALTER EXTENSION repmgr UPDATE</programlisting>
|
|||||||
</para>
|
</para>
|
||||||
<tip>
|
<tip>
|
||||||
<para>
|
<para>
|
||||||
If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
|
If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
|
||||||
with a PostgreSQL minor version upgrade, which will require a restart in any case.
|
with a PostgreSQL minor version upgrade, which will require a restart in any case.
|
||||||
New PostgreSQL minor version are usually released every couple of months.
|
</para>
|
||||||
|
<para>
|
||||||
|
New PostgreSQL minor versions are usually released every couple of months;
|
||||||
|
see the <ulink url="https://www.postgresql.org/developer/roadmap/">Roadmap</ulink>
|
||||||
|
for the current schedule.
|
||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
265
repmgr--unpackaged--5.1.sql
Normal file
265
repmgr--unpackaged--5.1.sql
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
|
-- extract the current schema name
|
||||||
|
-- NOTE: this assumes there will be only one schema matching 'repmgr_%';
|
||||||
|
-- user is responsible for ensuring this is the case
|
||||||
|
|
||||||
|
CREATE TEMPORARY TABLE repmgr_old_schema (schema_name TEXT);
|
||||||
|
INSERT INTO repmgr_old_schema (schema_name)
|
||||||
|
SELECT nspname AS schema_name
|
||||||
|
FROM pg_catalog.pg_namespace
|
||||||
|
WHERE nspname LIKE 'repmgr_%'
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- move old objects into new schema
|
||||||
|
DO $repmgr$
|
||||||
|
DECLARE
|
||||||
|
old_schema TEXT;
|
||||||
|
BEGIN
|
||||||
|
SELECT schema_name FROM repmgr_old_schema
|
||||||
|
INTO old_schema;
|
||||||
|
EXECUTE format('ALTER TABLE %I.repl_nodes SET SCHEMA repmgr', old_schema);
|
||||||
|
EXECUTE format('ALTER TABLE %I.repl_events SET SCHEMA repmgr', old_schema);
|
||||||
|
EXECUTE format('ALTER TABLE %I.repl_monitor SET SCHEMA repmgr', old_schema);
|
||||||
|
EXECUTE format('DROP VIEW IF EXISTS %I.repl_show_nodes', old_schema);
|
||||||
|
EXECUTE format('DROP VIEW IF EXISTS %I.repl_status', old_schema);
|
||||||
|
END$repmgr$;
|
||||||
|
|
||||||
|
-- convert "repmgr_$cluster.repl_nodes" to "repmgr.nodes"
|
||||||
|
CREATE TABLE repmgr.nodes (
|
||||||
|
node_id INTEGER PRIMARY KEY,
|
||||||
|
upstream_node_id INTEGER NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
|
||||||
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
node_name TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||||
|
location TEXT NOT NULL DEFAULT 'default',
|
||||||
|
priority INT NOT NULL DEFAULT 100,
|
||||||
|
conninfo TEXT NOT NULL,
|
||||||
|
repluser VARCHAR(63) NOT NULL,
|
||||||
|
slot_name TEXT NULL,
|
||||||
|
config_file TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO repmgr.nodes
|
||||||
|
(node_id, upstream_node_id, active, node_name, type, location, priority, conninfo, repluser, slot_name, config_file)
|
||||||
|
SELECT id, upstream_node_id, active, name,
|
||||||
|
CASE WHEN type = 'master' THEN 'primary' ELSE type END,
|
||||||
|
'default', priority, conninfo, 'unknown', slot_name, 'unknown'
|
||||||
|
FROM repmgr.repl_nodes
|
||||||
|
ORDER BY id;
|
||||||
|
|
||||||
|
|
||||||
|
-- convert "repmgr_$cluster.repl_event" to "event"
|
||||||
|
|
||||||
|
ALTER TABLE repmgr.repl_events RENAME TO events;
|
||||||
|
|
||||||
|
-- create new table "repmgr.voting_term"
|
||||||
|
CREATE TABLE repmgr.voting_term (
|
||||||
|
term INT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX voting_term_restrict
|
||||||
|
ON repmgr.voting_term ((TRUE));
|
||||||
|
|
||||||
|
CREATE RULE voting_term_delete AS
|
||||||
|
ON DELETE TO repmgr.voting_term
|
||||||
|
DO INSTEAD NOTHING;
|
||||||
|
|
||||||
|
INSERT INTO repmgr.voting_term (term) VALUES (1);
|
||||||
|
|
||||||
|
|
||||||
|
-- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"
|
||||||
|
|
||||||
|
|
||||||
|
DO $repmgr$
|
||||||
|
DECLARE
|
||||||
|
DECLARE server_version_num INT;
|
||||||
|
BEGIN
|
||||||
|
SELECT setting
|
||||||
|
FROM pg_catalog.pg_settings
|
||||||
|
WHERE name = 'server_version_num'
|
||||||
|
INTO server_version_num;
|
||||||
|
IF server_version_num >= 90400 THEN
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location PG_LSN NOT NULL,
|
||||||
|
last_wal_standby_location PG_LSN,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
INSERT INTO repmgr.monitoring_history
|
||||||
|
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||||
|
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
|
||||||
|
FROM repmgr.repl_monitor;
|
||||||
|
ELSE
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location TEXT NOT NULL,
|
||||||
|
last_wal_standby_location TEXT,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
INSERT INTO repmgr.monitoring_history
|
||||||
|
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||||
|
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag
|
||||||
|
FROM repmgr.repl_monitor;
|
||||||
|
|
||||||
|
END IF;
|
||||||
|
END$repmgr$;
|
||||||
|
|
||||||
|
CREATE INDEX idx_monitoring_history_time
|
||||||
|
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.show_nodes AS
|
||||||
|
SELECT n.node_id,
|
||||||
|
n.node_name,
|
||||||
|
n.active,
|
||||||
|
n.upstream_node_id,
|
||||||
|
un.node_name AS upstream_node_name,
|
||||||
|
n.type,
|
||||||
|
n.priority,
|
||||||
|
n.conninfo
|
||||||
|
FROM repmgr.nodes n
|
||||||
|
LEFT JOIN repmgr.nodes un
|
||||||
|
ON un.node_id = n.upstream_node_id;
|
||||||
|
|
||||||
|
|
||||||
|
/* ================= */
|
||||||
|
/* repmgrd functions */
|
||||||
|
/* ================= */
|
||||||
|
|
||||||
|
/* monitoring functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION set_local_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_local_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_set_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_get_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_last_seen(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_last_seen()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
/* failover functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION notify_follow_primary(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_new_primary()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION reset_voting_status()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pidfile()
|
||||||
|
RETURNS TEXT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||||
|
LANGUAGE C CALLED ON NULL INPUT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_running()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_paused()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_wal_receiver_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
/* views */
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.replication_status AS
|
||||||
|
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||||
|
n.type AS node_type, n.active, last_monitor_time,
|
||||||
|
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||||
|
m.last_wal_standby_location,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN
|
||||||
|
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||||
|
ELSE NULL
|
||||||
|
END AS replication_time_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||||
|
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||||
|
FROM repmgr.monitoring_history m
|
||||||
|
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||||
|
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||||
|
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||||
|
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* drop old tables */
|
||||||
|
DROP TABLE repmgr.repl_nodes;
|
||||||
|
DROP TABLE repmgr.repl_monitor;
|
||||||
|
|
||||||
|
-- remove temporary table
|
||||||
|
DROP TABLE repmgr_old_schema;
|
||||||
@@ -55,10 +55,8 @@ typedef enum
|
|||||||
struct ColHeader headers_show[SHOW_HEADER_COUNT];
|
struct ColHeader headers_show[SHOW_HEADER_COUNT];
|
||||||
struct ColHeader headers_event[EVENT_HEADER_COUNT];
|
struct ColHeader headers_event[EVENT_HEADER_COUNT];
|
||||||
|
|
||||||
|
static int build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, ItemList *warnings, int *error_code);
|
||||||
|
static int build_cluster_crosscheck(t_node_status_cube ***cube_dest, ItemList *warnings, int *error_code);
|
||||||
static int build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code);
|
|
||||||
static int build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length, ItemList *warnings, int *error_code);
|
|
||||||
static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);
|
static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -538,9 +536,6 @@ do_cluster_crosscheck(void)
|
|||||||
{
|
{
|
||||||
int i = 0,
|
int i = 0,
|
||||||
n = 0;
|
n = 0;
|
||||||
char c;
|
|
||||||
const char *node_header = "Name";
|
|
||||||
int name_length = strlen(node_header);
|
|
||||||
|
|
||||||
t_node_status_cube **cube;
|
t_node_status_cube **cube;
|
||||||
|
|
||||||
@@ -548,7 +543,7 @@ do_cluster_crosscheck(void)
|
|||||||
int error_code = SUCCESS;
|
int error_code = SUCCESS;
|
||||||
ItemList warnings = {NULL, NULL};
|
ItemList warnings = {NULL, NULL};
|
||||||
|
|
||||||
n = build_cluster_crosscheck(&cube, &name_length, &warnings, &error_code);
|
n = build_cluster_crosscheck(&cube, &warnings, &error_code);
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
{
|
{
|
||||||
@@ -582,24 +577,56 @@ do_cluster_crosscheck(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("%*s | Id ", name_length, node_header);
|
/* output header contains node name, node ID and one column for each node in the cluster */
|
||||||
for (i = 0; i < n; i++)
|
struct ColHeader *headers_crosscheck = NULL;
|
||||||
printf("| %2d ", cube[i]->node_id);
|
|
||||||
printf("\n");
|
int header_count = n + 2;
|
||||||
|
int header_id = 2;
|
||||||
|
|
||||||
|
headers_crosscheck = palloc0(sizeof(ColHeader) * header_count);
|
||||||
|
|
||||||
|
/* Initialize column headers */
|
||||||
|
strncpy(headers_crosscheck[0].title, _("Name"), MAXLEN);
|
||||||
|
strncpy(headers_crosscheck[1].title, _("ID"), MAXLEN);
|
||||||
|
|
||||||
for (i = 0; i < name_length; i++)
|
|
||||||
printf("-");
|
|
||||||
printf("-+----");
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
printf("+----");
|
{
|
||||||
printf("\n");
|
maxlen_snprintf(headers_crosscheck[header_id].title, "%i", cube[i]->node_id);
|
||||||
|
header_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize column max values */
|
||||||
|
for (i = 0; i < header_count; i++)
|
||||||
|
{
|
||||||
|
headers_crosscheck[i].display = true;
|
||||||
|
headers_crosscheck[i].max_length = strlen(headers_crosscheck[i].title);
|
||||||
|
headers_crosscheck[i].cur_length = headers_crosscheck[i].max_length;
|
||||||
|
|
||||||
|
/* We can derive the maximum node ID length for the ID column from
|
||||||
|
* the generated matrix node ID headers
|
||||||
|
*/
|
||||||
|
if (i >= 2 && headers_crosscheck[i].max_length > headers_crosscheck[1].max_length)
|
||||||
|
headers_crosscheck[1].max_length = headers_crosscheck[i].max_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
if (strlen(cube[i]->node_name) > headers_crosscheck[0].max_length)
|
||||||
|
{
|
||||||
|
headers_crosscheck[0].max_length = strlen(cube[i]->node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_status_header(header_count, headers_crosscheck);
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
int column_node_ix;
|
int column_node_ix;
|
||||||
|
|
||||||
printf("%*s | %2d ", name_length,
|
printf(" %-*s | %-*i ",
|
||||||
|
headers_crosscheck[0].max_length,
|
||||||
cube[i]->node_name,
|
cube[i]->node_name,
|
||||||
|
headers_crosscheck[1].max_length,
|
||||||
cube[i]->node_id);
|
cube[i]->node_id);
|
||||||
|
|
||||||
for (column_node_ix = 0; column_node_ix < n; column_node_ix++)
|
for (column_node_ix = 0; column_node_ix < n; column_node_ix++)
|
||||||
@@ -607,6 +634,8 @@ do_cluster_crosscheck(void)
|
|||||||
int max_node_status = -2;
|
int max_node_status = -2;
|
||||||
int node_ix = 0;
|
int node_ix = 0;
|
||||||
|
|
||||||
|
char c;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The value of entry (i,j) is equal to the maximum value of all
|
* The value of entry (i,j) is equal to the maximum value of all
|
||||||
* the (i,j,k). Indeed:
|
* the (i,j,k). Indeed:
|
||||||
@@ -646,7 +675,7 @@ do_cluster_crosscheck(void)
|
|||||||
exit(ERR_INTERNAL);
|
exit(ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("| %c ", c);
|
printf("| %-*c ", headers_crosscheck[column_node_ix + 2].max_length, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
@@ -708,16 +737,13 @@ do_cluster_matrix()
|
|||||||
j = 0,
|
j = 0,
|
||||||
n = 0;
|
n = 0;
|
||||||
|
|
||||||
const char *node_header = "Name";
|
|
||||||
int name_length = strlen(node_header);
|
|
||||||
|
|
||||||
t_node_matrix_rec **matrix_rec_list;
|
t_node_matrix_rec **matrix_rec_list;
|
||||||
|
|
||||||
bool connection_error_found = false;
|
bool connection_error_found = false;
|
||||||
int error_code = SUCCESS;
|
int error_code = SUCCESS;
|
||||||
ItemList warnings = {NULL, NULL};
|
ItemList warnings = {NULL, NULL};
|
||||||
|
|
||||||
n = build_cluster_matrix(&matrix_rec_list, &name_length, &warnings, &error_code);
|
n = build_cluster_matrix(&matrix_rec_list, &warnings, &error_code);
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
{
|
{
|
||||||
@@ -740,27 +766,60 @@ do_cluster_matrix()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char c;
|
/* output header contains node name, node ID and one column for each node in the cluster */
|
||||||
|
struct ColHeader *headers_matrix = NULL;
|
||||||
|
|
||||||
printf("%*s | Id ", name_length, node_header);
|
int header_count = n + 2;
|
||||||
for (i = 0; i < n; i++)
|
int header_id = 2;
|
||||||
printf("| %2d ", matrix_rec_list[i]->node_id);
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
for (i = 0; i < name_length; i++)
|
headers_matrix = palloc0(sizeof(ColHeader) * header_count);
|
||||||
printf("-");
|
|
||||||
printf("-+----");
|
/* Initialize column headers */
|
||||||
for (i = 0; i < n; i++)
|
strncpy(headers_matrix[0].title, _("Name"), MAXLEN);
|
||||||
printf("+----");
|
strncpy(headers_matrix[1].title, _("ID"), MAXLEN);
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
printf("%*s | %2d ", name_length,
|
maxlen_snprintf(headers_matrix[header_id].title, "%i", matrix_rec_list[i]->node_id);
|
||||||
|
header_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize column max values */
|
||||||
|
for (i = 0; i < header_count; i++)
|
||||||
|
{
|
||||||
|
headers_matrix[i].display = true;
|
||||||
|
headers_matrix[i].max_length = strlen(headers_matrix[i].title);
|
||||||
|
headers_matrix[i].cur_length = headers_matrix[i].max_length;
|
||||||
|
|
||||||
|
/* We can derive the maximum node ID length for the ID column from
|
||||||
|
* the generated matrix node ID headers
|
||||||
|
*/
|
||||||
|
if (i >= 2 && headers_matrix[i].max_length > headers_matrix[1].max_length)
|
||||||
|
headers_matrix[1].max_length = headers_matrix[i].max_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
if (strlen(matrix_rec_list[i]->node_name) > headers_matrix[0].max_length)
|
||||||
|
{
|
||||||
|
headers_matrix[0].max_length = strlen(matrix_rec_list[i]->node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_status_header(header_count, headers_matrix);
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
printf(" %-*s | %-*i ",
|
||||||
|
headers_matrix[0].max_length,
|
||||||
matrix_rec_list[i]->node_name,
|
matrix_rec_list[i]->node_name,
|
||||||
|
headers_matrix[1].max_length,
|
||||||
matrix_rec_list[i]->node_id);
|
matrix_rec_list[i]->node_id);
|
||||||
for (j = 0; j < n; j++)
|
for (j = 0; j < n; j++)
|
||||||
{
|
{
|
||||||
|
char c;
|
||||||
|
|
||||||
switch (matrix_rec_list[i]->node_status_list[j]->node_status)
|
switch (matrix_rec_list[i]->node_status_list[j]->node_status)
|
||||||
{
|
{
|
||||||
case -2:
|
case -2:
|
||||||
@@ -778,7 +837,7 @@ do_cluster_matrix()
|
|||||||
exit(ERR_INTERNAL);
|
exit(ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("| %c ", c);
|
printf("| %-*c ", headers_matrix[j + 2].max_length, c);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
@@ -838,7 +897,7 @@ matrix_set_node_status(t_node_matrix_rec **matrix_rec_list, int n, int node_id,
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code)
|
build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, ItemList *warnings, int *error_code)
|
||||||
{
|
{
|
||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
int i = 0,
|
int i = 0,
|
||||||
@@ -896,7 +955,6 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
/* Initialise matrix structure for each node */
|
/* Initialise matrix structure for each node */
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
int name_length_cur;
|
|
||||||
NodeInfoListCell *cell_j;
|
NodeInfoListCell *cell_j;
|
||||||
|
|
||||||
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
||||||
@@ -906,13 +964,6 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
sizeof(matrix_rec_list[i]->node_name));
|
sizeof(matrix_rec_list[i]->node_name));
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the maximum length of a node name
|
|
||||||
*/
|
|
||||||
name_length_cur = strlen(matrix_rec_list[i]->node_name);
|
|
||||||
if (name_length_cur > *name_length)
|
|
||||||
*name_length = name_length_cur;
|
|
||||||
|
|
||||||
matrix_rec_list[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * nodes.node_count);
|
matrix_rec_list[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * nodes.node_count);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
@@ -1077,7 +1128,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, ItemList *warnings, int *error_code)
|
build_cluster_crosscheck(t_node_status_cube ***dest_cube, ItemList *warnings, int *error_code)
|
||||||
{
|
{
|
||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
int h,
|
int h,
|
||||||
@@ -1126,20 +1177,12 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
int name_length_cur = 0;
|
|
||||||
NodeInfoListCell *cell_i = NULL;
|
NodeInfoListCell *cell_i = NULL;
|
||||||
|
|
||||||
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
||||||
cube[h]->node_id = cell->node_info->node_id;
|
cube[h]->node_id = cell->node_info->node_id;
|
||||||
strncpy(cube[h]->node_name, cell->node_info->node_name, sizeof(cube[h]->node_name));
|
strncpy(cube[h]->node_name, cell->node_info->node_name, sizeof(cube[h]->node_name));
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the maximum length of a node name
|
|
||||||
*/
|
|
||||||
name_length_cur = strlen(cube[h]->node_name);
|
|
||||||
if (name_length_cur > *name_length)
|
|
||||||
*name_length = name_length_cur;
|
|
||||||
|
|
||||||
cube[h]->matrix_list_rec = (t_node_matrix_rec **) pg_malloc(sizeof(t_node_matrix_rec) * nodes.node_count);
|
cube[h]->matrix_list_rec = (t_node_matrix_rec **) pg_malloc(sizeof(t_node_matrix_rec) * nodes.node_count);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ static void _do_node_restore_config(void);
|
|||||||
|
|
||||||
static void do_node_check_replication_connection(void);
|
static void do_node_check_replication_connection(void);
|
||||||
static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
@@ -204,7 +204,16 @@ do_node_status(void)
|
|||||||
|
|
||||||
if (enabled == false && recovery_type == RECTYPE_STANDBY)
|
if (enabled == false && recovery_type == RECTYPE_STANDBY)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&archiving_status, " (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
|
if (PQserverVersion(conn) >= 90500)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&archiving_status,
|
||||||
|
" (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&archiving_status,
|
||||||
|
" (\"archive_mode\" has no effect on standbys)");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
key_value_list_set(&node_status,
|
key_value_list_set(&node_status,
|
||||||
@@ -294,7 +303,7 @@ do_node_status(void)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_downstream_node_attached(conn, node_cell->node_info->node_name) != NODE_ATTACHED)
|
if (is_downstream_node_attached(conn, node_cell->node_info->node_name, NULL) != NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
missing_nodes_count++;
|
missing_nodes_count++;
|
||||||
item_list_append_format(&missing_nodes,
|
item_list_append_format(&missing_nodes,
|
||||||
@@ -797,6 +806,7 @@ do_node_check(void)
|
|||||||
{
|
{
|
||||||
return_code = do_node_check_downstream(conn,
|
return_code = do_node_check_downstream(conn,
|
||||||
runtime_options.output_mode,
|
runtime_options.output_mode,
|
||||||
|
&node_info,
|
||||||
NULL);
|
NULL);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(return_code);
|
exit(return_code);
|
||||||
@@ -888,7 +898,7 @@ do_node_check(void)
|
|||||||
if (do_node_check_upstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_upstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
issue_detected = true;
|
issue_detected = true;
|
||||||
|
|
||||||
if (do_node_check_downstream(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_downstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
issue_detected = true;
|
issue_detected = true;
|
||||||
|
|
||||||
if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
@@ -1183,7 +1193,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
|||||||
|
|
||||||
|
|
||||||
static CheckStatus
|
static CheckStatus
|
||||||
do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
|
do_node_check_downstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
{
|
{
|
||||||
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
NodeInfoListCell *cell = NULL;
|
NodeInfoListCell *cell = NULL;
|
||||||
@@ -1217,7 +1227,7 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_downstream_node_attached(conn, cell->node_info->node_name) != NODE_ATTACHED)
|
if (is_downstream_node_attached(conn, cell->node_info->node_name, NULL) != NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
missing_nodes_count++;
|
missing_nodes_count++;
|
||||||
item_list_append_format(&missing_nodes,
|
item_list_append_format(&missing_nodes,
|
||||||
@@ -1234,7 +1244,13 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (missing_nodes_count == 0)
|
if (node_info->type == WITNESS)
|
||||||
|
{
|
||||||
|
/* witness is not connecting to any upstream */
|
||||||
|
appendPQExpBufferStr(&details,
|
||||||
|
_("N/A - node is a witness"));
|
||||||
|
}
|
||||||
|
else if (missing_nodes_count == 0)
|
||||||
{
|
{
|
||||||
if (expected_nodes_count == 0)
|
if (expected_nodes_count == 0)
|
||||||
appendPQExpBufferStr(&details,
|
appendPQExpBufferStr(&details,
|
||||||
@@ -1367,7 +1383,13 @@ do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, Ch
|
|||||||
|
|
||||||
initPQExpBuffer(&details);
|
initPQExpBuffer(&details);
|
||||||
|
|
||||||
if (get_node_record(conn, node_info->upstream_node_id, &upstream_node_info) != RECORD_FOUND)
|
if (node_info->type == WITNESS)
|
||||||
|
{
|
||||||
|
/* witness is not connecting to any upstream */
|
||||||
|
appendPQExpBufferStr(&details,
|
||||||
|
_("N/A - node is a witness"));
|
||||||
|
}
|
||||||
|
else if (get_node_record(conn, node_info->upstream_node_id, &upstream_node_info) != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
if (get_recovery_type(conn) == RECTYPE_STANDBY)
|
if (get_recovery_type(conn) == RECTYPE_STANDBY)
|
||||||
{
|
{
|
||||||
@@ -1388,7 +1410,7 @@ do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, Ch
|
|||||||
upstream_conn = establish_db_connection(upstream_node_info.conninfo, true);
|
upstream_conn = establish_db_connection(upstream_node_info.conninfo, true);
|
||||||
|
|
||||||
/* check our node is connected */
|
/* check our node is connected */
|
||||||
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name) != NODE_ATTACHED)
|
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name, NULL) != NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(&details,
|
appendPQExpBuffer(&details,
|
||||||
_("node \"%s\" (ID: %i) is not attached to expected upstream node \"%s\" (ID: %i)"),
|
_("node \"%s\" (ID: %i) is not attached to expected upstream node \"%s\" (ID: %i)"),
|
||||||
@@ -2458,7 +2480,7 @@ do_node_rejoin(void)
|
|||||||
log_hint(_("check the local node is registered with the current primary \"%s\" (ID: %i)"),
|
log_hint(_("check the local node is registered with the current primary \"%s\" (ID: %i)"),
|
||||||
primary_node_record.node_name,
|
primary_node_record.node_name,
|
||||||
primary_node_record.node_id);
|
primary_node_record.node_id);
|
||||||
PQfinish(upstream_conn);
|
|
||||||
PQfinish(primary_conn);
|
PQfinish(primary_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -2777,7 +2799,7 @@ do_node_rejoin(void)
|
|||||||
config_file_options.node_rejoin_timeout);
|
config_file_options.node_rejoin_timeout);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
log_detail(_("no record for local node \"%s\" found in node \"%s\"'s \"pg_stat_replication\" table"),
|
log_detail(_("no active record for local node \"%s\" found in node \"%s\"'s \"pg_stat_replication\" table"),
|
||||||
config_file_options.node_name,
|
config_file_options.node_name,
|
||||||
primary_node_record.node_name);
|
primary_node_record.node_name);
|
||||||
}
|
}
|
||||||
@@ -2789,7 +2811,7 @@ do_node_rejoin(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* -W/--no-wait provided - check once */
|
/* -W/--no-wait provided - check once */
|
||||||
NodeAttached node_attached = is_downstream_node_attached(primary_conn, config_file_options.node_name);
|
NodeAttached node_attached = is_downstream_node_attached(primary_conn, config_file_options.node_name, NULL);
|
||||||
if (node_attached == NODE_ATTACHED)
|
if (node_attached == NODE_ATTACHED)
|
||||||
success = true;
|
success = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -986,14 +986,15 @@ check_barman_config(void)
|
|||||||
/*
|
/*
|
||||||
* _do_create_replication_conf()
|
* _do_create_replication_conf()
|
||||||
*
|
*
|
||||||
* Create recovery.conf for a previously cloned instance.
|
* Create replication configuration for a previously cloned instance.
|
||||||
*
|
*
|
||||||
* Prerequisites:
|
* Prerequisites:
|
||||||
*
|
*
|
||||||
* - data directory must be provided
|
* - data directory must be provided, either explicitly or via
|
||||||
|
* repmgr.conf
|
||||||
* - the instance should not be running
|
* - the instance should not be running
|
||||||
* - an existing "recovery.conf" file can only be overwritten with
|
* - an existing "recovery.conf" file can only be overwritten with
|
||||||
* -F/--force
|
* -F/--force (Pg11 and earlier)
|
||||||
* - connection parameters for an existing, running node must be provided
|
* - connection parameters for an existing, running node must be provided
|
||||||
* - --upstream-node-id, if provided, will be "primary_conninfo",
|
* - --upstream-node-id, if provided, will be "primary_conninfo",
|
||||||
* otherwise primary node id; node must exist; unless -F/--force
|
* otherwise primary node id; node must exist; unless -F/--force
|
||||||
@@ -1168,7 +1169,7 @@ _do_create_replication_conf(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_hint(_("standby must be registered before a new recovery.conf file can be created"));
|
log_hint(_("standby must be registered before replication can be configured"));
|
||||||
}
|
}
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -1796,7 +1797,7 @@ do_standby_register(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* check our standby is connected */
|
/* check our standby is connected */
|
||||||
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name) == NODE_ATTACHED)
|
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name, NULL) == NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_INFO, _("local node is attached to specified upstream node %i"), runtime_options.upstream_node_id);
|
log_verbose(LOG_INFO, _("local node is attached to specified upstream node %i"), runtime_options.upstream_node_id);
|
||||||
}
|
}
|
||||||
@@ -1855,7 +1856,7 @@ do_standby_register(void)
|
|||||||
primary_node_id);
|
primary_node_id);
|
||||||
|
|
||||||
/* check our standby is connected */
|
/* check our standby is connected */
|
||||||
if (is_downstream_node_attached(primary_conn, config_file_options.node_name) == NODE_ATTACHED)
|
if (is_downstream_node_attached(primary_conn, config_file_options.node_name, NULL) == NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_INFO, _("local node is attached to primary"));
|
log_verbose(LOG_INFO, _("local node is attached to primary"));
|
||||||
}
|
}
|
||||||
@@ -2031,6 +2032,9 @@ do_standby_register(void)
|
|||||||
if (node_record_on_standby.priority != node_record_on_primary.priority)
|
if (node_record_on_standby.priority != node_record_on_primary.priority)
|
||||||
records_match = false;
|
records_match = false;
|
||||||
|
|
||||||
|
if (strcmp(node_record_on_standby.location, node_record_on_primary.location) != 0)
|
||||||
|
records_match = false;
|
||||||
|
|
||||||
if (node_record_on_standby.active != node_record_on_primary.active)
|
if (node_record_on_standby.active != node_record_on_primary.active)
|
||||||
records_match = false;
|
records_match = false;
|
||||||
|
|
||||||
@@ -2388,7 +2392,7 @@ do_standby_promote(void)
|
|||||||
*/
|
*/
|
||||||
if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats, &dry_run_success) == false)
|
if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats, &dry_run_success) == false)
|
||||||
{
|
{
|
||||||
if (runtime_options.dry_run == false)
|
if (runtime_options.dry_run == false || runtime_options.force == false)
|
||||||
{
|
{
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -2402,7 +2406,7 @@ do_standby_promote(void)
|
|||||||
*/
|
*/
|
||||||
if (check_free_slots(&local_node_record, &sibling_nodes_stats, &dry_run_success) == false)
|
if (check_free_slots(&local_node_record, &sibling_nodes_stats, &dry_run_success) == false)
|
||||||
{
|
{
|
||||||
if (runtime_options.dry_run == false)
|
if (runtime_options.dry_run == false || runtime_options.force == false)
|
||||||
{
|
{
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
@@ -2503,7 +2507,7 @@ _do_standby_promote_internal(PGconn *conn)
|
|||||||
/*
|
/*
|
||||||
* Promote standby to primary.
|
* Promote standby to primary.
|
||||||
*
|
*
|
||||||
* `pg_ctl promote` returns immediately and (prior to 10.0) has no -w
|
* "pg_ctl promote: returns immediately and (prior to 10.0) has no -w
|
||||||
* option so we can't be sure when or if the promotion completes. For now
|
* option so we can't be sure when or if the promotion completes. For now
|
||||||
* we'll poll the server until the default timeout (60 seconds)
|
* we'll poll the server until the default timeout (60 seconds)
|
||||||
*
|
*
|
||||||
@@ -3069,7 +3073,9 @@ do_standby_follow(void)
|
|||||||
|
|
||||||
for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
|
for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
|
||||||
{
|
{
|
||||||
NodeAttached node_attached = is_downstream_node_attached(follow_target_conn, config_file_options.node_name);
|
NodeAttached node_attached = is_downstream_node_attached(follow_target_conn,
|
||||||
|
config_file_options.node_name,
|
||||||
|
NULL);
|
||||||
|
|
||||||
if (node_attached == NODE_ATTACHED)
|
if (node_attached == NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
@@ -3563,7 +3569,8 @@ do_standby_switchover(void)
|
|||||||
{
|
{
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
{
|
{
|
||||||
log_info(_("validating database connection for superuser \"%s\""), runtime_options.superuser);
|
log_info(_("validating connection to local database for superuser \"%s\""),
|
||||||
|
runtime_options.superuser);
|
||||||
}
|
}
|
||||||
|
|
||||||
superuser_conn = establish_db_connection_with_replacement_param(
|
superuser_conn = establish_db_connection_with_replacement_param(
|
||||||
@@ -3573,23 +3580,27 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
if (PQstatus(superuser_conn) != CONNECTION_OK)
|
if (PQstatus(superuser_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to connect as provided superuser \"%s\""),
|
log_error(_("unable to connect to local database \"%s\" as provided superuser \"%s\""),
|
||||||
|
PQdb(superuser_conn),
|
||||||
runtime_options.superuser);
|
runtime_options.superuser);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_superuser_connection(superuser_conn, NULL) == false)
|
if (is_superuser_connection(superuser_conn, NULL) == false)
|
||||||
{
|
{
|
||||||
log_error(_("database connection established for provided superuser \"%s\" is not a superuser connection"),
|
log_error(_("connection established to local database \"%s\" for provided superuser \"%s\" is not a superuser connection"),
|
||||||
|
PQdb(superuser_conn),
|
||||||
runtime_options.superuser);
|
runtime_options.superuser);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
{
|
{
|
||||||
log_info(_("successfully established database connection established for provided superuser \"%s\""),
|
log_info(_("successfully established connection to local database \"%s\" for provided superuser \"%s\""),
|
||||||
|
PQdb(superuser_conn),
|
||||||
runtime_options.superuser);
|
runtime_options.superuser);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -3702,7 +3713,7 @@ do_standby_switchover(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_downstream_node_attached(remote_conn, local_node_record.node_name) != NODE_ATTACHED)
|
if (is_downstream_node_attached(remote_conn, local_node_record.node_name, NULL) != NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"),
|
log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"),
|
||||||
local_node_record.node_name,
|
local_node_record.node_name,
|
||||||
@@ -4052,11 +4063,13 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
if (parse_data_directory_config(command_output.data) == false)
|
if (parse_data_directory_config(command_output.data) == false)
|
||||||
{
|
{
|
||||||
log_error(_("\"data_directory\" parameter in repmgr.conf on \"%s\" is incorrectly configured"),
|
log_error(_("\"data_directory\" parameter in \"repmgr.conf\" on \"%s\" (ID: %i) is incorrectly configured"),
|
||||||
remote_node_record.node_name);
|
remote_node_record.node_name,
|
||||||
|
remote_node_record.node_id);
|
||||||
|
|
||||||
log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" to diagnose the issue"),
|
log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" (ID: %i) to diagnose the issue"),
|
||||||
remote_node_record.node_name);
|
remote_node_record.node_name,
|
||||||
|
remote_node_record.node_id);
|
||||||
|
|
||||||
PQfinish(remote_conn);
|
PQfinish(remote_conn);
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
@@ -5184,7 +5197,8 @@ do_standby_switchover(void)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
node_attached = is_downstream_node_attached(local_conn,
|
node_attached = is_downstream_node_attached(local_conn,
|
||||||
remote_node_record.node_name);
|
remote_node_record.node_name,
|
||||||
|
NULL);
|
||||||
if (node_attached == NODE_ATTACHED)
|
if (node_attached == NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
switchover_success = true;
|
switchover_success = true;
|
||||||
@@ -5473,6 +5487,7 @@ check_source_server()
|
|||||||
{
|
{
|
||||||
uint64 test_system_identifier = system_identifier(cell->node_info->conn);
|
uint64 test_system_identifier = system_identifier(cell->node_info->conn);
|
||||||
PQfinish(cell->node_info->conn);
|
PQfinish(cell->node_info->conn);
|
||||||
|
cell->node_info->conn = NULL;
|
||||||
|
|
||||||
if (test_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
|
if (test_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
|
||||||
{
|
{
|
||||||
@@ -5496,6 +5511,7 @@ check_source_server()
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
PQfinish(cell->node_info->conn);
|
PQfinish(cell->node_info->conn);
|
||||||
|
cell->node_info->conn = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clear_node_info_list(&all_nodes);
|
clear_node_info_list(&all_nodes);
|
||||||
@@ -6691,6 +6707,11 @@ run_basebackup(t_node_info *node_record)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform a filesystem backup using rsync.
|
||||||
|
*
|
||||||
|
* From repmgr 4 this is only used for Barman backups.
|
||||||
|
*/
|
||||||
static int
|
static int
|
||||||
run_file_backup(t_node_info *local_node_record)
|
run_file_backup(t_node_info *local_node_record)
|
||||||
{
|
{
|
||||||
@@ -6721,10 +6742,11 @@ run_file_backup(t_node_info *local_node_record)
|
|||||||
/*
|
/*
|
||||||
* Read the list of backup files into a local file. In the process:
|
* Read the list of backup files into a local file. In the process:
|
||||||
*
|
*
|
||||||
* - determine the backup ID; - check, and remove, the prefix; -
|
* - determine the backup ID
|
||||||
* detect tablespaces; - filter files in one list per tablespace;
|
* - check, and remove, the prefix
|
||||||
|
* - detect tablespaces
|
||||||
|
* - filter files in one list per tablespace
|
||||||
*/
|
*/
|
||||||
|
|
||||||
{
|
{
|
||||||
FILE *fi; /* input stream */
|
FILE *fi; /* input stream */
|
||||||
FILE *fd; /* output for data.txt */
|
FILE *fd; /* output for data.txt */
|
||||||
@@ -7007,11 +7029,13 @@ run_file_backup(t_node_info *local_node_record)
|
|||||||
|
|
||||||
if (mode == barman)
|
if (mode == barman)
|
||||||
{
|
{
|
||||||
create_pg_dir(cell_t->location, false);
|
create_pg_dir(tblspc_dir_dest, false);
|
||||||
|
|
||||||
if (cell_t->f != NULL) /* cell_t->f == NULL iff the tablespace is
|
if (cell_t->f != NULL) /* cell_t->f == NULL iff the tablespace is
|
||||||
* empty */
|
* empty */
|
||||||
{
|
{
|
||||||
|
fclose(cell_t->f);
|
||||||
|
|
||||||
maxlen_snprintf(command,
|
maxlen_snprintf(command,
|
||||||
"rsync --progress -a --files-from=%s/%s.txt %s:%s/%s/%s %s",
|
"rsync --progress -a --files-from=%s/%s.txt %s:%s/%s/%s %s",
|
||||||
local_repmgr_tmp_directory,
|
local_repmgr_tmp_directory,
|
||||||
@@ -7024,7 +7048,6 @@ run_file_backup(t_node_info *local_node_record)
|
|||||||
(void) local_command(
|
(void) local_command(
|
||||||
command,
|
command,
|
||||||
NULL);
|
NULL);
|
||||||
fclose(cell_t->f);
|
|
||||||
maxlen_snprintf(filename,
|
maxlen_snprintf(filename,
|
||||||
"%s/%s.txt",
|
"%s/%s.txt",
|
||||||
local_repmgr_tmp_directory,
|
local_repmgr_tmp_directory,
|
||||||
@@ -7141,7 +7164,10 @@ stop_backup:
|
|||||||
|
|
||||||
if (mode == barman)
|
if (mode == barman)
|
||||||
{
|
{
|
||||||
/* In Barman mode, remove local_repmgr_directory */
|
/*
|
||||||
|
* In Barman mode, remove local_repmgr_tmp_directory,
|
||||||
|
* which contains various temporary files containing Barman metadata.
|
||||||
|
*/
|
||||||
rmtree(local_repmgr_tmp_directory, true);
|
rmtree(local_repmgr_tmp_directory, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -8579,6 +8605,10 @@ do_standby_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" \"standby promote\" promotes a standby node to primary.\n"));
|
printf(_(" \"standby promote\" promotes a standby node to primary.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
printf(_(" --dry-run perform checks etc. but don't actually promote the node\n"));
|
||||||
|
printf(_(" -F, --force ignore warnings and continue anyway\n"));
|
||||||
|
printf(_(" --siblings-follow have other standbys follow new primary\n"));
|
||||||
|
puts("");
|
||||||
|
|
||||||
printf(_("STANDBY FOLLOW\n"));
|
printf(_("STANDBY FOLLOW\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|||||||
@@ -1196,7 +1196,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
|
* If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
|
||||||
* of what's in the configuration file or -L/--log-level paremeter, otherwise
|
* of what's in the configuration file or -L/--log-level parameter, otherwise
|
||||||
* some or output might not be displayed.
|
* some or output might not be displayed.
|
||||||
*/
|
*/
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
@@ -2351,6 +2351,7 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
* connected to the upstream
|
* connected to the upstream
|
||||||
*/
|
*/
|
||||||
NodeAttached attached_to_upstream = NODE_ATTACHED_UNKNOWN;
|
NodeAttached attached_to_upstream = NODE_ATTACHED_UNKNOWN;
|
||||||
|
char *replication_state = NULL;
|
||||||
t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
|
t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
|
RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
|
||||||
node_info->upstream_node_id,
|
node_info->upstream_node_id,
|
||||||
@@ -2378,7 +2379,7 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
attached_to_upstream = is_downstream_node_attached(upstream_conn, node_info->node_name);
|
attached_to_upstream = is_downstream_node_attached(upstream_conn, node_info->node_name, &replication_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
@@ -2394,6 +2395,18 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
upstream_node_rec.node_name,
|
upstream_node_rec.node_name,
|
||||||
upstream_node_rec.node_id);
|
upstream_node_rec.node_id);
|
||||||
}
|
}
|
||||||
|
if (attached_to_upstream == NODE_NOT_ATTACHED)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(upstream, "? ");
|
||||||
|
item_list_append_format(warnings,
|
||||||
|
"node \"%s\" (ID: %i) attached to its upstream node \"%s\" (ID: %i) in state \"%s\"",
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
upstream_node_rec.node_name,
|
||||||
|
upstream_node_rec.node_id,
|
||||||
|
replication_state);
|
||||||
|
}
|
||||||
|
|
||||||
else if (attached_to_upstream == NODE_DETACHED)
|
else if (attached_to_upstream == NODE_DETACHED)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(upstream, "! ");
|
appendPQExpBufferStr(upstream, "! ");
|
||||||
@@ -3986,8 +3999,10 @@ check_standby_join(PGconn *upstream_conn, t_node_info *upstream_node_record, t_n
|
|||||||
|
|
||||||
for (; i < config_file_options.node_rejoin_timeout; i++)
|
for (; i < config_file_options.node_rejoin_timeout; i++)
|
||||||
{
|
{
|
||||||
|
char *node_state = NULL;
|
||||||
NodeAttached node_attached = is_downstream_node_attached(upstream_conn,
|
NodeAttached node_attached = is_downstream_node_attached(upstream_conn,
|
||||||
standby_node_record->node_name);
|
standby_node_record->node_name,
|
||||||
|
&node_state);
|
||||||
if (node_attached == NODE_ATTACHED)
|
if (node_attached == NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) has attached to its upstream node"),
|
log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) has attached to its upstream node"),
|
||||||
@@ -4004,9 +4019,19 @@ check_standby_join(PGconn *upstream_conn, t_node_info *upstream_node_record, t_n
|
|||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.node_rejoin_timeout);
|
config_file_options.node_rejoin_timeout);
|
||||||
|
|
||||||
log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
|
if (node_attached == NODE_NOT_ATTACHED)
|
||||||
upstream_node_record->node_name,
|
{
|
||||||
standby_node_record->node_name);
|
log_detail(_("node \"%s\" (ID: %i) is currrently attached to its upstream node in state \"%s\""),
|
||||||
|
upstream_node_record->node_name,
|
||||||
|
standby_node_record->node_id,
|
||||||
|
node_state);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
|
||||||
|
upstream_node_record->node_name,
|
||||||
|
standby_node_record->node_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -297,6 +297,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#connection_check_type=ping # How to check availability of the upstream node; valid options:
|
#connection_check_type=ping # How to check availability of the upstream node; valid options:
|
||||||
# 'ping': use PQping() to check if the node is accepting connections
|
# 'ping': use PQping() to check if the node is accepting connections
|
||||||
# 'connection': execute a throwaway query on the current connection
|
# 'connection': execute a throwaway query on the current connection
|
||||||
|
# 'query': execute an SQL statement on the node via the existing connection
|
||||||
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
||||||
# primary (or other upstream node)
|
# primary (or other upstream node)
|
||||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
#define REPMGR_VERSION_DATE ""
|
#define REPMGR_VERSION_DATE ""
|
||||||
#define REPMGR_VERSION "5.1dev"
|
#define REPMGR_VERSION "5.1.0"
|
||||||
#define REPMGR_VERSION_NUM 50100
|
#define REPMGR_VERSION_NUM 50100
|
||||||
#define REPMGR_RELEASE_DATE "2020-XX-XX"
|
#define REPMGR_RELEASE_DATE "2020-04-13"
|
||||||
#define PG_ACTUAL_VERSION_NUM
|
#define PG_ACTUAL_VERSION_NUM
|
||||||
|
|||||||
@@ -1328,6 +1328,7 @@ monitor_streaming_standby(void)
|
|||||||
*/
|
*/
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&upstream_conn);
|
||||||
log_error(_("unable connect to upstream node (ID: %i), terminating"),
|
log_error(_("unable connect to upstream node (ID: %i), terminating"),
|
||||||
local_node_info.upstream_node_id);
|
local_node_info.upstream_node_id);
|
||||||
log_hint(_("upstream node must be running before repmgrd can start"));
|
log_hint(_("upstream node must be running before repmgrd can start"));
|
||||||
@@ -1339,8 +1340,8 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
if (upstream_node_info.node_id == local_node_info.node_id)
|
if (upstream_node_info.node_id == local_node_info.node_id)
|
||||||
{
|
{
|
||||||
PQfinish(upstream_conn);
|
close_connection(&upstream_conn);
|
||||||
upstream_conn = NULL;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1364,6 +1365,8 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
log_error(_("unable to connect to primary node"));
|
log_error(_("unable to connect to primary node"));
|
||||||
log_hint(_("ensure the primary node is reachable from this node"));
|
log_hint(_("ensure the primary node is reachable from this node"));
|
||||||
|
|
||||||
@@ -1440,6 +1443,7 @@ monitor_streaming_standby(void)
|
|||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "checking %s", upstream_node_info.conninfo);
|
log_verbose(LOG_DEBUG, "checking %s", upstream_node_info.conninfo);
|
||||||
|
|
||||||
if (check_upstream_connection(&upstream_conn, upstream_node_info.conninfo) == true)
|
if (check_upstream_connection(&upstream_conn, upstream_node_info.conninfo) == true)
|
||||||
{
|
{
|
||||||
set_upstream_last_seen(local_conn, upstream_node_info.node_id);
|
set_upstream_last_seen(local_conn, upstream_node_info.node_id);
|
||||||
@@ -1557,8 +1561,9 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(upstream_conn);
|
|
||||||
upstream_conn = NULL;
|
close_connection(&upstream_conn);
|
||||||
|
|
||||||
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
/* check local connection */
|
/* check local connection */
|
||||||
@@ -1568,7 +1573,7 @@ monitor_streaming_standby(void)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
PQfinish(local_conn);
|
close_connection(&local_conn);
|
||||||
|
|
||||||
for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
|
for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
|
||||||
{
|
{
|
||||||
@@ -1577,6 +1582,8 @@ monitor_streaming_standby(void)
|
|||||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.repmgrd_standby_startup_timeout);
|
config_file_options.repmgrd_standby_startup_timeout);
|
||||||
@@ -1706,7 +1713,12 @@ monitor_streaming_standby(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (primary_conn == NULL || PQstatus(primary_conn) != CONNECTION_OK)
|
if (primary_conn != NULL && PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
close_connection(&primary_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (primary_conn == NULL)
|
||||||
{
|
{
|
||||||
primary_conn = establish_primary_db_connection(upstream_conn, false);
|
primary_conn = establish_primary_db_connection(upstream_conn, false);
|
||||||
}
|
}
|
||||||
@@ -1715,7 +1727,8 @@ monitor_streaming_standby(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("reconnected to upstream node %i after %i seconds, resuming monitoring"),
|
_("reconnected to upstream node \"%s\" (ID: %i) after %i seconds, resuming monitoring"),
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id,
|
upstream_node_info.node_id,
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
@@ -1851,7 +1864,9 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2036,7 +2051,8 @@ loop:
|
|||||||
|
|
||||||
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
||||||
{
|
{
|
||||||
log_notice(_("local node %i upstream appears to have changed, restarting monitoring"),
|
log_notice(_("upstream for local node \"%s\" (ID: %i) appears to have changed, restarting monitoring"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
||||||
last_known_upstream_node_id,
|
last_known_upstream_node_id,
|
||||||
@@ -2050,7 +2066,8 @@ loop:
|
|||||||
*/
|
*/
|
||||||
if (local_node_info.type != STANDBY)
|
if (local_node_info.type != STANDBY)
|
||||||
{
|
{
|
||||||
log_notice(_("local node %i is no longer a standby, restarting monitoring"),
|
log_notice(_("local node \"%s\" (ID: %i) is no longer a standby, restarting monitoring"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
close_connection(&upstream_conn);
|
close_connection(&upstream_conn);
|
||||||
return;
|
return;
|
||||||
@@ -2075,8 +2092,8 @@ loop:
|
|||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(primary_conn);
|
|
||||||
primary_conn = NULL;
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
||||||
return;
|
return;
|
||||||
@@ -2144,7 +2161,8 @@ loop:
|
|||||||
|
|
||||||
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
||||||
{
|
{
|
||||||
log_notice(_("local node %i's upstream appears to have changed, restarting monitoring"),
|
log_notice(_("local node \"%s\" (ID: %i)'s upstream appears to have changed, restarting monitoring"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
||||||
last_known_upstream_node_id,
|
last_known_upstream_node_id,
|
||||||
@@ -2335,8 +2353,9 @@ monitor_streaming_witness(void)
|
|||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(primary_conn);
|
|
||||||
primary_conn = NULL;
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -2397,7 +2416,8 @@ monitor_streaming_witness(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("reconnected to upstream node %i after %i seconds, resuming monitoring"),
|
_("reconnected to upstream node \"%s\" (ID: %i) after %i seconds, resuming monitoring"),
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id,
|
upstream_node_info.node_id,
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
@@ -2407,9 +2427,11 @@ monitor_streaming_witness(void)
|
|||||||
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name,
|
||||||
PQfinish(primary_conn);
|
upstream_node_info.node_id);
|
||||||
primary_conn = NULL;
|
|
||||||
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -2463,7 +2485,9 @@ monitor_streaming_witness(void)
|
|||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2605,8 +2629,9 @@ loop:
|
|||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(primary_conn);
|
|
||||||
primary_conn = NULL;
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2716,24 +2741,32 @@ do_primary_failover(void)
|
|||||||
{
|
{
|
||||||
for (cell = check_sibling_nodes.head; cell; cell = cell->next)
|
for (cell = check_sibling_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
pid_t sibling_wal_receiver_pid;
|
|
||||||
|
|
||||||
if (cell->node_info->conn == NULL)
|
if (cell->node_info->conn == NULL)
|
||||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||||
|
|
||||||
sibling_wal_receiver_pid = (pid_t)get_wal_receiver_pid(cell->node_info->conn);
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
|
|
||||||
if (sibling_wal_receiver_pid == UNKNOWN_PID)
|
|
||||||
{
|
{
|
||||||
log_warning(_("unable to query WAL receiver PID on node %i"),
|
log_warning(_("unable to query WAL receiver PID on node \"%s\" (ID: %i)"),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
}
|
}
|
||||||
else if (sibling_wal_receiver_pid > 0)
|
else
|
||||||
{
|
{
|
||||||
log_info(_("WAL receiver PID on node %i is %i"),
|
pid_t sibling_wal_receiver_pid = (pid_t)get_wal_receiver_pid(cell->node_info->conn);
|
||||||
cell->node_info->node_id,
|
|
||||||
sibling_wal_receiver_pid);
|
if (sibling_wal_receiver_pid == UNKNOWN_PID)
|
||||||
sibling_node_wal_receiver_connected = true;
|
{
|
||||||
|
log_warning(_("unable to query WAL receiver PID on node %i"),
|
||||||
|
cell->node_info->node_id);
|
||||||
|
}
|
||||||
|
else if (sibling_wal_receiver_pid > 0)
|
||||||
|
{
|
||||||
|
log_info(_("WAL receiver PID on node %i is %i"),
|
||||||
|
cell->node_info->node_id,
|
||||||
|
sibling_wal_receiver_pid);
|
||||||
|
sibling_node_wal_receiver_connected = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2895,7 +2928,8 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i is in manual failover mode and is now disconnected from streaming replication"),
|
_("node \"%s\" (ID: %i) is in manual failover mode and is now disconnected from streaming replication"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
|
|
||||||
new_primary_conn = establish_db_connection(new_primary.conninfo, false);
|
new_primary_conn = establish_db_connection(new_primary.conninfo, false);
|
||||||
@@ -3055,8 +3089,6 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
update_monitoring_history(void)
|
update_monitoring_history(void)
|
||||||
{
|
{
|
||||||
@@ -3291,6 +3323,8 @@ do_upstream_standby_failover(void)
|
|||||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
log_debug("sleeping 1 second; %i of %i (\"repmgrd_standby_startup_timeout\") attempts to reconnect to local node",
|
log_debug("sleeping 1 second; %i of %i (\"repmgrd_standby_startup_timeout\") attempts to reconnect to local node",
|
||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.repmgrd_standby_startup_timeout);
|
config_file_options.repmgrd_standby_startup_timeout);
|
||||||
@@ -3299,7 +3333,8 @@ do_upstream_standby_failover(void)
|
|||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node %i"),
|
log_error(_("unable to reconnect to local node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
@@ -3334,7 +3369,8 @@ do_upstream_standby_failover(void)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("unable to set node %i's new upstream ID to %i"),
|
_("unable to set node \"%s\" (ID: %i)'s new upstream ID to %i"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
primary_node_info.node_id);
|
primary_node_info.node_id);
|
||||||
|
|
||||||
@@ -3372,8 +3408,10 @@ do_upstream_standby_failover(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i is now following primary node %i"),
|
_("node \"%s\" (ID: %i) is now following primary node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
primary_node_info.node_name,
|
||||||
primary_node_info.node_id);
|
primary_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
@@ -3438,12 +3476,16 @@ promote_self(void)
|
|||||||
|
|
||||||
r = system(promote_command);
|
r = system(promote_command);
|
||||||
|
|
||||||
|
log_debug("result of promote_command: %i", WEXITSTATUS(r));
|
||||||
|
|
||||||
/* connection should stay up, but check just in case */
|
/* connection should stay up, but check just in case */
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("local database connection not available"));
|
log_warning(_("local database connection not available"));
|
||||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
||||||
|
|
||||||
/* assume node failed */
|
/* assume node failed */
|
||||||
@@ -3451,24 +3493,37 @@ promote_self(void)
|
|||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node"));
|
log_error(_("unable to reconnect to local node"));
|
||||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
/* XXX handle this */
|
/* XXX handle this */
|
||||||
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r != 0)
|
if (WIFEXITED(r) && WEXITSTATUS(r))
|
||||||
{
|
{
|
||||||
int primary_node_id;
|
int primary_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
|
log_error(_("promote command failed"));
|
||||||
|
log_detail(_("promote command exited with error code %i"), WEXITSTATUS(r));
|
||||||
|
|
||||||
|
log_info(_("checking if original primary node has reappeared"));
|
||||||
|
|
||||||
upstream_conn = get_primary_connection(local_conn,
|
upstream_conn = get_primary_connection(local_conn,
|
||||||
&primary_node_id,
|
&primary_node_id,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) == CONNECTION_OK && primary_node_id == failed_primary.node_id)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
close_connection(&upstream_conn);
|
||||||
|
}
|
||||||
|
else if (primary_node_id == failed_primary.node_id)
|
||||||
{
|
{
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
log_notice(_("original primary (ID: %i) reappeared before this standby was promoted - no action taken"),
|
log_notice(_("original primary \"%s\" (ID: %i) reappeared before this standby was promoted - no action taken"),
|
||||||
|
failed_primary.node_name,
|
||||||
failed_primary.node_id);
|
failed_primary.node_id);
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
@@ -3493,9 +3548,6 @@ promote_self(void)
|
|||||||
return FAILOVER_STATE_PRIMARY_REAPPEARED;
|
return FAILOVER_STATE_PRIMARY_REAPPEARED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
log_error(_("promote command failed"));
|
|
||||||
|
|
||||||
create_event_notification(NULL,
|
create_event_notification(NULL,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
@@ -3506,6 +3558,11 @@ promote_self(void)
|
|||||||
return FAILOVER_STATE_PROMOTION_FAILED;
|
return FAILOVER_STATE_PROMOTION_FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Promotion has succeeded - verify local connection is still available
|
||||||
|
*/
|
||||||
|
try_reconnect(&local_conn, &local_node_info);
|
||||||
|
|
||||||
/* bump the electoral term */
|
/* bump the electoral term */
|
||||||
increment_current_term(local_conn);
|
increment_current_term(local_conn);
|
||||||
|
|
||||||
@@ -3521,8 +3578,10 @@ promote_self(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i promoted to primary; old primary %i marked as failed"),
|
_("node \"%s\" (ID: %i) promoted to primary; old primary \"%s\" (ID: %i) marked as failed"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
failed_primary.node_name,
|
||||||
failed_primary.node_id);
|
failed_primary.node_id);
|
||||||
|
|
||||||
/* local_conn is now the primary connection */
|
/* local_conn is now the primary connection */
|
||||||
@@ -3566,6 +3625,8 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
|||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
|
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3576,6 +3637,7 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
|||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
|
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
|
||||||
|
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3800,15 +3862,18 @@ follow_new_primary(int new_primary_id)
|
|||||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.repmgrd_standby_startup_timeout);
|
config_file_options.repmgrd_standby_startup_timeout);
|
||||||
sleep(1);
|
sleep(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (local_conn == NULL || PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node %i"),
|
log_error(_("unable to reconnect to local node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
@@ -3822,8 +3887,10 @@ follow_new_primary(int new_primary_id)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i now following new upstream node %i"),
|
_("node \"%s\" (ID: %i) now following new upstream node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id);
|
upstream_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
@@ -3918,7 +3985,7 @@ witness_follow_new_primary(int new_primary_id)
|
|||||||
record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info);
|
record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info);
|
||||||
if (record_status != RECORD_FOUND)
|
if (record_status != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
log_error(_("unable to retrieve metadata record found for node %i"),
|
log_error(_("unable to retrieve metadata record for node %i"),
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
@@ -3928,8 +3995,10 @@ witness_follow_new_primary(int new_primary_id)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("witness node %i now following new primary node %i"),
|
_("witness node \"%s\" (ID: %i) now following new primary node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id);
|
upstream_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
@@ -4031,6 +4100,12 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
|
|
||||||
return ELECTION_NOT_CANDIDATE;
|
return ELECTION_NOT_CANDIDATE;
|
||||||
}
|
}
|
||||||
|
if (config_file_options.failover_delay > 0)
|
||||||
|
{
|
||||||
|
log_debug("sleeping %i seconds (\"failover_delay\") before initiating failover",
|
||||||
|
config_file_options.failover_delay);
|
||||||
|
sleep(config_file_options.failover_delay);
|
||||||
|
}
|
||||||
|
|
||||||
/* node priority is set to zero - don't become a candidate, and lose by default */
|
/* node priority is set to zero - don't become a candidate, and lose by default */
|
||||||
if (local_node_info.priority <= 0)
|
if (local_node_info.priority <= 0)
|
||||||
@@ -4165,6 +4240,8 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4295,7 +4372,8 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
{
|
{
|
||||||
if (sibling_replication_info.upstream_node_id != upstream_node_info.node_id)
|
if (sibling_replication_info.upstream_node_id != upstream_node_info.node_id)
|
||||||
{
|
{
|
||||||
log_warning(_("assumed sibling node %i monitoring different upstream node %i"),
|
log_warning(_("assumed sibling node \"%s\" (ID: %i) monitoring different upstream node %i"),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
sibling_replication_info.upstream_node_id);
|
sibling_replication_info.upstream_node_id);
|
||||||
|
|
||||||
@@ -4335,7 +4413,8 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
/* don't check 0-priority nodes */
|
/* don't check 0-priority nodes */
|
||||||
if (cell->node_info->priority <= 0)
|
if (cell->node_info->priority <= 0)
|
||||||
{
|
{
|
||||||
log_info(_("node %i has priority of %i, skipping"),
|
log_info(_("node \"%s\" (ID: %i) has priority of %i, skipping"),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
cell->node_info->priority);
|
cell->node_info->priority);
|
||||||
continue;
|
continue;
|
||||||
@@ -4596,8 +4675,8 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
|||||||
node_info->node_name,
|
node_info->node_name,
|
||||||
node_info->node_id);
|
node_info->node_id);
|
||||||
log_detail("\n%s", PQerrorMessage(*conn));
|
log_detail("\n%s", PQerrorMessage(*conn));
|
||||||
PQfinish(*conn);
|
|
||||||
*conn = NULL;
|
close_connection(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
@@ -4606,13 +4685,14 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
|||||||
node_info->node_name,
|
node_info->node_name,
|
||||||
node_info->node_id);
|
node_info->node_id);
|
||||||
|
|
||||||
PQfinish(*conn);
|
close_connection(conn);
|
||||||
|
|
||||||
*conn = establish_db_connection(node_info->conninfo, false);
|
*conn = establish_db_connection(node_info->conninfo, false);
|
||||||
|
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
PQfinish(*conn);
|
close_connection(conn);
|
||||||
*conn = NULL;
|
|
||||||
log_warning(_("reconnection to node \"%s\" (ID: %i) failed"),
|
log_warning(_("reconnection to node \"%s\" (ID: %i) failed"),
|
||||||
node_info->node_name,
|
node_info->node_name,
|
||||||
node_info->node_id);
|
node_info->node_id);
|
||||||
@@ -4686,7 +4766,8 @@ handle_sighup(PGconn **conn, t_server_type server_type)
|
|||||||
|
|
||||||
if (reload_config(&config_file_options, server_type))
|
if (reload_config(&config_file_options, server_type))
|
||||||
{
|
{
|
||||||
PQfinish(*conn);
|
close_connection(conn);
|
||||||
|
|
||||||
*conn = establish_db_connection(config_file_options.conninfo, true);
|
*conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4847,6 +4928,8 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
|||||||
if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
|
if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to establish a replication connection to the follow target node"));
|
log_error(_("unable to establish a replication connection to the follow target node"));
|
||||||
|
|
||||||
|
PQfinish(follow_target_repl_conn);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4978,7 +5061,6 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
|||||||
if (follow_target_history)
|
if (follow_target_history)
|
||||||
pfree(follow_target_history);
|
pfree(follow_target_history);
|
||||||
|
|
||||||
|
|
||||||
return can_follow;
|
return can_follow;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5152,11 +5234,8 @@ parse_child_nodes_disconnect_command(char *parsed_command, char *template, int r
|
|||||||
int
|
int
|
||||||
try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
||||||
{
|
{
|
||||||
PGconn *our_conn;
|
|
||||||
t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
int max_attempts = config_file_options.reconnect_attempts;
|
int max_attempts = config_file_options.reconnect_attempts;
|
||||||
|
|
||||||
initialize_conninfo_params(&conninfo_params, false);
|
initialize_conninfo_params(&conninfo_params, false);
|
||||||
@@ -5170,11 +5249,18 @@ try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
|||||||
|
|
||||||
for (i = 0; i < max_attempts; i++)
|
for (i = 0; i < max_attempts; i++)
|
||||||
{
|
{
|
||||||
log_info(_("checking state of node %i, %i of %i attempts"),
|
log_info(_("checking state of node \"%s\" (ID: %i), %i of %i attempts"),
|
||||||
node_info->node_id, i + 1, max_attempts);
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
i + 1, max_attempts);
|
||||||
|
|
||||||
if (is_server_available_params(&conninfo_params) == true)
|
if (is_server_available_params(&conninfo_params) == true)
|
||||||
{
|
{
|
||||||
log_notice(_("node %i has recovered, reconnecting"), node_info->node_id);
|
PGconn *our_conn;
|
||||||
|
|
||||||
|
log_notice(_("node \"%s\" (ID: %i) has recovered, reconnecting"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: we could also handle the case where node is pingable but
|
* Note: we could also handle the case where node is pingable but
|
||||||
@@ -5187,7 +5273,9 @@ try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
|||||||
{
|
{
|
||||||
free_conninfo_params(&conninfo_params);
|
free_conninfo_params(&conninfo_params);
|
||||||
|
|
||||||
log_info(_("connection to node %i succeeded"), node_info->node_id);
|
log_info(_("connection to node \"%s\" (ID: %i) succeeded"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
|
||||||
if (PQstatus(*conn) == CONNECTION_BAD)
|
if (PQstatus(*conn) == CONNECTION_BAD)
|
||||||
{
|
{
|
||||||
@@ -5244,6 +5332,8 @@ try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
|||||||
{
|
{
|
||||||
log_notice(_("received notification that new primary is node %i"), new_primary_node_id);
|
log_notice(_("received notification that new primary is node %i"), new_primary_node_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free_conninfo_params(&conninfo_params);
|
||||||
return new_primary_node_id;
|
return new_primary_node_id;
|
||||||
}
|
}
|
||||||
sleep(1);
|
sleep(1);
|
||||||
@@ -5251,7 +5341,8 @@ try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log_warning(_("unable to reconnect to node %i after %i attempts"),
|
log_warning(_("unable to reconnect to node \"%s\" (ID: %i) after %i attempts"),
|
||||||
|
node_info->node_name,
|
||||||
node_info->node_id,
|
node_info->node_id,
|
||||||
max_attempts);
|
max_attempts);
|
||||||
|
|
||||||
|
|||||||
60
repmgrd.c
60
repmgrd.c
@@ -817,32 +817,65 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
/* Check the connection status twice in case it changes after reset */
|
/* Check the connection status twice in case it changes after reset */
|
||||||
bool twice = false;
|
bool twice = false;
|
||||||
|
|
||||||
if (config_file_options.connection_check_type == CHECK_PING)
|
|
||||||
return is_server_available(conninfo);
|
|
||||||
|
|
||||||
if (config_file_options.connection_check_type == CHECK_CONNECTION)
|
log_debug("connection check type is \"%s\"",
|
||||||
|
print_connection_check_type(config_file_options.connection_check_type));
|
||||||
|
/*
|
||||||
|
* For the check types which do not involve using the existing database
|
||||||
|
* connection, we'll perform the actual check, then as an additional
|
||||||
|
* safeguard verify that the connection is still valid (as it might have
|
||||||
|
* gone away during a brief outage between checks).
|
||||||
|
*/
|
||||||
|
if (config_file_options.connection_check_type != CHECK_QUERY)
|
||||||
{
|
{
|
||||||
bool success = true;
|
bool success = true;
|
||||||
PGconn *test_conn = PQconnectdb(conninfo);
|
|
||||||
|
|
||||||
log_debug("check_upstream_connection(): attempting to connect to \"%s\"", conninfo);
|
if (config_file_options.connection_check_type == CHECK_PING)
|
||||||
|
|
||||||
if (PQstatus(test_conn) != CONNECTION_OK)
|
|
||||||
{
|
{
|
||||||
log_warning(_("unable to connect to \"%s\""), conninfo);
|
success = is_server_available(conninfo);
|
||||||
log_detail("\n%s", PQerrorMessage(test_conn));
|
|
||||||
success = false;
|
|
||||||
}
|
}
|
||||||
PQfinish(test_conn);
|
else if (config_file_options.connection_check_type == CHECK_CONNECTION)
|
||||||
|
{
|
||||||
|
PGconn *test_conn = PQconnectdb(conninfo);
|
||||||
|
|
||||||
return success;
|
log_debug("check_upstream_connection(): attempting to connect to \"%s\"", conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(test_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to connect to \"%s\""), conninfo);
|
||||||
|
log_detail("\n%s", PQerrorMessage(test_conn));
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
PQfinish(test_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success == false)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Checks have succeeded, but the open connection to the primary has gone away,
|
||||||
|
* possibly due to a brief outage between monitoring intervals - attempt to
|
||||||
|
* reset it.
|
||||||
|
*/
|
||||||
|
log_notice(_("upstream is available but upstream connection has gone away, resetting"));
|
||||||
|
|
||||||
|
PQfinish(*conn);
|
||||||
|
*conn = PQconnectdb(conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_debug("check_upstream_connection(): connection not OK");
|
log_debug("check_upstream_connection(): upstream connection has gone away, resetting");
|
||||||
if (twice)
|
if (twice)
|
||||||
return false;
|
return false;
|
||||||
/* reconnect */
|
/* reconnect */
|
||||||
@@ -877,6 +910,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* reconnect */
|
/* reconnect */
|
||||||
|
log_debug("check_upstream_connection(): upstream connection not available, resetting");
|
||||||
PQfinish(*conn);
|
PQfinish(*conn);
|
||||||
*conn = PQconnectdb(conninfo);
|
*conn = PQconnectdb(conninfo);
|
||||||
twice = true;
|
twice = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user