mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
149 Commits
REL4_0_BET
...
v4.0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38d293694d | ||
|
|
54a10a0c3f | ||
|
|
a8016f602f | ||
|
|
de57ecdad1 | ||
|
|
1fde81cf3f | ||
|
|
146c412061 | ||
|
|
e9cb61ae7a | ||
|
|
50e9460b3e | ||
|
|
47e7cbe147 | ||
|
|
bf0be3eb43 | ||
|
|
270da1294c | ||
|
|
d3c47f450f | ||
|
|
c20475f94a | ||
|
|
e0560c3e70 | ||
|
|
3fa2bef6f4 | ||
|
|
f8a0b051c8 | ||
|
|
3e4a5e6ff5 | ||
|
|
020b5b6982 | ||
|
|
932326e4a0 | ||
|
|
019cd081e8 | ||
|
|
3ace908126 | ||
|
|
2ad174489c | ||
|
|
9124e0f0a2 | ||
|
|
060b746743 | ||
|
|
bdb82d3aba | ||
|
|
f6a6df3600 | ||
|
|
67e27f9ecd | ||
|
|
454c0b7bd9 | ||
|
|
faf297b07f | ||
|
|
0dae8c9f0b | ||
|
|
3f872cde0c | ||
|
|
e331069f53 | ||
|
|
53ebde8f33 | ||
|
|
5e9d50f8ca | ||
|
|
347e753c27 | ||
|
|
2f978847b1 | ||
|
|
3014f72fda | ||
|
|
e02ddd0f37 | ||
|
|
29fcee2209 | ||
|
|
f61f7f82eb | ||
|
|
efe28cbbeb | ||
|
|
6131c1d8ce | ||
|
|
c907b7b33d | ||
|
|
e6644305d3 | ||
|
|
31b856dd9f | ||
|
|
dff2bcc5de | ||
|
|
688e609169 | ||
|
|
3e68c9fcc6 | ||
|
|
d459b92186 | ||
|
|
2a898721c0 | ||
|
|
35782d83c0 | ||
|
|
e16eb42693 | ||
|
|
4d6dc57589 | ||
|
|
cbc97d84ac | ||
|
|
96fe7dd2d6 | ||
|
|
13935a88c9 | ||
|
|
5275890467 | ||
|
|
7f865fdaf3 | ||
|
|
9e2fb7ea13 | ||
|
|
a3428e4d8a | ||
|
|
03b9475755 | ||
|
|
de1eb3c459 | ||
|
|
a13eccccc5 | ||
|
|
158f132bc0 | ||
|
|
cdf54d217a | ||
|
|
1a8a82f207 | ||
|
|
60e877ca39 | ||
|
|
91531bffe4 | ||
|
|
fc5f46ca5a | ||
|
|
b76952e136 | ||
|
|
c3a1969f55 | ||
|
|
11d856a1ec | ||
|
|
fbf357947d | ||
|
|
47eaa99537 | ||
|
|
aeee11d1b7 | ||
|
|
e4713c5eca | ||
|
|
e55e5a0581 | ||
|
|
fb0aae183d | ||
|
|
52655e9cd5 | ||
|
|
c5d91ca88c | ||
|
|
9f5edd07ad | ||
|
|
f58b102d51 | ||
|
|
90733aecf7 | ||
|
|
e0be228c89 | ||
|
|
a9759cf6ca | ||
|
|
6852ac82c6 | ||
|
|
c27bd2a135 | ||
|
|
5045e2eb9d | ||
|
|
23f7af17a2 | ||
|
|
93936c090d | ||
|
|
564c951f0c | ||
|
|
3f5e8f6aec | ||
|
|
a6a97cda86 | ||
|
|
18c8e4c529 | ||
|
|
6984fe7029 | ||
|
|
5ecc3a0a8f | ||
|
|
febde097be | ||
|
|
19ea248226 | ||
|
|
acdbd1110a | ||
|
|
946683182c | ||
|
|
c9fbb7febf | ||
|
|
ff966fe533 | ||
|
|
7001960cc1 | ||
|
|
1cfba44799 | ||
|
|
d1f9ca4b43 | ||
|
|
f6c253f8a6 | ||
|
|
95ec8d8b21 | ||
|
|
041f1b7667 | ||
|
|
0b2a6fe2fb | ||
|
|
e716d09053 | ||
|
|
f885e105f2 | ||
|
|
43be854ec6 | ||
|
|
f7e2c700b1 | ||
|
|
b1eef0a212 | ||
|
|
39e8a560b0 | ||
|
|
d4a847a96f | ||
|
|
034e501039 | ||
|
|
08878831fe | ||
|
|
cf1e17d758 | ||
|
|
87ea7850ca | ||
|
|
fee4569887 | ||
|
|
a4e79d33af | ||
|
|
a480b8bd52 | ||
|
|
d156de533d | ||
|
|
da47eb4bff | ||
|
|
5fd823fda9 | ||
|
|
c5c27d2250 | ||
|
|
ec843e1de4 | ||
|
|
e18d6ea81f | ||
|
|
cb61b447f3 | ||
|
|
9697e2ccfc | ||
|
|
e7bb3e9d50 | ||
|
|
f1fe6a32fc | ||
|
|
a149a99228 | ||
|
|
5fffd177a4 | ||
|
|
8013634b79 | ||
|
|
8d609249fd | ||
|
|
2f4e7c1d8b | ||
|
|
244d36a7d6 | ||
|
|
9a94878c73 | ||
|
|
4ab081ba41 | ||
|
|
aaf5af7591 | ||
|
|
2a48edb625 | ||
|
|
b049d7f0ec | ||
|
|
bb35ee1750 | ||
|
|
caa8d034de | ||
|
|
0c64b90427 | ||
|
|
d8c2f66c5b | ||
|
|
e00b4461b3 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -39,6 +39,10 @@ lib*.pc
|
||||
|
||||
# test output
|
||||
/results/
|
||||
/regression.diffs
|
||||
/regression.out
|
||||
|
||||
/doc/Makefile
|
||||
|
||||
# other
|
||||
/.lineno
|
||||
|
||||
@@ -28,4 +28,3 @@ project. For more details see:
|
||||
|
||||
Contributors should reformat their code similarly before submitting code to
|
||||
the project, in order to minimize merge conflicts with other work.
|
||||
>>>>>>> Add further documentation files
|
||||
|
||||
@@ -12,5 +12,5 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see http://www.gnu.org/licenses/
|
||||
along with this program. If not, see https://www.gnu.org/licenses/
|
||||
to obtain one.
|
||||
|
||||
10
FAQ.md
Normal file
10
FAQ.md
Normal file
@@ -0,0 +1,10 @@
|
||||
FAQ - Frequently Asked Questions about repmgr
|
||||
=============================================
|
||||
|
||||
The repmgr 4 FAQ is located here:
|
||||
|
||||
https://repmgr.org/docs/appendix-faq.html
|
||||
|
||||
The repmgr 3.x FAQ can be found here:
|
||||
|
||||
https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
|
||||
17
HISTORY
17
HISTORY
@@ -1,6 +1,17 @@
|
||||
4.0 2017-10-04
|
||||
Complete rewrite with many changes; see file "doc/upgrading-from-repmgr3.md"
|
||||
for details.
|
||||
4.0.1 2017-12-04
|
||||
repmgr: ensure "repmgr node check --action=" returns appropriate return
|
||||
code; GitHub #340 (Ian)
|
||||
repmgr: add missing schema qualification in get_all_node_records_with_upstream()
|
||||
query GitHub #341 (Martín)
|
||||
repmgr: initialise "voting_term" table in application, not extension SQL;
|
||||
GitHub #344 (Ian)
|
||||
repmgr: delete any replication slots copied by pg_rewind; GitHub #334 (Ian)
|
||||
repmgr: fix configuration file sanity check; GitHub #342 (Ian)
|
||||
Improve event notification documentation (Ian)
|
||||
|
||||
4.0.0 2017-11-21
|
||||
Complete rewrite with many changes; for details see the repmgr 4.0.0 release
|
||||
notes at: https://repmgr.org/docs/4.0/release-4.0.0.html
|
||||
|
||||
3.3.2 2017-06-01
|
||||
Add support for PostgreSQL 10 (Ian)
|
||||
|
||||
12
Makefile.in
12
Makefile.in
@@ -37,9 +37,10 @@ include Makefile.global
|
||||
$(info Building against PostgreSQL $(MAJORVERSION))
|
||||
|
||||
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
|
||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
|
||||
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
|
||||
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
|
||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o
|
||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
|
||||
DATE=$(shell date "+%Y-%m-%d")
|
||||
|
||||
repmgr_version.h: repmgr_version.h.in
|
||||
@@ -63,6 +64,12 @@ Makefile: Makefile.in config.status configure
|
||||
Makefile.global: Makefile.global.in config.status configure
|
||||
./config.status $@
|
||||
|
||||
doc:
|
||||
$(MAKE) -C doc all
|
||||
|
||||
install-doc:
|
||||
$(MAKE) -C doc install
|
||||
|
||||
clean: additional-clean
|
||||
|
||||
maintainer-clean: additional-maintainer-clean
|
||||
@@ -71,6 +78,7 @@ additional-clean:
|
||||
rm -f repmgr-client.o
|
||||
rm -f repmgr-action-primary.o
|
||||
rm -f repmgr-action-standby.o
|
||||
rm -f repmgr-action-witness.o
|
||||
rm -f repmgr-action-bdr.o
|
||||
rm -f repmgr-action-node.o
|
||||
rm -f repmgr-action-cluster.o
|
||||
|
||||
69
configfile.c
69
configfile.c
@@ -73,6 +73,59 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
|
||||
strncpy(config_file_path, config_file, MAXPGPATH);
|
||||
canonicalize_path(config_file_path);
|
||||
|
||||
/* relative path supplied - convert to absolute path */
|
||||
if (config_file_path[0] != '/')
|
||||
{
|
||||
PQExpBufferData fullpath;
|
||||
char *pwd = NULL;
|
||||
|
||||
initPQExpBuffer(&fullpath);
|
||||
|
||||
/*
|
||||
* we'll attempt to use $PWD to derive the effective path; getcwd()
|
||||
* will likely resolve symlinks, which may result in a path which
|
||||
* isn't permanent (e.g. if filesystem mountpoints change).
|
||||
*/
|
||||
pwd = getenv("PWD");
|
||||
|
||||
if (pwd != NULL)
|
||||
{
|
||||
appendPQExpBuffer(&fullpath,
|
||||
"%s", pwd);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* $PWD not available - fall back to getcwd() */
|
||||
char cwd[MAXPGPATH] = "";
|
||||
|
||||
if (getcwd(cwd, MAXPGPATH) == NULL)
|
||||
{
|
||||
log_error(_("unable to execute getcwd()"));
|
||||
log_detail("%s", strerror(errno));
|
||||
|
||||
termPQExpBuffer(&fullpath);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&fullpath,
|
||||
"%s",
|
||||
cwd);
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&fullpath,
|
||||
"/%s", config_file_path);
|
||||
|
||||
log_debug("relative configuration file converted to:\n \"%s\"",
|
||||
fullpath.data);
|
||||
|
||||
strncpy(config_file_path, fullpath.data, MAXPGPATH);
|
||||
|
||||
termPQExpBuffer(&fullpath);
|
||||
|
||||
canonicalize_path(config_file_path);
|
||||
}
|
||||
|
||||
|
||||
if (stat(config_file_path, &stat_config) != 0)
|
||||
{
|
||||
log_error(_("provided configuration file \"%s\" not found: %s"),
|
||||
@@ -81,6 +134,7 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
if (verbose == true)
|
||||
{
|
||||
log_notice(_("using provided configuration file \"%s\""), config_file);
|
||||
@@ -261,6 +315,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
memset(options->recovery_min_apply_delay, 0, sizeof(options->recovery_min_apply_delay));
|
||||
options->recovery_min_apply_delay_provided = false;
|
||||
options->use_primary_conninfo_password = false;
|
||||
memset(options->passfile, 0, sizeof(options->passfile));
|
||||
|
||||
/*-----------------
|
||||
* repmgrd settings
|
||||
@@ -283,6 +338,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
|
||||
/*-------------
|
||||
* witness settings
|
||||
*-------------
|
||||
*/
|
||||
options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL;
|
||||
|
||||
/*-------------
|
||||
* BDR settings
|
||||
*-------------
|
||||
@@ -441,6 +502,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
}
|
||||
else if (strcmp(name, "use_primary_conninfo_password") == 0)
|
||||
options->use_primary_conninfo_password = parse_bool(value, name, error_list);
|
||||
else if (strcmp(name, "passfile") == 0)
|
||||
strncpy(options->passfile, value, sizeof(options->passfile));
|
||||
|
||||
/* node check settings */
|
||||
else if (strcmp(name, "archive_ready_warning") == 0)
|
||||
@@ -494,6 +557,10 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* witness settings */
|
||||
else if (strcmp(name, "witness_sync_interval") == 0)
|
||||
options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1);
|
||||
|
||||
/* BDR settings */
|
||||
else if (strcmp(name, "bdr_local_monitoring_only") == 0)
|
||||
options->bdr_local_monitoring_only = parse_bool(value, name, error_list);
|
||||
@@ -677,7 +744,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
item_list_append(error_list,
|
||||
_("use \"barman_host\" for the hostname of the Barman server"));
|
||||
item_list_append(error_list,
|
||||
_("use \"barman_server\" for the name of the [server] section in the Barman configururation file"));
|
||||
_("use \"barman_server\" for the name of the [server] section in the Barman configuration file"));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -90,6 +90,7 @@ typedef struct
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
bool recovery_min_apply_delay_provided;
|
||||
bool use_primary_conninfo_password;
|
||||
char passfile[MAXPGPATH];
|
||||
|
||||
/* node check settings */
|
||||
int archive_ready_warning;
|
||||
@@ -97,6 +98,9 @@ typedef struct
|
||||
int replication_lag_warning;
|
||||
int replication_lag_critical;
|
||||
|
||||
/* witness settings */
|
||||
int witness_sync_interval;
|
||||
|
||||
/* repmgrd settings */
|
||||
failover_mode_opt failover;
|
||||
char location[MAXLEN];
|
||||
@@ -153,10 +157,12 @@ typedef struct
|
||||
/* log settings */ \
|
||||
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
||||
/* standby action settings */ \
|
||||
false, "", "", { NULL, NULL }, "", false, false, \
|
||||
false, "", "", { NULL, NULL }, "", false, false, "", \
|
||||
/* node check settings */ \
|
||||
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
|
||||
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
|
||||
/* witness settings */ \
|
||||
DEFAULT_WITNESS_SYNC_INTERVAL, \
|
||||
/* repmgrd settings */ \
|
||||
FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \
|
||||
DEFAULT_MONITORING_INTERVAL, \
|
||||
|
||||
21
configure
vendored
21
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.0beta1.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.0.1.
|
||||
#
|
||||
# Report bugs to <pgsql-bugs@postgresql.org>.
|
||||
#
|
||||
@@ -582,8 +582,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='repmgr'
|
||||
PACKAGE_TARNAME='repmgr'
|
||||
PACKAGE_VERSION='4.0beta1'
|
||||
PACKAGE_STRING='repmgr 4.0beta1'
|
||||
PACKAGE_VERSION='4.0.1'
|
||||
PACKAGE_STRING='repmgr 4.0.1'
|
||||
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
||||
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
||||
|
||||
@@ -1179,7 +1179,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures repmgr 4.0beta1 to adapt to many kinds of systems.
|
||||
\`configure' configures repmgr 4.0.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1240,7 +1240,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of repmgr 4.0beta1:";;
|
||||
short | recursive ) echo "Configuration of repmgr 4.0.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1319,7 +1319,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
repmgr configure 4.0beta1
|
||||
repmgr configure 4.0.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -1338,7 +1338,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by repmgr $as_me 4.0beta1, which was
|
||||
It was created by repmgr $as_me 4.0.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -1871,6 +1871,8 @@ ac_config_files="$ac_config_files Makefile"
|
||||
|
||||
ac_config_files="$ac_config_files Makefile.global"
|
||||
|
||||
ac_config_files="$ac_config_files doc/Makefile"
|
||||
|
||||
cat >confcache <<\_ACEOF
|
||||
# This file is a shell script that caches the results of configure
|
||||
# tests run on this system so they can be shared between configure
|
||||
@@ -2377,7 +2379,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by repmgr $as_me 4.0beta1, which was
|
||||
This file was extended by repmgr $as_me 4.0.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -2440,7 +2442,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
repmgr config.status 4.0beta1
|
||||
repmgr config.status 4.0.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
@@ -2564,6 +2566,7 @@ do
|
||||
"config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
|
||||
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
|
||||
"Makefile.global") CONFIG_FILES="$CONFIG_FILES Makefile.global" ;;
|
||||
"doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
|
||||
|
||||
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
|
||||
esac
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
AC_INIT([repmgr], [4.0beta1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
AC_INIT([repmgr], [4.0.1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2017, 2ndQuadrant Ltd.])
|
||||
|
||||
@@ -65,5 +65,6 @@ AC_SUBST(vpath_build)
|
||||
|
||||
AC_CONFIG_FILES([Makefile])
|
||||
AC_CONFIG_FILES([Makefile.global])
|
||||
AC_CONFIG_FILES([doc/Makefile])
|
||||
AC_OUTPUT
|
||||
|
||||
|
||||
357
dbutils.c
357
dbutils.c
@@ -27,7 +27,6 @@
|
||||
#include "repmgr.h"
|
||||
#include "dbutils.h"
|
||||
#include "controldata.h"
|
||||
|
||||
#include "dirutil.h"
|
||||
|
||||
/* mainly for use by repmgrd */
|
||||
@@ -50,6 +49,7 @@ static void _populate_node_records(PGresult *res, NodeInfoList *node_list);
|
||||
static bool _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info);
|
||||
static bool _create_event(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details, t_event_info *event_info, bool send_notification);
|
||||
|
||||
static bool _is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet);
|
||||
static void _populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row);
|
||||
static void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list);
|
||||
|
||||
@@ -594,7 +594,7 @@ parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_lis
|
||||
(option->val != NULL && option->val[0] == '\0'))
|
||||
continue;
|
||||
|
||||
/* Ignore application_name */
|
||||
/* Ignore settings specific to the upstream node */
|
||||
if (ignore_local_params == true)
|
||||
{
|
||||
if (strcmp(option->keyword, "application_name") == 0)
|
||||
@@ -678,6 +678,33 @@ param_list_to_string(t_conninfo_param_list *param_list)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* check whether the libpq version in use recognizes the "passfile" parameter
|
||||
* (should be 9.6 and later)
|
||||
*/
|
||||
bool
|
||||
has_passfile(void)
|
||||
{
|
||||
PQconninfoOption *defs = PQconndefaults();
|
||||
PQconninfoOption *def = NULL;
|
||||
bool has_passfile = false;
|
||||
|
||||
for (def = defs; def->keyword; def++)
|
||||
{
|
||||
if (strcmp(def->keyword, "passfile") == 0)
|
||||
{
|
||||
has_passfile = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PQconninfoFree(defs);
|
||||
|
||||
return has_passfile;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ===================== */
|
||||
/* transaction functions */
|
||||
/* ===================== */
|
||||
@@ -1121,6 +1148,7 @@ _get_primary_connection(PGconn *conn,
|
||||
" CASE WHEN type = 'primary' THEN 1 ELSE 2 END AS type_priority"
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE active IS TRUE "
|
||||
" AND type != 'witness' "
|
||||
"ORDER BY active DESC, type_priority, priority, node_id");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_primary_connection():\n%s", query.data);
|
||||
@@ -1554,7 +1582,7 @@ repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT repmgr.set_local_node_id(%i)",
|
||||
"SELECT repmgr.set_local_node_id(%i)",
|
||||
local_node_id);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
@@ -1572,6 +1600,26 @@ repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
|
||||
|
||||
|
||||
|
||||
int
|
||||
repmgrd_get_local_node_id(PGconn *conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
int local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
res = PQexec(conn, "SELECT repmgr.get_local_node_id()");
|
||||
|
||||
if (!PQgetisnull(res, 0, 0))
|
||||
{
|
||||
local_node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return local_node_id;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ================ */
|
||||
/* result functions */
|
||||
/* ================ */
|
||||
@@ -1769,6 +1817,10 @@ parse_node_type(const char *type)
|
||||
{
|
||||
return STANDBY;
|
||||
}
|
||||
else if (strcmp(type, "witness") == 0)
|
||||
{
|
||||
return WITNESS;
|
||||
}
|
||||
else if (strcmp(type, "bdr") == 0)
|
||||
{
|
||||
return BDR;
|
||||
@@ -1787,6 +1839,8 @@ get_node_type_string(t_server_type type)
|
||||
return "primary";
|
||||
case STANDBY:
|
||||
return "standby";
|
||||
case WITNESS:
|
||||
return "witness";
|
||||
case BDR:
|
||||
return "bdr";
|
||||
/* this should never happen */
|
||||
@@ -2076,7 +2130,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
|
||||
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
||||
" n.slot_name, n.location, n.priority, n.active, un.node_name AS upstream_node_name "
|
||||
" FROM repmgr.nodes n "
|
||||
" LEFT JOIN nodes un "
|
||||
" LEFT JOIN repmgr.nodes un "
|
||||
" ON un.node_id = n.upstream_node_id"
|
||||
" ORDER BY n.node_id ");
|
||||
|
||||
@@ -2437,6 +2491,57 @@ update_node_record_conn_priority(PGconn *conn, t_configuration_options *options)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Copy node records from primary to witness servers.
|
||||
*
|
||||
* This is used when initially registering a witness server, and
|
||||
* by repmgrd to update the node records when required.
|
||||
*/
|
||||
|
||||
bool
|
||||
witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
NodeInfoListCell *cell = NULL;
|
||||
|
||||
begin_transaction(witness_conn);
|
||||
|
||||
/* Defer constraints */
|
||||
|
||||
res = PQexec(witness_conn, "SET CONSTRAINTS ALL DEFERRED");
|
||||
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("unable to defer constraints:\n %s"),
|
||||
PQerrorMessage(witness_conn));
|
||||
rollback_transaction(witness_conn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* truncate existing records */
|
||||
|
||||
if (truncate_node_records(witness_conn) == false)
|
||||
{
|
||||
rollback_transaction(witness_conn);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
get_all_node_records(primary_conn, &nodes);
|
||||
|
||||
for (cell = nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
create_node_record(witness_conn, NULL, cell->node_info);
|
||||
}
|
||||
|
||||
/* and done */
|
||||
commit_transaction(witness_conn);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
delete_node_record(PGconn *conn, int node)
|
||||
{
|
||||
@@ -2468,9 +2573,27 @@ delete_node_record(PGconn *conn, int node)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
truncate_node_records(PGconn *conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
|
||||
res = PQexec(conn, "TRUNCATE TABLE repmgr.nodes");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("unable to truncate node record table:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
get_node_replication_stats(PGconn *conn, t_node_info *node_info)
|
||||
get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
@@ -2479,13 +2602,32 @@ get_node_replication_stats(PGconn *conn, t_node_info *node_info)
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT current_setting('max_wal_senders')::INT AS max_wal_senders, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_stat_replication) AS attached_wal_receivers, "
|
||||
" current_setting('max_replication_slots')::INT AS max_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots) AS total_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = TRUE) AS active_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = FALSE) AS inactive_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_stat_replication) AS attached_wal_receivers, ");
|
||||
|
||||
/* no replication slots in PostgreSQL 9.3 */
|
||||
if (server_version_num < 90400)
|
||||
{
|
||||
appendPQExpBuffer(&query,
|
||||
" 0 AS max_replication_slots, "
|
||||
" 0 AS total_replication_slots, "
|
||||
" 0 AS active_replication_slots, "
|
||||
" 0 AS inactive_replication_slots, ");
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&query,
|
||||
" current_setting('max_replication_slots')::INT AS max_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots) AS total_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = TRUE) AS active_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = FALSE) AS inactive_replication_slots, ");
|
||||
}
|
||||
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" pg_catalog.pg_is_in_recovery() AS in_recovery");
|
||||
|
||||
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
@@ -2962,6 +3104,7 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
||||
char *end_ptr = NULL;
|
||||
int r = 0;
|
||||
|
||||
log_verbose(LOG_DEBUG, "_create_event(): command is '%s'", options->event_notification_command);
|
||||
/*
|
||||
* If configuration option 'event_notifications' was provided, check
|
||||
* if this event is one of the ones listed; if not listed, don't
|
||||
@@ -3040,8 +3183,14 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
||||
src_ptr++;
|
||||
if (details != NULL)
|
||||
{
|
||||
strlcpy(dst_ptr, details, end_ptr - dst_ptr);
|
||||
PQExpBufferData details_escaped;
|
||||
initPQExpBuffer(&details_escaped);
|
||||
|
||||
escape_double_quotes(details, &details_escaped);
|
||||
|
||||
strlcpy(dst_ptr, details_escaped.data, end_ptr - dst_ptr);
|
||||
dst_ptr += strlen(dst_ptr);
|
||||
termPQExpBuffer(&details_escaped);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
@@ -3421,6 +3570,7 @@ is_server_available(const char *conninfo)
|
||||
{
|
||||
PGPing status = PQping(conninfo);
|
||||
|
||||
log_verbose(LOG_DEBUG, "ping status for %s is %i", conninfo, (int)status);
|
||||
if (status == PQPING_OK)
|
||||
return true;
|
||||
|
||||
@@ -3578,141 +3728,77 @@ delete_monitoring_records(PGconn *primary_conn, int keep_history)
|
||||
/*
|
||||
* node voting functions
|
||||
*
|
||||
* These are intended to run under repmgrd and rely on shared memory
|
||||
* These are intended to run under repmgrd and mainly rely on shared memory
|
||||
*/
|
||||
|
||||
NodeVotingStatus
|
||||
get_voting_status(PGconn *conn)
|
||||
int
|
||||
get_current_term(PGconn *conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
NodeVotingStatus voting_status = VS_UNKNOWN;
|
||||
int term = VOTING_TERM_NOT_SET;
|
||||
|
||||
res = PQexec(conn, "SELECT repmgr.get_voting_status()");
|
||||
res = PQexec(conn, "SELECT term FROM repmgr.voting_term");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to query repmgr.get_voting_status():\n %s"),
|
||||
log_error(_("unable to query repmgr.voting_term:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return VS_UNKNOWN;
|
||||
return -1;
|
||||
}
|
||||
|
||||
voting_status = atoi(PQgetvalue(res, 0, 0));
|
||||
if (PQntuples(res) > 0)
|
||||
{
|
||||
term = atoi(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
return voting_status;
|
||||
return term;
|
||||
}
|
||||
|
||||
|
||||
VoteRequestResult
|
||||
request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term)
|
||||
void
|
||||
initialize_voting_term(PGconn *conn)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
int lsn_diff = 0;
|
||||
|
||||
other_node->last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||
int current_term = get_current_term(conn);
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT repmgr.request_vote(%i, %i)",
|
||||
this_node->node_id,
|
||||
electoral_term);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
/* check for NULL */
|
||||
if (PQgetisnull(res, 0, 0))
|
||||
if (current_term == VOTING_TERM_NOT_SET)
|
||||
{
|
||||
PQclear(res);
|
||||
|
||||
log_debug("NULL returned by repmgr.request_vote()");
|
||||
|
||||
/*
|
||||
* get the node's last receive location anyway TODO: have
|
||||
* repmgr.request_vote() return two values
|
||||
*/
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
if (server_version_num >= 100000)
|
||||
{
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||
}
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
||||
{
|
||||
other_node->last_wal_receive_lsn = parse_lsn(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return VR_VOTE_REFUSED;
|
||||
res = PQexec(conn, "INSERT INTO repmgr.voting_term (term) VALUES (1)");
|
||||
}
|
||||
else
|
||||
{
|
||||
res = PQexec(conn, "UPDATE repmgr.voting_term SET term = 1");
|
||||
}
|
||||
|
||||
other_node->last_wal_receive_lsn = parse_lsn(PQgetvalue(res, 0, 0));
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("unable to initialize repmgr.voting_term:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
lsn_diff = this_node->last_wal_receive_lsn - other_node->last_wal_receive_lsn;
|
||||
|
||||
log_debug("lsn_diff %i", lsn_diff);
|
||||
|
||||
/* we're ahead */
|
||||
if (lsn_diff > 0)
|
||||
{
|
||||
log_debug("local node is ahead");
|
||||
return VR_POSITIVE_VOTE;
|
||||
}
|
||||
|
||||
|
||||
/* other node is ahead */
|
||||
if (lsn_diff < 0)
|
||||
{
|
||||
log_debug("other node is ahead");
|
||||
return VR_NEGATIVE_VOTE;
|
||||
}
|
||||
|
||||
/* tiebreak */
|
||||
|
||||
/* other node is higher priority */
|
||||
if (this_node->priority < other_node->priority)
|
||||
{
|
||||
log_debug("other node has higher priority");
|
||||
return VR_NEGATIVE_VOTE;
|
||||
}
|
||||
|
||||
/* still tiebreak - we're the candidate, so we win */
|
||||
log_debug("win by default");
|
||||
return VR_POSITIVE_VOTE;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
set_voting_status_initiated(PGconn *conn)
|
||||
void
|
||||
increment_current_term(PGconn *conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
int electoral_term = 0;
|
||||
|
||||
res = PQexec(conn, "SELECT repmgr.set_voting_status_initiated()");
|
||||
res = PQexec(conn, "UPDATE repmgr.voting_term SET term = term + 1");
|
||||
|
||||
electoral_term = atoi(PQgetvalue(res, 0, 0));
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("unable to increment repmgr.voting_term:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return electoral_term;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -3748,7 +3834,6 @@ notify_follow_primary(PGconn *conn, int primary_node_id)
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
@@ -3756,10 +3841,17 @@ notify_follow_primary(PGconn *conn, int primary_node_id)
|
||||
primary_node_id);
|
||||
log_verbose(LOG_DEBUG, "notify_follow_primary():\n %s", query.data);
|
||||
|
||||
/* XXX handle failure */
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"),
|
||||
@@ -3786,16 +3878,24 @@ get_new_primary(PGconn *conn, int *primary_node_id)
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
/* XXX handle error */
|
||||
|
||||
new_primary_node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute repmgr.reset_voting_status():\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (PQgetisnull(res, 0, 0))
|
||||
{
|
||||
*primary_node_id = UNKNOWN_NODE_ID;
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
new_primary_node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
PQclear(res);
|
||||
|
||||
*primary_node_id = new_primary_node_id;
|
||||
@@ -3891,8 +3991,8 @@ get_last_wal_receive_location(PGconn *conn)
|
||||
/* BDR functions */
|
||||
/* ============= */
|
||||
|
||||
bool
|
||||
is_bdr_db(PGconn *conn, PQExpBufferData *output)
|
||||
static bool
|
||||
_is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
@@ -3923,7 +4023,7 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output)
|
||||
|
||||
if (output != NULL)
|
||||
appendPQExpBuffer(output, "%s", warning);
|
||||
else
|
||||
else if (quiet == false)
|
||||
log_warning("%s", warning);
|
||||
|
||||
return is_bdr_db;
|
||||
@@ -3944,7 +4044,7 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output)
|
||||
|
||||
if (output != NULL)
|
||||
appendPQExpBuffer(output, "%s", warning);
|
||||
else
|
||||
else if (quiet == false)
|
||||
log_warning("%s", warning);
|
||||
}
|
||||
|
||||
@@ -3953,6 +4053,19 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output)
|
||||
return is_bdr_db;
|
||||
}
|
||||
|
||||
bool
|
||||
is_bdr_db(PGconn *conn, PQExpBufferData *output)
|
||||
{
|
||||
return _is_bdr_db(conn, output, false);
|
||||
}
|
||||
|
||||
bool
|
||||
is_bdr_db_quiet(PGconn *conn)
|
||||
{
|
||||
return _is_bdr_db(conn, NULL, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool
|
||||
is_active_bdr_node(PGconn *conn, const char *node_name)
|
||||
@@ -3965,7 +4078,7 @@ is_active_bdr_node(PGconn *conn, const char *node_name)
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT COALESCE(s.active, TRUE) AS active"
|
||||
" FROM bdr.bdr_nodes n "
|
||||
" LEFT JOIN pg_replication_slots s "
|
||||
" LEFT JOIN pg_catalog.pg_replication_slots s "
|
||||
" ON slot_name=bdr.bdr_format_slot_name(n.node_sysid, n.node_timeline, n.node_dboid, (SELECT oid FROM pg_database WHERE datname = current_database())) "
|
||||
" WHERE node_name='%s' ",
|
||||
node_name);
|
||||
|
||||
24
dbutils.h
24
dbutils.h
@@ -38,6 +38,7 @@ typedef enum
|
||||
UNKNOWN = 0,
|
||||
PRIMARY,
|
||||
STANDBY,
|
||||
WITNESS,
|
||||
BDR
|
||||
} t_server_type;
|
||||
|
||||
@@ -77,14 +78,6 @@ typedef enum
|
||||
NODE_STATUS_UNCLEAN_SHUTDOWN
|
||||
} NodeStatus;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
VR_VOTE_REFUSED = -1,
|
||||
VR_POSITIVE_VOTE,
|
||||
VR_NEGATIVE_VOTE
|
||||
} VoteRequestResult;
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SLOT_UNKNOWN = -1,
|
||||
@@ -365,6 +358,7 @@ void param_set_ine(t_conninfo_param_list *param_list, const char *param, const
|
||||
char *param_get(t_conninfo_param_list *param_list, const char *param);
|
||||
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char *errmsg, bool ignore_local_params);
|
||||
char *param_list_to_string(t_conninfo_param_list *param_list);
|
||||
bool has_passfile(void);
|
||||
|
||||
/* transaction functions */
|
||||
bool begin_transaction(PGconn *conn);
|
||||
@@ -390,6 +384,7 @@ bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferDat
|
||||
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
||||
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
||||
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||
int repmgrd_get_local_node_id(PGconn *conn);
|
||||
|
||||
/* extension functions */
|
||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
||||
@@ -419,6 +414,7 @@ void get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
||||
bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||
bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||
bool delete_node_record(PGconn *conn, int node);
|
||||
bool truncate_node_records(PGconn *conn);
|
||||
|
||||
bool update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
|
||||
bool update_node_record_set_primary(PGconn *conn, int this_node_id);
|
||||
@@ -426,6 +422,9 @@ bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_up
|
||||
bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
|
||||
|
||||
bool witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn);
|
||||
|
||||
|
||||
void clear_node_info_list(NodeInfoList *nodes);
|
||||
|
||||
/* PostgreSQL configuration file location functions */
|
||||
@@ -474,9 +473,9 @@ bool delete_monitoring_records(PGconn *primary_conn, int keep_history);
|
||||
|
||||
|
||||
/* node voting functions */
|
||||
NodeVotingStatus get_voting_status(PGconn *conn);
|
||||
VoteRequestResult request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
|
||||
int set_voting_status_initiated(PGconn *conn);
|
||||
void initialize_voting_term(PGconn *conn);
|
||||
int get_current_term(PGconn *conn);
|
||||
void increment_current_term(PGconn *conn);
|
||||
bool announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
|
||||
void notify_follow_primary(PGconn *conn, int primary_node_id);
|
||||
bool get_new_primary(PGconn *conn, int *primary_node_id);
|
||||
@@ -487,13 +486,14 @@ XLogRecPtr get_current_wal_lsn(PGconn *conn);
|
||||
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
||||
bool get_replication_info(PGconn *conn, ReplInfo *replication_info);
|
||||
int get_replication_lag_seconds(PGconn *conn);
|
||||
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
||||
void get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info);
|
||||
bool is_downstream_node_attached(PGconn *conn, char *node_name);
|
||||
|
||||
/* BDR functions */
|
||||
void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list);
|
||||
RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info);
|
||||
bool is_bdr_db(PGconn *conn, PQExpBufferData *output);
|
||||
bool is_bdr_db_quiet(PGconn *conn);
|
||||
bool is_active_bdr_node(PGconn *conn, const char *node_name);
|
||||
bool is_bdr_repmgr(PGconn *conn);
|
||||
bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
|
||||
|
||||
@@ -311,6 +311,14 @@ create_pg_dir(char *path, bool force)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int
|
||||
rmdir_recursive(char *path)
|
||||
{
|
||||
return nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||
}
|
||||
|
||||
static int
|
||||
unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
|
||||
{
|
||||
|
||||
@@ -26,5 +26,5 @@ extern int check_dir(char *path);
|
||||
extern bool create_dir(char *path);
|
||||
extern bool is_pg_dir(char *path);
|
||||
extern bool create_pg_dir(char *path, bool force);
|
||||
|
||||
extern int rmdir_recursive(char *path);
|
||||
#endif
|
||||
|
||||
5
doc/.gitignore
vendored
Normal file
5
doc/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
HTML.index
|
||||
bookindex.sgml
|
||||
html-stamp
|
||||
html/
|
||||
version.sgml
|
||||
71
doc/Makefile.in
Normal file
71
doc/Makefile.in
Normal file
@@ -0,0 +1,71 @@
|
||||
repmgr_subdir = doc
|
||||
repmgr_top_builddir = ..
|
||||
include $(repmgr_top_builddir)/Makefile.global
|
||||
|
||||
ifndef JADE
|
||||
JADE = $(missing) jade
|
||||
endif
|
||||
|
||||
SGMLINCLUDE = -D . -D ${srcdir}
|
||||
|
||||
SPFLAGS += -wall -wno-unused-param -wno-empty -wfully-tagged
|
||||
|
||||
JADE.html.call = $(JADE) $(JADEFLAGS) $(SPFLAGS) $(SGMLINCLUDE) $(CATALOG) -d stylesheet.dsl -t sgml -i output-html
|
||||
|
||||
ALLSGML := $(wildcard $(srcdir)/*.sgml)
|
||||
# to build bookindex
|
||||
ALMOSTALLSGML := $(filter-out %bookindex.sgml,$(ALLSGML))
|
||||
GENERATED_SGML = version.sgml bookindex.sgml
|
||||
|
||||
Makefile: Makefile.in
|
||||
cd $(repmgr_top_builddir) && ./config.status doc/Makefile
|
||||
|
||||
all: html
|
||||
|
||||
html: html-stamp
|
||||
|
||||
html-stamp: repmgr.sgml $(ALLSGML) $(GENERATED_SGML) stylesheet.dsl website-docs.css
|
||||
$(MKDIR_P) html
|
||||
$(JADE.html.call) -i include-index $<
|
||||
cp $(srcdir)/stylesheet.css $(srcdir)/website-docs.css html/
|
||||
touch $@
|
||||
|
||||
version.sgml: ${repmgr_top_builddir}/repmgr_version.h
|
||||
{ \
|
||||
echo "<!ENTITY repmgrversion \"$(REPMGR_VERSION)\">"; \
|
||||
} > $@
|
||||
|
||||
HTML.index: repmgr.sgml $(ALMOSTALLSGML) stylesheet.dsl
|
||||
@$(MKDIR_P) html
|
||||
$(JADE.html.call) -V html-index $<
|
||||
|
||||
website-docs.css:
|
||||
@$(MKDIR_P) html
|
||||
curl http://www.postgresql.org/media/css/docs.css > ${srcdir}/website-docs.css
|
||||
|
||||
bookindex.sgml: HTML.index
|
||||
ifdef COLLATEINDEX
|
||||
LC_ALL=C $(PERL) $(COLLATEINDEX) -f -g -i 'bookindex' -o $@ $<
|
||||
else
|
||||
@$(missing) collateindex.pl $< $@
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -f html-stamp
|
||||
rm -f HTML.index $(GENERATED_SGML)
|
||||
|
||||
maintainer-clean:
|
||||
rm -rf html
|
||||
rm -rf Makefile
|
||||
|
||||
zip: html
|
||||
cp -r html repmgr-docs-$(REPMGR_VERSION)
|
||||
zip -r repmgr-docs-$(REPMGR_VERSION).zip repmgr-docs-$(REPMGR_VERSION)
|
||||
rm -rf repmgr-docs-$(REPMGR_VERSION)
|
||||
|
||||
install: html
|
||||
@$(MKDIR_P) $(DESTDIR)$(docdir)/$(docmoduledir)/repmgr
|
||||
@$(INSTALL_DATA) $(wildcard html/*.html) $(wildcard html/*.css) $(DESTDIR)$(docdir)/$(docmoduledir)/repmgr
|
||||
@echo Installed docs to $(DESTDIR)$(docdir)/$(docmoduledir)/repmgr
|
||||
|
||||
.PHONY: html all
|
||||
241
doc/appendix-faq.sgml
Normal file
241
doc/appendix-faq.sgml
Normal file
@@ -0,0 +1,241 @@
|
||||
<appendix id="appendix-faq" xreflabel="FAQ">
|
||||
<indexterm>
|
||||
<primary>FAQ (Frequently Asked Questions)</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>FAQ (Frequently Asked Questions)</title>
|
||||
|
||||
<sect1 id="faq-general" xreflabel="General">
|
||||
<title>General</title>
|
||||
|
||||
<sect2 id="faq-xrepmgr-version-diff" xreflabel="Version differences">
|
||||
<title>What's the difference between the repmgr versions?</title>
|
||||
<para>
|
||||
&repmgr; 4 is a complete rewrite of the existing &repmgr; code base
|
||||
and implements &repmgr; as a PostgreSQL extension. It
|
||||
supports all PostgreSQL versions from 9.3 (although some &repmgr;
|
||||
features are not available for PostgreSQL 9.3 and 9.4).
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; 3.x builds on the improved replication facilities added
|
||||
in PostgreSQL 9.3, as well as improved automated failover support
|
||||
via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
|
||||
and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
|
||||
series will no longer be actively maintained.
|
||||
</para>
|
||||
<para>
|
||||
repmgr 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
|
||||
with PostgreSQL 9.3, we recommend using repmgr 4.x.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-replication-slots-advantage" xreflabel="Advantages of replication slots">
|
||||
<title>What's the advantage of using replication slots?</title>
|
||||
<para>
|
||||
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
||||
primary server will retain WAL files until they have been consumed
|
||||
by all standby servers. This makes WAL file management much easier,
|
||||
and if used `repmgr` will no longer insist on a fixed minimum number
|
||||
(default: 5000) of WAL files being retained.
|
||||
</para>
|
||||
<para>
|
||||
However this does mean that if a standby is no longer connected to the
|
||||
primary, the presence of the replication slot will cause WAL files
|
||||
to be retained indefinitely.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
|
||||
<title>How many replication slots should I define in <varname>max_replication_slots</varname>?</title>
|
||||
<para>
|
||||
Normally at least same number as the number of standbys which will connect
|
||||
to the node. Note that changes to <varname>max_replication_slots</varname> require a server
|
||||
restart to take effect, and as there is no particular penalty for unused
|
||||
replication slots, setting a higher figure will make adding new nodes
|
||||
easier.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-hash-index" xreflabel="Hash indexes">
|
||||
<title>Does &repmgr; support hash indexes?</title>
|
||||
<para>
|
||||
Before PostgreSQL 10, hash indexes were not WAL logged and are therefore not suitable
|
||||
for use in streaming replication in PostgreSQL 9.6 and earlier. See the
|
||||
<ulink url="https://www.postgresql.org/docs/9.6/static/sql-createindex.html#AEN80279">PostgreSQL documentation</ulink>
|
||||
for details.
|
||||
</para>
|
||||
<para>
|
||||
From PostgreSQL 10, this restriction has been lifted and hash indexes can be used
|
||||
in a streaming replication cluster.
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
||||
<title><command>repmgr</command></title>
|
||||
|
||||
<sect2 id="faq-register-existing-node" xreflabel="">
|
||||
<title>Can I register an existing PostgreSQL server with repmgr?</title>
|
||||
<para>
|
||||
Yes, any existing PostgreSQL server which is part of the same replication
|
||||
cluster can be registered with &repmgr;. There's no requirement for a
|
||||
standby to have been cloned using &repmgr;.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-failed-primary-standby" xreflabel="Reintegrate a failed primary as a standby">
|
||||
<title>How can a failed primary be re-added as a standby?</title>
|
||||
<para>
|
||||
This is a two-stage process. First, the failed primary's data directory
|
||||
must be re-synced with the current primary; secondly the failed primary
|
||||
needs to be re-registered as a standby.
|
||||
</para>
|
||||
<para>
|
||||
In PostgreSQL 9.5 and later, it's possible to use <command>pg_rewind</command>
|
||||
to re-synchronise the existing data directory, which will usually be much
|
||||
faster than re-cloning the server. However <command>pg_rewind</command> can only
|
||||
be used if PostgreSQL either has <varname>wal_log_hints</varname> enabled, or
|
||||
data checksums were enabled when the cluster was initialized.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; provides the command <command>repmgr node rejoin</command> which can
|
||||
optionally execute <command>pg_rewind</command>; see the <xref linkend="repmgr-node-rejoin">
|
||||
documentation for details.
|
||||
</para>
|
||||
<para>
|
||||
If <command>pg_rewind</command> cannot be used, then the data directory will have
|
||||
to be re-cloned from scratch.
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-check-configuration" xreflabel="Check PostgreSQL configuration">
|
||||
<title>Is there an easy way to check my primary server is correctly configured for use with &repmgr;?</title>
|
||||
<para>
|
||||
Execute <command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>
|
||||
with the <literal>--dry-run</literal> option; this will report any configuration problems
|
||||
which need to be rectified.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-clone-skip-config-files" xreflabel="">
|
||||
<title>When cloning a standby, how can I get &repmgr; to copy
|
||||
<filename>postgresql.conf</filename> and <filename>pg_hba.conf</filename> from the PostgreSQL configuration
|
||||
directory in <filename>/etc</filename>?</title>
|
||||
<para>
|
||||
Use the command line option <literal>--copy-external-config-files</literal>. For more details
|
||||
see <xref linkend="repmgr-standby-clone-config-file-copying">.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-shared-preload-libaries-no-repmgrd" xreflabel="shared_preload_libraries without repmgrd">
|
||||
<title>Do I need to include <literal>shared_preload_libraries = 'repmgr'</literal>
|
||||
in <filename>postgresql.conf</filename> if I'm not using <application>repmgrd</application>?</title>
|
||||
<para>
|
||||
No, the <literal>repmgr</literal> shared library is only needed when running <application>repmgrd</application>.
|
||||
If you later decide to run <application>repmgrd</application>, you just need to add
|
||||
<literal>shared_preload_libraries = 'repmgr'</literal> and restart PostgreSQL.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-permissions" xreflabel="Replication permission problems">
|
||||
<title>I've provided replication permission for the <literal>repmgr</literal> user in <filename>pg_hba.conf</filename>
|
||||
but <command>repmgr</command>/<application>repmgrd</application> complains it can't connect to the server... Why?</title>
|
||||
<para>
|
||||
<command>repmgr</command> and <application>repmgrd</application> need to be able to connect to the repmgr database
|
||||
with a normal connection to query metadata. The <literal>replication</literal> connection
|
||||
permission is for PostgreSQL's streaming replication (and doesn't necessarily need to be the <literal>repmgr</literal> user).
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-clone-provide-primary-conninfo" xreflabel="Providing primary connection parameters">
|
||||
<title>When cloning a standby, why do I need to provide the connection parameters
|
||||
for the primary server on the command line, not in the configuration file?</title>
|
||||
<para>
|
||||
Cloning a standby is a one-time action; the role of the server being cloned
|
||||
from could change, so fixing it in the configuration file would create
|
||||
confusion. If &repmgr; needs to establish a connection to the primary
|
||||
server, it can retrieve this from the <literal>repmgr.nodes</literal> table on the local
|
||||
node, and if necessary scan the replication cluster until it locates the active primary.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-clone-waldir-xlogdir" xreflabel="Providing a custom WAL directory">
|
||||
<title>When cloning a standby, how do I ensure the WAL files are placed in a custom directory?</title>
|
||||
<para>
|
||||
Provide the option <literal>--waldir</literal> (<literal>--xlogdir</literal> in PostgreSQL 9.6
|
||||
and earlier) with the absolute path to the WAL directory in <varname>pg_basebackup_options</varname>.
|
||||
For more details see <xref linkend="cloning-advanced-pg-basebackup-options">.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-events-no-fkey" xreflabel="No foreign key on node_id in repmgr.events">
|
||||
<title>Why is there no foreign key on the <literal>node_id</literal> column in the <literal>repmgr.events</literal>
|
||||
table?</title>
|
||||
<para>
|
||||
Under some circumstances event notifications can be generated for servers
|
||||
which have not yet been registered; it's also useful to retain a record
|
||||
of events which includes servers removed from the replication cluster
|
||||
which no longer have an entry in the <literal>repmrg.nodes</literal> table.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="faq-repmgrd" xreflabel="repmgrd">
|
||||
<title><application>repmgrd</application></title>
|
||||
|
||||
|
||||
<sect2 id="faq-repmgrd-prevent-promotion" xreflabel="Prevent standby from being promoted to primary">
|
||||
<title>How can I prevent a node from ever being promoted to primary?</title>
|
||||
<para>
|
||||
In `repmgr.conf`, set its priority to a value of 0 or less; apply the changed setting with
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>.
|
||||
</para>
|
||||
<para>
|
||||
Additionally, if <varname>failover</varname> is set to <literal>manual</literal>, the node will never
|
||||
be considered as a promotion candidate.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgrd-delayed-standby" xreflabel="Delayed standby support">
|
||||
<title>Does <application>repmgrd</application> support delayed standbys?</title>
|
||||
<para>
|
||||
<application>repmgrd</application> can monitor delayed standbys - those set up with
|
||||
<varname>recovery_min_apply_delay</varname> set to a non-zero value
|
||||
in <filename>recovery.conf</filename> - but as it's not currently possible
|
||||
to directly examine the value applied to the standby, <application>repmgrd</application>
|
||||
may not be able to properly evaluate the node as a promotion candidate.
|
||||
</para>
|
||||
<para>
|
||||
We recommend that delayed standbys are explicitly excluded from promotion
|
||||
by setting <varname>priority</varname> to <literal>0</literal> in
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
<para>
|
||||
Note that after registering a delayed standby, <application>repmgrd</application> will only start
|
||||
once the metadata added in the primary node has been replicated.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgrd-logfile-rotate" xreflabel="repmgrd logfile rotation">
|
||||
<title>How can I get <application>repmgrd</application> to rotate its logfile?</title>
|
||||
<para>
|
||||
Configure your system's <literal>logrotate</literal> service to do this; see <xref linkend="repmgrd-log-rotation">.
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgrd-recloned-no-start" xreflabel="repmgrd not restarting after node cloned">
|
||||
<title>I've recloned a failed primary as a standby, but <application>repmgrd</application> refuses to start?</title>
|
||||
<para>
|
||||
Check you registered the standby after recloning. If unregistered, the standby
|
||||
cannot be considered as a promotion candidate even if <varname>failover</varname> is set to
|
||||
<literal>automatic</literal>, which is probably not what you want. <application>repmgrd</application> will start if
|
||||
<varname>failover</varname> is set to <literal>manual</literal> so the node's replication status can still
|
||||
be monitored, if desired.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
</appendix>
|
||||
429
doc/appendix-release-notes.sgml
Normal file
429
doc/appendix-release-notes.sgml
Normal file
@@ -0,0 +1,429 @@
|
||||
<appendix id="appendix-release-notes">
|
||||
<title>Release notes</title>
|
||||
<indexterm>
|
||||
<primary>Release notes</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
Changes to each &repmgr; release are documented in the release notes.
|
||||
Please read the release notes for all versions between
|
||||
your current version and the version you are plan to upgrade to
|
||||
before performing an upgrade, as there may be version-specific upgrade steps.
|
||||
</para>
|
||||
|
||||
|
||||
<para>
|
||||
See also: <xref linkend="upgrading-repmgr">
|
||||
</para>
|
||||
|
||||
<sect1 id="release-4.0.1">
|
||||
<title>Release 4.0.1</title>
|
||||
|
||||
<para><emphasis>Mon Dec 4, 2017</emphasis></para>
|
||||
|
||||
<para>
|
||||
repmgr 4.0.1 is a bugfix release.
|
||||
</para>
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
ensure correct return codes are returned for
|
||||
<command><link linkend="repmgr-node-check">repmgr node check --action=</link></command> operations
|
||||
(GitHub #340)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Fix <xref linkend="repmgr-cluster-show"> when <literal>repmgr</literal> schema not set in search path
|
||||
(GitHub #341)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
When using <literal>--force-rewind</literal> with <xref linkend="repmgr-node-rejoin">
|
||||
delete any replication slots copied by <application>pg_rewind</application>
|
||||
(GitHub #334)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Only perform sanity check on accessibility of configuration files outside
|
||||
the data directory when <literal>--copy-external-config-files</literal>
|
||||
provided (GitHub #342)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Initialise "voting_term" table in application, not extension SQL
|
||||
(GitHub #344)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
|
||||
|
||||
<sect1 id="release-4.0.0">
|
||||
<title>Release 4.0.0</title>
|
||||
|
||||
<para><emphasis>Tue Nov 21, 2017</emphasis></para>
|
||||
|
||||
<para>
|
||||
repmgr 4.0 is an entirely new version of &repmgr;, implementing &repmgr;
|
||||
as a native PostgreSQL extension, adding new and improving existing features,
|
||||
and making &repmgr; more user-friendly and intuitive to use. The new code base
|
||||
will make it easier to add additional functionality for future releases.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
With the new version, the opportunity has been taken to
|
||||
make some changes in the way &repmgr; is set up and
|
||||
configured. In particular changes have been made to some
|
||||
configuration file settings consistency for and clarity.
|
||||
Changes are covered in detail below
|
||||
</simpara>
|
||||
<simpara>
|
||||
To standardise terminology, from this release <literal>primary</literal> is used to
|
||||
denote the read/write node in a streaming replication cluster. <literal>master</literal>
|
||||
is still accepted as an alias for &repmgr; commands
|
||||
(e.g. <link linkend="repmgr-primary-register"><command>repmgr master register</command></link>).
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
For detailed instructions on upgrading from repmgr 3.x, see <xref linkend="upgrading-from-repmgr-3">.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Features and improvements</title>
|
||||
<para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>improved switchover</emphasis>:
|
||||
the <command>switchover</command> process has been improved and streamlined,
|
||||
speeding up the switchover process and can also instruct other standbys
|
||||
to follow the new primary once the switchover has completed. See
|
||||
<xref linkend="performing-switchover"> for more details.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>"--dry-run" option</emphasis>: many &repmgr; commands now provide
|
||||
a <literal>--dry-run</literal> option which will execute the command as far
|
||||
as possible without making any changes, which will enable possible issues
|
||||
to be identified before the intended operation is actually carried out.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>easier upgrades</emphasis>: &repmgr; is now implemented as a native
|
||||
PostgreSQL extension, which means future upgrades can be carried out by
|
||||
installing the upgraded package and issuing
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/sql-alterextension.html">ALTER EXTENSION repmgr UPDATE</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>improved logging output</emphasis>:
|
||||
&repmgr; (and <application>repmgrd</application>) now provide more explicit
|
||||
logging output giving a better picture of what is going on. Where appropriate,
|
||||
<literal>DETAIL</literal> and <literal>HINT</literal> log lines provide additional
|
||||
detail and suggestions for resolving problems. Additionally, <application>repmgrd</application>
|
||||
now emits informational log lines at regular, configurable intervals
|
||||
to confirm that it's running correctly and which node(s) it's monitoring.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>automatic configuration file location in packages</emphasis>:
|
||||
Many operating system packages place the &repmgr; configuration files
|
||||
in a version-specific subdirectory, e.g. <filename>/etc/repmgr/9.6/repmgr.conf</filename>;
|
||||
&repmgr; now makes it easy for package maintainers to provide a patch
|
||||
with the actual file location, meaning <filename>repmgr.conf</filename>
|
||||
does not need to be provided explicitly. This is currently the case
|
||||
for 2ndQuadrant-provided <literal>.deb</literal> and <literal>.rpm</literal> packages.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>monitoring and status checks</emphasis>:
|
||||
New commands <xref linkend="repmgr-node-check"> and
|
||||
<xref linkend="repmgr-node-status"> providing information
|
||||
about a node's status and replication-related monitoring
|
||||
output.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>node rejoin</emphasis>:
|
||||
New commands <xref linkend="repmgr-node-rejoin"> enables a failed
|
||||
primary to be rejoined to a replication cluster, optionally using
|
||||
<application>pg_rewind</application> to synchronise its data,
|
||||
(note that <application>pg_rewind</application> may not be useable
|
||||
in some circumstances).
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>automatic failover</emphasis>:
|
||||
improved detection of node status; promotion decision based on a consensual
|
||||
model, with the promoted primary explicitly informing other standbys to
|
||||
follow it. The <application>repmgrd</application> daemon will continue
|
||||
functioning even if the monitored PostgreSQL instance is down, and resume
|
||||
monitoring if it reappears. Additionally, if the instance's role has changed
|
||||
(typically from a primary to a standby, e.g. following reintegration of a
|
||||
failed primary using <xref linkend="repmgr-node-rejoin">) <application>repmgrd</application>
|
||||
will automatically resume monitoring it as a standby.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<emphasis>new documentation</emphasis>:
|
||||
the existing documentation spread over multiple text files
|
||||
has been consolidated into DocBook format (as used by the
|
||||
main PostgreSQL project) and is now available online in
|
||||
HTML format.
|
||||
</para>
|
||||
<para>
|
||||
The DocBook files can easily be used to create versions
|
||||
of the documentation in other formats such as PDF.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
</sect2>
|
||||
<sect2>
|
||||
<title>New command line options</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem><para>
|
||||
<literal>--dry-run</literal>: &repmgr; will attempt to perform
|
||||
the action as far as possible without making any changes to the
|
||||
database
|
||||
</para></listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>--upstream-node-id</literal>: use to specify the upstream node
|
||||
the standby will connect later stream from, when <link linkend="repmgr-standby-clone">cloning</link>
|
||||
and <link linkend="repmgr-standby-register">registering</link> a standby.
|
||||
</para>
|
||||
<para>
|
||||
This replaces the configuration file parameter <varname>upstream_node</varname>.
|
||||
as the upstream node is set when the standby is initially cloned, but can change
|
||||
over the lifetime of an installation (due to failovers, switchovers etc.) so it's
|
||||
pointless/confusing keeping the original value around in <filename>repmgr.conf</filename>.
|
||||
</para></listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Changed command line options</title>
|
||||
<para>
|
||||
<application>repmgr</application>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem><para>
|
||||
<literal>--replication-user</literal> has been deprecated; it has been replaced
|
||||
by the configuration file option <varname>replication_user</varname>.
|
||||
The value (which defaults to the user provided in the <varname>conninfo</varname>
|
||||
string) will be stored in the &repmgr; metadata for use by
|
||||
<xref linkend="repmgr-standby-clone"> and <xref linkend="repmgr-standby-follow">.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<literal>--recovery-min-apply-delay</literal> is now a configuration file parameter
|
||||
<varname>recovery_min_apply_delay</varname>, to ensure the setting does not get lost
|
||||
when a standby follows a new upstream.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<literal>--no-conninfo-password</literal> is deprecated; a password included in
|
||||
the environment variable <varname>PGPASSWORD</varname> will no longer be added
|
||||
to <varname>primary_conninfo</varname> by default; to force the inclusion
|
||||
of a password (not recommended), use the new configuration file parameter
|
||||
<varname>use_primary_conninfo_password</varname>. For details, ee section
|
||||
<xref linkend="cloning-advanced-managing-passwords">.
|
||||
</para></listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<application>repmgrd</application>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem><para>
|
||||
<literal>--monitoring-history</literal> is deprecated and is replaced by the
|
||||
configuration file option <varname>monitoring_history</varname>.
|
||||
This enables the setting to be changed without having to modify system service
|
||||
files.
|
||||
</para></listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Configuration file changes</title>
|
||||
|
||||
<para><emphasis>Required settings</emphasis></para>
|
||||
<para>The following 4 parameters are mandatory in <filename>repmgr.conf</filename>:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>node_id</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>node_name</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>conninfo</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>data_directory</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para><emphasis>Renamed settings</emphasis></para>
|
||||
<para>
|
||||
Some settings have been renamed for clarity and consistency:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>node</varname> is now <varname>node_id</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>name</varname> is now <varname>node_name</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>barman_server</varname> is now <varname>barman_host</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>master_reponse_timeout</varname> is now
|
||||
<varname>async_query_timeout</varname> (to better indicate its purpose)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The following configuration file parameters have been renamed for consistency
|
||||
with other parameters (and conform to the pattern used by PostgreSQL itself,
|
||||
which uses the prefix <varname>log_</varname> for logging parameters):
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>loglevel</varname> is now <varname>log_level</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>logfile</varname> is now <varname>log_file</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>logfacility</varname> is now <varname>log_facility</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para><emphasis>Removed settings</emphasis></para>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>cluster</varname> has been removed</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><varname>upstream_node</varname> - see note about
|
||||
<literal>--upstream-node-id</literal> above</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>retry_promote_interval_secs</varname>this is now redundant due
|
||||
to changes in the failover/promotion mechanism; the new equivalent is
|
||||
<varname>primary_notification_timeout</varname> </simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para><emphasis>Logging changes</emphasis></para>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
default value for <varname>log_level</varname> is <literal>INFO</literal>
|
||||
rather than <literal>NOTICE</literal>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
new parameter <varname>log_status_interval</varname>, which causes
|
||||
<application>repmgrd</application> to emit a status log
|
||||
line at the specified interval
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
<sect2>
|
||||
<title>repmgrd</title>
|
||||
<para>
|
||||
The `repmgr` shared library has been renamed from <literal>repmgr_funcs</literal> to
|
||||
<literal>repmgr</literal>, meaning <varname>shared_preload_libraries</varname>
|
||||
in <filename>postgresql.conf</filename> needs to be updated to the new name:
|
||||
<programlisting>
|
||||
shared_preload_libraries = 'repmgr'</programlisting>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
</appendix>
|
||||
66
doc/appendix-signatures.sgml
Normal file
66
doc/appendix-signatures.sgml
Normal file
@@ -0,0 +1,66 @@
|
||||
<appendix id="appendix-signatures" xreflabel="Verifying digital signatures">
|
||||
<title>Verifying digital signatures</title>
|
||||
|
||||
<sect1 id="repmgr-source-key" xreflabel="repmgr source key">
|
||||
<title>repmgr source code signing key</title>
|
||||
<para>
|
||||
The signing key ID used for <application>repmgr</application> source code bundles is:
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr">
|
||||
<literal>0x297F1DCC</literal></ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To download the <application>repmgr</application> source key to your computer:
|
||||
<programlisting>
|
||||
curl -s http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr | gpg --import
|
||||
gpg --fingerprint 0x297F1DCC
|
||||
</programlisting>
|
||||
then verify that the fingerprint is the expected value:
|
||||
<programlisting>
|
||||
085A BE38 6FD9 72CE 6365 340D 8365 683D 297F 1DCC</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For checking tarballs, first download and import the <application>repmgr</application>
|
||||
source signing key as shown above. Then download both source tarball and the detached
|
||||
key (e.g. <filename>repmgr-4.0beta1.tar.gz</filename> and
|
||||
<filename>repmgr-4.0beta1.tar.gz.asc</filename>) from
|
||||
<ulink url="https://repmgr.org/download/">https://repmgr.org/download/</ulink>
|
||||
and use <application>gpg</application> to verify the key, e.g.:
|
||||
<programlisting>
|
||||
gpg --verify repmgr-4.0beta1.tar.gz.asc</programlisting>
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgr-rpm-key" xreflabel="repmgr rpm key">
|
||||
<title>repmgr RPM signing key</title>
|
||||
<para>
|
||||
The signing key ID used for <application>repmgr</application> source code bundles is:
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr">
|
||||
<literal>0x702D883A</literal></ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To download the <application>repmgr</application> source key to your computer:
|
||||
<programlisting>
|
||||
curl -s http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr | gpg --import
|
||||
gpg --fingerprint 0x702D883A
|
||||
</programlisting>
|
||||
then verify that the fingerprint is the expected value:
|
||||
<programlisting>
|
||||
AE4E 390E A58E 0037 6148 3F29 888D 018B 702D 883A</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To check a repository RPM, use <application>rpmkeys</application> to load the
|
||||
packaging signing key into the RPM database then use <literal>rpm -K</literal>, e.g.:
|
||||
<programlisting>
|
||||
sudo rpmkeys --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr
|
||||
rpm -K postgresql-bdr94-2ndquadrant-redhat-1.0-2.noarch.rpm
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
</appendix>
|
||||
@@ -1,288 +1,8 @@
|
||||
BDR failover with repmgrd
|
||||
=========================
|
||||
|
||||
`repmgr 4` provides support for monitoring BDR nodes and taking action in case
|
||||
one of the nodes fails.
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
*NOTE* Due to the nature of BDR, it's only safe to use this solution for
|
||||
a two-node scenario. Introducing additional nodes will create an inherent
|
||||
risk of node desynchronisation if a node goes down without being cleanly
|
||||
removed from the cluster.
|
||||
> [BDR failover with repmgrd](https://repmgr.org/docs/4.0/repmgrd-bdr.html)
|
||||
|
||||
In contrast to streaming replication, there's no concept of "promoting" a new
|
||||
primary node with BDR. Instead, "failover" involves monitoring both nodes
|
||||
with `repmgrd` and redirecting queries from the failed node to the remaining
|
||||
active node. This can be done by using the event notification script generated by
|
||||
`repmgrd` to dynamically reconfigure a proxy server/connection pooler such
|
||||
as PgBouncer.
|
||||
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
`repmgr 4` requires PostgreSQL 9.6 with the BDR 2 extension enabled and
|
||||
configured for a two-node BDR network. `repmgr 4` packages
|
||||
must be installed on each node before attempting to configure repmgr.
|
||||
|
||||
*NOTE* `repmgr 4` will refuse to install if it detects more than two
|
||||
BDR nodes.
|
||||
|
||||
Application database connections *must* be passed through a proxy server/
|
||||
connection pooler such as PgBouncer, and it must be possible to dynamically
|
||||
reconfigure that from `repmgrd`. The example demonstrated in this document
|
||||
will use PgBouncer.
|
||||
|
||||
The proxy server / connection poolers must not be installed on the database
|
||||
servers.
|
||||
|
||||
For this example, it's assumed password-less SSH connections are available
|
||||
from the PostgreSQL servers to the servers where PgBouncer runs, and
|
||||
that the user on those servers has permission to alter the PgBouncer
|
||||
configuration files.
|
||||
|
||||
PostgreSQL connections must be possible between each node, and each node
|
||||
must be able to connect to each PgBouncer instance.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
Sample configuration for `repmgr.conf`:
|
||||
|
||||
node_id=1
|
||||
node_name='node1'
|
||||
conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2'
|
||||
replication_type='bdr'
|
||||
|
||||
event_notifications=bdr_failover
|
||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
|
||||
|
||||
# repmgrd options
|
||||
monitor_interval_secs=5
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=5
|
||||
|
||||
Adjust settings as appropriate; copy and adjust for the second node (particularly
|
||||
the values `node_id`, `node_name` and `conninfo`).
|
||||
|
||||
Note that the values provided for the `conninfo` string must be valid for
|
||||
connections from *both* nodes in the cluster. The database must be the BDR
|
||||
database.
|
||||
|
||||
If defined, `event_notifications` will restrict execution of `event_notification_command`
|
||||
to the specified events.
|
||||
|
||||
`event_notification_command` is the script which does the actual "heavy lifting"
|
||||
of reconfiguring the proxy server/ connection pooler. It is fully user-definable;
|
||||
a sample implementation is documented below.
|
||||
|
||||
|
||||
repmgr user permissions
|
||||
-----------------------
|
||||
|
||||
`repmgr` will create an extension in the BDR database containing objects
|
||||
for administering `repmgr` metadata. The user defined in the `conninfo`
|
||||
setting must be able to access all objects. Additionally, superuser permissions
|
||||
are required to install the `repmgr` extension. The easiest way to do this
|
||||
is create the `repmgr` user as a superuser, however if this is not
|
||||
desirable, the `repmgr` user can be created as a normal user and a
|
||||
superuser specified with `--superuser` when registering a BDR node.
|
||||
|
||||
repmgr setup
|
||||
------------
|
||||
|
||||
Register both nodes:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf bdr register
|
||||
NOTICE: attempting to install extension "repmgr"
|
||||
NOTICE: "repmgr" extension successfully installed
|
||||
NOTICE: node record created for node 'node1' (ID: 1)
|
||||
NOTICE: BDR node 1 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5501)
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf bdr register
|
||||
NOTICE: node record created for node 'node2' (ID: 2)
|
||||
NOTICE: BDR node 2 registered (conninfo: host=localhost dbname=bdrtest user=repmgr port=5502)
|
||||
|
||||
The `repmgr` extension will be automatically created when the first
|
||||
node is registered, and will be propagated to the second node.
|
||||
|
||||
*IMPORTANT* ensure the repmgr package is available on both nodes before
|
||||
attempting to register the first node
|
||||
|
||||
|
||||
At this point the meta data for both nodes has been created; executing
|
||||
`repmgr cluster show` (on either node) should produce output like this:
|
||||
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Connection string
|
||||
----+-------+------+-----------+----------+--------------------------------------------------------
|
||||
1 | node1 | bdr | * running | | host=node1 dbname=bdrtest user=repmgr connect_timeout=2
|
||||
2 | node2 | bdr | * running | | host=node2 dbname=bdrtest user=repmgr connect_timeout=2
|
||||
|
||||
Additionally it's possible to see a log of significant events; so far
|
||||
this will only record the two node registrations (in reverse chronological order):
|
||||
|
||||
Node ID | Event | OK | Timestamp | Details
|
||||
---------+--------------+----+---------------------+----------------------------------------------
|
||||
2 | bdr_register | t | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2)
|
||||
1 | bdr_register | t | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1)
|
||||
|
||||
|
||||
Defining the "event_notification_command"
|
||||
-----------------------------------------
|
||||
|
||||
Key to "failover" execution is the `event_notification_command`, which is a
|
||||
user-definable script which should reconfigure the proxy server/
|
||||
connection pooler.
|
||||
|
||||
Each time `repmgr` (or `repmgrd`) records an event, it can optionally
|
||||
execute the script defined in `event_notification_command` to
|
||||
take further action; details of the event will be passed as parameters.
|
||||
Following placeholders are available to the script:
|
||||
|
||||
%n - node ID
|
||||
%e - event type
|
||||
%s - success (1 or 0)
|
||||
%t - timestamp
|
||||
%d - details
|
||||
%c - conninfo string of the next available node
|
||||
%a - name of the next available node
|
||||
|
||||
Note that `%c` and `%a` will only be provided during `bdr_failover`
|
||||
events, which is what is of interest here.
|
||||
|
||||
The provided sample script (`scripts/bdr-pgbouncer.sh`) is configured like
|
||||
this:
|
||||
|
||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'
|
||||
|
||||
and parses the configures parameters like this:
|
||||
|
||||
NODE_ID=$1
|
||||
EVENT_TYPE=$2
|
||||
SUCCESS=$3
|
||||
NEXT_CONNINFO=$4
|
||||
NEXT_NODE_NAME=$5
|
||||
|
||||
It also contains some hard-coded values about the PgBouncer configuration for
|
||||
both nodes; these will need to be adjusted for your local environment of course
|
||||
(ideally the scripts would be maintained as templates and generated by some
|
||||
kind of provisioning system).
|
||||
|
||||
The script performs following steps:
|
||||
|
||||
- pauses PgBouncer on all nodes
|
||||
- recreates the PgBouncer configuration file on each node using the information
|
||||
provided by `repmgrd` (mainly the `conninfo` string) to configure PgBouncer
|
||||
to point to the remaining node
|
||||
- reloads the PgBouncer configuration
|
||||
- resumes PgBouncer
|
||||
|
||||
From that point, any connections to PgBouncer on the failed BDR node will be redirected
|
||||
to the active node.
|
||||
|
||||
|
||||
repmgrd
|
||||
-------
|
||||
|
||||
|
||||
|
||||
Node monitoring and failover
|
||||
----------------------------
|
||||
|
||||
At the intervals specified by `monitor_interval_secs` in `repmgr.conf`, `repmgrd`
|
||||
will ping each node to check if it's available. If a node isn't available,
|
||||
`repmgrd` will enter failover mode and check `reconnect_attempts` times
|
||||
at intervals of `reconnect_interval` to confirm the node is definitely unreachable.
|
||||
This buffer period is necessary to avoid false positives caused by transient
|
||||
network outages.
|
||||
|
||||
If the node is still unavailable, `repmgrd` will enter failover mode and execute
|
||||
the script defined in `event_notification_command`; an entry will be logged
|
||||
in the `repmgr.events` table and `repmgrd` will (unless otherwise configured)
|
||||
resume monitoring of the node in "degraded" mode until it reappears.
|
||||
|
||||
`repmgrd` logfile output during a failover event will look something like this
|
||||
one one node (usually the node which has failed, here "node2"):
|
||||
|
||||
...
|
||||
[2017-07-27 21:08:39] [INFO] starting continuous BDR node monitoring
|
||||
[2017-07-27 21:08:39] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:08:55] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:09:11] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
|
||||
[2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
|
||||
[2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
|
||||
[2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
|
||||
[2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
|
||||
[2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
|
||||
[2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
|
||||
[2017-07-27 21:09:28] [NOTICE] setting node record for node 2 to inactive
|
||||
[2017-07-27 21:09:28] [INFO] executing notification command for event "bdr_failover"
|
||||
[2017-07-27 21:09:28] [DETAIL] command is:
|
||||
/path/to/bdr-pgbouncer.sh 2 bdr_failover 1 "host=host=node1 dbname=bdrtest user=repmgr connect_timeout=2" "node1"
|
||||
[2017-07-27 21:09:28] [INFO] node 'node2' (ID: 2) detected as failed; next available node is 'node1' (ID: 1)
|
||||
[2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
...
|
||||
|
||||
Output on the other node ("node1") during the same event will look like this:
|
||||
|
||||
[2017-07-27 21:08:35] [INFO] starting continuous BDR node monitoring
|
||||
[2017-07-27 21:08:35] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:08:51] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:09:07] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
|
||||
[2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
|
||||
[2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
|
||||
[2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
|
||||
[2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
|
||||
[2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
|
||||
[2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
|
||||
[2017-07-27 21:09:28] [NOTICE] other node's repmgrd is handling failover
|
||||
[2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
|
||||
This assumes only the PostgreSQL instance on "node2" has failed. In this case the
|
||||
`repmgrd` instance running on "node2" has performed the failover. However if
|
||||
the entire server becomes unavailable, `repmgrd` on "node1" will perform
|
||||
the failover.
|
||||
|
||||
|
||||
Node recovery
|
||||
-------------
|
||||
|
||||
Following failure of a BDR node, if the node subsequently becomes available again,
|
||||
a `bdr_recovery` event will be generated. This could potentially be used to
|
||||
reconfigure PgBouncer automatically to bring the node back into the available pool,
|
||||
however it would be prudent to manually verify the node's status before
|
||||
exposing it to the application.
|
||||
|
||||
If the failed node comes back up and connects correctly, output similar to this
|
||||
will be visible in the `repmgrd` log:
|
||||
|
||||
[2017-07-27 21:25:30] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
[2017-07-27 21:25:46] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:25:46] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
[2017-07-27 21:25:55] [INFO] active replication slot for node "node1" found after 1 seconds
|
||||
[2017-07-27 21:25:55] [NOTICE] node "node2" (ID: 2) has recovered after 986 seconds
|
||||
|
||||
|
||||
Shutdown of both nodes
|
||||
----------------------
|
||||
|
||||
If both PostgreSQL instances are shut down, `repmgrd` will try and handle the
|
||||
situation as gracefully as possible, though with no failover candidates available
|
||||
there's not much it can do. Should this case ever occur, we recommend shutting
|
||||
down `repmgrd` on both nodes and restarting it once the PostgreSQL instances
|
||||
are running properly.
|
||||
|
||||
@@ -1,106 +1,7 @@
|
||||
Changes in repmgr 4
|
||||
===================
|
||||
|
||||
Standardisation on `primary`
|
||||
----------------------------
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
To standardise terminology, `primary` is used to denote the read/write
|
||||
node in a streaming replication cluster. `master` is still accepted
|
||||
as a synonym (e.g. `repmgr master register`).
|
||||
|
||||
|
||||
New command line options
|
||||
------------------------
|
||||
|
||||
- `--dry-run`: repmgr will attempt to perform the action as far as possible
|
||||
without making any changes to the database
|
||||
|
||||
- `--upstream-node-id`: use to specify the upstream node the standby will
|
||||
connect later stream from, when cloning a standby. This replaces the configuration
|
||||
file parameter `upstream_node`, as the upstream node is set when the standby
|
||||
is initially cloned, but can change over the lifetime of an installation (due
|
||||
to failovers, switchovers etc.) so it's pointless/confusing keeping the original
|
||||
value around in the config file.
|
||||
|
||||
Changed command line options
|
||||
----------------------------
|
||||
|
||||
### repmgr
|
||||
|
||||
- `--replication-user` has been deprecated; it has been replaced by the
|
||||
configuration file option `replication_user`. The value (which defaults
|
||||
to the user in the `conninfo` string) will be stored in the repmgr metadata
|
||||
for use by standby clone/follow..
|
||||
|
||||
- `--recovery-min-apply-delay` is now a configuration file parameter
|
||||
`recovery_min_apply_delay, to ensure the setting does not get lost when
|
||||
a standby follows a new upstream.
|
||||
|
||||
### repmgrd
|
||||
|
||||
- `--monitoring-history` is deprecated and has been replaced by the
|
||||
configuration file option `monitoring_history`. This enables the
|
||||
setting to be changed without having to modify system service files.
|
||||
|
||||
Changes to repmgr commands
|
||||
--------------------------
|
||||
|
||||
|
||||
### `repmgr cluster show`
|
||||
|
||||
This now displays the role of each node (e.g. `primary`, `standby`)
|
||||
and its status in separate columns.
|
||||
|
||||
The `--csv` option now emits a third column indicating the recovery
|
||||
status of the node.
|
||||
|
||||
|
||||
Configuration file changes
|
||||
--------------------------
|
||||
|
||||
### Required settings
|
||||
|
||||
The following 4 parameters are mandatory in `repmgr.conf`:
|
||||
|
||||
- `node_id`
|
||||
- `node_name`
|
||||
- `conninfo`
|
||||
- `data_directory`
|
||||
|
||||
|
||||
### Renamed settings
|
||||
|
||||
Some settings have been renamed for clarity and consistency:
|
||||
|
||||
- `node`: now `node_id`
|
||||
- `name`: now `node_name`
|
||||
- `master_reponse_timeout`: now `async_query_timeout` to better indicate its
|
||||
purpose
|
||||
|
||||
- The following configuration file parameters have been renamed for consistency
|
||||
with other parameters (and conform to the pattern used by PostgreSQL itself,
|
||||
which uses the prefix `log_` for logging parameters):
|
||||
- `loglevel` has been renamed to `log_level`
|
||||
- `logfile` has been renamed to `log_file`
|
||||
- `logfacility` has been renamed to `log_facility`
|
||||
|
||||
### Removed settings
|
||||
|
||||
- `cluster`: has been removed
|
||||
- `upstream_node`: see note about `--upstream-node-id` above.
|
||||
- `retry_promote_interval_secs`: this is now redundant due to changes in the
|
||||
failover/promotion mechanism; the new equivalent is `primary_notification_timeout`
|
||||
|
||||
|
||||
### Logging changes
|
||||
|
||||
- default value for `log_level` is `INFO` rather than `NOTICE`.
|
||||
- new parameter `log_status_interval`, which causes `repmgrd` to emit a status log
|
||||
line at the specified interval
|
||||
|
||||
|
||||
repmgrd
|
||||
-------
|
||||
|
||||
The `repmgr` shared library has been renamed from `repmgr_funcs` to `repmgr`,
|
||||
meaning `shared_preload_libraries` needs to be updated to the new name:
|
||||
|
||||
shared_preload_libraries = 'repmgr'
|
||||
> [Release notes](https://repmgr.org/docs/4.0/release-4.0.html)
|
||||
|
||||
440
doc/cloning-standbys.sgml
Normal file
440
doc/cloning-standbys.sgml
Normal file
@@ -0,0 +1,440 @@
|
||||
<chapter id="cloning-standbys" xreflabel="cloning standbys">
|
||||
<title>Cloning standbys</title>
|
||||
|
||||
<sect1 id="cloning-from-barman" xreflabel="Cloning from Barman">
|
||||
<indexterm>
|
||||
<primary>cloning</primary>
|
||||
<secondary>from Barman</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>Barman</primary>
|
||||
<secondary>cloning a standby</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Cloning a standby from Barman</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-standby-clone"> can use
|
||||
<ulink url="https://www.2ndquadrant.com/">2ndQuadrant</ulink>'s
|
||||
<ulink url="https://www.pgbarman.org/">Barman</ulink> application
|
||||
to clone a standby (and also as a fallback source for WAL files).
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
Barman (aka PgBarman) should be considered as an integral part of any
|
||||
PostgreSQL replication cluster. For more details see:
|
||||
<ulink url="https://www.pgbarman.org/">https://www.pgbarman.org/</ulink>.
|
||||
</simpara>
|
||||
</tip>
|
||||
<para>
|
||||
Barman support provides the following advantages:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<para>
|
||||
the primary node does not need to perform a new backup every time a
|
||||
new standby is cloned
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
a standby node can be disconnected for longer periods without losing
|
||||
the ability to catch up, and without causing accumulation of WAL
|
||||
files on the primary node
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
WAL management on the primary becomes much easier as there's no need
|
||||
to use replication slots, and <varname>wal_keep_segments</varname>
|
||||
does not need to be set.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<sect2 id="cloning-from-barman-prerequisites" xreflabel="Prerequisites for cloning from Barman">
|
||||
<title>Prerequisites for cloning from Barman</title>
|
||||
<para>
|
||||
In order to enable Barman support for <command>repmgr standby clone</command>, following
|
||||
prerequisites must be met:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<para>
|
||||
the <varname>barman_server</varname> setting in <filename>repmgr.conf</filename> is the same as the
|
||||
server configured in Barman;
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
the <varname>barman_host</varname> setting in <filename>repmgr.conf</filename> is set to the SSH
|
||||
hostname of the Barman server;
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
the <varname>restore_command</varname> setting in <filename>repmgr.conf</filename> is configured to
|
||||
use a copy of the <command>barman-wal-restore</command> script shipped with the
|
||||
<literal>barman-cli</literal> package (see section <xref linkend="cloning-from-barman-restore-command">
|
||||
below).
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
the Barman catalogue includes at least one valid backup for this server.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
Barman support is automatically enabled if <varname>barman_server</varname>
|
||||
is set. Normally it is good practice to use Barman, for instance
|
||||
when fetching a base backup while cloning a standby; in any case,
|
||||
Barman mode can be disabled using the <literal>--without-barman</literal>
|
||||
command line option.
|
||||
</simpara>
|
||||
</note>
|
||||
<tip>
|
||||
<simpara>
|
||||
If you have a non-default SSH configuration on the Barman
|
||||
server, e.g. using a port other than 22, then you can set those
|
||||
parameters in a dedicated Host section in <filename>~/.ssh/config</filename>
|
||||
corresponding to the value of<varname>barman_host</varname> in
|
||||
<filename>repmgr.conf</filename>. See the <literal>Host</literal>
|
||||
section in <command>man 5 ssh_config</command> for more details.
|
||||
</simpara>
|
||||
</tip>
|
||||
<para>
|
||||
It's now possible to clone a standby from Barman, e.g.:
|
||||
<programlisting>
|
||||
NOTICE: using configuration file "/etc/repmgr.conf"
|
||||
NOTICE: destination directory "/var/lib/postgresql/data" provided
|
||||
INFO: connecting to Barman server to verify backup for test_cluster
|
||||
INFO: checking and correcting permissions on existing directory "/var/lib/postgresql/data"
|
||||
INFO: creating directory "/var/lib/postgresql/data/repmgr"...
|
||||
INFO: connecting to Barman server to fetch server parameters
|
||||
INFO: connecting to upstream node
|
||||
INFO: connected to source node, checking its state
|
||||
INFO: successfully connected to source node
|
||||
DETAIL: current installation size is 29 MB
|
||||
NOTICE: retrieving backup from Barman...
|
||||
receiving file list ...
|
||||
(...)
|
||||
NOTICE: standby clone (from Barman) complete
|
||||
NOTICE: you can now start your PostgreSQL server
|
||||
HINT: for example: pg_ctl -D /var/lib/postgresql/data start</programlisting>
|
||||
|
||||
</para>
|
||||
</sect2>
|
||||
<sect2 id="cloning-from-barman-restore-command" xreflabel="Using Barman as a WAL file source">
|
||||
<indexterm>
|
||||
<primary>Barman</primary>
|
||||
<secondary>fetching archived WAL</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Using Barman as a WAL file source</title>
|
||||
<para>
|
||||
As a fallback in case streaming replication is interrupted, PostgreSQL can optionally
|
||||
retrieve WAL files from an archive, such as that provided by Barman. This is done by
|
||||
setting <varname>restore_command</varname> in <filename>recovery.conf</filename> to
|
||||
a valid shell command which can retrieve a specified WAL file from the archive.
|
||||
</para>
|
||||
<para>
|
||||
<command>barman-wal-restore</command> is a Python script provided as part of the <literal>barman-cli</literal>
|
||||
package (Barman 2.0 and later; for Barman 1.x the script is provided separately as
|
||||
<command>barman-wal-restore.py</command>) which performs this function for Barman.
|
||||
</para>
|
||||
<para>
|
||||
To use <command>barman-wal-restore</command> with &repmgr;
|
||||
and assuming Barman is located on the <literal>barmansrv</literal> host
|
||||
and that <command>barman-wal-restore</command> is located as an executable at
|
||||
<filename>/usr/bin/barman-wal-restore</filename>,
|
||||
<filename>repmgr.conf</filename> should include the following lines:
|
||||
<programlisting>
|
||||
barman_host=barmansrv
|
||||
barman_server=somedb
|
||||
restore_command=/usr/bin/barman-wal-restore barmansrv somedb %f %p</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
<command>barman-wal-restore</command> supports command line switches to
|
||||
control parallelism (<literal>--parallel=N</literal>) and compression (
|
||||
<literal>--bzip2</literal>, <literal>--gzip</literal>).
|
||||
</simpara>
|
||||
</note>
|
||||
<note>
|
||||
<para>
|
||||
To use a non-default Barman configuration file on the Barman server,
|
||||
specify this in <filename>repmgr.conf</filename> with <filename>barman_config</filename>:
|
||||
<programlisting>
|
||||
barman_config=/path/to/barman.conf</programlisting>
|
||||
</para>
|
||||
</note>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="cloning-replication-slots" xreflabel="Cloning and replication slots">
|
||||
<indexterm>
|
||||
<primary>cloning</primary>
|
||||
<secondary>replication slots</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>replication slots</primary>
|
||||
<secondary>cloning</secondary>
|
||||
</indexterm>
|
||||
<title>Cloning and replication slots</title>
|
||||
<para>
|
||||
Replication slots were introduced with PostgreSQL 9.4 and are designed to ensure
|
||||
that any standby connected to the primary using a replication slot will always
|
||||
be able to retrieve the required WAL files. This removes the need to manually
|
||||
manage WAL file retention by estimating the number of WAL files that need to
|
||||
be maintained on the primary using <varname>wal_keep_segments</varname>.
|
||||
Do however be aware that if a standby is disconnected, WAL will continue to
|
||||
accumulate on the primary until either the standby reconnects or the replication
|
||||
slot is dropped.
|
||||
</para>
|
||||
<para>
|
||||
To enable &repmgr; to use replication slots, set the boolean parameter
|
||||
<varname>use_replication_slots</varname> in <filename>repmgr.conf</filename>:
|
||||
<programlisting>
|
||||
use_replication_slots=true</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Replication slots must be enabled in <filename>postgresql.conf</filename> by
|
||||
setting the parameter <varname>max_replication_slots</varname> to at least the
|
||||
number of expected standbys (changes to this parameter require a server restart).
|
||||
</para>
|
||||
<para>
|
||||
When cloning a standby, &repmgr; will automatically generate an appropriate
|
||||
slot name, which is stored in the <literal>repmgr.nodes</literal> table, and create the slot
|
||||
on the upstream node:
|
||||
<programlisting>
|
||||
repmgr=# SELECT node_id, upstream_node_id, active, node_name, type, priority, slot_name
|
||||
FROM repmgr.nodes ORDER BY node_id;
|
||||
node_id | upstream_node_id | active | node_name | type | priority | slot_name
|
||||
---------+------------------+--------+-----------+---------+----------+---------------
|
||||
1 | | t | node1 | primary | 100 | repmgr_slot_1
|
||||
2 | 1 | t | node2 | standby | 100 | repmgr_slot_2
|
||||
3 | 1 | t | node3 | standby | 100 | repmgr_slot_3
|
||||
(3 rows)</programlisting>
|
||||
|
||||
<programlisting>
|
||||
repmgr=# SELECT slot_name, slot_type, active, active_pid FROM pg_replication_slots ;
|
||||
slot_name | slot_type | active | active_pid
|
||||
---------------+-----------+--------+------------
|
||||
repmgr_slot_2 | physical | t | 23658
|
||||
repmgr_slot_3 | physical | t | 23687
|
||||
(2 rows)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Note that a slot name will be created by default for the primary but not
|
||||
actually used unless the primary is converted to a standby using e.g.
|
||||
<command>repmgr standby switchover</command>.
|
||||
</para>
|
||||
<para>
|
||||
Further information on replication slots in the PostgreSQL documentation:
|
||||
<ulink url="https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS">https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION-SLOTS</ulink>
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
While replication slots can be useful for streaming replication, it's
|
||||
recommended to monitor for inactive slots as these will cause WAL files to
|
||||
build up indefinitely, possibly leading to server failure.
|
||||
</simpara>
|
||||
<simpara>
|
||||
As an alternative we recommend using 2ndQuadrant's <ulink url="https://www.pgbarman.org/">Barman</ulink>,
|
||||
which offloads WAL management to a separate server, negating the need to use replication
|
||||
slots to reserve WAL. See section <xref linkend="cloning-from-barman">
|
||||
for more details on using &repmgr; together with Barman.
|
||||
</simpara>
|
||||
</tip>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="cloning-cascading" xreflabel="Cloning and cascading replication">
|
||||
<indexterm>
|
||||
<primary>cloning</primary>
|
||||
<secondary>cascading replication</secondary>
|
||||
</indexterm>
|
||||
<title>Cloning and cascading replication</title>
|
||||
<para>
|
||||
Cascading replication, introduced with PostgreSQL 9.2, enables a standby server
|
||||
to replicate from another standby server rather than directly from the primary,
|
||||
meaning replication changes "cascade" down through a hierarchy of servers. This
|
||||
can be used to reduce load on the primary and minimize bandwith usage between
|
||||
sites. For more details, see the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/warm-standby.html#CASCADING-REPLICATION">
|
||||
PostgreSQL cascading replication documentation</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; supports cascading replication. When cloning a standby,
|
||||
set the command-line parameter <literal>--upstream-node-id</literal> to the
|
||||
<varname>node_id</varname> of the server the standby should connect to, and
|
||||
&repmgr; will create <filename>recovery.conf</filename> to point to it. Note
|
||||
that if <literal>--upstream-node-id</literal> is not explicitly provided,
|
||||
&repmgr; will set the standby's <filename>recovery.conf</filename> to
|
||||
point to the primary node.
|
||||
</para>
|
||||
<para>
|
||||
To demonstrate cascading replication, first ensure you have a primary and standby
|
||||
set up as shown in the <xref linkend="quickstart">.
|
||||
Then create an additional standby server with <filename>repmgr.conf</filename> looking
|
||||
like this:
|
||||
<programlisting>
|
||||
node_id=3
|
||||
node_name=node3
|
||||
conninfo='host=node3 user=repmgr dbname=repmgr'
|
||||
data_directory='/var/lib/postgresql/data'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Clone this standby (using the connection parameters for the existing standby),
|
||||
ensuring <literal>--upstream-node-id</literal> is provide with the <varname>node_id</varname>
|
||||
of the previously created standby (if following the example, this will be <literal>2</literal>):
|
||||
<programlisting>
|
||||
$ repmgr -h node2 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --upstream-node-id=2
|
||||
NOTICE: using configuration file "/etc/repmgr.conf"
|
||||
NOTICE: destination directory "/var/lib/postgresql/data" provided
|
||||
INFO: connecting to upstream node
|
||||
INFO: connected to source node, checking its state
|
||||
NOTICE: checking for available walsenders on upstream node (2 required)
|
||||
INFO: sufficient walsenders available on upstream node (2 required)
|
||||
INFO: successfully connected to source node
|
||||
DETAIL: current installation size is 29 MB
|
||||
INFO: creating directory "/var/lib/postgresql/data"...
|
||||
NOTICE: starting backup (using pg_basebackup)...
|
||||
HINT: this may take some time; consider using the -c/--fast-checkpoint option
|
||||
INFO: executing: 'pg_basebackup -l "repmgr base backup" -D /var/lib/postgresql/data -h node2 -U repmgr -X stream '
|
||||
NOTICE: standby clone (using pg_basebackup) complete
|
||||
NOTICE: you can now start your PostgreSQL server
|
||||
HINT: for example: pg_ctl -D /var/lib/postgresql/data start</programlisting>
|
||||
|
||||
then register it (note that <literal>--upstream-node-id</literal> must be provided here
|
||||
too):
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby register --upstream-node-id=2
|
||||
NOTICE: standby node "node2" (ID: 2) successfully registered
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
After starting the standby, the cluster will look like this, showing that <literal>node3</literal>
|
||||
is attached to <literal>node2</literal>, not the primary (<literal>node1</literal>).
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node2 | default | host=node3 dbname=repmgr user=repmgr
|
||||
</programlisting>
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
Under some circumstances when setting up a cascading replication
|
||||
cluster, you may wish to clone a downstream standby whose upstream node
|
||||
does not yet exist. In this case you can clone from the primary (or
|
||||
another upstream node); provide the parameter <literal>--upstream-conninfo</literal>
|
||||
to explictly set the upstream's <varname>primary_conninfo</varname> string
|
||||
in <filename>recovery.conf</filename>.
|
||||
</simpara>
|
||||
</tip>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="cloning-advanced" xreflabel="Advanced cloning options">
|
||||
<indexterm>
|
||||
<primary>cloning</primary>
|
||||
<secondary>advanced options</secondary>
|
||||
</indexterm>
|
||||
<title>Advanced cloning options</title>
|
||||
|
||||
<sect2 id="cloning-advanced-pg-basebackup-options" xreflabel="pg_basebackup options when cloning a standby">
|
||||
<title>pg_basebackup options when cloning a standby</title>
|
||||
<para>
|
||||
As &repmgr; uses <command>pg_basebackup</command> to clone a standby, it's possible to
|
||||
provide additional parameters for <command>pg_basebackup</command> to customise the
|
||||
cloning process.
|
||||
</para>
|
||||
<para>
|
||||
By default, <command>pg_basebackup</command> performs a checkpoint before beginning the backup
|
||||
process. However, a normal checkpoint may take some time to complete;
|
||||
a fast checkpoint can be forced with the <literal>-c/--fast-checkpoint</literal> option.
|
||||
However this may impact performance of the server being cloned from (typically the primary)
|
||||
so should be used with care.
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
If <application>Barman</application> is set up for the cluster, it's possible to
|
||||
clone the standby directly from Barman, without any impact on the server the standby
|
||||
is being cloned from. For more details see <xref linkend="cloning-from-barman">.
|
||||
</simpara>
|
||||
</tip>
|
||||
<para>
|
||||
Other options can be passed to <command>pg_basebackup</command> by including them
|
||||
in the <filename>repmgr.conf</filename> setting <varname>pg_basebackup_options</varname>.
|
||||
</para>
|
||||
<para>
|
||||
If using a separate directory to store WAL files, provide the option <literal>--waldir</literal>
|
||||
(<literal>--xlogdir</literal> in PostgreSQL 9.6 and earlier) with the absolute path to the
|
||||
WAL directory. Any WALs generated during the cloning process will be copied here, and
|
||||
a symlink will automatically be created from the main data directory.
|
||||
</para>
|
||||
<para>
|
||||
See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">PostgreSQL pg_basebackup documentation</ulink>
|
||||
for more details of available options.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="cloning-advanced-managing-passwords" xreflabel="Managing passwords">
|
||||
<title>Managing passwords</title>
|
||||
<para>
|
||||
If replication connections to a standby's upstream server are password-protected,
|
||||
the standby must be able to provide the password so it can begin streaming
|
||||
replication.
|
||||
</para>
|
||||
<para>
|
||||
The recommended way to do this is to store the password in the <literal>postgres</literal> system
|
||||
user's <filename>~/.pgpass</filename> file. It's also possible to store the password in the
|
||||
environment variable <varname>PGPASSWORD</varname>, however this is not recommended for
|
||||
security reasons. For more details see the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
If, for whatever reason, you wish to include the password in <filename>recovery.conf</filename>,
|
||||
set <varname>use_primary_conninfo_password</varname> to <literal>true</literal> in
|
||||
<filename>repmgr.conf</filename>. This will read a password set in <varname>PGPASSWORD</varname>
|
||||
(but not <filename>~/.pgpass</filename>) and place it into the <varname>primary_conninfo</varname>
|
||||
string in <filename>recovery.conf</filename>. Note that <varname>PGPASSWORD</varname>
|
||||
will need to be set during any action which causes <filename>recovery.conf</filename> to be
|
||||
rewritten, e.g. <xref linkend="repmgr-standby-follow">.
|
||||
</para>
|
||||
<para>
|
||||
It is of course also possible to include the password value in the <varname>conninfo</varname>
|
||||
string for each node, but this is obviously a security risk and should be
|
||||
avoided.
|
||||
</para>
|
||||
<para>
|
||||
From PostgreSQL 9.6, <application>libpq</application> supports the <varname>passfile</varname>
|
||||
parameter in connection strings, which can be used to specify a password file other than
|
||||
the default <filename>~/.pgpass</filename>.
|
||||
</para>
|
||||
<para>
|
||||
To have &repmgr; write a custom password file in <varname>primary_conninfo</varname>,
|
||||
specify its location in <varname>passfile</varname> in <filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="cloning-advanced-replication-user" xreflabel="Separate replication user">
|
||||
<title>Separate replication user</title>
|
||||
<para>
|
||||
In some circumstances it might be desirable to create a dedicated replication-only
|
||||
user (in addition to the user who manages the &repmgr; metadata). In this case,
|
||||
the replication user should be set in <filename>repmgr.conf</filename> via the parameter
|
||||
<varname>replication_user</varname>; &repmgr; will use this value when making
|
||||
replication connections and generating <filename>recovery.conf</filename>. This
|
||||
value will also be stored in the parameter <literal>repmgr.nodes</literal>
|
||||
table for each node; it no longer needs to be explicitly specified when
|
||||
cloning a node or executing <xref linkend="repmgr-standby-follow">.
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
|
||||
</chapter>
|
||||
119
doc/configuration-file-settings.sgml
Normal file
119
doc/configuration-file-settings.sgml
Normal file
@@ -0,0 +1,119 @@
|
||||
<sect1 id="configuration-file-settings" xreflabel="configuration file settings">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>settings</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file settings</title>
|
||||
<para>
|
||||
Each <filename>repmgr.conf</filename> file must contain the following parameters:
|
||||
</para>
|
||||
<para>
|
||||
<variablelist>
|
||||
<varlistentry id="repmgr-conf-node-id" xreflabel="node_id">
|
||||
<term><varname>node_id</varname> (<type>int</type>)
|
||||
<indexterm>
|
||||
<primary><varname>node_id</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
A unique integer greater than zero which identifies the node.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="repmgr-conf-node-name" xreflabel="node_name">
|
||||
<term><varname>node_name</varname> (<type>string</type>)
|
||||
<indexterm>
|
||||
<primary><varname>node_name</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
An arbitrary (but unique) string; we recommend using the server's hostname
|
||||
or another identifier unambiguously associated with the server to avoid
|
||||
confusion. Avoid choosing names which reflect the node's current role,
|
||||
e.g. <varname>primary</varname> or <varname>standby1</varname>
|
||||
as roles can change and if you end up in a solution where the current primary is
|
||||
called <varname>standby1</varname> (for example), things will be confusing
|
||||
to say the least.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="repmgr-conf-conninfo" xreflabel="conninfo">
|
||||
<term><varname>conninfo</varname> (<type>string</type>)
|
||||
<indexterm>
|
||||
<primary><varname>conninfo</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Database connection information as a conninfo string.
|
||||
All servers in the cluster must be able to connect to
|
||||
the local node using this string.
|
||||
</para>
|
||||
<para>
|
||||
For details on conninfo strings, see section <ulink
|
||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING">Connection Strings</>
|
||||
in the PosgreSQL documentation.
|
||||
</para>
|
||||
<para>
|
||||
If repmgrd is in use, consider explicitly setting
|
||||
<varname>connect_timeout</varname> in the <varname>conninfo</varname>
|
||||
string to determine the length of time which elapses before a network
|
||||
connection attempt is abandoned; for details see <ulink
|
||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT">
|
||||
the PostgreSQL documentation</>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="repmgr-conf-data-directory" xreflabel="data_directory">
|
||||
<term><varname>data_directory</varname> (<type>string</type>)
|
||||
<indexterm>
|
||||
<primary><varname>data_directory</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
The node's data directory. This is needed by repmgr
|
||||
when performing operations when the PostgreSQL instance
|
||||
is not running and there's no other way of determining
|
||||
the data directory.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
</variablelist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For a full list of annotated configuration items, see the file
|
||||
<ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
The following parameters in the configuration file can be overridden with
|
||||
command line options:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>-L/--log-level</literal> overrides <literal>log_level</literal> in
|
||||
<filename>repmgr.conf</filename>
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>-b/--pg_bindir</literal> overrides <literal>pg_bindir</literal> in
|
||||
<filename>repmgr.conf</filename>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</sect1>
|
||||
69
doc/configuration-file.sgml
Normal file
69
doc/configuration-file.sgml
Normal file
@@ -0,0 +1,69 @@
|
||||
<sect1 id="configuration-file" xreflabel="configuration file location">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>repmgr.conf location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file location</title>
|
||||
<para>
|
||||
<application>repmgr</application> and <application>repmgrd</application>
|
||||
use a common configuration file, by default called
|
||||
<filename>repmgr.conf</filename> (although any name can be used if explicitly specified).
|
||||
<filename>repmgr.conf</filename> must contain a number of required parameters, including
|
||||
the database connection string for the local node and the location
|
||||
of its data directory; other values will be inferred from defaults if
|
||||
not explicitly supplied. See section <xref linkend="configuration-file-settings">
|
||||
for more details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The configuration file will be searched for in the following locations:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<para>a configuration file specified by the <literal>-f/--config-file</literal> command line option</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
a location specified by the package maintainer (if <application>repmgr</application>
|
||||
as installed from a package and the package maintainer has specified the configuration
|
||||
file location)
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para><filename>repmgr.conf</filename> in the local directory</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para><filename>/etc/repmgr.conf</filename></para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>the directory reported by <application>pg_config --sysconfdir</application></para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
||||
an error will be raised if it is not found or not readable, and no attempt will be made to
|
||||
check default locations; this is to prevent <application>repmgr</application> unexpectedly
|
||||
reading the wrong configuraton file.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If providing the configuration file location with <literal>-f/--config-file</literal>,
|
||||
avoid using a relative path, particularly when executing <xref linkend="repmgr-primary-register">
|
||||
and <xref linkend="repmgr-standby-register">, as &repmgr; stores the configuration file location
|
||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||
a relative path into an absolute one, but this may not be the same as the path you
|
||||
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
</note>
|
||||
</sect1>
|
||||
24
doc/configuration.sgml
Normal file
24
doc/configuration.sgml
Normal file
@@ -0,0 +1,24 @@
|
||||
<chapter id="configuration" xreflabel="Configuration">
|
||||
<title>repmgr configuration</title>
|
||||
|
||||
&configuration-file;
|
||||
&configuration-file-settings;
|
||||
|
||||
<sect1 id="configuration-permissions" xreflabel="User permissions">
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>user permissions</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgr user permissions</title>
|
||||
<para>
|
||||
&repmgr; will create an extension database containing objects
|
||||
for administering &repmgr; metadata. The user defined in the <varname>conninfo</varname>
|
||||
setting must be able to access all objects. Additionally, superuser permissions
|
||||
are required to install the &repmgr; extension. The easiest way to do this
|
||||
is create the &repmgr; user as a superuser, however if this is not
|
||||
desirable, the &repmgr; user can be created as a normal user and a
|
||||
superuser specified with <literal>--superuser</literal> when registering a &repmgr; node.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
86
doc/configuring-witness-server.sgml
Normal file
86
doc/configuring-witness-server.sgml
Normal file
@@ -0,0 +1,86 @@
|
||||
<chapter id="using-witness-server">
|
||||
<indexterm>
|
||||
<primary>witness server</primary>
|
||||
<seealso>Using a witness server with repmgrd</seealso>
|
||||
</indexterm>
|
||||
|
||||
|
||||
<title>Using a witness server</title>
|
||||
<para>
|
||||
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
||||
is not part of the streaming replication cluster; its purpose is, if a
|
||||
failover situation occurs, to provide proof that the primary server
|
||||
itself is unavailable.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A typical use case for a witness server is a two-node streaming replication
|
||||
setup, where the primary and standby are in different locations (data centres).
|
||||
By creating a witness server in the same location as the primary, if the primary
|
||||
becomes unavailable it's possible for the standby to decide whether it can
|
||||
promote itself without risking a "split brain" scenario: if it can't see either the
|
||||
witness or the primary server, it's likely there's a network-level interruption
|
||||
and it should not promote itself. If it can seen the witness but not the primary,
|
||||
this proves there is no network interruption and the primary itself is unavailable,
|
||||
and it can therefore promote itself (and ideally take action to fence the
|
||||
former primary).
|
||||
</para>
|
||||
<para>
|
||||
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
||||
be preferable to use location-based failover, which ensures that only nodes
|
||||
in the same location as the primary will ever be promotion candidates;
|
||||
see <xref linkend="repmgrd-network-split"> for more details.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
A witness server will only be useful if <application>repmgrd</application>
|
||||
is in use.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect1 id="creating-witness-server">
|
||||
<title>Creating a witness server</title>
|
||||
<para>
|
||||
To create a witness server, set up a normal PostgreSQL instance on a server
|
||||
in the same physical location as the cluster's primary server.
|
||||
</para>
|
||||
<para>
|
||||
This instance should *not* be on the same physical host as the primary server,
|
||||
as otherwise if the primary server fails due to hardware issues, the witness
|
||||
server will be lost too.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
||||
command, which would automatically create a PostgreSQL instance. However
|
||||
this often resulted in an unsatisfactory, hard-to-customise instance.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
The witness server should be configured in the same way as a normal
|
||||
&repmgr; node; see section <xref linkend="configuration">.
|
||||
</para>
|
||||
<para>
|
||||
Register the witness server with <xref linkend="repmgr-witness-register">.
|
||||
This will create the &repmgr; extension on the witness server, and make
|
||||
a copy of the &repmgr; metadata.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
As the witness server is not part of the replication cluster, further
|
||||
changes to the &repmgr; metadata will be synchronised by
|
||||
<application>repmgrd</application>.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
Once the witness server has been configured, <application>repmgrd</application>
|
||||
should be started; for more details see <xref linkend="repmgrd-witness-server">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
</chapter>
|
||||
198
doc/event-notifications.sgml
Normal file
198
doc/event-notifications.sgml
Normal file
@@ -0,0 +1,198 @@
|
||||
<chapter id="event-notifications" xreflabel="event notifications">
|
||||
|
||||
<indexterm>
|
||||
<primary>event notifications</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Event Notifications</title>
|
||||
<para>
|
||||
Each time &repmgr; or <application>repmgrd</application> perform a significant event, a record
|
||||
of that event is written into the <literal>repmgr.events</literal> table together with
|
||||
a timestamp, an indication of failure or success, and further details
|
||||
if appropriate. This is useful for gaining an overview of events
|
||||
affecting the replication cluster. However note that this table has
|
||||
advisory character and should be used in combination with the &repmgr;
|
||||
and PostgreSQL logs to obtain details of any events.
|
||||
</para>
|
||||
<para>
|
||||
Example output after a primary was registered and a standby cloned
|
||||
and registered:
|
||||
<programlisting>
|
||||
repmgr=# SELECT * from repmgr.events ;
|
||||
node_id | event | successful | event_timestamp | details
|
||||
---------+------------------+------------+-------------------------------+-------------------------------------------------------------------------------------
|
||||
1 | primary_register | t | 2016-01-08 15:04:39.781733+09 |
|
||||
2 | standby_clone | t | 2016-01-08 15:04:49.530001+09 | Cloned from host 'repmgr_node1', port 5432; backup method: pg_basebackup; --force: N
|
||||
2 | standby_register | t | 2016-01-08 15:04:50.621292+09 |
|
||||
(3 rows)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Alternatively, use <xref linkend="repmgr-cluster-event"> to output a
|
||||
formatted list of events.
|
||||
</para>
|
||||
<para>
|
||||
Additionally, event notifications can be passed to a user-defined program
|
||||
or script which can take further action, e.g. send email notifications.
|
||||
This is done by setting the <literal>event_notification_command</literal> parameter in
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
<para>
|
||||
This parameter accepts the following format placeholders:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>%n</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
node ID
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%e</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
event type
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%t</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
success (1 or 0)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>%t</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
timestamp
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%d</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
details
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
<para>
|
||||
The values provided for <literal>%t</literal> and <literal>%d</literal>
|
||||
will probably contain spaces, so should be quoted in the provided command
|
||||
configuration, e.g.:
|
||||
<programlisting>
|
||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Additionally the following format placeholders are available for the event
|
||||
type <varname>bdr_failover</varname> and optionally <varname>bdr_recovery</varname>:
|
||||
</para>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>%c</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
conninfo string of the next available node
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>%a</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
name of the next available node
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
<para>
|
||||
These should always be quoted.
|
||||
</para>
|
||||
<para>
|
||||
By default, all notification types will be passed to the designated script;
|
||||
the notification types can be filtered to explicitly named ones:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><literal>primary_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>primary_unregister</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_unregister</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_clone</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_follow</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_disconnect_manual</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>witness_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>witness_unregister</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>node_rejoin</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_start</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_shutdown</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_follow</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_failover</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_reconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_recovery</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_unregister</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
Note that under some circumstances (e.g. when no replication cluster primary
|
||||
could be located), it will not be possible to write an entry into the
|
||||
<literal>repmgr.events</literal>
|
||||
table, in which case executing a script via <varname>event_notification_command</varname>
|
||||
can serve as a fallback by generating some form of notification.
|
||||
</para>
|
||||
|
||||
|
||||
</chapter>
|
||||
85
doc/filelist.sgml
Normal file
85
doc/filelist.sgml
Normal file
@@ -0,0 +1,85 @@
|
||||
<!-- doc/filelist.sgml -->
|
||||
|
||||
<!ENTITY legal SYSTEM "legal.sgml">
|
||||
|
||||
<!ENTITY bookindex SYSTEM "bookindex.sgml">
|
||||
|
||||
<!--
|
||||
Some parts of the documentation are also source for some plain-text
|
||||
files used during installation. To selectively ignore or include
|
||||
some parts (e.g., external xref's) when generating these files we use
|
||||
these parameter entities. See also standalone-install.sgml.
|
||||
-->
|
||||
<!ENTITY % standalone-ignore "INCLUDE">
|
||||
<!ENTITY % standalone-include "IGNORE">
|
||||
|
||||
<!-- doc/filelist.sgml -->
|
||||
|
||||
<!--
|
||||
By default, no index is included. Use -i include-index on the command line
|
||||
to include it.
|
||||
-->
|
||||
<!ENTITY % include-index "IGNORE">
|
||||
|
||||
<!--
|
||||
Create empty index element for processing by XSLT stylesheet.
|
||||
-->
|
||||
<!ENTITY % include-xslt-index "IGNORE">
|
||||
|
||||
<!--
|
||||
Include external documentation sections
|
||||
-->
|
||||
|
||||
<!ENTITY overview SYSTEM "overview.sgml">
|
||||
<!ENTITY install SYSTEM "install.sgml">
|
||||
<!ENTITY install-requirements SYSTEM "install-requirements.sgml">
|
||||
<!ENTITY install-packages SYSTEM "install-packages.sgml">
|
||||
<!ENTITY install-source SYSTEM "install-source.sgml">
|
||||
<!ENTITY quickstart SYSTEM "quickstart.sgml">
|
||||
<!ENTITY configuration SYSTEM "configuration.sgml">
|
||||
<!ENTITY configuration-file SYSTEM "configuration-file.sgml">
|
||||
<!ENTITY configuration-file-settings SYSTEM "configuration-file-settings.sgml">
|
||||
<!ENTITY cloning-standbys SYSTEM "cloning-standbys.sgml">
|
||||
<!ENTITY promoting-standby SYSTEM "promoting-standby.sgml">
|
||||
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.sgml">
|
||||
<!ENTITY switchover SYSTEM "switchover.sgml">
|
||||
<!ENTITY configuring-witness-server SYSTEM "configuring-witness-server.sgml">
|
||||
|
||||
<!ENTITY event-notifications SYSTEM "event-notifications.sgml">
|
||||
<!ENTITY upgrading-repmgr SYSTEM "upgrading-repmgr.sgml">
|
||||
|
||||
<!ENTITY repmgrd-automatic-failover SYSTEM "repmgrd-automatic-failover.sgml">
|
||||
<!ENTITY repmgrd-configuration SYSTEM "repmgrd-configuration.sgml">
|
||||
<!ENTITY repmgrd-demonstration SYSTEM "repmgrd-demonstration.sgml">
|
||||
<!ENTITY repmgrd-monitoring SYSTEM "repmgrd-monitoring.sgml">
|
||||
<!ENTITY repmgrd-degraded-monitoring SYSTEM "repmgrd-degraded-monitoring.sgml">
|
||||
<!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
|
||||
<!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
|
||||
<!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
|
||||
<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">
|
||||
|
||||
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
|
||||
<!ENTITY repmgr-primary-unregister SYSTEM "repmgr-primary-unregister.sgml">
|
||||
<!ENTITY repmgr-standby-clone SYSTEM "repmgr-standby-clone.sgml">
|
||||
<!ENTITY repmgr-standby-register SYSTEM "repmgr-standby-register.sgml">
|
||||
<!ENTITY repmgr-standby-unregister SYSTEM "repmgr-standby-unregister.sgml">
|
||||
<!ENTITY repmgr-standby-promote SYSTEM "repmgr-standby-promote.sgml">
|
||||
<!ENTITY repmgr-standby-follow SYSTEM "repmgr-standby-follow.sgml">
|
||||
<!ENTITY repmgr-standby-switchover SYSTEM "repmgr-standby-switchover.sgml">
|
||||
<!ENTITY repmgr-witness-register SYSTEM "repmgr-witness-register.sgml">
|
||||
<!ENTITY repmgr-witness-unregister SYSTEM "repmgr-witness-unregister.sgml">
|
||||
<!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
|
||||
<!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
|
||||
<!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
|
||||
<!ENTITY repmgr-cluster-show SYSTEM "repmgr-cluster-show.sgml">
|
||||
<!ENTITY repmgr-cluster-matrix SYSTEM "repmgr-cluster-matrix.sgml">
|
||||
<!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
|
||||
<!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
|
||||
<!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
|
||||
|
||||
<!ENTITY appendix-release-notes SYSTEM "appendix-release-notes.sgml">
|
||||
<!ENTITY appendix-faq SYSTEM "appendix-faq.sgml">
|
||||
<!ENTITY appendix-signatures SYSTEM "appendix-signatures.sgml">
|
||||
|
||||
<!ENTITY bookindex SYSTEM "bookindex.sgml">
|
||||
|
||||
48
doc/follow-new-primary.sgml
Normal file
48
doc/follow-new-primary.sgml
Normal file
@@ -0,0 +1,48 @@
|
||||
<chapter id="follow-new-primary">
|
||||
<indexterm>
|
||||
<primary>Following a new primary</primary>
|
||||
<seealso>repmgr standby follow</seealso>
|
||||
</indexterm>
|
||||
|
||||
<title>Following a new primary</title>
|
||||
<para>
|
||||
Following the failure or removal of the replication cluster's existing primary
|
||||
server, <xref linkend="repmgr-standby-follow"> can be used to make 'orphaned' standbys
|
||||
follow the new primary and catch up to its current state.
|
||||
</para>
|
||||
<para>
|
||||
To demonstrate this, assuming a replication cluster in the same state as the
|
||||
end of the preceding section (<xref linkend="promoting-standby">),
|
||||
execute this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf repmgr standby follow
|
||||
INFO: changing node 3's primary to node 2
|
||||
NOTICE: restarting server using "pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/postgresql/data' restart"
|
||||
waiting for server to shut down......... done
|
||||
server stopped
|
||||
waiting for server to start.... done
|
||||
server started
|
||||
NOTICE: STANDBY FOLLOW successful
|
||||
DETAIL: node 3 is now attached to node 2
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The standby is now replicating from the new primary and
|
||||
<command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>
|
||||
output reflects this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | - failed | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node2 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Note that with cascading replication, <command>repmgr standby follow</command> can also be
|
||||
used to detach a standby from its current upstream server and follow the
|
||||
primary. However it's currently not possible to have it follow another standby;
|
||||
we hope to improve this in a future release.
|
||||
</para>
|
||||
|
||||
</chapter>
|
||||
153
doc/install-packages.sgml
Normal file
153
doc/install-packages.sgml
Normal file
@@ -0,0 +1,153 @@
|
||||
<sect1 id="installation-packages" xreflabel="Installing from packages">
|
||||
<title>Installing &repmgr; from packages</title>
|
||||
<para>
|
||||
We recommend installing &repmgr; using the available packages for your
|
||||
system.
|
||||
</para>
|
||||
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, Fedora and CentOS">
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>on Redhat/CentOS/Fedora etc.</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>RedHat/Fedora/CentOS</title>
|
||||
<para>
|
||||
RPM packages for &repmgr; are available via Yum through
|
||||
the PostgreSQL Global Development Group RPM repository
|
||||
(<ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</ulink>).
|
||||
Follow the instructions for your distribution (RedHat, CentOS,
|
||||
Fedora, etc.) and architecture as detailed there.
|
||||
</para>
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> also provides its
|
||||
own RPM packages which are made available
|
||||
at the same time as each &repmgr; release, as it can take some days for
|
||||
them to become available via the main PGDG repository. See following section for details:
|
||||
</para>
|
||||
|
||||
|
||||
<sect3 id="installation-packages-redhat-2ndq">
|
||||
<title>2ndQuadrant repmgr yum repository</title>
|
||||
<para>
|
||||
Beginning with <ulink url="http://repmgr.org/release-notes-3.1.3.html">repmgr 3.1.3</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||
repository for &repmgr; releases. This repository complements the main
|
||||
<ulink url="https://yum.postgresql.org/repopackages.php">PGDG community repository</ulink>,
|
||||
but enables repmgr users to access the latest &repmgr; packages before they are
|
||||
available via the PGDG repository, which can take several days to be updated following
|
||||
a fresh &repmgr; release.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Import the repository public key (optional but recommended):
|
||||
<programlisting>
|
||||
rpm --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository RPM for your distribution (this enables the 2ndQuadrant
|
||||
repository as a source of repmgr packages):
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>Fedora:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>RHEL, CentOS etc:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
e.g.:
|
||||
<programlisting>
|
||||
$ yum install http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr96</literal>), e.g.:
|
||||
<programlisting>
|
||||
$ yum install repmg96</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>Compatibility with PGDG Repositories</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
The 2ndQuadrant &repmgr; yum repository uses exactly the same package definitions as the
|
||||
main PGDG repository and is effectively a selective mirror for &repmgr; packages only.
|
||||
</para>
|
||||
<para>
|
||||
Normally yum should prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
</para>
|
||||
<para>
|
||||
To ensure the 2ndQuadrant repository is always prioritised, install <literal>yum-plugin-priorities</literal>
|
||||
and set the repository priorities accordingly.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>Installing a specific package version</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
To install a specific package version, execute <command>yum --showduplicates list</command>
|
||||
for the package in question:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum --showduplicates list repmgr96
|
||||
Loaded plugins: fastestmirror
|
||||
Loading mirror speeds from cached hostfile
|
||||
* base: ftp.iij.ad.jp
|
||||
* extras: ftp.iij.ad.jp
|
||||
* updates: ftp.iij.ad.jp
|
||||
Available Packages
|
||||
repmgr96.x86_64 3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.2.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.rhel6 pgdg96
|
||||
repmgr96.x86_64 4.0.0-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 4.0.0-1.rhel6 pgdg96</programlisting>
|
||||
then append the appropriate version number to the package name with a hyphen, e.g.:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum install repmgr96-3.3.2-1.el6</programlisting>
|
||||
</para>
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>on Debian/Ubuntu etc.</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Debian/Ubuntu</title>
|
||||
<para>.deb packages for &repmgr; are available from the
|
||||
PostgreSQL Community APT repository (<ulink url="http://apt.postgresql.org/">http://apt.postgresql.org/</ulink>).
|
||||
Instructions can be found in the APT section of the PostgreSQL Wiki
|
||||
(<ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink>).
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
72
doc/install-requirements.sgml
Normal file
72
doc/install-requirements.sgml
Normal file
@@ -0,0 +1,72 @@
|
||||
<sect1 id="install-requirements" xreflabel="installation requirements">
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>requirements</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Requirements for installing repmgr</title>
|
||||
<para>
|
||||
repmgr is developed and tested on Linux and OS X, but should work on any
|
||||
UNIX-like system supported by PostgreSQL itself. There is no support for
|
||||
Microsoft Windows.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.3, including PostgreSQL 10.
|
||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
If upgrading from &repmgr; 3.x, please see the section <xref linkend="upgrading-from-repmgr-3">.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
All servers in the replication cluster must be running the same major version of
|
||||
PostgreSQL, and we recommend that they also run the same minor version.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
&repmgr; must be installed on each server in the replication cluster.
|
||||
If installing repmgr from packages, the package version must match the PostgreSQL
|
||||
version. If installing from source, repmgr must be compiled against the same
|
||||
major version.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A dedicated system user for &repmgr; is *not* required; as many &repmgr; and
|
||||
<application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
|
||||
these commands should be executed by the <literal>postgres</literal> user.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Passwordless <command>ssh</command> connectivity between all servers in the replication cluster
|
||||
is not required, but is necessary in the following cases:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
||||
data directory (in which case <command>rsync</command> is also required)</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
||||
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
We recommend using a session multiplexer utility such as <command>screen</command> or
|
||||
<command>tmux</command> when performing long-running actions (such as cloning a database)
|
||||
on a remote server - this will ensure the &repmgr; action won't be prematurely
|
||||
terminated if your <command>ssh</command> session to the server is interrupted or closed.
|
||||
</simpara>
|
||||
</tip>
|
||||
</sect1>
|
||||
161
doc/install-source.sgml
Normal file
161
doc/install-source.sgml
Normal file
@@ -0,0 +1,161 @@
|
||||
<sect1 id="installation-source" xreflabel="Installing from source code">
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>from source</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Installing &repmgr; from source</title>
|
||||
|
||||
<sect2 id="installation-source-prereqs">
|
||||
<title>Prerequisites for installing from source</title>
|
||||
<para>
|
||||
To install &repmgr; the prerequisites for compiling
|
||||
&postgres; must be installed. These are described in &postgres;'s
|
||||
documentation
|
||||
on <ulink url="https://www.postgresql.org/docs/current/install-requirements.html">build requirements</ulink>
|
||||
and <ulink url="https://www.postgresql.org/docs/current/docguide-toolsets.html">build requirements for documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Most mainstream Linux distributions and other UNIX variants provide simple
|
||||
ways to install the prerequisites from packages.
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>Debian</literal> and <literal>Ubuntu</literal>: First
|
||||
add the <ulink
|
||||
url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
|
||||
repository to your <filename>sources.list</filename> if you
|
||||
have not already done so. Then install the pre-requisites for
|
||||
building PostgreSQL with:
|
||||
<programlisting>
|
||||
sudo apt-get update
|
||||
sudo apt-get build-dep postgresql-9.6</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>RHEL or CentOS 6.x or 7.x</literal>: install the appropriate repository RPM
|
||||
for your system from <ulink url="https://yum.postgresql.org/repopackages.php">
|
||||
yum.postgresql.org</ulink>. Then install the prerequisites for building
|
||||
PostgreSQL with:
|
||||
<programlisting>
|
||||
sudo yum check-update
|
||||
sudo yum groupinstall "Development Tools"
|
||||
sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
|
||||
sudo yum-builddep postgresql96</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL versions for your target repmgr version.
|
||||
</simpara>
|
||||
</note>
|
||||
</sect2>
|
||||
|
||||
|
||||
<sect2 id="installation-get-source">
|
||||
<title>Getting &repmgr; source code</title>
|
||||
|
||||
<para>
|
||||
There are two ways to get the &repmgr; source code: with git, or by downloading tarballs of released versions.
|
||||
</para>
|
||||
|
||||
<sect3>
|
||||
<title>Using <application>git</application> to get the &repmgr; sources</title>
|
||||
|
||||
<para>
|
||||
Use <application><ulink url="https://git-scm.com">git</ulink></application> if you expect
|
||||
to update often, you want to keep track of development or if you want to contribute
|
||||
changes to &repmgr;. There is no reason <emphasis>not</emphasis> to use <application>git</application>
|
||||
if you're familiar with it.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The source for &repmgr; is maintained at
|
||||
<ulink url="https://github.com/2ndQuadrant/repmgr">https://github.com/2ndQuadrant/repmgr</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There are also tags for each &repmgr; release, e.g. <filename>REL4_0_STABLE</filename>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Clone the source code using <application>git</application>:
|
||||
<programlisting>
|
||||
git clone https://github.com/2ndQuadrant/repmgr</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For more information on using <application>git</application> see
|
||||
<ulink url="https://git-scm.com/">git-scm.com</ulink>.
|
||||
</para>
|
||||
|
||||
</sect3>
|
||||
|
||||
<sect3>
|
||||
<title>Downloading release source tarballs</title>
|
||||
|
||||
<para>
|
||||
Official release source code is uploaded as tarballs to the
|
||||
&repmgr; website along with a tarball checksum and a matching GnuPG
|
||||
signature. See
|
||||
<ulink url="http://repmgr.org/">http://repmgr.org/</ulink>
|
||||
for the download information. See <xref linkend="appendix-signatures">
|
||||
for information on verifying digital signatures.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
You will need to download the repmgr source, e.g. <filename>repmgr-4.0.tar.gz</filename>.
|
||||
You may optionally verify the package checksums from the
|
||||
<literal>.md5</literal> files and/or verify the GnuPG signatures
|
||||
per <xref linkend="appendix-signatures">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
After you unpack the source code archives using <literal>tar xf</literal>
|
||||
the installation process is the same as if you were installing from a git
|
||||
clone.
|
||||
</para>
|
||||
|
||||
</sect3>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="installation-repmgr-source">
|
||||
<title>Installation of &repmgr; from source</title>
|
||||
<para>
|
||||
To installing &repmgr; from source, simply execute:
|
||||
|
||||
<programlisting>
|
||||
./configure && make install</programlisting>
|
||||
|
||||
Ensure <command>pg_config</command> for the target PostgreSQL version is in
|
||||
<varname>$PATH</varname>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="installation-build-repmgr-docs">
|
||||
<title>Building &repmgr; documentation</title>
|
||||
<para>
|
||||
The &repmgr; documentation is (like the main PostgreSQL project)
|
||||
written in DocBook format. To build it locally as HTML, you'll need to
|
||||
install the required packages as described in the
|
||||
<ulink url="https://www.postgresql.org/docs/9.6/static/docguide-toolsets.html">
|
||||
PostgreSQL documentation</ulink> then execute:
|
||||
<programlisting>
|
||||
./configure && make install-doc</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The generated HTML files will be placed in the <filename>doc/html</filename>
|
||||
subdirectory of your source tree.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
</sect1>
|
||||
28
doc/install.sgml
Normal file
28
doc/install.sgml
Normal file
@@ -0,0 +1,28 @@
|
||||
<chapter id="installation" xreflabel="Installation">
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Installation</title>
|
||||
|
||||
<para>
|
||||
&repmgr; can be installed from binary packages provided by your operating
|
||||
system's packaging system, or from source.
|
||||
</para>
|
||||
<para>
|
||||
In general we recommend using binary packages, unless unavailable for your operating system.
|
||||
</para>
|
||||
<para>
|
||||
Source installs are mainly useful if you want to keep track of the very
|
||||
latest repmgr development and contribute to development. They're also the
|
||||
only option if there are no packages for your operating system yet.
|
||||
</para>
|
||||
<para>
|
||||
Before installing &repmgr; make sure you satisfy the <xref linkend="install-requirements">.
|
||||
</para>
|
||||
|
||||
&install-requirements;
|
||||
&install-packages;
|
||||
&install-source;
|
||||
|
||||
</chapter>
|
||||
37
doc/legal.sgml
Normal file
37
doc/legal.sgml
Normal file
@@ -0,0 +1,37 @@
|
||||
<!-- doc/legal.sgml -->
|
||||
|
||||
<date>2017</date>
|
||||
|
||||
<copyright>
|
||||
<year>2010-2017</year>
|
||||
<holder>2ndQuadrant, Ltd.</holder>
|
||||
</copyright>
|
||||
|
||||
<legalnotice id="legalnotice">
|
||||
<title>Legal Notice</title>
|
||||
|
||||
<para>
|
||||
<productname>repmgr</productname> is Copyright © 2010-2017
|
||||
by 2ndQuadrant, Ltd. All rights reserved.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
</para>
|
||||
<para>
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
</para>
|
||||
<para>
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see
|
||||
<ulink url="https://www.gnu.org/licenses/">https://www.gnu.org/licenses/</ulink>
|
||||
to obtain one.
|
||||
</para>
|
||||
|
||||
</legalnotice>
|
||||
240
doc/overview.sgml
Normal file
240
doc/overview.sgml
Normal file
@@ -0,0 +1,240 @@
|
||||
<chapter id="overview" xreflabel="Overview">
|
||||
<title>repmgr overview</title>
|
||||
|
||||
<para>
|
||||
This chapter provides a high-level overview of repmgr's components and functionality.
|
||||
</para>
|
||||
<sect1 id="repmgr-concepts" xreflabel="Concepts">
|
||||
|
||||
<indexterm>
|
||||
<primary>concepts</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Concepts</title>
|
||||
|
||||
<para>
|
||||
This guide assumes that you are familiar with PostgreSQL administration and
|
||||
streaming replication concepts. For further details on streaming
|
||||
replication, see the PostgreSQL documentation section on <ulink
|
||||
url="https://www.postgresql.org/docs/current/interactive/warm-standby.html#STREAMING-REPLICATION">
|
||||
streaming replication</>.
|
||||
</para>
|
||||
<para>
|
||||
The following terms are used throughout the &repmgr; documentation.
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>replication cluster</term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
In the &repmgr; documentation, "replication cluster" refers to the network
|
||||
of PostgreSQL servers connected by streaming replication.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>node</term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
A node is a single PostgreSQL server within a replication cluster.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>upstream node</term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
The node a standby server connects to, in order to receive streaming replication.
|
||||
This is either the primary server, or in the case of cascading replication, another
|
||||
standby.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>failover</term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
This is the action which occurs if a primary server fails and a suitable standby
|
||||
is promoted as the new primary. The <application>repmgrd</application> daemon supports automatic failover
|
||||
to minimise downtime.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>switchover</term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
In certain circumstances, such as hardware or operating system maintenance,
|
||||
it's necessary to take a primary server offline; in this case a controlled
|
||||
switchover is necessary, whereby a suitable standby is promoted and the
|
||||
existing primary removed from the replication cluster in a controlled manner.
|
||||
The &repmgr; command line client provides this functionality.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>fencing</term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
In a failover situation, following the promotion of a new standby, it's
|
||||
essential that the previous primary does not unexpectedly come back on
|
||||
line, which would result in a split-brain situation. To prevent this,
|
||||
the failed primary should be isolated from applications, i.e. "fenced off".
|
||||
</simpara>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry id="witness-server">
|
||||
<term>witness server</term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; provides functionality to set up a so-called "witness server" to
|
||||
assist in determining a new primary server in a failover situation with more
|
||||
than one standby. The witness server itself is not part of the replication
|
||||
cluster, although it does contain a copy of the repmgr metadata schema.
|
||||
</para>
|
||||
<para>
|
||||
The purpose of a witness server is to provide a "casting vote" where servers
|
||||
in the replication cluster are split over more than one location. In the event
|
||||
of a loss of connectivity between locations, the presence or absence of
|
||||
the witness server will decide whether a server at that location is promoted
|
||||
to primary; this is to prevent a "split-brain" situation where an isolated
|
||||
location interprets a network outage as a failure of the (remote) primary and
|
||||
promotes a (local) standby.
|
||||
</para>
|
||||
<para>
|
||||
A witness server only needs to be created if <application>repmgrd</application>
|
||||
is in use.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="repmgr-components" xreflabel="Components">
|
||||
<title>Components</title>
|
||||
<para>
|
||||
&repmgr; is a suite of open-source tools to manage replication and failover
|
||||
within a cluster of PostgreSQL servers. It supports and enhances PostgreSQL's
|
||||
built-in streaming replication, which provides a single read/write primary server
|
||||
and one or more read-only standbys containing near-real time copies of the primary
|
||||
server's database. It provides two main tools:
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>repmgr</term>
|
||||
<listitem>
|
||||
<para>
|
||||
A command-line tool used to perform administrative tasks such as:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>setting up standby servers</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>promoting a standby server to primary</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>switching over primary and standby servers</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>displaying the status of servers in the replication cluster</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>repmgrd</term>
|
||||
<listitem>
|
||||
<para>
|
||||
A daemon which actively monitors servers in a replication cluster
|
||||
and performs the following tasks:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>monitoring and recording replication performance</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>performing failover by detecting failure of the primary and
|
||||
promoting the most suitable standby server
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>provide notifications about events in the cluster to a user-defined
|
||||
script which can perform tasks such as sending alerts by email</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgr-user-metadata" xreflabel="Repmgr user and metadata">
|
||||
<title>Repmgr user and metadata</title>
|
||||
<para>
|
||||
In order to effectively manage a replication cluster, &repmgr; needs to store
|
||||
information about the servers in the cluster in a dedicated database schema.
|
||||
This schema is automatically by the &repmgr; extension, which is installed
|
||||
during the first step in initialising a &repmgr;-administered cluster
|
||||
(<command><link linkend="repmgr-primary-register">repmgr primary register</link></command>)
|
||||
and contains the following objects:
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>Tables</term>
|
||||
<listitem>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara><literal>repmgr.events</literal>: records events of interest</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgr.nodes</literal>: connection and status information for each server in the
|
||||
replication cluster</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgr.monitoring_history</literal>: historical standby monitoring information
|
||||
written by <application>repmgrd</application></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term>Views</term>
|
||||
<listitem>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>repmgr.show_nodes: based on the table <literal>repmgr.nodes</literal>, additionally showing the
|
||||
name of the server's upstream node</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>repmgr.replication_status: when <application>repmgrd</application>'s monitoring is enabled, shows
|
||||
current monitoring status for each standby.</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The &repmgr; metadata schema can be stored in an existing database or in its own
|
||||
dedicated database. Note that the &repmgr; metadata schema cannot reside on a database
|
||||
server which is not part of the replication cluster managed by &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
A database user must be available for &repmgr; to access this database and perform
|
||||
necessary changes. This user does not need to be a superuser, however some operations
|
||||
such as initial installation of the &repmgr; extension will require a superuser
|
||||
connection (this can be specified where required with the command line option
|
||||
<literal>--superuser</literal>).
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
79
doc/promoting-standby.sgml
Normal file
79
doc/promoting-standby.sgml
Normal file
@@ -0,0 +1,79 @@
|
||||
<chapter id="promoting-standby" xreflabel="Promoting a standby">
|
||||
<indexterm>
|
||||
<primary>promoting a standby</primary>
|
||||
<seealso>repmgr standby promote</seealso>
|
||||
</indexterm>
|
||||
<title>Promoting a standby server with repmgr</title>
|
||||
<para>
|
||||
If a primary server fails or needs to be removed from the replication cluster,
|
||||
a new primary server must be designated, to ensure the cluster continues
|
||||
to function correctly. This can be done with <xref linkend="repmgr-standby-promote">,
|
||||
which promotes the standby on the current server to primary.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To demonstrate this, set up a replication cluster with a primary and two attached
|
||||
standby servers so that the cluster looks like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Stop the current primary with e.g.:
|
||||
<programlisting>
|
||||
$ pg_ctl -D /var/lib/postgresql/data -m fast stop</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
At this point the replication cluster will be in a partially disabled state, with
|
||||
both standbys accepting read-only connections while attempting to connect to the
|
||||
stopped primary. Note that the &repmgr; metadata table will not yet have been updated;
|
||||
executing <xref linkend="repmgr-cluster-show"> will note the discrepancy:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+---------------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | ? unreachable | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr
|
||||
|
||||
WARNING: following issues were detected
|
||||
node "node1" (ID: 1) is registered as an active primary but is unreachable</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Now promote the first standby with:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby promote</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will produce output similar to the following:
|
||||
<programlisting>
|
||||
INFO: connecting to standby database
|
||||
NOTICE: promoting standby
|
||||
DETAIL: promoting server using "pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/postgresql/data' promote"
|
||||
server promoting
|
||||
INFO: reconnecting to promoted server
|
||||
NOTICE: STANDBY PROMOTE successful
|
||||
DETAIL: node 2 was successfully promoted to primary</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Executing <xref linkend="repmgr-cluster-show"> will show the current state; as there is now an
|
||||
active primary, the previous warning will not be displayed:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | - failed | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
However the sole remaining standby (<literal>node3</literal>) is still trying to replicate from the failed
|
||||
primary; <xref linkend="repmgr-standby-follow"> must now be executed to rectify this situation
|
||||
(see <xref linkend="follow-new-primary"> for example).
|
||||
</para>
|
||||
</chapter>
|
||||
|
||||
455
doc/quickstart.sgml
Normal file
455
doc/quickstart.sgml
Normal file
@@ -0,0 +1,455 @@
|
||||
<chapter id="quickstart" xreflabel="Quick-start guide">
|
||||
<title>Quick-start guide</title>
|
||||
|
||||
<para>
|
||||
This section gives a quick introduction to &repmgr;, including setting up a
|
||||
sample &repmgr; installation and a basic replication cluster.
|
||||
</para>
|
||||
<para>
|
||||
These instructions for demonstration purposes and are not suitable for a production
|
||||
install, as issues such as account security considerations, and system administration
|
||||
best practices are omitted.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
To upgrade an existing &repmgr; 3.x installation, see section
|
||||
<xref linkend="upgrading-from-repmgr-3">.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect1 id="quickstart-prerequisites">
|
||||
<title>Prerequisites for setting up a basic replication cluster with &repmgr;</title>
|
||||
<para>
|
||||
The following section will describe how to set up a basic replication cluster
|
||||
with a primary and a standby server using the <application>repmgr</application>
|
||||
command line tool.
|
||||
</para>
|
||||
<para>
|
||||
We'll assume the primary is called <literal>node1</literal> with IP address
|
||||
<literal>192.168.1.11</literal>, and the standby is called <literal>node2</literal>
|
||||
with IP address <literal>192.168.1.12</literal>
|
||||
</para>
|
||||
<para>
|
||||
Following software must be installed on both servers:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><application>PostgreSQL</application></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgr</application> (matching the installed
|
||||
<application>PostgreSQL</application> major version)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
At network level, connections between the PostgreSQL port (default: <literal>5432</literal>)
|
||||
must be possible in both directions.
|
||||
</para>
|
||||
<para>
|
||||
If you want <application>repmgr</application> to copy configuration files which are
|
||||
located outside the PostgreSQL data directory, and/or to test <command>switchover</command>
|
||||
functionality, you will also need passwordless SSH connections between both servers, and
|
||||
<application>rsync</application> should be installed.
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
For testing <application>repmgr</application>, it's possible to use multiple PostgreSQL
|
||||
instances running on different ports on the same computer, with
|
||||
passwordless SSH access to <filename>localhost</filename> enabled.
|
||||
</simpara>
|
||||
</tip>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-postgresql-configuration">
|
||||
<title>PostgreSQL configuration</title>
|
||||
<para>
|
||||
On the primary server, a PostgreSQL instance must be initialised and running.
|
||||
The following replication settings may need to be adjusted:
|
||||
</para>
|
||||
<programlisting>
|
||||
|
||||
# Enable replication connections; set this figure to at least one more
|
||||
# than the number of standbys which will connect to this server
|
||||
# (note that repmgr will execute `pg_basebackup` in WAL streaming mode,
|
||||
# which requires two free WAL senders)
|
||||
|
||||
max_wal_senders = 10
|
||||
|
||||
# Ensure WAL files contain enough information to enable read-only queries
|
||||
# on the standby.
|
||||
#
|
||||
# PostgreSQL 9.5 and earlier: one of 'hot_standby' or 'logical'
|
||||
# PostgreSQL 9.6 and later: one of 'replica' or 'logical'
|
||||
# ('hot_standby' will still be accepted as an alias for 'replica')
|
||||
#
|
||||
# See: https://www.postgresql.org/docs/current/static/runtime-config-wal.html#GUC-WAL-LEVEL
|
||||
|
||||
wal_level = 'hot_standby'
|
||||
|
||||
# Enable read-only queries on a standby
|
||||
# (Note: this will be ignored on a primary but we recommend including
|
||||
# it anyway)
|
||||
|
||||
hot_standby = on
|
||||
|
||||
# Enable WAL file archiving
|
||||
archive_mode = on
|
||||
|
||||
# Set archive command to a script or application that will safely store
|
||||
# you WALs in a secure place. /bin/true is an example of a command that
|
||||
# ignores archiving. Use something more sensible.
|
||||
archive_command = '/bin/true'
|
||||
|
||||
# If you have configured "pg_basebackup_options"
|
||||
# in "repmgr.conf" to include the setting "--xlog-method=fetch" (from
|
||||
# PostgreSQL 10 "--wal-method=fetch"), *and* you have not set
|
||||
# "restore_command" in "repmgr.conf"to fetch WAL files from another
|
||||
# source such as Barman, you'll need to set "wal_keep_segments" to a
|
||||
# high enough value to ensure that all WAL files generated while
|
||||
# the standby is being cloned are retained until the standby starts up.
|
||||
#
|
||||
# wal_keep_segments = 5000
|
||||
</programlisting>
|
||||
<tip>
|
||||
<simpara>
|
||||
Rather than editing these settings in the default <filename>postgresql.conf</filename>
|
||||
file, create a separate file such as <filename>postgresql.replication.conf</filename> and
|
||||
include it from the end of the main configuration file with:
|
||||
<command>include 'postgresql.replication.conf</command>.
|
||||
</simpara>
|
||||
</tip>
|
||||
<para>
|
||||
Additionally, if you are intending to use <application>pg_rewind</application>,
|
||||
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-repmgr-user-database">
|
||||
<title>Create the repmgr user and database</title>
|
||||
<para>
|
||||
Create a dedicated PostgreSQL superuser account and a database for
|
||||
the &repmgr; metadata, e.g.
|
||||
</para>
|
||||
<programlisting>
|
||||
createuser -s repmgr
|
||||
createdb repmgr -O repmgr
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
For the examples in this document, the name <literal>repmgr</literal> will be
|
||||
used for both user and database, but any names can be used.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For the sake of simplicity, the <literal>repmgr</literal> user is created
|
||||
as a superuser. If desired, it's possible to create the <literal>repmgr</literal>
|
||||
user as a normal user. However for certain operations superuser permissions
|
||||
are requiredl; in this case the command line option <command>--superuser</command>
|
||||
can be provided to specify a superuser.
|
||||
</para>
|
||||
<para>
|
||||
It's also assumed that the <literal>repmgr</literal> user will be used to make the
|
||||
replication connection from the standby to the primary; again this can be
|
||||
overridden by specifying a separate replication user when registering each node.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
&repmgr; will install the <literal>repmgr</literal> extension, which creates a
|
||||
<literal>repmgr</literal> schema containing the &repmgr;'s metadata tables as
|
||||
well as other functions and views. We also recommend that you set the
|
||||
<literal>repmgr</literal> user's search path to include this schema name, e.g.
|
||||
<programlisting>
|
||||
ALTER USER repmgr SET search_path TO repmgr, "$user", public;</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-authentication">
|
||||
<title>Configuring authentication in pg_hba.conf</title>
|
||||
<para>
|
||||
Ensure the <literal>repmgr</literal> user has appropriate permissions in <filename>pg_hba.conf</filename> and
|
||||
can connect in replication mode; <filename>pg_hba.conf</filename> should contain entries
|
||||
similar to the following:
|
||||
</para>
|
||||
<programlisting>
|
||||
local replication repmgr trust
|
||||
host replication repmgr 127.0.0.1/32 trust
|
||||
host replication repmgr 192.168.1.0/24 trust
|
||||
|
||||
local repmgr repmgr trust
|
||||
host repmgr repmgr 127.0.0.1/32 trust
|
||||
host repmgr repmgr 192.168.1.0/24 trust
|
||||
</programlisting>
|
||||
<para>
|
||||
Note that these are simple settings for testing purposes.
|
||||
Adjust according to your network environment and authentication requirements.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-standby-preparation">
|
||||
<title>Preparing the standby</title>
|
||||
<para>
|
||||
On the standby, do not create a PostgreSQL instance, but do ensure the destination
|
||||
data directory (and any other directories which you want PostgreSQL to use)
|
||||
exist and are owned by the <literal>postgres</literal> system user. Permissions
|
||||
must be set to <literal>0700</literal> (<literal>drwx------</literal>).
|
||||
</para>
|
||||
<para>
|
||||
Check the primary database is reachable from the standby using <application>psql</application>:
|
||||
</para>
|
||||
<programlisting>
|
||||
psql 'host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlisting>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; stores connection information as <ulink
|
||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING">libpq
|
||||
connection strings</ulink> throughout. This documentation refers to them as <literal>conninfo</literal>
|
||||
strings; an alternative name is <literal>DSN</literal> (<literal>data source name</literal>).
|
||||
We'll use these in place of the <command>-h hostname -d databasename -U username</command> syntax.
|
||||
</para>
|
||||
</note>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-repmgr-conf">
|
||||
<title>repmgr configuration file</title>
|
||||
<para>
|
||||
Create a <filename>repmgr.conf</filename> file on the primary server. The file must
|
||||
contain at least the following parameters:
|
||||
</para>
|
||||
<programlisting>
|
||||
node_id=1
|
||||
node_name=node1
|
||||
conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'
|
||||
data_directory='/var/lib/postgresql/data'
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
<filename>repmgr.conf</filename> should not be stored inside the PostgreSQL data directory,
|
||||
as it could be overwritten when setting up or reinitialising the PostgreSQL
|
||||
server. See sections on <xref linkend="configuration-file"> and <xref linkend="configuration-file-settings">
|
||||
for further details about <filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
For Debian-based distributions we recommend explictly setting
|
||||
<literal>pg_bindir</literal> to the directory where <command>pg_ctl</command> and other binaries
|
||||
not in the standard path are located. For PostgreSQL 9.6 this would be <filename>/usr/lib/postgresql/9.6/bin/</filename>.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
See the file
|
||||
<ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>
|
||||
for details of all available configuration parameters.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="quickstart-primary-register">
|
||||
<title>Register the primary server</title>
|
||||
<para>
|
||||
To enable &repmgr; to support a replication cluster, the primary node must
|
||||
be registered with &repmgr;. This installs the <literal>repmgr</literal>
|
||||
extension and metadata objects, and adds a metadata record for the primary server:
|
||||
</para>
|
||||
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf primary register
|
||||
INFO: connecting to primary database...
|
||||
NOTICE: attempting to install extension "repmgr"
|
||||
NOTICE: "repmgr" extension successfully installed
|
||||
NOTICE: primary node record (id: 1) registered</programlisting>
|
||||
|
||||
<para>
|
||||
Verify status of the cluster like this:
|
||||
</para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Connection string
|
||||
----+-------+---------+-----------+----------+--------------------------------------------------------
|
||||
1 | node1 | primary | * running | | host=node1 dbname=repmgr user=repmgr connect_timeout=2
|
||||
</programlisting>
|
||||
<para>
|
||||
The record in the <literal>repmgr</literal> metadata table will look like this:
|
||||
</para>
|
||||
<programlisting>
|
||||
repmgr=# SELECT * FROM repmgr.nodes;
|
||||
-[ RECORD 1 ]----+-------------------------------------------------------
|
||||
node_id | 1
|
||||
upstream_node_id |
|
||||
active | t
|
||||
node_name | node1
|
||||
type | primary
|
||||
location | default
|
||||
priority | 100
|
||||
conninfo | host=node1 dbname=repmgr user=repmgr connect_timeout=2
|
||||
repluser | repmgr
|
||||
slot_name |
|
||||
config_file | /etc/repmgr.conf</programlisting>
|
||||
<para>
|
||||
Each server in the replication cluster will have its own record. If <application>repmgrd</application>
|
||||
is in use, the fields <literal>upstream_node_id</literal>, <literal>active</literal> and
|
||||
<literal>type</literal> will be updated when the node's status or role changes.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-standby-clone">
|
||||
<title>Clone the standby server</title>
|
||||
<para>
|
||||
Create a <filename>repmgr.conf</filename> file on the standby server. It must contain at
|
||||
least the same parameters as the primary's <filename>repmgr.conf</filename>, but with
|
||||
the mandatory values <literal>node</literal>, <literal>node_name</literal>, <literal>conninfo</literal>
|
||||
(and possibly <literal>data_directory</literal>) adjusted accordingly, e.g.:
|
||||
</para>
|
||||
<programlisting>
|
||||
node_id=2
|
||||
node_name=node2
|
||||
conninfo='host=node2 user=repmgr dbname=repmgr connect_timeout=2'
|
||||
data_directory='/var/lib/postgresql/data'</programlisting>
|
||||
<para>
|
||||
Use the <command>--dry-run</command> option to check the standby can be cloned:
|
||||
</para>
|
||||
<programlisting>
|
||||
$ repmgr -h node1 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
NOTICE: destination directory "/var/lib/postgresql/data" provided
|
||||
INFO: connecting to source node
|
||||
NOTICE: checking for available walsenders on source node (2 required)
|
||||
INFO: sufficient walsenders available on source node (2 required)
|
||||
NOTICE: standby will attach to upstream node 1
|
||||
HINT: consider using the -c/--fast-checkpoint option
|
||||
INFO: all prerequisites for "standby clone" are met</programlisting>
|
||||
<para>
|
||||
If no problems are reported, the standby can then be cloned with:
|
||||
</para>
|
||||
<programlisting>
|
||||
$ repmgr -h node1 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone
|
||||
|
||||
NOTICE: using configuration file "/etc/repmgr.conf"
|
||||
NOTICE: destination directory "/var/lib/postgresql/data" provided
|
||||
INFO: connecting to source node
|
||||
NOTICE: checking for available walsenders on source node (2 required)
|
||||
INFO: sufficient walsenders available on source node (2 required)
|
||||
INFO: creating directory "/var/lib/postgresql/data"...
|
||||
NOTICE: starting backup (using pg_basebackup)...
|
||||
HINT: this may take some time; consider using the -c/--fast-checkpoint option
|
||||
INFO: executing:
|
||||
pg_basebackup -l "repmgr base backup" -D /var/lib/postgresql/data -h node1 -U repmgr -X stream
|
||||
NOTICE: standby clone (using pg_basebackup) complete
|
||||
NOTICE: you can now start your PostgreSQL server
|
||||
HINT: for example: pg_ctl -D /var/lib/postgresql/data start
|
||||
</programlisting>
|
||||
<para>
|
||||
This has cloned the PostgreSQL data directory files from the primary <literal>node1</literal>
|
||||
using PostgreSQL's <command>pg_basebackup</command> utility. A <filename>recovery.conf</filename>
|
||||
file containing the correct parameters to start streaming from this primary server will be created
|
||||
automatically.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
By default, any configuration files in the primary's data directory will be
|
||||
copied to the standby. Typically these will be <filename>postgresql.conf</filename>,
|
||||
<filename>postgresql.auto.conf</filename>, <filename>pg_hba.conf</filename> and
|
||||
<filename>pg_ident.conf</filename>. These may require modification before the standby
|
||||
is started.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
Make any adjustments to the standby's PostgreSQL configuration files now,
|
||||
then start the server.
|
||||
</para>
|
||||
<para>
|
||||
For more details on <command>repmgr standby clone</command>, see the
|
||||
<link linkend="repmgr-standby-clone">command reference</link>.
|
||||
A more detailed overview of cloning options is available in the
|
||||
<link linkend="cloning-standbys">administration manual</link>.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-verify-replication">
|
||||
<title>Verify replication is functioning</title>
|
||||
<para>
|
||||
Connect to the primary server and execute:
|
||||
<programlisting>
|
||||
repmgr=# SELECT * FROM pg_stat_replication;
|
||||
-[ RECORD 1 ]----+------------------------------
|
||||
pid | 19111
|
||||
usesysid | 16384
|
||||
usename | repmgr
|
||||
application_name | node2
|
||||
client_addr | 192.168.1.12
|
||||
client_hostname |
|
||||
client_port | 50378
|
||||
backend_start | 2017-08-28 15:14:19.851581+09
|
||||
backend_xmin |
|
||||
state | streaming
|
||||
sent_location | 0/7000318
|
||||
write_location | 0/7000318
|
||||
flush_location | 0/7000318
|
||||
replay_location | 0/7000318
|
||||
sync_priority | 0
|
||||
sync_state | async</programlisting>
|
||||
This shows that the previously cloned standby (<literal>node2</literal> shown in the field
|
||||
<literal>application_name</literal>) has connected to the primary from IP address
|
||||
<literal>192.168.1.12</literal>.
|
||||
</para>
|
||||
<para>
|
||||
From PostgreSQL 9.6 you can also use the view
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/monitoring-stats.html#PG-STAT-WAL-RECEIVER-VIEW">
|
||||
<literal>pg_stat_wal_receiver</literal></ulink> to check the replication status from the standby.
|
||||
|
||||
<programlisting>
|
||||
repmgr=# SELECT * FROM pg_stat_wal_receiver;
|
||||
Expanded display is on.
|
||||
-[ RECORD 1 ]---------+--------------------------------------------------------------------------------
|
||||
pid | 18236
|
||||
status | streaming
|
||||
receive_start_lsn | 0/3000000
|
||||
receive_start_tli | 1
|
||||
received_lsn | 0/7000538
|
||||
received_tli | 1
|
||||
last_msg_send_time | 2017-08-28 15:21:26.465728+09
|
||||
last_msg_receipt_time | 2017-08-28 15:21:26.465774+09
|
||||
latest_end_lsn | 0/7000538
|
||||
latest_end_time | 2017-08-28 15:20:56.418735+09
|
||||
slot_name |
|
||||
conninfo | user=repmgr dbname=replication host=node1 application_name=node2
|
||||
</programlisting>
|
||||
Note that the <varname>conninfo</varname> value is that generated in <filename>recovery.conf</filename>
|
||||
and will differ slightly from the primary's <varname>conninfo</varname> as set in <filename>repmgr.conf</filename> -
|
||||
among others it will contain the connecting node's name as <varname>application_name</varname>.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-register-standby">
|
||||
<title>Register the standby</title>
|
||||
<para>
|
||||
Register the standby server with:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby register
|
||||
NOTICE: standby node "node2" (ID: 2) successfully registered</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Check the node is registered by executing <command>repmgr cluster show</command> on the standby:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Both nodes are now registered with &repmgr; and the records have been copied to the standby server.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
37
doc/repmgr-bdr.sgml
Normal file
37
doc/repmgr-bdr.sgml
Normal file
@@ -0,0 +1,37 @@
|
||||
<chapter id="repmgrd-bdr">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>BDR</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>BDR</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>BDR failover with repmgrd</title>
|
||||
<para>
|
||||
&repmgr; 4.x provides support for monitoring BDR nodes and taking action in
|
||||
case one of the nodes fails.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
Due to the nature of BDR, it's only safe to use this solution for
|
||||
a two-node scenario. Introducing additional nodes will create an inherent
|
||||
risk of node desynchronisation if a node goes down without being cleanly
|
||||
removed from the cluster.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
In contrast to streaming replication, there's no concept of "promoting" a new
|
||||
primary node with BDR. Instead, "failover" involves monitoring both nodes
|
||||
with `repmgrd` and redirecting queries from the failed node to the remaining
|
||||
active node. This can be done by using an
|
||||
<link linkend="event-notifications">event notification</link> script
|
||||
which is called by <application>repmgrd</application> to dynamically
|
||||
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
||||
</para>
|
||||
|
||||
<sect1 id="prerequisites" xreflable="BDR prequisites">
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
41
doc/repmgr-cluster-cleanup.sgml
Normal file
41
doc/repmgr-cluster-cleanup.sgml
Normal file
@@ -0,0 +1,41 @@
|
||||
<refentry id="repmgr-cluster-cleanup">
|
||||
<indexterm>
|
||||
<primary>repmgr cluster cleanup</primary>
|
||||
</indexterm>
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr cluster cleanup</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr cluster cleanup</refname>
|
||||
<refpurpose>purge monitoring history</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Purges monitoring history from the <literal>repmgr.monitoring_history</literal> table to
|
||||
prevent excessive table growth. Use the <literal>-k/--keep-history</literal> to specify the
|
||||
number of days of monitoring history to retain. This command can be used
|
||||
manually or as a cronjob.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Usage</title>
|
||||
<para>
|
||||
This command requires a valid <filename>repmgr.conf</filename> file for the node on which it is
|
||||
executed; no additional arguments are required.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Notes</title>
|
||||
|
||||
<para>
|
||||
Monitoring history will only be written if <application>repmgrd</application> is active, and
|
||||
<varname>monitoring_history</varname> is set to <literal>true</literal> in
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
42
doc/repmgr-cluster-crosscheck.sgml
Normal file
42
doc/repmgr-cluster-crosscheck.sgml
Normal file
@@ -0,0 +1,42 @@
|
||||
<refentry id="repmgr-cluster-crosscheck">
|
||||
<indexterm>
|
||||
<primary>repmgr cluster crosscheck</primary>
|
||||
</indexterm>
|
||||
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr cluster crosscheck</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr cluster crosscheck</refname>
|
||||
<refpurpose>cross-checks connections between each combination of nodes</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr cluster crosscheck</command> is similar to <xref linkend="repmgr-cluster-matrix">,
|
||||
but cross-checks connections between each combination of nodes. In "Example 3" in
|
||||
<xref linkend="repmgr-cluster-matrix"> we have no information about the state of <literal>node3</literal>.
|
||||
However by running <command>repmgr cluster crosscheck</command> it's possible to get a better
|
||||
overview of the cluster situation:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster crosscheck
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | x
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | * | * | *</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
What happened is that <command>repmgr cluster crosscheck</command> merged its own
|
||||
<command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command> with the
|
||||
<command>repmgr cluster matrix</command> output from <literal>node2</literal>; the latter is
|
||||
able to connect to <literal>node3</literal>
|
||||
and therefore determine the state of outbound connections from that node.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
60
doc/repmgr-cluster-event.sgml
Normal file
60
doc/repmgr-cluster-event.sgml
Normal file
@@ -0,0 +1,60 @@
|
||||
<refentry id="repmgr-cluster-event">
|
||||
<indexterm>
|
||||
<primary>repmgr cluster event</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr cluster event</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr cluster event</refname>
|
||||
<refpurpose>output a formatted list of cluster events</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
<para>
|
||||
Outputs a formatted list of cluster events, as stored in the <literal>repmgr.events</literal> table.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Usage</title>
|
||||
|
||||
<para>
|
||||
Output is in reverse chronological order, and
|
||||
can be filtered with the following options:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><literal>--all</literal>: outputs all entries</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>--limit</literal>: set the maximum number of entries to output (default: 20)</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>--node-id</literal>: restrict entries to node with this ID</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>--node-name</literal>: restrict entries to node with this name</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>--event</literal>: filter specific event</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event --event=standby_register
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+------------------+----+---------------------+--------------------------------
|
||||
3 | node3 | standby_register | t | 2017-08-17 10:28:55 | standby registration succeeded
|
||||
2 | node2 | standby_register | t | 2017-08-17 10:28:53 | standby registration succeeded</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
101
doc/repmgr-cluster-matrix.sgml
Normal file
101
doc/repmgr-cluster-matrix.sgml
Normal file
@@ -0,0 +1,101 @@
|
||||
<refentry id="repmgr-cluster-matrix">
|
||||
<indexterm>
|
||||
<primary>repmgr cluster matrix</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr cluster matrix</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr cluster matrix</refname>
|
||||
<refpurpose>
|
||||
runs repmgr cluster show on each node and summarizes output
|
||||
</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr cluster matrix</command> runs <command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command> on each
|
||||
node and arranges the results in a matrix, recording success or failure.
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr cluster matrix</command> requires a valid <filename>repmgr.conf</filename>
|
||||
file on each node. Additionally, passwordless <command>ssh</command> connections are required between
|
||||
all nodes.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
Example 1 (all nodes up):
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | *
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | * | * | *</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Example 2 (<literal>node1</literal> and <literal>node2</literal> up, <literal>node3</literal> down):
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | x
|
||||
node2 | 2 | * | * | x
|
||||
node3 | 3 | ? | ? | ?
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Each row corresponds to one server, and indicates the result of
|
||||
testing an outbound connection from that server.
|
||||
</para>
|
||||
<para>
|
||||
Since <literal>node3</literal> is down, all the entries in its row are filled with
|
||||
<literal>?</literal>, meaning that there we cannot test outbound connections.
|
||||
</para>
|
||||
<para>
|
||||
The other two nodes are up; the corresponding rows have <literal>x</literal> in the
|
||||
column corresponding to <literal>node3</literal>, meaning that inbound connections to
|
||||
that node have failed, and <literal>*</literal> in the columns corresponding to
|
||||
<literal>node1</literal> and <literal>node2</literal>, meaning that inbound connections
|
||||
to these nodes have succeeded.
|
||||
</para>
|
||||
<para>
|
||||
Example 3 (all nodes up, firewall dropping packets originating
|
||||
from <literal>node1</literal> and directed to port 5432 on <literal>node3</literal>) -
|
||||
running <command>repmgr cluster matrix</command> from <literal>node1</literal> gives the following output:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster matrix
|
||||
|
||||
Name | Id | 1 | 2 | 3
|
||||
-------+----+----+----+----
|
||||
node1 | 1 | * | * | x
|
||||
node2 | 2 | * | * | *
|
||||
node3 | 3 | ? | ? | ?</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Note this may take some time depending on the <varname>connect_timeout</varname>
|
||||
setting in the node <varname>conninfo</varname> strings; default is
|
||||
<literal>1 minute</literal> which means without modification the above
|
||||
command would take around 2 minutes to run; see comment elsewhere about setting
|
||||
<varname>connect_timeout</varname>)
|
||||
</para>
|
||||
<para>
|
||||
The matrix tells us that we cannot connect from <literal>node1</literal> to <literal>node3</literal>,
|
||||
and that (therefore) we don't know the state of any outbound
|
||||
connection from <literal>node3</literal>.
|
||||
</para>
|
||||
<para>
|
||||
In this case, the <xref linkend="repmgr-cluster-crosscheck"> command will produce a more
|
||||
useful result.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
116
doc/repmgr-cluster-show.sgml
Normal file
116
doc/repmgr-cluster-show.sgml
Normal file
@@ -0,0 +1,116 @@
|
||||
<refentry id="repmgr-cluster-show">
|
||||
<indexterm>
|
||||
<primary>repmgr cluster show</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr cluster show</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr cluster show</refname>
|
||||
<refpurpose>display information about each registered node in the replication cluster</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Displays information about each registered node in the replication cluster. This
|
||||
command polls each registered server and shows its role (<literal>primary</literal> /
|
||||
<literal>standby</literal> / <literal>bdr</literal>) and status. It polls each server
|
||||
directly and can be run on any node in the cluster; this is also useful when analyzing
|
||||
connectivity from a particular node.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
This command requires either a valid <filename>repmgr.conf</filename> file or a database
|
||||
connection string to one of the registered nodes; no additional arguments are needed.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To show database connection errors when polling nodes, run the command in
|
||||
<literal>--verbose</literal> mode.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>Notes</title>
|
||||
<para>
|
||||
The column <literal>Role</literal> shows the expected server role according to the
|
||||
&repmgr; metadata. <literal>Status</literal> shows whether the server is running or unreachable.
|
||||
If the node has an unexpected role not reflected in the &repmgr; metadata, e.g. a node was manually
|
||||
promoted to primary, this will be highlighted with an exclamation mark, e.g.:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+----------------------+----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | ? unreachable | | default | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | ! running as primary | node1 | default | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=db_node3 dbname=repmgr user=repmgr
|
||||
|
||||
WARNING: following issues were detected
|
||||
node "node1" (ID: 1) is registered as an active primary but is unreachable
|
||||
node "node2" (ID: 2) is registered as standby but running as primary</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Node availability is tested by connecting from the node where
|
||||
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
||||
is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
|
||||
a better overviews of connections between nodes.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<para>
|
||||
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||
1,-1,-1
|
||||
2,0,0
|
||||
3,0,1</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The columns have following meanings:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
node ID
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
availability (0 = available, -1 = unavailable)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
87
doc/repmgr-node-check.sgml
Normal file
87
doc/repmgr-node-check.sgml
Normal file
@@ -0,0 +1,87 @@
|
||||
<refentry id="repmgr-node-check">
|
||||
<indexterm>
|
||||
<primary>repmgr node check</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr node check</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr node check</refname>
|
||||
<refpurpose>performs some health checks on a node from a replication perspective</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Performs some health checks on a node from a replication perspective.
|
||||
This command must be run on the local node.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf node check
|
||||
Node "node1":
|
||||
Server role: OK (node is primary)
|
||||
Replication lag: OK (N/A - node is primary)
|
||||
WAL archiving: OK (0 pending files)
|
||||
Downstream servers: OK (2 of 2 downstream nodes attached)
|
||||
Replication slots: OK (node has no replication slots)</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>Individual checks</title>
|
||||
<para>
|
||||
Each check can be performed individually by supplying
|
||||
an additional command line parameter, e.g.:
|
||||
<programlisting>
|
||||
$ repmgr node check --role
|
||||
OK (node is primary)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Parameters for individual checks are as follows:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--role</literal>: checks if the node has the expected role
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--replication-lag</literal>: checks if the node is lagging by more than
|
||||
<varname>replication_lag_warning</varname> or <varname>replication_lag_critical</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--archive-ready</literal>: checks for WAL files which have not yet been archived
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--downstream</literal>: checks that the expected downstream nodes are attached
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--slots</literal>: checks there are no inactive replication slots
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
Individual checks can also be output in a Nagios-compatible format by additionally
|
||||
providing the option <literal>--nagios</literal>.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
155
doc/repmgr-node-rejoin.sgml
Normal file
155
doc/repmgr-node-rejoin.sgml
Normal file
@@ -0,0 +1,155 @@
|
||||
<refentry id="repmgr-node-rejoin">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgr node rejoin</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr node rejoin</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr node rejoin</refname>
|
||||
<refpurpose>rejoin a dormant (stopped) node to the replication cluster</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Enables a dormant (stopped) node to be rejoined to the replication cluster.
|
||||
</para>
|
||||
<para>
|
||||
This can optionally use <application>pg_rewind</application> to re-integrate
|
||||
a node which has diverged from the rest of the cluster, typically a failed primary.
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
If the node is running and needs to be attached to the current primary, use
|
||||
<xref linkend="repmgr-standby-follow">.
|
||||
</para>
|
||||
</tip>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Usage</title>
|
||||
|
||||
<para>
|
||||
<programlisting>
|
||||
repmgr node rejoin -d '$conninfo'</programlisting>
|
||||
|
||||
where <literal>$conninfo</literal> is the conninfo string of any reachable node in the cluster.
|
||||
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
|
||||
otherwise available.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>node_rejoin</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Notes</title>
|
||||
<para>
|
||||
Currently <command>repmgr node rejoin</command> can only be used to attach
|
||||
a standby to the current primary, not another standby.
|
||||
</para>
|
||||
<para>
|
||||
The node must have been shut down cleanly; if this was not the case, it will
|
||||
need to be manually started (remove any existing <filename>recovery.conf</filename> file first)
|
||||
until it has reached a consistent recovery point, then shut down cleanly.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If <application>PostgreSQL</application> is started in single-user mode and
|
||||
input is directed from <filename>/dev/null/</filename>, it will perform recovery
|
||||
then immediately quit, and will then be in a state suitable for use by
|
||||
<application>pg_rewind</application>.
|
||||
<programlisting>
|
||||
rm -f /var/lib/pgsql/data/recovery.conf
|
||||
postgres --single -D /var/lib/pgsql/data/ < /dev/null</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
|
||||
<title>Using <command>pg_rewind</command></title>
|
||||
<para>
|
||||
<command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
|
||||
node which has diverged from the rest of the cluster, typically a failed primary.
|
||||
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<command>pg_rewind</command> <emphasis>requires</emphasis> that either
|
||||
<varname>wal_log_hints</varname> is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
|
||||
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
|
||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||
overwritten with those from the source server.
|
||||
</para>
|
||||
<para>
|
||||
To prevent this happening, provide a comma-separated list of files to retain
|
||||
using the <literal>--config-file</literal> command line option; the specified files
|
||||
will be archived in a temporary directory (whose parent directory can be specified with
|
||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
||||
complete.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Example, first using <literal>--dry-run</literal>, then actually executing the
|
||||
<literal>node rejoin command</literal>.
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
INFO: prerequisites for using pg_rewind are met
|
||||
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
||||
INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
||||
INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1"
|
||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
||||
INFO: pg_rewind would now be executed
|
||||
DETAIL: pg_rewind command is:
|
||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
INFO: prerequisites for using pg_rewind are met
|
||||
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
|
||||
NOTICE: executing pg_rewind
|
||||
NOTICE: 2 files copied to /var/lib/pgsql/data
|
||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
||||
INFO: deleting "recovery.done"
|
||||
INFO: setting node 1's primary to node 2
|
||||
NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||
waiting for server to start.... done
|
||||
server started
|
||||
NOTICE: NODE REJOIN successful
|
||||
DETAIL: node 1 is now attached to node 2</programlisting>
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-standby-follow">
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
47
doc/repmgr-node-status.sgml
Normal file
47
doc/repmgr-node-status.sgml
Normal file
@@ -0,0 +1,47 @@
|
||||
<refentry id="repmgr-node-status">
|
||||
<indexterm>
|
||||
<primary>repmgr node status</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr node status</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr node status</refname>
|
||||
<refpurpose>show overview of a node's basic information and replication status</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Displays an overview of a node's basic information and replication
|
||||
status. This command must be run on the local node.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.comf node status
|
||||
Node "node1":
|
||||
PostgreSQL version: 10beta1
|
||||
Total data size: 30 MB
|
||||
Conninfo: host=node1 dbname=repmgr user=repmgr connect_timeout=2
|
||||
Role: primary
|
||||
WAL archiving: off
|
||||
Archive command: (none)
|
||||
Replication connections: 2 (of maximal 10)
|
||||
Replication slots: 0 (of maximal 10)
|
||||
Replication lag: n/a</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
See <xref linkend="repmgr-node-check"> to diagnose issues.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
58
doc/repmgr-primary-register.sgml
Normal file
58
doc/repmgr-primary-register.sgml
Normal file
@@ -0,0 +1,58 @@
|
||||
<refentry id="repmgr-primary-register">
|
||||
<indexterm>
|
||||
<primary>repmgr primary register</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr primary register</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr primary register</refname>
|
||||
<refpurpose>initialise a repmgr installation and register the primary node</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr primary register</command> registers a primary node in a
|
||||
streaming replication cluster, and configures it for use with repmgr, including
|
||||
installing the &repmgr; extension. This command needs to be executed before any
|
||||
standby nodes are registered.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen without
|
||||
actually registering the primary.
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr master register</command> can be used as an alias for
|
||||
<command>repmgr primary register</command>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If providing the configuration file location with <literal>-f/--config-file</literal>,
|
||||
avoid using a relative path, as &repmgr; stores the configuration file location
|
||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||
a relative path into an absolute one, but this may not be the same as the path you
|
||||
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>primary_register</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
42
doc/repmgr-primary-unregister.sgml
Normal file
42
doc/repmgr-primary-unregister.sgml
Normal file
@@ -0,0 +1,42 @@
|
||||
<refentry id="repmgr-primary-unregister">
|
||||
<indexterm>
|
||||
<primary>repmgr primary unregister</primary>
|
||||
</indexterm>
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr primary unregister</refentrytitle>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>repmgr primary unregister</refname>
|
||||
<refpurpose>unregister an inactive primary node</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr primary register</command> unregisters an inactive primary node
|
||||
from the &repmgr; metadata. This is typically when the primary has failed and is
|
||||
being removed from the cluster after a new primary has been promoted.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen without
|
||||
actually unregistering the node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<command>repmgr master unregister</command> can be used as an alias for
|
||||
<command>repmgr primary unregister</command>/
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>primary_unregister</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
111
doc/repmgr-standby-clone.sgml
Normal file
111
doc/repmgr-standby-clone.sgml
Normal file
@@ -0,0 +1,111 @@
|
||||
<refentry id="repmgr-standby-clone">
|
||||
<indexterm>
|
||||
<primary>repmgr standby clone</primary>
|
||||
<seealso>cloning</seealso>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr standby clone</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby clone</refname>
|
||||
<refpurpose>clone a PostgreSQL standby node from another PostgreSQL node</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr standby clone</command> clones a PostgreSQL node from another
|
||||
PostgreSQL node, typically the primary, but optionally from any other node in
|
||||
the cluster or from Barman. It creates the <filename>recovery.conf</filename> file required
|
||||
to attach the cloned node to the primary node (or another standby, if cascading replication
|
||||
is in use).
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
<command>repmgr standby clone</command> does not start the standby, and after cloning
|
||||
<command>repmgr standby register</command> must be executed to notify &repmgr; of its presence.
|
||||
</simpara>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="repmgr-standby-clone-config-file-copying" xreflabel="Copying configuration files">
|
||||
<title>Handling configuration files</title>
|
||||
|
||||
<para>
|
||||
Note that by default, all configuration files in the source node's data
|
||||
directory will be copied to the cloned node. Typically these will be
|
||||
<filename>postgresql.conf</filename>, <filename>postgresql.auto.conf</filename>,
|
||||
<filename>pg_hba.conf</filename> and <filename>pg_ident.conf</filename>.
|
||||
These may require modification before the standby is started.
|
||||
</para>
|
||||
<para>
|
||||
In some cases (e.g. on Debian or Ubuntu Linux installations), PostgreSQL's
|
||||
configuration files are located outside of the data directory and will
|
||||
not be copied by default. &repmgr; can copy these files, either to the same
|
||||
location on the standby server (provided appropriate directory and file permissions
|
||||
are available), or into the standby's data directory. This requires passwordless
|
||||
SSH access to the primary server. Add the option <literal>--copy-external-config-files</literal>
|
||||
to the <command>repmgr standby clone</command> command; by default files will be copied to
|
||||
the same path as on the upstream server. Note that the user executing <command>repmgr</command>
|
||||
must have write access to those directories.
|
||||
</para>
|
||||
<para>
|
||||
To have the configuration files placed in the standby's data directory, specify
|
||||
<literal>--copy-external-config-files=pgdata</literal>, but note that
|
||||
any include directives in the copied files may need to be updated.
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
For reliable configuration file management we recommend using a
|
||||
configuration management tool such as Ansible, Chef, Puppet or Salt.
|
||||
</simpara>
|
||||
</tip>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-clone-wal-management" xreflabel="Managing WAL during the cloning process">
|
||||
<title>Managing WAL during the cloning process</title>
|
||||
<para>
|
||||
When initially cloning a standby, you will need to ensure
|
||||
that all required WAL files remain available while the cloning is taking
|
||||
place. To ensure this happens when using the default <command>pg_basebackup</command> method,
|
||||
&repmgr; will set <command>pg_basebackup</command>'s <literal>--xlog-method</literal>
|
||||
parameter to <literal>stream</literal>,
|
||||
which will ensure all WAL files generated during the cloning process are
|
||||
streamed in parallel with the main backup. Note that this requires two
|
||||
replication connections to be available (&repmgr; will verify sufficient
|
||||
connections are available before attempting to clone, and this can be checked
|
||||
before performing the clone using the <literal>--dry-run</literal> option).
|
||||
</para>
|
||||
<para>
|
||||
To override this behaviour, in <filename>repmgr.conf</filename> set
|
||||
<command>pg_basebackup</command>'s <literal>--xlog-method</literal>
|
||||
parameter to <literal>fetch</literal>:
|
||||
<programlisting>
|
||||
pg_basebackup_options='--xlog-method=fetch'</programlisting>
|
||||
|
||||
and ensure that <literal>wal_keep_segments</literal> is set to an appropriately high value.
|
||||
See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">
|
||||
pg_basebackup</ulink> documentation for details.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
From PostgreSQL 10, <command>pg_basebackup</command>'s
|
||||
<literal>--xlog-method</literal> parameter has been renamed to
|
||||
<literal>--wal-method</literal>.
|
||||
</simpara>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_clone</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
65
doc/repmgr-standby-follow.sgml
Normal file
65
doc/repmgr-standby-follow.sgml
Normal file
@@ -0,0 +1,65 @@
|
||||
<refentry id="repmgr-standby-follow">
|
||||
<indexterm>
|
||||
<primary>repmgr standby follow</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr standby follow</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby follow</refname>
|
||||
<refpurpose>attach a standby to a new primary</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
<para>
|
||||
Attaches the standby to a new primary. This command requires a valid
|
||||
<filename>repmgr.conf</filename> file for the standby, either specified
|
||||
explicitly with <literal>-f/--config-file</literal> or located in a
|
||||
default location; no additional arguments are required.
|
||||
</para>
|
||||
<para>
|
||||
This command will force a restart of the standby server, which must be
|
||||
running. It can only be used to attach an active standby to the current primary node
|
||||
(and not to another standby).
|
||||
</para>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, see
|
||||
<xref linkend="repmgr-node-rejoin">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby follow
|
||||
INFO: setting node 3's primary to node 2
|
||||
NOTICE: restarting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' restart"
|
||||
waiting for server to shut down........ done
|
||||
server stopped
|
||||
waiting for server to start.... done
|
||||
server started
|
||||
NOTICE: STANDBY FOLLOW successful
|
||||
DETAIL: node 3 is now attached to node 2</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_follow</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-node-rejoin">
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
52
doc/repmgr-standby-promote.sgml
Normal file
52
doc/repmgr-standby-promote.sgml
Normal file
@@ -0,0 +1,52 @@
|
||||
<refentry id="repmgr-standby-promote">
|
||||
<indexterm>
|
||||
<primary>repmgr standby promote</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr standby promote</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby promote</refname>
|
||||
<refpurpose>promote a standby to a primary</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Promotes a standby to a primary if the current primary has failed. This
|
||||
command requires a valid <filename>repmgr.conf</filename> file for the standby, either
|
||||
specified explicitly with <literal>-f/--config-file</literal> or located in a
|
||||
default location; no additional arguments are required.
|
||||
</para>
|
||||
<para>
|
||||
If the standby promotion succeeds, the server will not need to be
|
||||
restarted. However any other standbys will need to follow the new server,
|
||||
by using <xref linkend="repmgr-standby-follow">; if <application>repmgrd</application>
|
||||
is active, it will handle this automatically.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby promote
|
||||
NOTICE: promoting standby to primary
|
||||
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' promote"
|
||||
server promoting
|
||||
DEBUG: setting node 2 as primary and marking existing primary as failed
|
||||
NOTICE: STANDBY PROMOTE successful
|
||||
DETAIL: server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_promote</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
86
doc/repmgr-standby-register.sgml
Normal file
86
doc/repmgr-standby-register.sgml
Normal file
@@ -0,0 +1,86 @@
|
||||
<refentry id="repmgr-standby-register" xreflabel="repmgr standby register">
|
||||
<indexterm>
|
||||
<primary>repmgr standby register</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr standby register</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby register</refname>
|
||||
<refpurpose>add a standby's information to the &repmgr; metadata</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr standby register</command> adds a standby's information to
|
||||
the &repmgr; metadata. This command needs to be executed to enable
|
||||
promote/follow operations and to allow <application>repmgrd</application> to work with the node.
|
||||
An existing standby can be registered using this command. Execute with the
|
||||
<literal>--dry-run</literal> option to check what would happen without actually registering the
|
||||
standby.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If providing the configuration file location with <literal>-f/--config-file</literal>,
|
||||
avoid using a relative path, as &repmgr; stores the configuration file location
|
||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||
a relative path into an absolute one, but this may not be the same as the path you
|
||||
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-register-wait" xreflabel="repmgr standby register --wait">
|
||||
<title>Waiting for the registration to propagate to the standby</title>
|
||||
<para>
|
||||
Depending on your environment and workload, it may take some time for
|
||||
the standby's node record to propagate from the primary to the standby. Some
|
||||
actions (such as starting <application>repmgrd</application>) require that the standby's node record
|
||||
is present and up-to-date to function correctly.
|
||||
</para>
|
||||
<para>
|
||||
By providing the option <literal>--wait-sync</literal> to the
|
||||
<command>repmgr standby register</command> command, &repmgr; will wait
|
||||
until the record is synchronised before exiting. An optional timeout (in
|
||||
seconds) can be added to this option (e.g. <literal>--wait-sync=60</literal>).
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-register-inactive-node" xreflabel="Registering an inactive node">
|
||||
<title>Registering an inactive node</title>
|
||||
<para>
|
||||
Under some circumstances you may wish to register a standby which is not
|
||||
yet running; this can be the case when using provisioning tools to create
|
||||
a complex replication cluster. In this case, by using the <literal>-F/--force</literal>
|
||||
option and providing the connection parameters to the primary server,
|
||||
the standby can be registered.
|
||||
</para>
|
||||
<para>
|
||||
Similarly, with cascading replication it may be necessary to register
|
||||
a standby whose upstream node has not yet been registered - in this case,
|
||||
using <literal>-F/--force</literal> will result in the creation of an inactive placeholder
|
||||
record for the upstream node, which will however later need to be registered
|
||||
with the <literal>-F/--force</literal> option too.
|
||||
</para>
|
||||
<para>
|
||||
When used with <command>repmgr standby register</command>, care should be taken that use of the
|
||||
<literal>-F/--force</literal> option does not result in an incorrectly configured cluster.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_register</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
59
doc/repmgr-standby-switchover.sgml
Normal file
59
doc/repmgr-standby-switchover.sgml
Normal file
@@ -0,0 +1,59 @@
|
||||
<refentry id="repmgr-standby-switchover">
|
||||
<indexterm>
|
||||
<primary>repmgr standby switchover</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr standby switchover</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby switchover</refname>
|
||||
<refpurpose>promote a standby to primary and demote the existing primary to a standby</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
<para>
|
||||
Promotes a standby to primary and demotes the existing primary to a standby.
|
||||
This command must be run on the standby to be promoted, and requires a
|
||||
passwordless SSH connection to the current primary.
|
||||
</para>
|
||||
<para>
|
||||
If other standbys are connected to the demotion candidate, &repmgr; can instruct
|
||||
these to follow the new primary if the option <literal>--siblings-follow</literal>
|
||||
is specified.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
|
||||
possible without actually changing the status of either node.
|
||||
</para>
|
||||
<para>
|
||||
<application>repmgrd</application> should not be active on any nodes while a switchover is being
|
||||
executed. This restriction may be lifted in a later version.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_promote</literal> event notification will be generated on the new primary,
|
||||
and a <literal>node_rejoin</literal> event notification on the former primary (new standby).
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
For more details see the section <xref linkend="performing-switchover">.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
54
doc/repmgr-standby-unregister.sgml
Normal file
54
doc/repmgr-standby-unregister.sgml
Normal file
@@ -0,0 +1,54 @@
|
||||
<refentry id="repmgr-standby-unregister">
|
||||
<indexterm>
|
||||
<primary>repmgr standby unregister</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr standby unregister</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby unregister</refname>
|
||||
<refpurpose>remove a standby's information from the &repmgr; metadata</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Unregisters a standby with &repmgr;. This command does not affect the actual
|
||||
replication, just removes the standby's entry from the &repmgr; metadata.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
To unregister a running standby, execute:
|
||||
<programlisting>
|
||||
repmgr standby unregister -f /etc/repmgr.conf</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will remove the standby record from &repmgr;'s internal metadata
|
||||
table (<literal>repmgr.nodes</literal>). A <literal>standby_unregister</literal>
|
||||
event notification will be recorded in the <literal>repmgr.events</literal> table.
|
||||
</para>
|
||||
<para>
|
||||
If the standby is not running, the command can be executed on another
|
||||
node by providing the id of the node to be unregistered using
|
||||
the command line parameter <literal>--node-id</literal>, e.g. executing the following
|
||||
command on the primary server will unregister the standby with
|
||||
id <literal>3</literal>:
|
||||
<programlisting>
|
||||
repmgr standby unregister -f /etc/repmgr.conf --node-id=3</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_unregister</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
60
doc/repmgr-witness-register.sgml
Normal file
60
doc/repmgr-witness-register.sgml
Normal file
@@ -0,0 +1,60 @@
|
||||
<refentry id="repmgr-witness-register">
|
||||
<indexterm>
|
||||
<primary>repmgr witness register</primary>
|
||||
<seealso>witness server</seealso>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr witness register</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr witness register</refname>
|
||||
<refpurpose>add a witness node's information to the &repmgr; metadata</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr witness register</command> adds a witness server's node
|
||||
record to the &repmgr; metadata, and if necessary initialises the witness
|
||||
node by installing the &repmgr; extension and copying the &repmgr; metadata
|
||||
to the witness server. This command needs to be executed to enable
|
||||
use of the witness server with <application>repmgrd</application>.
|
||||
</para>
|
||||
<para>
|
||||
When executing <command>repmgr witness register</command>, connection information
|
||||
for the cluster primary server must also be provided. &repmgr; will automatically
|
||||
use the <varname>user</varname> and <varname>dbname</varname> values defined
|
||||
in the <varname>conninfo</varname> string defined in the witness node's
|
||||
<filename>repmgr.conf</filename>, if these are not explicitly provided.
|
||||
</para>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen
|
||||
without actually registering the witness server.
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf witness register -h node1
|
||||
INFO: connecting to witness node "node3" (ID: 3)
|
||||
INFO: connecting to primary node
|
||||
NOTICE: attempting to install extension "repmgr"
|
||||
NOTICE: "repmgr" extension successfully installed
|
||||
INFO: witness registration complete
|
||||
NOTICE: witness node "node3" (ID: 3) successfully registered
|
||||
</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>witness_register</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
73
doc/repmgr-witness-unregister.sgml
Normal file
73
doc/repmgr-witness-unregister.sgml
Normal file
@@ -0,0 +1,73 @@
|
||||
<refentry id="repmgr-witness-unregister" xreflabel="repmgr witness unregister">
|
||||
<indexterm>
|
||||
<primary>repmgr witness unregister</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr witness unregister</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr witness unregister</refname>
|
||||
<refpurpose>remove a witness node's information to the &repmgr; metadata</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr witness unregister</command> removes a witness server's node
|
||||
record from the &repmgr; metadata.
|
||||
</para>
|
||||
<para>
|
||||
The node does not have to be running to be unregistered, however if this is the
|
||||
case then connection information for the primary server must be provided.
|
||||
</para>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen
|
||||
without actually registering the witness server.
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>Examples</title>
|
||||
<para>
|
||||
Unregistering a running witness node:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf witness unregister
|
||||
INFO: connecting to witness node "node3" (ID: 3)
|
||||
INFO: unregistering witness node 3
|
||||
INFO: witness unregistration complete
|
||||
DETAIL: witness node with id 3 (conninfo: host=node3 dbname=repmgr user=repmgr port=5499) successfully unregistered</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Unregistering a non-running witness node:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf witness unregister -h node1 -p 5501 -F
|
||||
INFO: connecting to witness node "node3" (ID: 3)
|
||||
NOTICE: unable to connect to witness node "node3" (ID: 3), removing node record on cluster primary only
|
||||
INFO: unregistering witness node 3
|
||||
INFO: witness unregistration complete
|
||||
DETAIL: witness node with id 3 (conninfo: host=node3 dbname=repmgr user=repmgr port=5499) successfully unregistered</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Notes</title>
|
||||
<para>
|
||||
This command will not make any changes to the witness node itself and will neither
|
||||
remove any data from the witness database nor stop the PostgreSQL instance.
|
||||
</para>
|
||||
<para>
|
||||
A witness node which has been unregistered, can be re-registered with
|
||||
<link linkend="repmgr-witness-register">repmgr witness register --force</link>.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>witness_unregister</literal> event notification will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
124
doc/repmgr.sgml
Normal file
124
doc/repmgr.sgml
Normal file
@@ -0,0 +1,124 @@
|
||||
<!-- doc/src/sgml/postgres.sgml -->
|
||||
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.2//EN" [
|
||||
|
||||
<!ENTITY % version SYSTEM "version.sgml">
|
||||
%version;
|
||||
|
||||
<!ENTITY % filelist SYSTEM "filelist.sgml">
|
||||
%filelist;
|
||||
|
||||
<!ENTITY repmgr "<productname>repmgr</productname>">
|
||||
<!ENTITY postgres "<productname>PostgreSQL</productname>">
|
||||
]>
|
||||
|
||||
<book id="repmgr">
|
||||
<title>repmgr &repmgrversion; Documentation</title>
|
||||
|
||||
<bookinfo>
|
||||
<corpauthor>2ndQuadrant Ltd</corpauthor>
|
||||
<productname>repmgr</productname>
|
||||
<productnumber>&repmgrversion;</productnumber>
|
||||
&legal;
|
||||
|
||||
<abstract>
|
||||
<para>
|
||||
This is the official documentation of &repmgr; &repmgrversion; for
|
||||
use with PostgreSQL 9.3 - PostgreSQL 10.
|
||||
It describes the functionality supported by the current version of &repmgr;.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
&repmgr; was developed by
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||
along with contributions from other individuals and companies.
|
||||
Contributions from the community are appreciated and welcome - get
|
||||
in touch via <ulink url="https://github.com/2ndQuadrant/repmgr">github</>
|
||||
or <ulink url="https://groups.google.com/group/repmgr">the mailing list/forum</>.
|
||||
Multiple 2ndQuadrant customers contribute funding
|
||||
to make repmgr development possible.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
2ndQuadrant, a Platinum sponsor of the PostgreSQL project,
|
||||
continues to develop repmgr to meet internal needs and those of customers.
|
||||
Other companies as well as individual developers
|
||||
are welcome to participate in the efforts.
|
||||
</para>
|
||||
</abstract>
|
||||
|
||||
<keywordset>
|
||||
<keyword>repmgr</keyword>
|
||||
<keyword>PostgreSQL</keyword>
|
||||
<keyword>replication</keyword>
|
||||
<keyword>asynchronous</keyword>
|
||||
<keyword>HA</keyword>
|
||||
<keyword>high-availability</keyword>
|
||||
</keywordset>
|
||||
</bookinfo>
|
||||
|
||||
|
||||
<part id="getting-started">
|
||||
<title>Getting started</title>
|
||||
&overview;
|
||||
&install;
|
||||
&quickstart;
|
||||
</part>
|
||||
|
||||
<part id="repmgr-administration-manual">
|
||||
<title>repmgr administration manual</title>
|
||||
|
||||
&configuration;
|
||||
&cloning-standbys;
|
||||
&promoting-standby;
|
||||
&follow-new-primary;
|
||||
&switchover;
|
||||
&configuring-witness-server;
|
||||
&event-notifications;
|
||||
&upgrading-repmgr;
|
||||
</part>
|
||||
|
||||
<part id="using-repmgrd">
|
||||
<title>Using repmgrd</title>
|
||||
&repmgrd-automatic-failover;
|
||||
&repmgrd-configuration;
|
||||
&repmgrd-demonstration;
|
||||
&repmgrd-cascading-replication;
|
||||
&repmgrd-network-split;
|
||||
&repmgrd-witness-server;
|
||||
&repmgrd-degraded-monitoring;
|
||||
&repmgrd-monitoring;
|
||||
&repmgrd-bdr;
|
||||
</part>
|
||||
|
||||
<part id="repmgr-command-reference">
|
||||
<title>repmgr command reference</title>
|
||||
|
||||
&repmgr-primary-register;
|
||||
&repmgr-primary-unregister;
|
||||
&repmgr-standby-clone;
|
||||
&repmgr-standby-register;
|
||||
&repmgr-standby-unregister;
|
||||
&repmgr-standby-promote;
|
||||
&repmgr-standby-follow;
|
||||
&repmgr-standby-switchover;
|
||||
&repmgr-witness-register;
|
||||
&repmgr-witness-unregister;
|
||||
&repmgr-node-status;
|
||||
&repmgr-node-check;
|
||||
&repmgr-node-rejoin;
|
||||
&repmgr-cluster-show;
|
||||
&repmgr-cluster-matrix;
|
||||
&repmgr-cluster-crosscheck;
|
||||
&repmgr-cluster-event;
|
||||
&repmgr-cluster-cleanup;
|
||||
</part>
|
||||
|
||||
&appendix-release-notes;
|
||||
&appendix-signatures;
|
||||
&appendix-faq;
|
||||
|
||||
<![%include-index;[&bookindex;]]>
|
||||
<![%include-xslt-index;[<index id="bookindex"></index>]]>
|
||||
|
||||
</book>
|
||||
17
doc/repmgrd-automatic-failover.sgml
Normal file
17
doc/repmgrd-automatic-failover.sgml
Normal file
@@ -0,0 +1,17 @@
|
||||
<chapter id="repmgrd-automatic-failover" xreflabel="Automatic failover with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>automatic failover</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Automatic failover with repmgrd</title>
|
||||
|
||||
<para>
|
||||
<application>repmgrd</application> is a management and monitoring daemon which runs
|
||||
on each node in a replication cluster. It can automate actions such as
|
||||
failover and updating standbys to follow the new primary, as well as
|
||||
providing monitoring information about the state of each standby.
|
||||
</para>
|
||||
|
||||
|
||||
</chapter>
|
||||
399
doc/repmgrd-bdr.sgml
Normal file
399
doc/repmgrd-bdr.sgml
Normal file
@@ -0,0 +1,399 @@
|
||||
<chapter id="repmgrd-bdr">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>BDR</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>BDR</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>BDR failover with repmgrd</title>
|
||||
<para>
|
||||
&repmgr; 4.x provides support for monitoring BDR nodes and taking action in
|
||||
case one of the nodes fails.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
Due to the nature of BDR, it's only safe to use this solution for
|
||||
a two-node scenario. Introducing additional nodes will create an inherent
|
||||
risk of node desynchronisation if a node goes down without being cleanly
|
||||
removed from the cluster.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
In contrast to streaming replication, there's no concept of "promoting" a new
|
||||
primary node with BDR. Instead, "failover" involves monitoring both nodes
|
||||
with `repmgrd` and redirecting queries from the failed node to the remaining
|
||||
active node. This can be done by using an
|
||||
<link linkend="event-notifications">event notification</link> script
|
||||
which is called by <application>repmgrd</application> to dynamically
|
||||
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
||||
</para>
|
||||
|
||||
<sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
|
||||
<title>Prerequisites</title>
|
||||
<para>
|
||||
&repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
|
||||
enabled and configured for a two-node BDR network. &repmgr; 4 packages
|
||||
must be installed on each node before attempting to configure
|
||||
<application>repmgr</application>.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
&repmgr; 4 will refuse to install if it detects more than two BDR nodes.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
Application database connections *must* be passed through a proxy server/
|
||||
connection pooler such as <application>PgBouncer</application>, and it must be possible to dynamically
|
||||
reconfigure that from <application>repmgrd</application>. The example demonstrated in this document
|
||||
will use <application>PgBouncer</application>
|
||||
</para>
|
||||
<para>
|
||||
The proxy server / connection poolers must <emphasis>not</emphasis>
|
||||
be installed on the database servers.
|
||||
</para>
|
||||
<para>
|
||||
For this example, it's assumed password-less SSH connections are available
|
||||
from the PostgreSQL servers to the servers where <application>PgBouncer</application>
|
||||
runs, and that the user on those servers has permission to alter the
|
||||
<application>PgBouncer</application> configuration files.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL connections must be possible between each node, and each node
|
||||
must be able to connect to each PgBouncer instance.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-configuration" xreflabel="BDR configuration">
|
||||
<title>Configuration</title>
|
||||
<para>
|
||||
A sample configuration for <filename>repmgr.conf</filename> on each
|
||||
BDR node would look like this:
|
||||
<programlisting>
|
||||
# Node information
|
||||
node_id=1
|
||||
node_name='node1'
|
||||
conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2'
|
||||
data_directory='/var/lib/postgresql/data'
|
||||
replication_type='bdr'
|
||||
|
||||
# Event notification configuration
|
||||
event_notifications=bdr_failover
|
||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
|
||||
|
||||
# repmgrd options
|
||||
monitor_interval_secs=5
|
||||
reconnect_attempts=6
|
||||
reconnect_interval=5</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Adjust settings as appropriate; copy and adjust for the second node (particularly
|
||||
the values <varname>node_id</varname>, <varname>node_name</varname>
|
||||
and <varname>conninfo</varname>).
|
||||
</para>
|
||||
<para>
|
||||
Note that the values provided for the <varname>conninfo</varname> string
|
||||
must be valid for connections from <emphasis>both</emphasis> nodes in the
|
||||
replication cluster. The database must be the BDR-enabled database.
|
||||
</para>
|
||||
<para>
|
||||
If defined, the evenr <application>event_notifications</application> parameter
|
||||
will restrict execution of <varname>event_notification_command</varname>
|
||||
to the specified event(s).
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
<varname>event_notification_command</varname> is the script which does the actual "heavy lifting"
|
||||
of reconfiguring the proxy server/ connection pooler. It is fully
|
||||
user-definable; a reference implementation is documented below.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-repmgr-setup" xreflabel="repmgr setup with BDR">
|
||||
<title>repmgr setup</title>
|
||||
<para>
|
||||
Register both nodes; example on <literal>node1</literal>:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf bdr register
|
||||
NOTICE: attempting to install extension "repmgr"
|
||||
NOTICE: "repmgr" extension successfully installed
|
||||
NOTICE: node record created for node 'node1' (ID: 1)
|
||||
NOTICE: BDR node 1 registered (conninfo: host=node1 dbname=bdrtest user=repmgr)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
and on <literal>node1</literal>:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf bdr register
|
||||
NOTICE: node record created for node 'node2' (ID: 2)
|
||||
NOTICE: BDR node 2 registered (conninfo: host=node2 dbname=bdrtest user=repmgr)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The <literal>repmgr</literal> extension will be automatically created
|
||||
when the first node is registered, and will be propagated to the second
|
||||
node.
|
||||
</para>
|
||||
<important>
|
||||
<simpara>
|
||||
Ensure the &repmgr; package is available on both nodes before
|
||||
attempting to register the first node.
|
||||
</simpara>
|
||||
</important>
|
||||
<para>
|
||||
At this point the meta data for both nodes has been created; executing
|
||||
<xref linkend="repmgr-cluster-show"> (on either node) should produce output like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+------+-----------+----------+--------------------------------------------------------
|
||||
1 | node1 | bdr | * running | | default | host=node1 dbname=bdrtest user=repmgr connect_timeout=2
|
||||
2 | node2 | bdr | * running | | default | host=node2 dbname=bdrtest user=repmgr connect_timeout=2</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Additionally it's possible to display log of significant events; executing
|
||||
<xref linkend="repmgr-cluster-event"> (on either node) should produce output like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event
|
||||
Node ID | Event | OK | Timestamp | Details
|
||||
---------+--------------+----+---------------------+----------------------------------------------
|
||||
2 | bdr_register | t | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2)
|
||||
1 | bdr_register | t | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1)
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
At this point there will only be records for the two node registrations (displayed here
|
||||
in reverse chronological order).
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-event-notification-command" xreflabel="BDR failover event notification command">
|
||||
<title>Defining the "event_notification_command"</title>
|
||||
<para>
|
||||
Key to "failover" execution is the <literal>event_notification_command</literal>,
|
||||
which is a user-definable script specified in <filename>repmpgr.conf</filename>
|
||||
and which should reconfigure the proxy server/ connection pooler to point
|
||||
to the other, still-active node.
|
||||
</para>
|
||||
<para>
|
||||
Each time &repmgr; (or <application>repmgrd</application>) records an event,
|
||||
it can optionally execute the script defined in
|
||||
<literal>event_notification_command</literal> to take further action;
|
||||
details of the event will be passed as parameters.
|
||||
</para>
|
||||
<para>
|
||||
Following placeholders are available to the script:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>%n</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
node ID
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%e</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
event type
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%t</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
success (1 or 0)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>%t</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
timestamp
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%d</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
details
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
Note that <literal>%c</literal> and <literal>%a</literal> will only be provided during
|
||||
<varname>bdr_failover</varname> events, which is what is of interest here.
|
||||
</para>
|
||||
<para>
|
||||
The provided sample script (`scripts/bdr-pgbouncer.sh`) is configured like
|
||||
this:
|
||||
<programlisting>
|
||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
and parses the configures parameters like this:
|
||||
<programlisting>
|
||||
NODE_ID=$1
|
||||
EVENT_TYPE=$2
|
||||
SUCCESS=$3
|
||||
NEXT_CONNINFO=$4
|
||||
NEXT_NODE_NAME=$5</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The script also contains some hard-coded values about the <application>PgBouncer</application>
|
||||
configuration for both nodes; these will need to be adjusted for your local environment
|
||||
(ideally the scripts would be maintained as templates and generated by some
|
||||
kind of provisioning system).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The script performs following steps:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>pauses <application>PgBouncer</application> on all nodes</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>recreates the <application>PgBouncer</application> configuration file on each
|
||||
node using the information provided by <application>repmgrd</application>
|
||||
(primarily the <varname>conninfo</varname> string) to configure
|
||||
<application>PgBouncer</application></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>reloads the <application>PgBouncer</application> configuration</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>executes the <command>RESUME</command> command (in <application>PgBouncer</application>)</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
Following successful script execution, any connections to PgBouncer on the failed BDR node
|
||||
will be redirected to the active node.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-monitoring-failover" xreflabel="Node monitoring and failover">
|
||||
<title>Node monitoring and failover</title>
|
||||
<para>
|
||||
At the intervals specified by <varname>monitor_interval_secs</varname>
|
||||
in <filename>repmgr.conf</filename>, <application>repmgrd</application>
|
||||
will ping each node to check if it's available. If a node isn't available,
|
||||
<application>repmgrd</application> will enter failover mode and check <varname>reconnect_attempts</varname>
|
||||
times at intervals of <varname>reconnect_interval</varname> to confirm the node is definitely unreachable.
|
||||
This buffer period is necessary to avoid false positives caused by transient
|
||||
network outages.
|
||||
</para>
|
||||
<para>
|
||||
If the node is still unavailable, <application>repmgrd</application> will enter failover mode and execute
|
||||
the script defined in <varname>event_notification_command</varname>; an entry will be logged
|
||||
in the <literal>repmgr.events</literal> table and <application>repmgrd</application> will
|
||||
(unless otherwise configured) resume monitoring of the node in "degraded" mode until it reappears.
|
||||
</para>
|
||||
<para>
|
||||
<application>repmgrd</application> logfile output during a failover event will look something like this
|
||||
on one node (usually the node which has failed, here <literal>node2</literal>):
|
||||
<programlisting>
|
||||
...
|
||||
[2017-07-27 21:08:39] [INFO] starting continuous BDR node monitoring
|
||||
[2017-07-27 21:08:39] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:08:55] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:09:11] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
|
||||
[2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
|
||||
[2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
|
||||
[2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
|
||||
[2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
|
||||
[2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
|
||||
[2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
|
||||
[2017-07-27 21:09:28] [NOTICE] setting node record for node 2 to inactive
|
||||
[2017-07-27 21:09:28] [INFO] executing notification command for event "bdr_failover"
|
||||
[2017-07-27 21:09:28] [DETAIL] command is:
|
||||
/path/to/bdr-pgbouncer.sh 2 bdr_failover 1 "host=host=node1 dbname=bdrtest user=repmgr connect_timeout=2" "node1"
|
||||
[2017-07-27 21:09:28] [INFO] node 'node2' (ID: 2) detected as failed; next available node is 'node1' (ID: 1)
|
||||
[2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
...</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Output on the other node (<literal>node1</literal>) during the same event will look like this:
|
||||
<programlisting>
|
||||
...
|
||||
[2017-07-27 21:08:35] [INFO] starting continuous BDR node monitoring
|
||||
[2017-07-27 21:08:35] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:08:51] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:09:07] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
|
||||
[2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
|
||||
[2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
|
||||
[2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
|
||||
[2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
|
||||
[2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
|
||||
[2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
|
||||
[2017-07-27 21:09:28] [NOTICE] other node's repmgrd is handling failover
|
||||
[2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
||||
[2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
...</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This assumes only the PostgreSQL instance on <literal>node2</literal> has failed. In this case the
|
||||
<application>repmgrd</application> instance running on <literal>node2</literal> has performed the failover. However if
|
||||
the entire server becomes unavailable, <application>repmgrd</application> on <literal>node1</literal> will perform
|
||||
the failover.
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="bdr-node-recovery" xreflabel="Node recovery">
|
||||
<title>Node recovery</title>
|
||||
<para>
|
||||
Following failure of a BDR node, if the node subsequently becomes available again,
|
||||
a <varname>bdr_recovery</varname> event will be generated. This could potentially be used to
|
||||
reconfigure PgBouncer automatically to bring the node back into the available pool,
|
||||
however it would be prudent to manually verify the node's status before
|
||||
exposing it to the application.
|
||||
</para>
|
||||
<para>
|
||||
If the failed node comes back up and connects correctly, output similar to this
|
||||
will be visible in the <application>repmgrd</application> log:
|
||||
<programlisting>
|
||||
[2017-07-27 21:25:30] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
[2017-07-27 21:25:46] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
||||
[2017-07-27 21:25:46] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
||||
[2017-07-27 21:25:55] [INFO] active replication slot for node "node1" found after 1 seconds
|
||||
[2017-07-27 21:25:55] [NOTICE] node "node2" (ID: 2) has recovered after 986 seconds</programlisting>
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-complete-shutdown" xreflabel="Shutdown of both nodes">
|
||||
<title>Shutdown of both nodes</title>
|
||||
<para>
|
||||
If both PostgreSQL instances are shut down, <application>repmgrd</application> will try and handle the
|
||||
situation as gracefully as possible, though with no failover candidates available
|
||||
there's not much it can do. Should this case ever occur, we recommend shutting
|
||||
down <application>repmgrd</application> on both nodes and restarting it once the PostgreSQL instances
|
||||
are running properly.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
22
doc/repmgrd-cascading-replication.sgml
Normal file
22
doc/repmgrd-cascading-replication.sgml
Normal file
@@ -0,0 +1,22 @@
|
||||
<chapter id="repmgrd-cascading-replication">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>cascading replication</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd and cascading replication</title>
|
||||
<para>
|
||||
Cascading replication - where a standby can connect to an upstream node and not
|
||||
the primary server itself - was introduced in PostgreSQL 9.2. &repmgr; and
|
||||
<application>repmgrd</application> support cascading replication by keeping track of the relationship
|
||||
between standby servers - each node record is stored with the node id of its
|
||||
upstream ("parent") server (except of course the primary server).
|
||||
</para>
|
||||
<para>
|
||||
In a failover situation where the primary node fails and a top-level standby
|
||||
is promoted, a standby connected to another standby will not be affected
|
||||
and continue working as normal (even if the upstream standby it's connected
|
||||
to becomes the primary node). If however the node's direct upstream fails,
|
||||
the "cascaded standby" will attempt to reconnect to that node's parent.
|
||||
</para>
|
||||
</chapter>
|
||||
100
doc/repmgrd-configuration.sgml
Normal file
100
doc/repmgrd-configuration.sgml
Normal file
@@ -0,0 +1,100 @@
|
||||
<chapter id="repmgrd-configuration">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd configuration</title>
|
||||
<para>
|
||||
To use <application>repmgrd</application>, its associated function library must be
|
||||
included in <filename>postgresql.conf</filename> with:
|
||||
|
||||
<programlisting>
|
||||
shared_preload_libraries = 'repmgr'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Changing this setting requires a restart of PostgreSQL; for more details see
|
||||
the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Additionally the following <application>repmgrd</application> options *must* be set in
|
||||
<filename>repmgr.conf</filename> (adjust configuration file locations as appropriate):
|
||||
<programlisting>
|
||||
failover=automatic
|
||||
promote_command='repmgr standby promote -f /etc/repmgr.conf --log-to-file'
|
||||
follow_command='repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Note that the <literal>--log-to-file</literal> option will cause
|
||||
output generated by the &repmgr; command, when executed by <application>repmgrd</application>,
|
||||
to be logged to the same destination configured to receive log output for <application>repmgrd</application>.
|
||||
See <filename>repmgr.conf.sample</filename> for further <application>repmgrd</application>-specific settings.
|
||||
</para>
|
||||
<para>
|
||||
When <varname>failover</varname> is set to <literal>automatic</literal>, upon detecting failure
|
||||
of the current primary, <application>repmgrd</application> will execute one of
|
||||
<varname>promote_command</varname> or <varname>follow_command</varname>,
|
||||
depending on whether the current server is to become the new primary, or
|
||||
needs to follow another server which has become the new primary. Note that
|
||||
these commands can be any valid shell script which results in one of these
|
||||
two actions happening, but if &repmgr;'s <command>standby follow</command> or
|
||||
<command>standby promote</command>
|
||||
commands are not executed (either directly as shown here, or from a script which
|
||||
performs other actions), the &repmgr; metadata will not be updated and
|
||||
&repmgr; will no longer function reliably.
|
||||
</para>
|
||||
<para>
|
||||
The <varname>follow_command</varname> should provide the <literal>--upstream-node-id=%n</literal>
|
||||
option to <command>repmgr standby follow</command>; the <literal>%n</literal> will be replaced by
|
||||
<application>repmgrd</application> with the ID of the new primary node. If this is not provided, &repmgr;
|
||||
will attempt to determine the new primary by itself, but if the
|
||||
original primary comes back online after the new primary is promoted, there is a risk that
|
||||
<command>repmgr standby follow</command> will result in the node continuing to follow
|
||||
the original primary.
|
||||
</para>
|
||||
<sect1 id="repmgrd-connection-settings">
|
||||
<title>repmgrd connection settings</title>
|
||||
<para>
|
||||
In addition to the &repmgr; configuration settings, parameters in the
|
||||
<varname>conninfo</varname> string influence how &repmgr; makes a network connection to
|
||||
PostgreSQL. In particular, if another server in the replication cluster
|
||||
is unreachable at network level, system network settings will influence
|
||||
the length of time it takes to determine that the connection is not possible.
|
||||
</para>
|
||||
<para>
|
||||
In particular explicitly setting a parameter for <literal>connect_timeout</literal>
|
||||
should be considered; the effective minimum value of <literal>2</literal>
|
||||
(seconds) will ensure that a connection failure at network level is reported
|
||||
as soon as possible, otherwise depending on the system settings (e.g.
|
||||
<varname>tcp_syn_retries</varname> in Linux) a delay of a minute or more
|
||||
is possible.
|
||||
</para>
|
||||
<para>
|
||||
For further details on <varname>conninfo</varname> network connection
|
||||
parameters, see the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS">PostgreSQL documentation</ulink>.
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="repmgrd-log-rotation">
|
||||
<title>repmgrd log rotation</title>
|
||||
<para>
|
||||
To ensure the current <application>repmgrd</application> logfile does not grow
|
||||
indefinitely, configure your system's <command>logrotate</command> to
|
||||
regularly rotate it.
|
||||
</para>
|
||||
<para>
|
||||
Sample configuration to rotate logfiles weekly with retention for
|
||||
up to 52 weeks and rotation forced if a file grows beyond 100Mb:
|
||||
<programlisting>
|
||||
/var/log/postgresql/repmgr-9.6.log {
|
||||
missingok
|
||||
compress
|
||||
rotate 52
|
||||
maxsize 100M
|
||||
weekly
|
||||
create 0600 postgres postgres
|
||||
}</programlisting>
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
75
doc/repmgrd-degraded-monitoring.sgml
Normal file
75
doc/repmgrd-degraded-monitoring.sgml
Normal file
@@ -0,0 +1,75 @@
|
||||
<chapter id="repmgrd-degraded-monitoring">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>degraded monitoring</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>"degraded monitoring" mode</title>
|
||||
<para>
|
||||
In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
|
||||
of monitoring the nodes' upstream server. In these cases it enters "degraded
|
||||
monitoring" mode, where <application>repmgrd</application> remains active but is waiting for the situation
|
||||
to be resolved.
|
||||
</para>
|
||||
<para>
|
||||
Situations where this happens are:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, no nodes in the primary node's location are visible</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but no promotion candidate is available</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but the promotion candidate could not be promoted</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but the node was unable to follow the new primary</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but no primary has become available</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but automatic failover is not enabled for the node</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>repmgrd is monitoring the primary node, but it is not available</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Example output in a situation where there is only one standby with <literal>failover=manual</literal>,
|
||||
and the primary node is unavailable (but is later restarted):
|
||||
<programlisting>
|
||||
[2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)
|
||||
[2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts
|
||||
[2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
(...)
|
||||
[2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts
|
||||
[2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||
[2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate
|
||||
[2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate
|
||||
[2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node
|
||||
[2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||
[2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||
[2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring
|
||||
[2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)</programlisting>
|
||||
|
||||
</para>
|
||||
<para>
|
||||
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
||||
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
||||
after which <application>repmgrd</application> will terminate.
|
||||
|
||||
</para>
|
||||
|
||||
</chapter>
|
||||
96
doc/repmgrd-demonstration.sgml
Normal file
96
doc/repmgrd-demonstration.sgml
Normal file
@@ -0,0 +1,96 @@
|
||||
<chapter id="repmgrd-demonstration">
|
||||
<title>repmgrd demonstration</title>
|
||||
<para>
|
||||
To demonstrate automatic failover, set up a 3-node replication cluster (one primary
|
||||
and two standbys streaming directly from the primary) so that the cluster looks
|
||||
something like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
||||
log output, which at log level <literal>INFO</literal> will look like this:
|
||||
<programlisting>
|
||||
[2017-08-24 17:31:00] [NOTICE] using configuration file "/etc/repmgr.conf"
|
||||
[2017-08-24 17:31:00] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr"
|
||||
[2017-08-24 17:31:00] [NOTICE] starting monitoring of node <literal>node2</literal> (ID: 2)
|
||||
[2017-08-24 17:31:00] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
||||
3 | node3 | repmgrd_start | t | 2017-08-24 17:35:54 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||
2 | node2 | repmgrd_start | t | 2017-08-24 17:35:50 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||
1 | node1 | repmgrd_start | t | 2017-08-24 17:35:46 | monitoring cluster primary "node1" (node ID: 1) </programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Now stop the current primary server with e.g.:
|
||||
<programlisting>
|
||||
pg_ctl -D /var/lib/postgresql/data -m immediate stop</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will force the primary to shut down straight away, aborting all processes
|
||||
and transactions. This will cause a flurry of activity in the <application>repmgrd</application> log
|
||||
files as each <application>repmgrd</application> detects the failure of the primary and a failover
|
||||
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
||||
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
||||
<programlisting>
|
||||
[2017-08-24 23:32:01] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state
|
||||
[2017-08-24 23:32:08] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2017-08-24 23:32:08] [INFO] checking state of node 1, 1 of 5 attempts
|
||||
[2017-08-24 23:32:08] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:09] [INFO] checking state of node 1, 2 of 5 attempts
|
||||
[2017-08-24 23:32:09] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:10] [INFO] checking state of node 1, 3 of 5 attempts
|
||||
[2017-08-24 23:32:10] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:11] [INFO] checking state of node 1, 4 of 5 attempts
|
||||
[2017-08-24 23:32:11] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:12] [INFO] checking state of node 1, 5 of 5 attempts
|
||||
[2017-08-24 23:32:12] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||
INFO: setting voting term to 1
|
||||
INFO: node 2 is candidate
|
||||
INFO: node 3 has received request from node 2 for electoral term 1 (our term: 0)
|
||||
[2017-08-24 23:32:12] [NOTICE] this node is the winner, will now promote self and inform other nodes
|
||||
INFO: connecting to standby database
|
||||
NOTICE: promoting standby
|
||||
DETAIL: promoting server using 'pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' promote'
|
||||
INFO: reconnecting to promoted server
|
||||
NOTICE: STANDBY PROMOTE successful
|
||||
DETAIL: node 2 was successfully promoted to primary
|
||||
INFO: node 3 received notification to follow node 2
|
||||
[2017-08-24 23:32:13] [INFO] switching to primary monitoring mode</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
||||
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
||||
(<literal>node2</literal>):
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+----------------------------------------------------
|
||||
1 | node1 | primary | - failed | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node2 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr cluster event</command> will display a summary of what happened to each server
|
||||
during the failover:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+--------------------------+----+---------------------+-----------------------------------------------------------------------------------
|
||||
3 | node3 | repmgrd_failover_follow | t | 2017-08-24 23:32:16 | node 3 now following new upstream node 2
|
||||
3 | node3 | standby_follow | t | 2017-08-24 23:32:16 | node 3 is now attached to node 2
|
||||
2 | node2 | repmgrd_failover_promote | t | 2017-08-24 23:32:13 | node 2 promoted to primary; old primary 1 marked as failed
|
||||
2 | node2 | standby_promote | t | 2017-08-24 23:32:13 | node 2 was successfully promoted to primary</programlisting>
|
||||
</para>
|
||||
</chapter>
|
||||
76
doc/repmgrd-monitoring.sgml
Normal file
76
doc/repmgrd-monitoring.sgml
Normal file
@@ -0,0 +1,76 @@
|
||||
<chapter id="repmgrd-monitoring">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>monitoring</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Monitoring with repmgrd</title>
|
||||
<para>
|
||||
When <application>repmgrd</application> is running with the option <literal>monitoring_history=true</literal>,
|
||||
it will constantly write standby node status information to the
|
||||
<varname>monitoring_history</varname> table, providing a near-real time
|
||||
overview of replication status on all nodes
|
||||
in the cluster.
|
||||
</para>
|
||||
<para>
|
||||
The view <literal>replication_status</literal> shows the most recent state
|
||||
for each node, e.g.:
|
||||
<programlisting>
|
||||
repmgr=# select * from repmgr.replication_status;
|
||||
-[ RECORD 1 ]-------------+------------------------------
|
||||
primary_node_id | 1
|
||||
standby_node_id | 2
|
||||
standby_name | node2
|
||||
node_type | standby
|
||||
active | t
|
||||
last_monitor_time | 2017-08-24 16:28:41.260478+09
|
||||
last_wal_primary_location | 0/6D57A00
|
||||
last_wal_standby_location | 0/5000000
|
||||
replication_lag | 29 MB
|
||||
replication_time_lag | 00:00:11.736163
|
||||
apply_lag | 15 MB
|
||||
communication_time_lag | 00:00:01.365643</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The interval in which monitoring history is written is controlled by the
|
||||
configuration parameter <varname>monitor_interval_secs</varname>;
|
||||
default is 2.
|
||||
</para>
|
||||
<para>
|
||||
As this can generate a large amount of monitoring data in the table
|
||||
<literal>repmgr.monitoring_history</literal>. it's advisable to regularly
|
||||
purge historical data using the <xref linkend="repmgr-cluster-cleanup">
|
||||
command; use the <literal>-k/--keep-history</literal> option to
|
||||
specify how many day's worth of data should be retained.
|
||||
</para>
|
||||
<para>
|
||||
It's possible to use <application>repmgrd</application> to run in monitoring
|
||||
mode only (without automatic failover capability) for some or all
|
||||
nodes by setting <literal>failover=manual</literal> in the node's
|
||||
<filename>repmgr.conf</filename> file. In the event of the node's upstream failing,
|
||||
no failover action will be taken and the node will require manual intervention to
|
||||
be reattached to replication. If this occurs, an
|
||||
<link linkend="event-notifications">event notification</link>
|
||||
<varname>standby_disconnect_manual</varname> will be created.
|
||||
</para>
|
||||
<para>
|
||||
Note that when a standby node is not streaming directly from its upstream
|
||||
node, e.g. recovering WAL from an archive, <varname>apply_lag</varname> will always appear as
|
||||
<literal>0 bytes</literal>.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If monitoring history is enabled, the contents of the <literal>repmgr.monitoring_history</literal>
|
||||
table will be replicated to attached standbys. This means there will be a small but
|
||||
constant stream of replication activity which may not be desirable. To prevent
|
||||
this, convert the table to an <literal>UNLOGGED</literal> one with:
|
||||
<programlisting>
|
||||
ALTER TABLE repmgr.monitoring_history SET UNLOGGED;</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will however mean that monitoring history will not be available on
|
||||
another node following a failover, and the view <literal>repmgr.replication_status</literal>
|
||||
will not work on standbys.
|
||||
</para>
|
||||
</tip>
|
||||
</chapter>
|
||||
48
doc/repmgrd-network-split.sgml
Normal file
48
doc/repmgrd-network-split.sgml
Normal file
@@ -0,0 +1,48 @@
|
||||
<chapter id="repmgrd-network-split" xreflabel="Handling network splits with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>network splits</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Handling network splits with repmgrd</title>
|
||||
<para>
|
||||
A common pattern for replication cluster setups is to spread servers over
|
||||
more than one datacentre. This can provide benefits such as geographically-
|
||||
distributed read replicas and DR (disaster recovery capability). However
|
||||
this also means there is a risk of disconnection at network level between
|
||||
datacentre locations, which would result in a split-brain scenario if
|
||||
servers in a secondary data centre were no longer able to see the primary
|
||||
in the main data centre and promoted a standby among themselves.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; enables provision of "<xref linkend="witness-server">" to
|
||||
artificially create a quorum of servers in a particular location, ensuring
|
||||
that nodes in another location will not elect a new primary if they
|
||||
are unable to see the majority of nodes. However this approach does not
|
||||
scale well, particularly with more complex replication setups, e.g.
|
||||
where the majority of nodes are located outside of the primary datacentre.
|
||||
It also means the <literal>witness</literal> node needs to be managed as an
|
||||
extra PostgreSQL instance outside of the main replication cluster, which
|
||||
adds administrative and programming complexity.
|
||||
</para>
|
||||
<para>
|
||||
<literal>repmgr4</literal> introduces the concept of <literal>location</literal>:
|
||||
each node is associated with an arbitrary location string (default is
|
||||
<literal>default</literal>); this is set in <filename>repmgr.conf</filename>, e.g.:
|
||||
<programlisting>
|
||||
node_id=1
|
||||
node_name=node1
|
||||
conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'
|
||||
data_directory='/var/lib/postgresql/data'
|
||||
location='dc1'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
In a failover situation, <application>repmgrd</application> will check if any servers in the
|
||||
same location as the current primary node are visible. If not, <application>repmgrd</application>
|
||||
will assume a network interruption and not promote any node in any
|
||||
other location (it will however enter <xref linkend="repmgrd-degraded-monitoring"> mode until
|
||||
a primary becomes visible).
|
||||
</para>
|
||||
|
||||
</chapter>
|
||||
|
||||
31
doc/repmgrd-witness-server.sgml
Normal file
31
doc/repmgrd-witness-server.sgml
Normal file
@@ -0,0 +1,31 @@
|
||||
<chapter id="repmgrd-witness-server" xreflabel="Using a witness server with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>witness server</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Using a witness server with repmgrd</title>
|
||||
<para>
|
||||
In a situation caused e.g. by a network interruption between two
|
||||
data centres, it's important to avoid a "split-brain" situation where
|
||||
both sides of the network assume they are the active segment and the
|
||||
side without an active primary unilaterally promotes one of its standbys.
|
||||
</para>
|
||||
<para>
|
||||
To prevent this situation happening, it's essential to ensure that one
|
||||
network segment has a "voting majority", so other segments will know
|
||||
they're in the minority and not attempt to promote a new primary. Where
|
||||
an odd number of servers exists, this is not an issue. However, if each
|
||||
network has an even number of nodes, it's necessary to provide some way
|
||||
of ensuring a majority, which is where the witness server becomes useful.
|
||||
</para>
|
||||
<para>
|
||||
This is not a fully-fledged standby node and is not integrated into
|
||||
replication, but it effectively represents the "casting vote" when
|
||||
deciding which network segment has a majority. A witness server can
|
||||
be set up using <xref linkend="repmgr-witness-register">. Note that it only
|
||||
makes sense to create a witness server in conjunction with running
|
||||
<application>repmgrd</application>; the witness server will require its own
|
||||
<application>repmgrd</application> instance.
|
||||
</para>
|
||||
</chapter>
|
||||
96
doc/stylesheet.css
Normal file
96
doc/stylesheet.css
Normal file
@@ -0,0 +1,96 @@
|
||||
/* doc/src/sgml/stylesheet.css */
|
||||
|
||||
/* color scheme similar to www.postgresql.org */
|
||||
|
||||
BODY {
|
||||
color: #000000;
|
||||
background: #FFFFFF;
|
||||
font-family: verdana, sans-serif;
|
||||
}
|
||||
|
||||
A:link { color:#0066A2; }
|
||||
A:visited { color:#004E66; }
|
||||
A:active { color:#0066A2; }
|
||||
A:hover { color:#000000; }
|
||||
|
||||
H1 {
|
||||
font-size: 1.4em;
|
||||
font-weight: bold;
|
||||
margin-top: 0em;
|
||||
margin-bottom: 0em;
|
||||
color: #EC5800;
|
||||
}
|
||||
|
||||
H2 {
|
||||
font-size: 1.2em;
|
||||
margin: 1.2em 0em 1.2em 0em;
|
||||
font-weight: bold;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
H3 {
|
||||
font-size: 1.1em;
|
||||
margin: 1.2em 0em 1.2em 0em;
|
||||
font-weight: bold;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
H4 {
|
||||
font-size: 0.95em;
|
||||
margin: 1.2em 0em 1.2em 0em;
|
||||
font-weight: normal;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
H5 {
|
||||
font-size: 0.9em;
|
||||
margin: 1.2em 0em 1.2em 0em;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
H6 {
|
||||
font-size: 0.85em;
|
||||
margin: 1.2em 0em 1.2em 0em;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
/* center some titles */
|
||||
|
||||
.BOOK .TITLE, .BOOK .CORPAUTHOR, .BOOK .COPYRIGHT {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* decoration for formal examples */
|
||||
|
||||
DIV.EXAMPLE {
|
||||
padding-left: 15px;
|
||||
border-style: solid;
|
||||
border-width: 0px;
|
||||
border-left-width: 2px;
|
||||
border-color: black;
|
||||
margin: 0.5ex;
|
||||
}
|
||||
|
||||
/* less dense spacing of TOC */
|
||||
|
||||
.BOOK .TOC DL DT {
|
||||
padding-top: 1.5ex;
|
||||
padding-bottom: 1.5ex;
|
||||
}
|
||||
|
||||
.BOOK .TOC DL DL DT {
|
||||
padding-top: 0ex;
|
||||
padding-bottom: 0ex;
|
||||
}
|
||||
|
||||
/* miscellaneous */
|
||||
|
||||
PRE.LITERALLAYOUT, .SCREEN, .SYNOPSIS, .PROGRAMLISTING {
|
||||
margin-left: 4ex;
|
||||
}
|
||||
|
||||
.COMMENT { color: red; }
|
||||
|
||||
VAR { font-family: monospace; font-style: italic; }
|
||||
/* Konqueror's standard style for ACRONYM is italic. */
|
||||
ACRONYM { font-style: inherit; }
|
||||
851
doc/stylesheet.dsl
Normal file
851
doc/stylesheet.dsl
Normal file
@@ -0,0 +1,851 @@
|
||||
<!-- doc/src/sgml/stylesheet.dsl -->
|
||||
<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN" [
|
||||
|
||||
<!-- must turn on one of these with -i on the jade command line -->
|
||||
<!ENTITY % output-html "IGNORE">
|
||||
<!ENTITY % output-print "IGNORE">
|
||||
<!ENTITY % output-text "IGNORE">
|
||||
|
||||
<![ %output-html; [
|
||||
<!ENTITY dbstyle PUBLIC "-//Norman Walsh//DOCUMENT DocBook HTML Stylesheet//EN" CDATA DSSSL>
|
||||
]]>
|
||||
|
||||
<![ %output-print; [
|
||||
<!ENTITY dbstyle PUBLIC "-//Norman Walsh//DOCUMENT DocBook Print Stylesheet//EN" CDATA DSSSL>
|
||||
]]>
|
||||
|
||||
<![ %output-text; [
|
||||
<!ENTITY dbstyle PUBLIC "-//Norman Walsh//DOCUMENT DocBook HTML Stylesheet//EN" CDATA DSSSL>
|
||||
]]>
|
||||
|
||||
]>
|
||||
|
||||
<style-sheet>
|
||||
<style-specification use="docbook">
|
||||
<style-specification-body>
|
||||
|
||||
<!-- general customization ......................................... -->
|
||||
|
||||
<!-- (applicable to all output formats) -->
|
||||
|
||||
(define draft-mode #f)
|
||||
|
||||
;; Don't show manpage volume numbers
|
||||
(define %refentry-xref-manvolnum% #f)
|
||||
|
||||
;; Don't use graphics for callouts. (We could probably do that, but
|
||||
;; it needs extra work.)
|
||||
(define %callout-graphics% #f)
|
||||
|
||||
;; Show comments during the development stage.
|
||||
(define %show-comments% draft-mode)
|
||||
|
||||
;; Force a chapter TOC even if it includes only a single entry
|
||||
(define %force-chapter-toc% #t)
|
||||
|
||||
;; Don't append period if run-in title ends with any of these
|
||||
;; characters. We had to add the colon here. This is fixed in
|
||||
;; stylesheets version 1.71, so it can be removed sometime.
|
||||
(define %content-title-end-punct%
|
||||
'(#\. #\! #\? #\:))
|
||||
|
||||
;; No automatic punctuation after honorific name parts
|
||||
(define %honorific-punctuation% "")
|
||||
|
||||
;; Change display of some elements
|
||||
(element command ($mono-seq$))
|
||||
(element envar ($mono-seq$))
|
||||
(element lineannotation ($italic-seq$))
|
||||
(element literal ($mono-seq$))
|
||||
(element option ($mono-seq$))
|
||||
(element parameter ($mono-seq$))
|
||||
(element structfield ($mono-seq$))
|
||||
(element structname ($mono-seq$))
|
||||
(element symbol ($mono-seq$))
|
||||
(element token ($mono-seq$))
|
||||
(element type ($mono-seq$))
|
||||
(element varname ($mono-seq$))
|
||||
(element (programlisting emphasis) ($bold-seq$)) ;; to highlight sections of code
|
||||
|
||||
;; Special support for Tcl synopses
|
||||
(element optional
|
||||
(if (equal? (attribute-string (normalize "role")) "tcl")
|
||||
(make sequence
|
||||
(literal "?")
|
||||
($charseq$)
|
||||
(literal "?"))
|
||||
(make sequence
|
||||
(literal %arg-choice-opt-open-str%)
|
||||
($charseq$)
|
||||
(literal %arg-choice-opt-close-str%))))
|
||||
|
||||
;; Avoid excessive cross-reference labels
|
||||
(define (auto-xref-indirect? target ancestor)
|
||||
(cond
|
||||
; ;; Always add indirect references to another book
|
||||
; ((member (gi ancestor) (book-element-list))
|
||||
; #t)
|
||||
;; Add indirect references to the section or component a block
|
||||
;; is in iff chapters aren't autolabelled. (Otherwise "Figure 1-3"
|
||||
;; is sufficient)
|
||||
((and (member (gi target) (block-element-list))
|
||||
(not %chapter-autolabel%))
|
||||
#t)
|
||||
;; Add indirect references to the component a section is in if
|
||||
;; the sections are not autolabelled
|
||||
((and (member (gi target) (section-element-list))
|
||||
(member (gi ancestor) (component-element-list))
|
||||
(not %section-autolabel%))
|
||||
#t)
|
||||
(else #f)))
|
||||
|
||||
|
||||
;; Bibliography things
|
||||
|
||||
;; Use the titles of bibliography entries in cross-references
|
||||
(define biblio-xref-title #t)
|
||||
|
||||
;; Process bibliography entry components in the order shown below, not
|
||||
;; in the order they appear in the document. (I suppose this should
|
||||
;; be made to fit some publishing standard.)
|
||||
(define %biblioentry-in-entry-order% #f)
|
||||
|
||||
(define (biblioentry-inline-elements)
|
||||
(list
|
||||
(normalize "author")
|
||||
(normalize "authorgroup")
|
||||
(normalize "title")
|
||||
(normalize "subtitle")
|
||||
(normalize "volumenum")
|
||||
(normalize "edition")
|
||||
(normalize "othercredit")
|
||||
(normalize "contrib")
|
||||
(normalize "editor")
|
||||
(normalize "publishername")
|
||||
(normalize "confgroup")
|
||||
(normalize "publisher")
|
||||
(normalize "isbn")
|
||||
(normalize "issn")
|
||||
(normalize "pubsnumber")
|
||||
(normalize "date")
|
||||
(normalize "pubdate")
|
||||
(normalize "pagenums")
|
||||
(normalize "bibliomisc")))
|
||||
|
||||
(mode biblioentry-inline-mode
|
||||
|
||||
(element confgroup
|
||||
(make sequence
|
||||
(literal "Proc. ")
|
||||
(next-match)))
|
||||
|
||||
(element isbn
|
||||
(make sequence
|
||||
(literal "ISBN ")
|
||||
(process-children)))
|
||||
|
||||
(element issn
|
||||
(make sequence
|
||||
(literal "ISSN ")
|
||||
(process-children))))
|
||||
|
||||
|
||||
;; The rules in the default stylesheet for productname format it as a
|
||||
;; paragraph. This may be suitable for productname directly within
|
||||
;; *info, but it's nonsense when productname is used inline, as we do.
|
||||
(mode book-titlepage-recto-mode
|
||||
(element (para productname) ($charseq$)))
|
||||
(mode book-titlepage-verso-mode
|
||||
(element (para productname) ($charseq$)))
|
||||
;; Add more here if needed...
|
||||
|
||||
|
||||
;; Replace a sequence of whitespace in a string by a single space
|
||||
(define (normalize-whitespace str #!optional (whitespace '(#\space #\U-000D)))
|
||||
(let loop ((characters (string->list str))
|
||||
(result '())
|
||||
(prev-was-space #f))
|
||||
(if (null? characters)
|
||||
(list->string (reverse result))
|
||||
(let ((c (car characters))
|
||||
(rest (cdr characters)))
|
||||
(if (member c whitespace)
|
||||
(if prev-was-space
|
||||
(loop rest result #t)
|
||||
(loop rest (cons #\space result) #t))
|
||||
(loop rest (cons c result) #f))))))
|
||||
|
||||
|
||||
<!-- HTML output customization ..................................... -->
|
||||
|
||||
<![ %output-html; [
|
||||
|
||||
(define %section-autolabel% #t)
|
||||
(define %label-preface-sections% #f)
|
||||
(define %generate-legalnotice-link% #t)
|
||||
(define %html-ext% ".html")
|
||||
(define %root-filename% "index")
|
||||
(define %link-mailto-url% (string-append "mailto: repmgr-list@2ndquadrant.com"))
|
||||
(define %use-id-as-filename% #t)
|
||||
(define website-build #f)
|
||||
(define %stylesheet% (if website-build "/resources/docs.css" "website-docs.css"))
|
||||
(define %graphic-default-extension% "gif")
|
||||
(define %body-attr% '())
|
||||
(define ($generate-book-lot-list$) '())
|
||||
(define use-output-dir #t)
|
||||
(define %output-dir% "html")
|
||||
(define html-index-filename "../HTML.index")
|
||||
|
||||
|
||||
;; Only build HTML.index or the actual HTML output, not both. Saves a
|
||||
;; *lot* of time. (overrides docbook.dsl)
|
||||
(root
|
||||
(if (not html-index)
|
||||
(make sequence
|
||||
(process-children)
|
||||
(with-mode manifest
|
||||
(process-children)))
|
||||
(with-mode htmlindex
|
||||
(process-children))))
|
||||
|
||||
|
||||
;; Do not combine first section into chapter chunk.
|
||||
(define (chunk-skip-first-element-list) '())
|
||||
|
||||
;; Returns the depth of auto TOC that should be made at the nd-level
|
||||
(define (toc-depth nd)
|
||||
(cond ((string=? (gi nd) (normalize "book")) 2)
|
||||
((string=? (gi nd) (normalize "part")) 2)
|
||||
((string=? (gi nd) (normalize "chapter")) 2)
|
||||
(else 1)))
|
||||
|
||||
;; Add character encoding and time of creation into HTML header
|
||||
(define %html-header-tags%
|
||||
(list (list "META" '("HTTP-EQUIV" "Content-Type") '("CONTENT" "text/html; charset=ISO-8859-1"))
|
||||
(list "META" '("NAME" "creation") (list "CONTENT" (time->string (time) #t)))))
|
||||
|
||||
|
||||
;; Block elements are allowed in PARA in DocBook, but not in P in
|
||||
;; HTML. With %fix-para-wrappers% turned on, the stylesheets attempt
|
||||
;; to avoid putting block elements in HTML P tags by outputting
|
||||
;; additional end/begin P pairs around them.
|
||||
(define %fix-para-wrappers% #t)
|
||||
|
||||
;; ...but we need to do some extra work to make the above apply to PRE
|
||||
;; as well. (mostly pasted from dbverb.dsl)
|
||||
(define ($verbatim-display$ indent line-numbers?)
|
||||
(let ((content (make element gi: "PRE"
|
||||
attributes: (list
|
||||
(list "CLASS" (gi)))
|
||||
(if (or indent line-numbers?)
|
||||
($verbatim-line-by-line$ indent line-numbers?)
|
||||
(process-children)))))
|
||||
(if %shade-verbatim%
|
||||
(make element gi: "TABLE"
|
||||
attributes: ($shade-verbatim-attr$)
|
||||
(make element gi: "TR"
|
||||
(make element gi: "TD"
|
||||
content)))
|
||||
(make sequence
|
||||
(para-check)
|
||||
content
|
||||
(para-check 'restart)))))
|
||||
|
||||
;; ...and for notes.
|
||||
(element note
|
||||
(make sequence
|
||||
(para-check)
|
||||
($admonition$)
|
||||
(para-check 'restart)))
|
||||
|
||||
;;; XXX The above is very ugly. It might be better to run 'tidy' on
|
||||
;;; the resulting *.html files.
|
||||
|
||||
|
||||
;; Format multiple terms in varlistentry vertically, instead
|
||||
;; of comma-separated.
|
||||
(element (varlistentry term)
|
||||
(make sequence
|
||||
(process-children-trim)
|
||||
(if (not (last-sibling?))
|
||||
(make empty-element gi: "BR")
|
||||
(empty-sosofo))))
|
||||
|
||||
|
||||
;; Customization of header
|
||||
;; - make title a link to the home page
|
||||
;; - add tool tips to Prev/Next links
|
||||
;; - add Up link
|
||||
;; (overrides dbnavig.dsl)
|
||||
(define (default-header-nav-tbl-noff elemnode prev next prevsib nextsib)
|
||||
(let* ((r1? (nav-banner? elemnode))
|
||||
(r1-sosofo (make element gi: "TR"
|
||||
(make element gi: "TH"
|
||||
attributes: (list
|
||||
(list "COLSPAN" "4")
|
||||
(list "ALIGN" "center")
|
||||
(list "VALIGN" "bottom"))
|
||||
(make element gi: "A"
|
||||
attributes: (list
|
||||
(list "HREF" (href-to (nav-home elemnode))))
|
||||
(nav-banner elemnode)))))
|
||||
(r2? (or (not (node-list-empty? prev))
|
||||
(not (node-list-empty? next))
|
||||
(nav-context? elemnode)))
|
||||
(r2-sosofo (make element gi: "TR"
|
||||
(make element gi: "TD"
|
||||
attributes: (list
|
||||
(list "WIDTH" "10%")
|
||||
(list "ALIGN" "left")
|
||||
(list "VALIGN" "top"))
|
||||
(if (node-list-empty? prev)
|
||||
(make entity-ref name: "nbsp")
|
||||
(make element gi: "A"
|
||||
attributes: (list
|
||||
(list "TITLE" (element-title-string prev))
|
||||
(list "HREF"
|
||||
(href-to
|
||||
prev))
|
||||
(list "ACCESSKEY"
|
||||
"P"))
|
||||
(gentext-nav-prev prev))))
|
||||
(make element gi: "TD"
|
||||
attributes: (list
|
||||
(list "WIDTH" "10%")
|
||||
(list "ALIGN" "left")
|
||||
(list "VALIGN" "top"))
|
||||
(if (nav-up? elemnode)
|
||||
(nav-up elemnode)
|
||||
(nav-home-link elemnode)))
|
||||
(make element gi: "TD"
|
||||
attributes: (list
|
||||
(list "WIDTH" "60%")
|
||||
(list "ALIGN" "center")
|
||||
(list "VALIGN" "bottom"))
|
||||
(nav-context elemnode))
|
||||
(make element gi: "TD"
|
||||
attributes: (list
|
||||
(list "WIDTH" "20%")
|
||||
(list "ALIGN" "right")
|
||||
(list "VALIGN" "top"))
|
||||
(if (node-list-empty? next)
|
||||
(make entity-ref name: "nbsp")
|
||||
(make element gi: "A"
|
||||
attributes: (list
|
||||
(list "TITLE" (element-title-string next))
|
||||
(list "HREF"
|
||||
(href-to
|
||||
next))
|
||||
(list "ACCESSKEY"
|
||||
"N"))
|
||||
(gentext-nav-next next)))))))
|
||||
(if (or r1? r2?)
|
||||
(make element gi: "DIV"
|
||||
attributes: '(("CLASS" "NAVHEADER"))
|
||||
(make element gi: "TABLE"
|
||||
attributes: (list
|
||||
(list "SUMMARY" "Header navigation table")
|
||||
(list "WIDTH" %gentext-nav-tblwidth%)
|
||||
(list "BORDER" "0")
|
||||
(list "CELLPADDING" "0")
|
||||
(list "CELLSPACING" "0"))
|
||||
(if r1? r1-sosofo (empty-sosofo))
|
||||
(if r2? r2-sosofo (empty-sosofo)))
|
||||
(make empty-element gi: "HR"
|
||||
attributes: (list
|
||||
(list "ALIGN" "LEFT")
|
||||
(list "WIDTH" %gentext-nav-tblwidth%))))
|
||||
(empty-sosofo))))
|
||||
|
||||
|
||||
;; Put index "quicklinks" (A | B | C | ...) at the top of the bookindex page.
|
||||
|
||||
(element index
|
||||
(let ((preamble (node-list-filter-by-not-gi
|
||||
(children (current-node))
|
||||
(list (normalize "indexentry"))))
|
||||
(indexdivs (node-list-filter-by-gi
|
||||
(children (current-node))
|
||||
(list (normalize "indexdiv"))))
|
||||
(entries (node-list-filter-by-gi
|
||||
(children (current-node))
|
||||
(list (normalize "indexentry")))))
|
||||
(html-document
|
||||
(with-mode head-title-mode
|
||||
(literal (element-title-string (current-node))))
|
||||
(make element gi: "DIV"
|
||||
attributes: (list (list "CLASS" (gi)))
|
||||
($component-separator$)
|
||||
($component-title$)
|
||||
(if (node-list-empty? indexdivs)
|
||||
(empty-sosofo)
|
||||
(make element gi: "P"
|
||||
attributes: (list (list "CLASS" "INDEXDIV-QUICKLINKS"))
|
||||
(with-mode indexdiv-quicklinks-mode
|
||||
(process-node-list indexdivs))))
|
||||
(process-node-list preamble)
|
||||
(if (node-list-empty? entries)
|
||||
(empty-sosofo)
|
||||
(make element gi: "DL"
|
||||
(process-node-list entries)))))))
|
||||
|
||||
|
||||
(mode indexdiv-quicklinks-mode
|
||||
(element indexdiv
|
||||
(make sequence
|
||||
(make element gi: "A"
|
||||
attributes: (list (list "HREF" (href-to (current-node))))
|
||||
(element-title-sosofo))
|
||||
(if (not (last-sibling?))
|
||||
(literal " | ")
|
||||
(literal "")))))
|
||||
|
||||
|
||||
;; Changed to strip and normalize index term content (overrides
|
||||
;; dbindex.dsl)
|
||||
(define (htmlindexterm)
|
||||
(let* ((attr (gi (current-node)))
|
||||
(content (data (current-node)))
|
||||
(string (strip (normalize-whitespace content))) ;; changed
|
||||
(sortas (attribute-string (normalize "sortas"))))
|
||||
(make sequence
|
||||
(make formatting-instruction data: attr)
|
||||
(if sortas
|
||||
(make sequence
|
||||
(make formatting-instruction data: "[")
|
||||
(make formatting-instruction data: sortas)
|
||||
(make formatting-instruction data: "]"))
|
||||
(empty-sosofo))
|
||||
(make formatting-instruction data: " ")
|
||||
(make formatting-instruction data: string)
|
||||
(htmlnewline))))
|
||||
|
||||
(define ($html-body-start$)
|
||||
(if website-build
|
||||
(make empty-element gi: "!--#include virtual=\"/resources/docs-header.html\"--")
|
||||
(empty-sosofo)))
|
||||
|
||||
(define ($html-body-end$)
|
||||
(if website-build
|
||||
(make empty-element gi: "!--#include virtual=\"/resources/docs-footer.html\"--")
|
||||
(empty-sosofo)))
|
||||
|
||||
]]> <!-- %output-html -->
|
||||
|
||||
|
||||
<!-- Print output customization .................................... -->
|
||||
|
||||
<![ %output-print; [
|
||||
|
||||
(define %section-autolabel% #t)
|
||||
(define %default-quadding% 'justify)
|
||||
|
||||
;; Don't know how well hyphenation works with other backends. Might
|
||||
;; turn this on if desired.
|
||||
(define %hyphenation%
|
||||
(if tex-backend #t #f))
|
||||
|
||||
;; Put footnotes at the bottom of the page (rather than end of
|
||||
;; section), and put the URLs of links into footnotes.
|
||||
;;
|
||||
;; bop-footnotes only works with TeX, otherwise it's ignored. But
|
||||
;; when both of these are #t and TeX is used, you need at least
|
||||
;; stylesheets 1.73 because otherwise you don't get any footnotes at
|
||||
;; all for the links.
|
||||
(define bop-footnotes #t)
|
||||
(define %footnote-ulinks% #t)
|
||||
|
||||
(define %refentry-new-page% #t)
|
||||
(define %refentry-keep% #f)
|
||||
|
||||
;; Disabled because of TeX problems
|
||||
;; (http://archives.postgresql.org/pgsql-docs/2007-12/msg00056.php)
|
||||
(define ($generate-book-lot-list$) '())
|
||||
|
||||
;; Indentation of verbatim environments. (This should really be done
|
||||
;; with start-indent in DSSSL.)
|
||||
;; Use of indentation in this area exposes a bug in openjade,
|
||||
;; http://archives.postgresql.org/pgsql-docs/2006-12/msg00064.php
|
||||
;; (define %indent-programlisting-lines% " ")
|
||||
;; (define %indent-screen-lines% " ")
|
||||
;; (define %indent-synopsis-lines% " ")
|
||||
|
||||
|
||||
;; Default graphic format: Jadetex wants eps, pdfjadetex wants pdf.
|
||||
;; (Note that pdfjadetex will not accept eps, that's why we need to
|
||||
;; create a different .tex file for each.) What works with RTF?
|
||||
|
||||
(define texpdf-output #f) ;; override from command line
|
||||
|
||||
(define %graphic-default-extension%
|
||||
(cond (tex-backend (if texpdf-output "pdf" "eps"))
|
||||
(rtf-backend "gif")
|
||||
(else "XXX")))
|
||||
|
||||
;; Need to add pdf here so that the above works. Default setup
|
||||
;; doesn't know about PDF.
|
||||
(define preferred-mediaobject-extensions
|
||||
(list "eps" "ps" "jpg" "jpeg" "pdf" "png"))
|
||||
|
||||
|
||||
;; Don't show links when citing a bibliography entry. This fouls up
|
||||
;; the footnumber counting. To get the link, one can still look into
|
||||
;; the bibliography itself.
|
||||
(mode xref-title-mode
|
||||
(element ulink
|
||||
(process-children)))
|
||||
|
||||
|
||||
;; Format legalnotice justified and with space between paragraphs.
|
||||
(mode book-titlepage-verso-mode
|
||||
(element (legalnotice para)
|
||||
(make paragraph
|
||||
use: book-titlepage-verso-style ;; alter this if ever it needs to appear elsewhere
|
||||
quadding: %default-quadding%
|
||||
line-spacing: (* 0.8 (inherited-line-spacing))
|
||||
font-size: (* 0.8 (inherited-font-size))
|
||||
space-before: (* 0.8 %para-sep%)
|
||||
space-after: (* 0.8 %para-sep%)
|
||||
first-line-start-indent: (if (is-first-para)
|
||||
(* 0.8 %para-indent-firstpara%)
|
||||
(* 0.8 %para-indent%))
|
||||
(process-children))))
|
||||
|
||||
|
||||
;; Fix spacing problems in variablelists
|
||||
|
||||
(element (varlistentry term)
|
||||
(make paragraph
|
||||
space-before: (if (first-sibling?)
|
||||
%para-sep%
|
||||
0pt)
|
||||
keep-with-next?: #t
|
||||
(process-children)))
|
||||
|
||||
(define %varlistentry-indent% 2em)
|
||||
|
||||
(element (varlistentry listitem)
|
||||
(make sequence
|
||||
start-indent: (+ (inherited-start-indent) %varlistentry-indent%)
|
||||
(process-children)))
|
||||
|
||||
|
||||
;; Whitespace fixes for itemizedlists and orderedlists
|
||||
|
||||
(define (process-listitem-content)
|
||||
(if (absolute-first-sibling?)
|
||||
(make sequence
|
||||
(process-children-trim))
|
||||
(next-match)))
|
||||
|
||||
|
||||
;; Default stylesheets format simplelists as tables. This spells
|
||||
;; trouble for Jade. So we just format them as plain lines.
|
||||
|
||||
(define %simplelist-indent% 1em)
|
||||
|
||||
(define (my-simplelist-vert members)
|
||||
(make display-group
|
||||
space-before: %para-sep%
|
||||
space-after: %para-sep%
|
||||
start-indent: (+ %simplelist-indent% (inherited-start-indent))
|
||||
(process-children)))
|
||||
|
||||
(element simplelist
|
||||
(let ((type (attribute-string (normalize "type")))
|
||||
(cols (if (attribute-string (normalize "columns"))
|
||||
(if (> (string->number (attribute-string (normalize "columns"))) 0)
|
||||
(string->number (attribute-string (normalize "columns")))
|
||||
1)
|
||||
1))
|
||||
(members (select-elements (children (current-node)) (normalize "member"))))
|
||||
(cond
|
||||
((equal? type (normalize "inline"))
|
||||
(if (equal? (gi (parent (current-node)))
|
||||
(normalize "para"))
|
||||
(process-children)
|
||||
(make paragraph
|
||||
space-before: %para-sep%
|
||||
space-after: %para-sep%
|
||||
start-indent: (inherited-start-indent))))
|
||||
((equal? type (normalize "vert"))
|
||||
(my-simplelist-vert members))
|
||||
((equal? type (normalize "horiz"))
|
||||
(simplelist-table 'row cols members)))))
|
||||
|
||||
(element member
|
||||
(let ((type (inherited-attribute-string (normalize "type"))))
|
||||
(cond
|
||||
((equal? type (normalize "inline"))
|
||||
(make sequence
|
||||
(process-children)
|
||||
(if (not (last-sibling?))
|
||||
(literal ", ")
|
||||
(literal ""))))
|
||||
((equal? type (normalize "vert"))
|
||||
(make paragraph
|
||||
space-before: 0pt
|
||||
space-after: 0pt))
|
||||
((equal? type (normalize "horiz"))
|
||||
(make paragraph
|
||||
quadding: 'start
|
||||
(process-children))))))
|
||||
|
||||
|
||||
;; Jadetex doesn't handle links to the content of tables, so
|
||||
;; indexterms that point to table entries will go nowhere. We fix
|
||||
;; this by pointing the index entry to the table itself instead, which
|
||||
;; should be equally useful in practice.
|
||||
|
||||
(define (find-parent-table nd)
|
||||
(let ((table (ancestor-member nd ($table-element-list$))))
|
||||
(if (node-list-empty? table)
|
||||
nd
|
||||
table)))
|
||||
|
||||
;; (The function below overrides the one in print/dbindex.dsl.)
|
||||
|
||||
(define (indexentry-link nd)
|
||||
(let* ((id (attribute-string (normalize "role") nd))
|
||||
(prelim-target (find-indexterm id))
|
||||
(target (find-parent-table prelim-target))
|
||||
(preferred (not (node-list-empty?
|
||||
(select-elements (children (current-node))
|
||||
(normalize "emphasis")))))
|
||||
(sosofo (if (node-list-empty? target)
|
||||
(literal "?")
|
||||
(make link
|
||||
destination: (node-list-address target)
|
||||
(with-mode toc-page-number-mode
|
||||
(process-node-list target))))))
|
||||
(if preferred
|
||||
(make sequence
|
||||
font-weight: 'bold
|
||||
sosofo)
|
||||
sosofo)))
|
||||
|
||||
|
||||
;; By default, the part and reference title pages get wrong page
|
||||
;; numbers: The first title page gets roman numerals carried over from
|
||||
;; preface/toc -- we want Arabic numerals. We also need to make sure
|
||||
;; that page-number-restart is set of #f explicitly, because otherwise
|
||||
;; it will carry over from the previous component, which is not good.
|
||||
;;
|
||||
;; (This looks worse than it is. It's copied from print/dbttlpg.dsl
|
||||
;; and common/dbcommon.dsl and modified in minor detail.)
|
||||
|
||||
(define (first-part?)
|
||||
(let* ((book (ancestor (normalize "book")))
|
||||
(nd (ancestor-member (current-node)
|
||||
(append
|
||||
(component-element-list)
|
||||
(division-element-list))))
|
||||
(bookch (children book)))
|
||||
(let loop ((nl bookch))
|
||||
(if (node-list-empty? nl)
|
||||
#f
|
||||
(if (equal? (gi (node-list-first nl)) (normalize "part"))
|
||||
(if (node-list=? (node-list-first nl) nd)
|
||||
#t
|
||||
#f)
|
||||
(loop (node-list-rest nl)))))))
|
||||
|
||||
(define (first-reference?)
|
||||
(let* ((book (ancestor (normalize "book")))
|
||||
(nd (ancestor-member (current-node)
|
||||
(append
|
||||
(component-element-list)
|
||||
(division-element-list))))
|
||||
(bookch (children book)))
|
||||
(let loop ((nl bookch))
|
||||
(if (node-list-empty? nl)
|
||||
#f
|
||||
(if (equal? (gi (node-list-first nl)) (normalize "reference"))
|
||||
(if (node-list=? (node-list-first nl) nd)
|
||||
#t
|
||||
#f)
|
||||
(loop (node-list-rest nl)))))))
|
||||
|
||||
|
||||
(define (part-titlepage elements #!optional (side 'recto))
|
||||
(let ((nodelist (titlepage-nodelist
|
||||
(if (equal? side 'recto)
|
||||
(reference-titlepage-recto-elements)
|
||||
(reference-titlepage-verso-elements))
|
||||
elements))
|
||||
;; partintro is a special case...
|
||||
(partintro (node-list-first
|
||||
(node-list-filter-by-gi elements (list (normalize "partintro"))))))
|
||||
(if (part-titlepage-content? elements side)
|
||||
(make simple-page-sequence
|
||||
page-n-columns: %titlepage-n-columns%
|
||||
;; Make sure that page number format is correct.
|
||||
page-number-format: ($page-number-format$)
|
||||
;; Make sure that the page number is set to 1 if this is the
|
||||
;; first part in the book
|
||||
page-number-restart?: (first-part?)
|
||||
input-whitespace-treatment: 'collapse
|
||||
use: default-text-style
|
||||
|
||||
;; This hack is required for the RTF backend. If an external-graphic
|
||||
;; is the first thing on the page, RTF doesn't seem to do the right
|
||||
;; thing (the graphic winds up on the baseline of the first line
|
||||
;; of the page, left justified). This "one point rule" fixes
|
||||
;; that problem.
|
||||
(make paragraph
|
||||
line-spacing: 1pt
|
||||
(literal ""))
|
||||
|
||||
(let loop ((nl nodelist) (lastnode (empty-node-list)))
|
||||
(if (node-list-empty? nl)
|
||||
(empty-sosofo)
|
||||
(make sequence
|
||||
(if (or (node-list-empty? lastnode)
|
||||
(not (equal? (gi (node-list-first nl))
|
||||
(gi lastnode))))
|
||||
(part-titlepage-before (node-list-first nl) side)
|
||||
(empty-sosofo))
|
||||
(cond
|
||||
((equal? (gi (node-list-first nl)) (normalize "subtitle"))
|
||||
(part-titlepage-subtitle (node-list-first nl) side))
|
||||
((equal? (gi (node-list-first nl)) (normalize "title"))
|
||||
(part-titlepage-title (node-list-first nl) side))
|
||||
(else
|
||||
(part-titlepage-default (node-list-first nl) side)))
|
||||
(loop (node-list-rest nl) (node-list-first nl)))))
|
||||
|
||||
(if (and %generate-part-toc%
|
||||
%generate-part-toc-on-titlepage%
|
||||
(equal? side 'recto))
|
||||
(make display-group
|
||||
(build-toc (current-node)
|
||||
(toc-depth (current-node))))
|
||||
(empty-sosofo))
|
||||
|
||||
;; PartIntro is a special case
|
||||
(if (and (equal? side 'recto)
|
||||
(not (node-list-empty? partintro))
|
||||
%generate-partintro-on-titlepage%)
|
||||
($process-partintro$ partintro #f)
|
||||
(empty-sosofo)))
|
||||
|
||||
(empty-sosofo))))
|
||||
|
||||
|
||||
(define (reference-titlepage elements #!optional (side 'recto))
|
||||
(let ((nodelist (titlepage-nodelist
|
||||
(if (equal? side 'recto)
|
||||
(reference-titlepage-recto-elements)
|
||||
(reference-titlepage-verso-elements))
|
||||
elements))
|
||||
;; partintro is a special case...
|
||||
(partintro (node-list-first
|
||||
(node-list-filter-by-gi elements (list (normalize "partintro"))))))
|
||||
(if (reference-titlepage-content? elements side)
|
||||
(make simple-page-sequence
|
||||
page-n-columns: %titlepage-n-columns%
|
||||
;; Make sure that page number format is correct.
|
||||
page-number-format: ($page-number-format$)
|
||||
;; Make sure that the page number is set to 1 if this is the
|
||||
;; first part in the book
|
||||
page-number-restart?: (first-reference?)
|
||||
input-whitespace-treatment: 'collapse
|
||||
use: default-text-style
|
||||
|
||||
;; This hack is required for the RTF backend. If an external-graphic
|
||||
;; is the first thing on the page, RTF doesn't seem to do the right
|
||||
;; thing (the graphic winds up on the baseline of the first line
|
||||
;; of the page, left justified). This "one point rule" fixes
|
||||
;; that problem.
|
||||
(make paragraph
|
||||
line-spacing: 1pt
|
||||
(literal ""))
|
||||
|
||||
(let loop ((nl nodelist) (lastnode (empty-node-list)))
|
||||
(if (node-list-empty? nl)
|
||||
(empty-sosofo)
|
||||
(make sequence
|
||||
(if (or (node-list-empty? lastnode)
|
||||
(not (equal? (gi (node-list-first nl))
|
||||
(gi lastnode))))
|
||||
(reference-titlepage-before (node-list-first nl) side)
|
||||
(empty-sosofo))
|
||||
(cond
|
||||
((equal? (gi (node-list-first nl)) (normalize "author"))
|
||||
(reference-titlepage-author (node-list-first nl) side))
|
||||
((equal? (gi (node-list-first nl)) (normalize "authorgroup"))
|
||||
(reference-titlepage-authorgroup (node-list-first nl) side))
|
||||
((equal? (gi (node-list-first nl)) (normalize "corpauthor"))
|
||||
(reference-titlepage-corpauthor (node-list-first nl) side))
|
||||
((equal? (gi (node-list-first nl)) (normalize "editor"))
|
||||
(reference-titlepage-editor (node-list-first nl) side))
|
||||
((equal? (gi (node-list-first nl)) (normalize "subtitle"))
|
||||
(reference-titlepage-subtitle (node-list-first nl) side))
|
||||
((equal? (gi (node-list-first nl)) (normalize "title"))
|
||||
(reference-titlepage-title (node-list-first nl) side))
|
||||
(else
|
||||
(reference-titlepage-default (node-list-first nl) side)))
|
||||
(loop (node-list-rest nl) (node-list-first nl)))))
|
||||
|
||||
(if (and %generate-reference-toc%
|
||||
%generate-reference-toc-on-titlepage%
|
||||
(equal? side 'recto))
|
||||
(make display-group
|
||||
(build-toc (current-node)
|
||||
(toc-depth (current-node))))
|
||||
(empty-sosofo))
|
||||
|
||||
;; PartIntro is a special case
|
||||
(if (and (equal? side 'recto)
|
||||
(not (node-list-empty? partintro))
|
||||
%generate-partintro-on-titlepage%)
|
||||
($process-partintro$ partintro #f)
|
||||
(empty-sosofo)))
|
||||
|
||||
(empty-sosofo))))
|
||||
|
||||
]]> <!-- %output-print -->
|
||||
|
||||
|
||||
<!-- Plain text output customization ............................... -->
|
||||
|
||||
<!--
|
||||
This is used for making the INSTALL file and others. We customize the
|
||||
HTML stylesheets to be suitable for dumping plain text (via Netscape,
|
||||
Lynx, or similar).
|
||||
-->
|
||||
|
||||
<![ %output-text; [
|
||||
|
||||
(define %section-autolabel% #f)
|
||||
(define %chapter-autolabel% #f)
|
||||
(define $generate-chapter-toc$ (lambda () #f))
|
||||
|
||||
;; For text output, produce "ASCII markup" for emphasis and such.
|
||||
|
||||
(define ($asterix-seq$ #!optional (sosofo (process-children)))
|
||||
(make sequence
|
||||
(literal "*")
|
||||
sosofo
|
||||
(literal "*")))
|
||||
|
||||
(define ($dquote-seq$ #!optional (sosofo (process-children)))
|
||||
(make sequence
|
||||
(literal (gentext-start-quote))
|
||||
sosofo
|
||||
(literal (gentext-end-quote))))
|
||||
|
||||
(element (para command) ($dquote-seq$))
|
||||
(element (para emphasis) ($asterix-seq$))
|
||||
(element (para filename) ($dquote-seq$))
|
||||
(element (para option) ($dquote-seq$))
|
||||
(element (para replaceable) ($dquote-seq$))
|
||||
(element (para userinput) ($dquote-seq$))
|
||||
|
||||
]]> <!-- %output-text -->
|
||||
|
||||
</style-specification-body>
|
||||
</style-specification>
|
||||
|
||||
<external-specification id="docbook" document="dbstyle">
|
||||
</style-sheet>
|
||||
209
doc/switchover.sgml
Normal file
209
doc/switchover.sgml
Normal file
@@ -0,0 +1,209 @@
|
||||
<chapter id="performing-switchover" xreflabel="Performing a switchover with repmgr">
|
||||
|
||||
<indexterm>
|
||||
<primary>switchover</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Performing a switchover with repmgr</title>
|
||||
<para>
|
||||
A typical use-case for replication is a combination of primary and standby
|
||||
server, with the standby serving as a backup which can easily be activated
|
||||
in case of a problem with the primary. Such an unplanned failover would
|
||||
normally be handled by promoting the standby, after which an appropriate
|
||||
action must be taken to restore the old primary.
|
||||
</para>
|
||||
<para>
|
||||
In some cases however it's desirable to promote the standby in a planned
|
||||
way, e.g. so maintenance can be performed on the primary; this kind of switchover
|
||||
is supported by the <xref linkend="repmgr-standby-switchover"> command.
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr standby switchover</command> differs from other &repmgr;
|
||||
actions in that it also performs actions on another server (the demotion
|
||||
candidate), which means passwordless SSH access is required to that server
|
||||
from the one where <command>repmgr standby switchover</command> is executed.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
<command>repmgr standby switchover</command> performs a relatively complex
|
||||
series of operations on two servers, and should therefore be performed after
|
||||
careful preparation and with adequate attention. In particular you should
|
||||
be confident that your network environment is stable and reliable.
|
||||
</simpara>
|
||||
<simpara>
|
||||
Additionally you should be sure that the current primary can be shut down
|
||||
quickly and cleanly. In particular, access from applications should be
|
||||
minimalized or preferably blocked completely. Also be aware that if there
|
||||
is a backlog of files waiting to be archived, PostgreSQL will not shut
|
||||
down until archiving completes.
|
||||
</simpara>
|
||||
<simpara>
|
||||
We recommend running <command>repmgr standby switchover</command> at the
|
||||
most verbose logging level (<literal>--log-level=DEBUG --verbose</literal>)
|
||||
and capturing all output to assist troubleshooting any problems.
|
||||
</simpara>
|
||||
<simpara>
|
||||
Please also read carefully the sections <xref linkend="preparing-for-switchover"> and
|
||||
<xref linkend="switchover-caveats"> below.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect1 id="preparing-for-switchover" xreflabel="Preparing for switchover">
|
||||
<indexterm>
|
||||
<primary>switchover</primary>
|
||||
<secondary>preparation</secondary>
|
||||
</indexterm>
|
||||
<title>Preparing for switchover</title>
|
||||
<para>
|
||||
As mentioned above, success of the switchover operation depends on &repmgr;
|
||||
being able to shut down the current primary server quickly and cleanly.
|
||||
</para>
|
||||
<para>
|
||||
Double-check which commands will be used to stop/start/restart the current
|
||||
primary; on the primary execute:
|
||||
<programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=restart
|
||||
</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
On <literal>systemd</literal> systems we strongly recommend using the appropriate
|
||||
<command>systemctl</command> commands (typically run via <command>sudo</command>) to ensure
|
||||
<literal>systemd</literal> informed about the status of the PostgreSQL service.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
Check that access from applications is minimalized or preferably blocked
|
||||
completely, so applications are not unexpectedly interrupted.
|
||||
</para>
|
||||
<para>
|
||||
Check there is no significant replication lag on standbys attached to the
|
||||
current primary.
|
||||
</para>
|
||||
<para>
|
||||
If WAL file archiving is set up, check that there is no backlog of files waiting
|
||||
to be archived, as PostgreSQL will not finally shut down until all these have been
|
||||
archived. If there is a backlog exceeding <varname>archive_ready_warning</varname> WAL files,
|
||||
&repmgr; will emit a warning before attempting to perform a switchover; you can also check
|
||||
manually with <command>repmgr node check --archive-ready</command>.
|
||||
</para>
|
||||
<para>
|
||||
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
|
||||
promoting a node.
|
||||
</para>
|
||||
<para>
|
||||
Finally, consider executing <command>repmgr standby switchover</command> with the
|
||||
<literal>--dry-run</literal> option; this will perform any necessary checks and inform you about
|
||||
success/failure, and stop before the first actual command is run (which would be the shutdown of the
|
||||
current primary). Example output:
|
||||
<programlisting>
|
||||
$ repmgr standby switchover -f /etc/repmgr.conf --siblings-follow --dry-run
|
||||
NOTICE: checking switchover on node "node2" (ID: 2) in --dry-run mode
|
||||
INFO: SSH connection to host "node1" succeeded
|
||||
INFO: archive mode is "off"
|
||||
INFO: replication lag on this standby is 0 seconds
|
||||
INFO: all sibling nodes are reachable via SSH
|
||||
NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
|
||||
INFO: following shutdown command would be run on node "node1":
|
||||
"pg_ctl -l /var/log/postgresql/startup.log -D '/var/lib/postgresql/data' -m fast -W stop"
|
||||
</programlisting>
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="switchover-execution" xreflabel="Executing the switchover command">
|
||||
<indexterm>
|
||||
<primary>switchover</primary>
|
||||
<secondary>execution</secondary>
|
||||
</indexterm>
|
||||
<title>Executing the switchover command</title>
|
||||
<para>
|
||||
To demonstrate switchover, we will assume a replication cluster with a
|
||||
primary (<literal>node1</literal>) and one standby (<literal>node2</literal>);
|
||||
after the switchover <literal>node2</literal> should become the primary with
|
||||
<literal>node1</literal> following it.
|
||||
</para>
|
||||
<para>
|
||||
The switchover command must be run from the standby which is to be promoted,
|
||||
and in its simplest form looks like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby switchover
|
||||
NOTICE: executing switchover on node "node2" (ID: 2)
|
||||
INFO: searching for primary node
|
||||
INFO: checking if node 1 is primary
|
||||
INFO: current primary node is 1
|
||||
INFO: SSH connection to host "node1" succeeded
|
||||
INFO: archive mode is "off"
|
||||
INFO: replication lag on this standby is 0 seconds
|
||||
NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
|
||||
NOTICE: stopping current primary node "node1" (ID: 1)
|
||||
NOTICE: issuing CHECKPOINT
|
||||
DETAIL: executing server command "pg_ctl -l /var/log/postgres/startup.log -D '/var/lib/pgsql/data' -m fast -W stop"
|
||||
INFO: checking primary status; 1 of 6 attempts
|
||||
NOTICE: current primary has been cleanly shut down at location 0/3001460
|
||||
NOTICE: promoting standby to primary
|
||||
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' promote"
|
||||
server promoting
|
||||
NOTICE: STANDBY PROMOTE successful
|
||||
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
|
||||
INFO: setting node 1's primary to node 2
|
||||
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' restart"
|
||||
NOTICE: NODE REJOIN successful
|
||||
DETAIL: node 1 is now attached to node 2
|
||||
NOTICE: switchover was successful
|
||||
DETAIL: node "node2" is now primary
|
||||
NOTICE: STANDBY SWITCHOVER is complete
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The old primary is now replicating as a standby from the new primary, and the
|
||||
cluster status will now look like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | standby | running | node2 | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
||||
</programlisting>
|
||||
</para>
|
||||
</sect1>
|
||||
<sect1 id="switchover-caveats" xreflabel="Caveats">
|
||||
<indexterm>
|
||||
<primary>switchover</primary>
|
||||
<secondary>caveats</secondary>
|
||||
</indexterm>
|
||||
<title>Caveats</title>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
If using PostgreSQL 9.3 or 9.4, you should ensure that the shutdown command
|
||||
is configured to use PostgreSQL's <varname>fast</varname> shutdown mode (the default in 9.5
|
||||
and later). If relying on <command>pg_ctl</command> to perform database server operations,
|
||||
you should include <literal>-m fast</literal> in <varname>pg_ctl_options</varname>
|
||||
in <filename>repmgr.conf</filename>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html">pg_rewind documentation</ulink>
|
||||
for details.
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgrd</application> should not be running with setting <varname>failover=automatic</varname>
|
||||
in <filename>repmgr.conf</filename> when a switchover is carried out, otherwise the
|
||||
<application>repmgrd</application> daemon may try and promote a standby by itself.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
We hope to remove some of these restrictions in future versions of &repmgr;.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
@@ -1,121 +1,9 @@
|
||||
Upgrading from repmgr 3
|
||||
=======================
|
||||
|
||||
The upgrade process consists of two steps:
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
1) converting the repmgr.conf configuration files
|
||||
2) upgrading the repmgr schema.
|
||||
|
||||
Scripts are provided to assist both with converting repmgr.conf
|
||||
and upgrading the schema.
|
||||
|
||||
Converting repmgr.conf configuration files
|
||||
------------------------------------------
|
||||
|
||||
With a completely new repmgr version, we've taken the opportunity
|
||||
to rename some configuration items have had their names changed for
|
||||
clarity and consistency, both between the configuration file and
|
||||
the column names in `repmgr.nodes` (e.g. `node` → `node_id`), and
|
||||
also for consistency with PostgreSQL naming conventions
|
||||
(e.g. `loglevel` → `log_level`).
|
||||
|
||||
Other configuration items have been changed to command line options,
|
||||
and vice-versa, e.g. to avoid hard-coding items such as a a node's
|
||||
upstream ID, which might change over time.
|
||||
|
||||
`repmgr` will issue a warning about deprecated/altered options.
|
||||
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/4.0/upgrading-from-repmgr-3.html)
|
||||
|
||||
|
||||
### Changed parameters
|
||||
|
||||
Following parameters have been added:
|
||||
|
||||
- `data_directory`: this is mandatory and must contain the path
|
||||
to the node's data directory
|
||||
- `monitoring_history`: this replaces the `repmgrd` command line
|
||||
option `--monitoring-history`
|
||||
|
||||
Following parameters have been renamed:
|
||||
|
||||
- `node` → `node_id`
|
||||
- `loglevel` → `log_level`
|
||||
- `logfacility` → `log_facility`
|
||||
- `logfile` → `log_file`
|
||||
- `master_reponse_timeout` → `async_query_timeout`
|
||||
|
||||
Following parameters have been removed:
|
||||
|
||||
- `cluster` is no longer required and will be ignored.
|
||||
- `upstream_node_id` is replaced by the command-line parameter
|
||||
`--upstream-node-id`
|
||||
|
||||
### Conversion script
|
||||
|
||||
To assist with conversion of `repmgr.conf` files, a Perl script
|
||||
is provided in `contrib/convert-config.pl`. Use like this:
|
||||
|
||||
$ ./convert-config.pl /etc/repmgr.conf
|
||||
node_id=2
|
||||
node_name=node2
|
||||
conninfo=host=localhost dbname=repmgr user=repmgr port=5602
|
||||
pg_ctl_options='-l /tmp/postgres.5602.log'
|
||||
pg_bindir=/home/barwick/devel/builds/HEAD/bin
|
||||
rsync_options=--exclude=postgresql.local.conf --archive
|
||||
log_level=DEBUG
|
||||
pg_basebackup_options=--no-slot
|
||||
data_directory=
|
||||
|
||||
The converted file is printed to `STDOUT` and the original file is not
|
||||
changed.
|
||||
|
||||
Please note that the parameter `data_directory` *must* be provided;
|
||||
if not already present, the conversion script will add an empty
|
||||
placeholder parameter.
|
||||
|
||||
|
||||
Upgrading the repmgr schema
|
||||
---------------------------
|
||||
|
||||
Ensure `repmgrd` is not running, or any cron jobs which execute the
|
||||
`repmgr` binary.
|
||||
|
||||
Install `repmgr4`; any `repmgr3` packages should be uninstalled
|
||||
(if not automatically installed already).
|
||||
|
||||
### Manually create the repmgr extension
|
||||
|
||||
In the database used by the existing `repmgr` configuration, execute:
|
||||
|
||||
CREATE EXTENSION repmgr FROM unpackaged;
|
||||
|
||||
This will move and convert all objects from the existing schema
|
||||
into the new, standard `repmgr` schema.
|
||||
|
||||
> *NOTE* there must be only one schema matching 'repmgr_%' in the
|
||||
> database, otherwise this step may not work.
|
||||
|
||||
### Re-register each node
|
||||
|
||||
This is necessary to update the `repmgr` metadata with some additional items.
|
||||
|
||||
On the primary node, execute e.g.
|
||||
|
||||
repmgr primary register -f /etc/repmgr.conf --force
|
||||
|
||||
On each standby node, execute e.g.
|
||||
|
||||
repmgr standby register -f /etc/repmgr.conf --force
|
||||
|
||||
Check the data is updated as expected by examining the `repmgr.nodes` table;
|
||||
restart `repmgrd` if required.
|
||||
|
||||
The original `repmgr_$cluster` schema can be dropped at any time.
|
||||
|
||||
* * *
|
||||
|
||||
> *TIP* If you don't care about any data from the existing `repmgr` installation,
|
||||
> (e.g. the contents of the `events` and `monitoring` tables), the manual
|
||||
> "CREATE EXTENSION" step can be skipped; just re-register each node, starting
|
||||
> with the primary node, and the `repmgr` extension will be automatically created.
|
||||
|
||||
* * *
|
||||
|
||||
277
doc/upgrading-repmgr.sgml
Normal file
277
doc/upgrading-repmgr.sgml
Normal file
@@ -0,0 +1,277 @@
|
||||
<chapter id="upgrading-repmgr" xreflabel="Upgrading repmgr">
|
||||
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Upgrading repmgr</title>
|
||||
|
||||
<para>
|
||||
&repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
|
||||
containing bugfixes and other minor improvements. Any substantial new
|
||||
functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; is implemented as a PostgreSQL extension; to upgrade it, first
|
||||
install the updated package (or compile the updated source), then in the
|
||||
database where the &repmgr; extension is installed, execute
|
||||
<command>ALTER EXTENSION repmgr UPDATE</command>.
|
||||
</para>
|
||||
<para>
|
||||
If <application>repmgrd</application> is running, it may be necessary to restart
|
||||
the PostgreSQL server if the upgrade contains changes to the shared object
|
||||
file used by <application>repmgrd</application>; check the release notes for details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Please check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
</para>
|
||||
|
||||
<sect1 id="upgrading-from-repmgr-3" xreflabel="Upgrading from repmgr 3.x">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>from repmgr 3.x</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Upgrading from repmgr 3.x</title>
|
||||
<para>
|
||||
The upgrade process consists of two steps:
|
||||
<orderedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
converting the repmgr.conf configuration files
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
upgrading the repmgr schema
|
||||
</simpara>
|
||||
</listitem>
|
||||
</orderedlist>
|
||||
</para>
|
||||
<para>
|
||||
A script is provided to assist with converting <filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
<para>
|
||||
The schema upgrade (which converts the &repmgr; metadata into
|
||||
a packaged PostgreSQL extension) is normally carried out
|
||||
automatically when the &repmgr; extension is created.
|
||||
</para>
|
||||
<sect2 id="converting-repmgr-conf">
|
||||
<title>Converting repmgr.conf configuration files</title>
|
||||
<para>
|
||||
With a completely new repmgr version, we've taken the opportunity
|
||||
to rename some configuration items have had their names changed for
|
||||
clarity and consistency, both between the configuration file and
|
||||
the column names in <structname>repmgr.nodes</structname>
|
||||
(e.g. <varname>node</varname> to <varname>node_id</varname>), and
|
||||
also for consistency with PostgreSQL naming conventions
|
||||
(e.g. <varname>loglevel</varname> to <varname>log_level</varname>).
|
||||
</para>
|
||||
<para>
|
||||
Other configuration items have been changed to command line options,
|
||||
and vice-versa, e.g. to avoid hard-coding items such as a a node's
|
||||
upstream ID, which might change over time.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; will issue a warning about deprecated/altered options.
|
||||
</para>
|
||||
<sect3>
|
||||
<title>Changed parameters in "repmgr.conf"</title>
|
||||
<para>
|
||||
Following parameters have been added:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><varname>data_directory</varname>: this is mandatory and must
|
||||
contain the path to the node's data directory</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><varname>monitoring_history</varname>: this replaces the
|
||||
<application>repmgrd</application> command line option
|
||||
<literal>--monitoring-history</literal></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
Following parameters have been renamed:
|
||||
</para>
|
||||
<table tocentry="1" id="repmgr3-repmgr4-renamed-parameters">
|
||||
<title>Parameters renamed in repmgr4</title>
|
||||
<tgroup cols="2">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>repmgr3</entry>
|
||||
<entry>repmgr4</entry>
|
||||
</row>
|
||||
</thead>
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><varname>node</varname></entry>
|
||||
<entry><varname>node_id</varname></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><varname>loglevel</varname></entry>
|
||||
<entry><varname>log_level</varname></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><varname>logfacility</varname></entry>
|
||||
<entry><varname>log_facility</varname></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><varname>logfile</varname></entry>
|
||||
<entry><varname>log_file</varname></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><varname>barman_server</varname></entry>
|
||||
<entry><varname>barman_host</varname></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><varname>master_reponse_timeout</varname></entry>
|
||||
<entry><varname>async_query_timeout</varname></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
<note>
|
||||
<para>
|
||||
From &repmgr; 4, <literal>barman_server</literal> refers
|
||||
to the server configured in Barman (in &repmgr; 3, the deprecated
|
||||
<literal>cluster</literal> parameter was used for this);
|
||||
the physical Barman hostname is configured with
|
||||
<literal>barman_host</literal> (see <xref linkend="cloning-from-barman-prerequisites">
|
||||
for details).
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
Following parameters have been removed:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><varname>cluster</varname>: is no longer required and will
|
||||
be ignored.</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><varname>upstream_node</varname>: is replaced by the
|
||||
command-line parameter <literal>--upstream-node-id</literal></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect3>
|
||||
<sect3>
|
||||
<title>Conversion script</title>
|
||||
<para>
|
||||
To assist with conversion of <filename>repmgr.conf</filename> files, a Perl script
|
||||
is provided in <filename>contrib/convert-config.pl</filename>.
|
||||
Use like this:
|
||||
<programlisting>
|
||||
$ ./convert-config.pl /etc/repmgr.conf
|
||||
node_id=2
|
||||
node_name=node2
|
||||
conninfo=host=node2 dbname=repmgr user=repmgr connect_timeout=2
|
||||
pg_ctl_options='-l /var/log/postgres/startup.log'
|
||||
rsync_options=--exclude=postgresql.local.conf --archive
|
||||
log_level=INFO
|
||||
pg_basebackup_options=--no-slot
|
||||
data_directory=</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The converted file is printed to <literal>STDOUT</literal> and the original file is not
|
||||
changed.
|
||||
</para>
|
||||
<para>
|
||||
Please note that the the conversion script will add an empty
|
||||
placeholder parameter for <varname>data_directory</varname>, which
|
||||
is a required parameter in repmgr4 and which <emphasis>must</emphasis>
|
||||
be provided.
|
||||
</para>
|
||||
</sect3>
|
||||
</sect2>
|
||||
<sect2>
|
||||
<title>Upgrading the repmgr schema</title>
|
||||
<para>
|
||||
Ensure <application>repmgrd</application> is not running, or any cron jobs which execute the
|
||||
<command>repmgr</command> binary.
|
||||
</para>
|
||||
<para>
|
||||
Install <literal>repmgr 4</literal> packages; any <literal>repmgr 3.x</literal> packages
|
||||
should be uninstalled (if not automatically uninstalled already by your packaging system).
|
||||
</para>
|
||||
<sect3>
|
||||
<title>Upgrading from repmgr 3.1.1 or earlier</title>
|
||||
<para>
|
||||
If your repmgr version is 3.1.1 or earlier, you will need to update
|
||||
the schema to the latest version in the 3.x series (3.3.2) before
|
||||
converting the installation to repmgr 4.
|
||||
</para>
|
||||
<para>
|
||||
To do this, apply the following upgrade scripts as appropriate for
|
||||
your current version:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
<ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/REL3_3_STABLE/sql/repmgr3.0_repmgr3.1.sql">repmgr3.0_repmgr3.1.sql</ulink></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/REL3_3_STABLE/sql/repmgr3.1.1_repmgr3.1.2.sql">repmgr3.1.1_repmgr3.1.2.sql</ulink></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
For more details see the
|
||||
<ulink url="https://repmgr.org/release-notes-3.3.2.html#upgrading">repmgr 3 upgrade notes</ulink>.
|
||||
</para>
|
||||
</sect3>
|
||||
<sect3>
|
||||
<title>Manually create the repmgr extension</title>
|
||||
<para>
|
||||
In the database used by the existing &repmgr; installation, execute:
|
||||
<programlisting>
|
||||
CREATE EXTENSION repmgr FROM unpackaged;</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will move and convert all objects from the existing schema
|
||||
into the new, standard <literal>repmgr</literal> schema.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>there must be only one schema matching <literal>repmgr_%</literal> in the
|
||||
database, otherwise this step may not work.
|
||||
</simpara>
|
||||
</note>
|
||||
</sect3>
|
||||
<sect3>
|
||||
<title>Re-register each node</title>
|
||||
<para>
|
||||
This is necessary to update the <literal>repmgr</literal> metadata with some additional items.
|
||||
</para>
|
||||
<para>
|
||||
On the primary node, execute e.g.
|
||||
<programlisting>
|
||||
repmgr primary register -f /etc/repmgr.conf --force</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
On each standby node, execute e.g.
|
||||
<programlisting>
|
||||
repmgr standby register -f /etc/repmgr.conf --force</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Check the data is updated as expected by examining the <structname>repmgr.nodes</structname>
|
||||
table; restart <application>repmgrd</application> if required.
|
||||
</para>
|
||||
<para>
|
||||
The original <literal>repmgr_$cluster</literal> schema can be dropped at any time.
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
If you don't care about any data from the existing &repmgr; installation,
|
||||
(e.g. the contents of the <structname>events</structname> and <structname>monitoring</structname>
|
||||
tables), the manual <command>CREATE EXTENSION</command> step can be skipped; just re-register
|
||||
each node, starting with the primary node, and the <literal>repmgr</literal> extension will be
|
||||
automatically created.
|
||||
</simpara>
|
||||
</tip>
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
1
doc/version.sgml
Normal file
1
doc/version.sgml
Normal file
@@ -0,0 +1 @@
|
||||
<!ENTITY repmgrversion "4.0.1">
|
||||
469
doc/website-docs.css
Normal file
469
doc/website-docs.css
Normal file
@@ -0,0 +1,469 @@
|
||||
/* PostgreSQL.org Documentation Style */
|
||||
|
||||
/* requires global.css, table.css and text.css to be loaded before this file! */
|
||||
body {
|
||||
font-family: verdana, sans-serif;
|
||||
font-size: 76%;
|
||||
background: url("/resources/background.png") repeat-x scroll left top transparent;
|
||||
padding: 15px 4%;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* monospace font size fix */
|
||||
pre, code, kbd, samp, tt {
|
||||
font-family: monospace,monospace;
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
div.NAVHEADER table {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
/* Container Definitions */
|
||||
|
||||
#docContainerWrap {
|
||||
text-align: center; /* Win IE5 */
|
||||
}
|
||||
|
||||
#docContainer {
|
||||
margin: 0 auto;
|
||||
width: 90%;
|
||||
padding-bottom: 2em;
|
||||
display: block;
|
||||
text-align: left; /* Win IE5 */
|
||||
}
|
||||
|
||||
#docHeader {
|
||||
background-image: url("/media/img/docs/bg_hdr.png");
|
||||
height: 83px;
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
display: block;
|
||||
}
|
||||
|
||||
#docHeaderLogo {
|
||||
position: relative;
|
||||
width: 206px;
|
||||
height: 83px;
|
||||
border: 0px;
|
||||
padding: 0px;
|
||||
margin: 0 0 0 20px;
|
||||
}
|
||||
|
||||
#docHeaderLogo img {
|
||||
border: 0px;
|
||||
}
|
||||
|
||||
#docNavSearchContainer {
|
||||
padding-bottom: 2px;
|
||||
}
|
||||
|
||||
#docNav, #docVersions {
|
||||
position: relative;
|
||||
text-align: left;
|
||||
margin-left: 10px;
|
||||
margin-top: 5px;
|
||||
color: #666;
|
||||
font-size: 0.95em;
|
||||
}
|
||||
|
||||
#docSearch {
|
||||
position: relative;
|
||||
text-align: right;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
#docTextSize {
|
||||
text-align: right;
|
||||
white-space: nowrap;
|
||||
margin-top: 7px;
|
||||
font-size: 0.95em;
|
||||
}
|
||||
|
||||
#docSearch form {
|
||||
position: relative;
|
||||
top: 5px;
|
||||
right: 0;
|
||||
margin: 0; /* need for IE 5.5 OSX */
|
||||
text-align: right; /* need for IE 5.5 OSX */
|
||||
white-space: nowrap; /* for Opera */
|
||||
}
|
||||
|
||||
#docSearch form label {
|
||||
color: #666;
|
||||
font-size: 0.95em;
|
||||
}
|
||||
|
||||
#docSearch form input {
|
||||
font-size: 0.95em;
|
||||
}
|
||||
|
||||
#docSearch form #submit {
|
||||
font-size: 0.95em;
|
||||
background: #7A7A7A;
|
||||
color: #fff;
|
||||
border: 1px solid #7A7A7A;
|
||||
padding: 1px 4px;
|
||||
}
|
||||
|
||||
#docSearch form #q {
|
||||
width: 170px;
|
||||
font-size: 0.95em;
|
||||
border: 1px solid #7A7A7A;
|
||||
background: #E1E1E1;
|
||||
color: #000000;
|
||||
padding: 2px;
|
||||
}
|
||||
|
||||
.frmDocSearch {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
display: inline;
|
||||
}
|
||||
|
||||
.inpDocSearch {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
color: #000;
|
||||
}
|
||||
|
||||
#docContent {
|
||||
position: relative;
|
||||
margin-left: 10px;
|
||||
margin-right: 10px;
|
||||
margin-top: 40px;
|
||||
}
|
||||
|
||||
#docFooter {
|
||||
position: relative;
|
||||
font-size: 0.9em;
|
||||
color: #666;
|
||||
line-height: 1.3em;
|
||||
margin-left: 10px;
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
#docComments {
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
#docClear {
|
||||
clear: both;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Heading Definitions */
|
||||
|
||||
h1, h2, h3 {
|
||||
font-weight: bold;
|
||||
margin-top: 2ex;
|
||||
color: #444;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.4em;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.2em !important;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
h1 a:hover,
|
||||
h2 a:hover,
|
||||
h3 a:hover,
|
||||
h4 a:hover {
|
||||
color: #444;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* Text Styles */
|
||||
|
||||
div.SECT2 {
|
||||
margin-top: 4ex;
|
||||
}
|
||||
|
||||
div.SECT3 {
|
||||
margin-top: 3ex;
|
||||
margin-left: 3ex;
|
||||
}
|
||||
|
||||
.txtCurrentLocation {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
p, ol, ul, li {
|
||||
line-height: 1.5em;
|
||||
}
|
||||
|
||||
.txtCommentsWrap {
|
||||
border: 2px solid #F5F5F5;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.txtCommentsContent {
|
||||
background: #F5F5F5;
|
||||
padding: 3px;
|
||||
}
|
||||
|
||||
.txtCommentsPoster {
|
||||
float: left;
|
||||
}
|
||||
|
||||
.txtCommentsDate {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.txtCommentsComment {
|
||||
padding: 3px;
|
||||
}
|
||||
|
||||
#docContainer pre code,
|
||||
#docContainer pre tt,
|
||||
#docContainer pre pre,
|
||||
#docContainer tt tt,
|
||||
#docContainer tt code,
|
||||
#docContainer tt pre {
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
pre.LITERALLAYOUT,
|
||||
.SCREEN,
|
||||
.SYNOPSIS,
|
||||
.PROGRAMLISTING,
|
||||
.REFSYNOPSISDIV p,
|
||||
table.CAUTION,
|
||||
table.WARNING,
|
||||
blockquote.NOTE,
|
||||
blockquote.TIP,
|
||||
table.CALSTABLE {
|
||||
-moz-box-shadow: 3px 3px 5px #DFDFDF;
|
||||
-webkit-box-shadow: 3px 3px 5px #DFDFDF;
|
||||
-khtml-box-shadow: 3px 3px 5px #DFDFDF;
|
||||
-o-box-shadow: 3px 3px 5px #DFDFDF;
|
||||
box-shadow: 3px 3px 5px #DFDFDF;
|
||||
}
|
||||
|
||||
pre.LITERALLAYOUT,
|
||||
.SCREEN,
|
||||
.SYNOPSIS,
|
||||
.PROGRAMLISTING,
|
||||
.REFSYNOPSISDIV p,
|
||||
table.CAUTION,
|
||||
table.WARNING,
|
||||
blockquote.NOTE,
|
||||
blockquote.TIP {
|
||||
color: black;
|
||||
border-width: 1px;
|
||||
border-style: solid;
|
||||
padding: 2ex;
|
||||
margin: 2ex 0 2ex 2ex;
|
||||
overflow: auto;
|
||||
-moz-border-radius: 8px;
|
||||
-webkit-border-radius: 8px;
|
||||
-khtml-border-radius: 8px;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
pre.LITERALLAYOUT,
|
||||
pre.SYNOPSIS,
|
||||
pre.PROGRAMLISTING,
|
||||
.REFSYNOPSISDIV p,
|
||||
.SCREEN {
|
||||
border-color: #CFCFCF;
|
||||
background-color: #F7F7F7;
|
||||
}
|
||||
|
||||
blockquote.NOTE,
|
||||
blockquote.TIP {
|
||||
border-color: #DBDBCC;
|
||||
background-color: #EEEEDD;
|
||||
padding: 14px;
|
||||
width: 572px;
|
||||
}
|
||||
|
||||
blockquote.NOTE,
|
||||
blockquote.TIP,
|
||||
table.CAUTION,
|
||||
table.WARNING {
|
||||
margin: 4ex auto;
|
||||
}
|
||||
|
||||
blockquote.NOTE p,
|
||||
blockquote.TIP p {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
blockquote.NOTE pre,
|
||||
blockquote.NOTE code,
|
||||
blockquote.TIP pre,
|
||||
blockquote.TIP code {
|
||||
margin-left: 0;
|
||||
margin-right: 0;
|
||||
-moz-box-shadow: none;
|
||||
-webkit-box-shadow: none;
|
||||
-khtml-box-shadow: none;
|
||||
-o-box-shadow: none;
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.emphasis,
|
||||
.c2 {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.REPLACEABLE {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* Table Styles */
|
||||
|
||||
table {
|
||||
margin-left: 2ex;
|
||||
}
|
||||
|
||||
table.CALSTABLE td,
|
||||
table.CALSTABLE th,
|
||||
table.CAUTION td,
|
||||
table.CAUTION th,
|
||||
table.WARNING td,
|
||||
table.WARNING th {
|
||||
border-style: solid;
|
||||
}
|
||||
|
||||
table.CALSTABLE,
|
||||
table.CAUTION,
|
||||
table.WARNING {
|
||||
border-spacing: 0;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table.CALSTABLE
|
||||
{
|
||||
margin: 2ex 0 2ex 2ex;
|
||||
background-color: #E0ECEF;
|
||||
border: 2px solid #A7C6DF;
|
||||
}
|
||||
|
||||
table.CALSTABLE tr:hover td
|
||||
{
|
||||
background-color: #EFEFEF;
|
||||
}
|
||||
|
||||
table.CALSTABLE td {
|
||||
background-color: #FFF;
|
||||
}
|
||||
|
||||
table.CALSTABLE td,
|
||||
table.CALSTABLE th {
|
||||
border: 1px solid #A7C6DF;
|
||||
padding: 0.5ex 0.5ex;
|
||||
}
|
||||
|
||||
table.CAUTION,
|
||||
table.WARNING {
|
||||
border-collapse: separate;
|
||||
display: block;
|
||||
padding: 0;
|
||||
max-width: 600px;
|
||||
}
|
||||
|
||||
table.CAUTION {
|
||||
background-color: #F5F5DC;
|
||||
border-color: #DEDFA7;
|
||||
}
|
||||
|
||||
table.WARNING {
|
||||
background-color: #FFD7D7;
|
||||
border-color: #DF421E;
|
||||
}
|
||||
|
||||
table.CAUTION td,
|
||||
table.CAUTION th,
|
||||
table.WARNING td,
|
||||
table.WARNING th {
|
||||
border-width: 0;
|
||||
padding-left: 2ex;
|
||||
padding-right: 2ex;
|
||||
}
|
||||
|
||||
table.CAUTION td,
|
||||
table.CAUTION th {
|
||||
border-color: #F3E4D5
|
||||
}
|
||||
|
||||
table.WARNING td,
|
||||
table.WARNING th {
|
||||
border-color: #FFD7D7;
|
||||
}
|
||||
|
||||
td.c1,
|
||||
td.c2,
|
||||
td.c3,
|
||||
td.c4,
|
||||
td.c5,
|
||||
td.c6 {
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
border-bottom: 0px solid #FFEFEF;
|
||||
padding: 1ex 2ex 0;
|
||||
}
|
||||
|
||||
/* Link Styles */
|
||||
|
||||
#docNav a {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
a:link,
|
||||
a:visited,
|
||||
a:active,
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
a:link,
|
||||
a:active {
|
||||
color:#0066A2;
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color:#004E66;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color:#000000;
|
||||
}
|
||||
|
||||
#docFooter a:link,
|
||||
#docFooter a:visited,
|
||||
#docFooter a:active {
|
||||
color:#666;
|
||||
}
|
||||
|
||||
#docContainer code.FUNCTION tt {
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
div.header {
|
||||
color: #444;
|
||||
margin-top: 5px;
|
||||
}
|
||||
|
||||
div.footer {
|
||||
text-align: center;
|
||||
background-image: url("/resources/footerl.png"), url("/resources/footerr.png"), url("/resources/footerc.png");
|
||||
background-position: left top, right top, center top;
|
||||
background-repeat: no-repeat, no-repeat, repeat-x;
|
||||
padding-top: 45px;
|
||||
}
|
||||
|
||||
img {
|
||||
border-style: none;
|
||||
}
|
||||
@@ -38,33 +38,27 @@ SELECT repmgr.am_bdr_failover_handler(-1);
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.am_bdr_failover_handler(NULL);
|
||||
am_bdr_failover_handler
|
||||
-------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.get_new_primary();
|
||||
get_new_primary
|
||||
-----------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.get_voting_status();
|
||||
get_voting_status
|
||||
-------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.notify_follow_primary(-1);
|
||||
notify_follow_primary
|
||||
-----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.other_node_is_candidate(-1,-1);
|
||||
other_node_is_candidate
|
||||
-------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.request_vote(-1,-1);
|
||||
request_vote
|
||||
--------------
|
||||
SELECT repmgr.notify_follow_primary(NULL);
|
||||
notify_follow_primary
|
||||
-----------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
@@ -80,10 +74,10 @@ SELECT repmgr.set_local_node_id(-1);
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.set_voting_status_initiated();
|
||||
set_voting_status_initiated
|
||||
-----------------------------
|
||||
|
||||
SELECT repmgr.set_local_node_id(NULL);
|
||||
set_local_node_id
|
||||
-------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.standby_get_last_updated();
|
||||
|
||||
@@ -6,7 +6,7 @@ CREATE TABLE repmgr.nodes (
|
||||
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
node_name TEXT NOT NULL,
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','bdr')),
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||
location TEXT NOT NULL DEFAULT 'default',
|
||||
priority INT NOT NULL DEFAULT 100,
|
||||
conninfo TEXT NOT NULL,
|
||||
@@ -79,6 +79,19 @@ LEFT JOIN repmgr.nodes un
|
||||
ON un.node_id = n.upstream_node_id;
|
||||
|
||||
|
||||
/* XXX update upgrade scripts! */
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
|
||||
/* ================= */
|
||||
/* repmgrd functions */
|
||||
/* ================= */
|
||||
@@ -90,6 +103,11 @@ CREATE FUNCTION set_local_node_id(INT)
|
||||
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_local_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_set_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||
@@ -102,49 +120,6 @@ CREATE FUNCTION standby_get_last_updated()
|
||||
|
||||
/* failover functions */
|
||||
|
||||
|
||||
DO $repmgr$
|
||||
DECLARE
|
||||
DECLARE server_version_num INT;
|
||||
BEGIN
|
||||
SELECT setting
|
||||
FROM pg_catalog.pg_settings
|
||||
WHERE name = 'server_version_num'
|
||||
INTO server_version_num;
|
||||
|
||||
IF server_version_num >= 90400 THEN
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE FUNCTION request_vote(INT,INT)
|
||||
RETURNS pg_lsn
|
||||
AS 'MODULE_PATHNAME', 'request_vote'
|
||||
LANGUAGE C STRICT;
|
||||
$repmgr_func$;
|
||||
ELSE
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE FUNCTION request_vote(INT,INT)
|
||||
RETURNS TEXT
|
||||
AS 'MODULE_PATHNAME', 'request_vote'
|
||||
LANGUAGE C STRICT;
|
||||
$repmgr_func$;
|
||||
END IF;
|
||||
END$repmgr$;
|
||||
|
||||
|
||||
CREATE FUNCTION get_voting_status()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_voting_status_initiated()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'set_voting_status_initiated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION other_node_is_candidate(INT, INT)
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'other_node_is_candidate'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION notify_follow_primary(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||
@@ -160,13 +135,11 @@ CREATE FUNCTION reset_voting_status()
|
||||
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
CREATE FUNCTION am_bdr_failover_handler(INT)
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
CREATE FUNCTION unset_bdr_failover_handler()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
|
||||
|
||||
@@ -32,7 +32,7 @@ CREATE TABLE repmgr.nodes (
|
||||
upstream_node_id INTEGER NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
node_name TEXT NOT NULL,
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','bdr')),
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||
location TEXT NOT NULL DEFAULT 'default',
|
||||
priority INT NOT NULL DEFAULT 100,
|
||||
conninfo TEXT NOT NULL,
|
||||
@@ -54,8 +54,34 @@ SELECT id, upstream_node_id, active, name,
|
||||
|
||||
ALTER TABLE repmgr.repl_events RENAME TO events;
|
||||
|
||||
-- create new table "repmgr.voting_term"
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
INSERT INTO repmgr.voting_term (term) VALUES (1);
|
||||
|
||||
|
||||
-- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"
|
||||
|
||||
|
||||
DO $repmgr$
|
||||
DECLARE
|
||||
DECLARE server_version_num INT;
|
||||
BEGIN
|
||||
SELECT setting
|
||||
FROM pg_catalog.pg_settings
|
||||
WHERE name = 'server_version_num'
|
||||
INTO server_version_num;
|
||||
IF server_version_num >= 90400 THEN
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
@@ -65,12 +91,32 @@ CREATE TABLE repmgr.monitoring_history (
|
||||
last_wal_standby_location PG_LSN,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
);
|
||||
)
|
||||
$repmgr_func$;
|
||||
INSERT INTO repmgr.monitoring_history
|
||||
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
|
||||
FROM repmgr.repl_monitor;
|
||||
ELSE
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location TEXT NOT NULL,
|
||||
last_wal_standby_location TEXT,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
INSERT INTO repmgr.monitoring_history
|
||||
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag
|
||||
FROM repmgr.repl_monitor;
|
||||
|
||||
INSERT INTO repmgr.monitoring_history
|
||||
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
|
||||
FROM repmgr.repl_monitor;
|
||||
END IF;
|
||||
END$repmgr$;
|
||||
|
||||
CREATE INDEX idx_monitoring_history_time
|
||||
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||
@@ -95,6 +141,16 @@ LEFT JOIN repmgr.nodes un
|
||||
|
||||
/* monitoring functions */
|
||||
|
||||
CREATE FUNCTION set_local_node_id(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_local_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_set_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS '$libdir/repmgr', 'standby_set_last_updated'
|
||||
@@ -108,26 +164,6 @@ CREATE FUNCTION standby_get_last_updated()
|
||||
|
||||
/* failover functions */
|
||||
|
||||
CREATE FUNCTION request_vote(INT,INT)
|
||||
RETURNS pg_lsn
|
||||
AS '$libdir/repmgr', 'request_vote'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_voting_status()
|
||||
RETURNS INT
|
||||
AS '$libdir/repmgr', 'get_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_voting_status_initiated()
|
||||
RETURNS INT
|
||||
AS '$libdir/repmgr', 'set_voting_status_initiated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION other_node_is_candidate(INT, INT)
|
||||
RETURNS BOOL
|
||||
AS '$libdir/repmgr', 'other_node_is_candidate'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION notify_follow_primary(INT)
|
||||
RETURNS VOID
|
||||
AS '$libdir/repmgr', 'notify_follow_primary'
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
/*
|
||||
* do_bdr_register()
|
||||
*
|
||||
* As each BDR node is its own master, registering a BDR node
|
||||
* As each BDR node is its own primary, registering a BDR node
|
||||
* will create the repmgr metadata schema if necessary.
|
||||
*/
|
||||
void
|
||||
|
||||
@@ -279,6 +279,7 @@ do_cluster_show(void)
|
||||
}
|
||||
}
|
||||
break;
|
||||
case WITNESS:
|
||||
case BDR:
|
||||
{
|
||||
/* node is reachable */
|
||||
@@ -1323,7 +1324,7 @@ do_cluster_cleanup(void)
|
||||
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
|
||||
/* check if there is a master in this cluster */
|
||||
/* check if there is a primary in this cluster */
|
||||
log_info(_("connecting to primary server"));
|
||||
primary_conn = establish_primary_db_connection(conn, true);
|
||||
|
||||
|
||||
@@ -36,7 +36,6 @@ static bool copy_file(const char *src_file, const char *dest_file);
|
||||
static void format_archive_dir(PQExpBufferData *archive_dir);
|
||||
static t_server_action parse_server_action(const char *action);
|
||||
|
||||
static void _do_node_service_check(void);
|
||||
static void _do_node_service_list_actions(t_server_action action);
|
||||
static void _do_node_status_is_shutdown_cleanly(void);
|
||||
static void _do_node_archive_config(void);
|
||||
@@ -106,7 +105,7 @@ do_node_status(void)
|
||||
|
||||
recovery_type = get_recovery_type(conn);
|
||||
|
||||
get_node_replication_stats(conn, &node_info);
|
||||
get_node_replication_stats(conn, server_version_num, &node_info);
|
||||
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
@@ -265,7 +264,13 @@ do_node_status(void)
|
||||
"disabled");
|
||||
}
|
||||
|
||||
if (node_info.max_replication_slots > 0)
|
||||
if (server_version_num < 90400)
|
||||
{
|
||||
key_value_list_set(&node_status,
|
||||
"Replication slots",
|
||||
"not available");
|
||||
}
|
||||
else if (node_info.max_replication_slots > 0)
|
||||
{
|
||||
PQExpBufferData slotinfo;
|
||||
|
||||
@@ -280,8 +285,7 @@ do_node_status(void)
|
||||
|
||||
if (node_info.inactive_replication_slots > 0)
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&slotinfo,
|
||||
appendPQExpBuffer(&slotinfo,
|
||||
"; %i inactive",
|
||||
node_info.inactive_replication_slots);
|
||||
|
||||
@@ -291,8 +295,7 @@ do_node_status(void)
|
||||
node_info.inactive_replication_slots);
|
||||
}
|
||||
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
key_value_list_set(&node_status,
|
||||
"Replication slots",
|
||||
slotinfo.data);
|
||||
|
||||
@@ -300,8 +303,7 @@ do_node_status(void)
|
||||
}
|
||||
else if (node_info.max_replication_slots == 0)
|
||||
{
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
key_value_list_set(&node_status,
|
||||
"Replication slots",
|
||||
"disabled");
|
||||
}
|
||||
@@ -309,8 +311,7 @@ do_node_status(void)
|
||||
|
||||
if (node_info.type == STANDBY)
|
||||
{
|
||||
key_value_list_set_format(
|
||||
&node_status,
|
||||
key_value_list_set_format(&node_status,
|
||||
"Upstream node",
|
||||
"%s (ID: %i)",
|
||||
node_info.node_name,
|
||||
@@ -318,46 +319,47 @@ do_node_status(void)
|
||||
|
||||
get_replication_info(conn, &replication_info);
|
||||
|
||||
key_value_list_set_format(
|
||||
&node_status,
|
||||
key_value_list_set_format(&node_status,
|
||||
"Replication lag",
|
||||
"%i seconds",
|
||||
replication_info.replication_lag_time);
|
||||
|
||||
key_value_list_set_format(
|
||||
&node_status,
|
||||
key_value_list_set_format(&node_status,
|
||||
"Last received LSN",
|
||||
"%X/%X", format_lsn(replication_info.last_wal_receive_lsn));
|
||||
|
||||
key_value_list_set_format(
|
||||
&node_status,
|
||||
key_value_list_set_format(&node_status,
|
||||
"Last replayed LSN",
|
||||
"%X/%X", format_lsn(replication_info.last_wal_replay_lsn));
|
||||
}
|
||||
else
|
||||
{
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
key_value_list_set(&node_status,
|
||||
"Upstream node",
|
||||
"(none)");
|
||||
key_value_list_set_output_mode(&node_status, "Upstream node", OM_CSV);
|
||||
key_value_list_set_output_mode(&node_status,
|
||||
"Upstream node",
|
||||
OM_CSV);
|
||||
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
key_value_list_set(&node_status,
|
||||
"Replication lag",
|
||||
"n/a");
|
||||
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
key_value_list_set(&node_status,
|
||||
"Last received LSN",
|
||||
"(none)");
|
||||
key_value_list_set_output_mode(&node_status, "Last received LSN", OM_CSV);
|
||||
|
||||
key_value_list_set(
|
||||
&node_status,
|
||||
key_value_list_set_output_mode(&node_status,
|
||||
"Last received LSN",
|
||||
OM_CSV);
|
||||
|
||||
key_value_list_set(&node_status,
|
||||
"Last replayed LSN",
|
||||
"(none)");
|
||||
key_value_list_set_output_mode(&node_status, "Last replayed LSN", OM_CSV);
|
||||
|
||||
key_value_list_set_output_mode(&node_status,
|
||||
"Last replayed LSN",
|
||||
OM_CSV);
|
||||
}
|
||||
|
||||
|
||||
@@ -368,64 +370,55 @@ do_node_status(void)
|
||||
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"Node name\",\"%s\"\n",
|
||||
node_info.node_name);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"Node ID\",\"%i\"\n",
|
||||
node_info.node_id);
|
||||
|
||||
for (cell = node_status.head; cell; cell = cell->next)
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"%s\",\"%s\"\n",
|
||||
cell->key, cell->value);
|
||||
}
|
||||
|
||||
/* we'll add the raw data as well */
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"max_wal_senders\",%i\n",
|
||||
node_info.max_wal_senders);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"occupied_wal_senders\",%i\n",
|
||||
node_info.attached_wal_receivers);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"max_replication_slots\",%i\n",
|
||||
node_info.max_replication_slots);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"active_replication_slots\",%i\n",
|
||||
node_info.active_replication_slots);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"\"inactive_replaction_slots\",%i\n",
|
||||
node_info.inactive_replication_slots);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"Node \"%s\":\n",
|
||||
node_info.node_name);
|
||||
|
||||
for (cell = node_status.head; cell; cell = cell->next)
|
||||
{
|
||||
if (cell->output_mode == OM_NOT_SET)
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
"\t%s: %s\n", cell->key, cell->value);
|
||||
appendPQExpBuffer(&output,
|
||||
"\t%s: %s\n",
|
||||
cell->key, cell->value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -566,10 +559,20 @@ do_node_check(void)
|
||||
|
||||
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
CheckStatus return_code;
|
||||
CheckStatusList status_list = {NULL, NULL};
|
||||
CheckStatusListCell *cell = NULL;
|
||||
|
||||
|
||||
/* internal */
|
||||
if (runtime_options.has_passfile == true)
|
||||
{
|
||||
return_code = has_passfile() ? 0 : 1;
|
||||
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
|
||||
if (strlen(config_file_options.conninfo))
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
else
|
||||
@@ -582,46 +585,61 @@ do_node_check(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
server_version_num = get_server_version(conn, NULL);
|
||||
|
||||
/* add replication statistics to node record */
|
||||
get_node_replication_stats(conn, &node_info);
|
||||
get_node_replication_stats(conn, server_version_num, &node_info);
|
||||
|
||||
/*
|
||||
* handle specific checks ======================
|
||||
*/
|
||||
if (runtime_options.archive_ready == true)
|
||||
{
|
||||
(void) do_node_check_archive_ready(conn, runtime_options.output_mode, NULL);
|
||||
return_code = do_node_check_archive_ready(conn,
|
||||
runtime_options.output_mode,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.downstream == true)
|
||||
{
|
||||
(void) do_node_check_downstream(conn, runtime_options.output_mode, NULL);
|
||||
return_code = do_node_check_downstream(conn,
|
||||
runtime_options.output_mode,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
|
||||
if (runtime_options.replication_lag == true)
|
||||
{
|
||||
(void) do_node_check_replication_lag(conn, runtime_options.output_mode, &node_info, NULL);
|
||||
return_code = do_node_check_replication_lag(conn,
|
||||
runtime_options.output_mode,
|
||||
&node_info,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.role == true)
|
||||
{
|
||||
(void) do_node_check_role(conn, runtime_options.output_mode, &node_info, NULL);
|
||||
return_code = do_node_check_role(conn,
|
||||
runtime_options.output_mode,
|
||||
&node_info,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.slots == true)
|
||||
{
|
||||
(void) do_node_check_slots(conn, runtime_options.output_mode, &node_info, NULL);
|
||||
return_code = do_node_check_slots(conn,
|
||||
runtime_options.output_mode,
|
||||
&node_info,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
return;
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.output_mode == OM_NAGIOS)
|
||||
@@ -705,8 +723,7 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
|
||||
if (recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
status = CHECK_STATUS_CRITICAL;
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("node is registered as primary but running as standby"));
|
||||
}
|
||||
else
|
||||
@@ -720,14 +737,12 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
|
||||
if (recovery_type == RECTYPE_PRIMARY)
|
||||
{
|
||||
status = CHECK_STATUS_CRITICAL;
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("node is registered as standby but running as primary"));
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("node is standby"));
|
||||
}
|
||||
break;
|
||||
@@ -750,8 +765,7 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
|
||||
if (is_active_bdr_node(conn, node_info->node_name) == false)
|
||||
{
|
||||
status = CHECK_STATUS_CRITICAL;
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("node is not an active BDR node"));
|
||||
}
|
||||
}
|
||||
@@ -799,16 +813,19 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
||||
|
||||
initPQExpBuffer(&details);
|
||||
|
||||
if (node_info->total_replication_slots == 0)
|
||||
if (server_version_num < 90400)
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("replication slots not available for this PostgreSQL version"));
|
||||
}
|
||||
else if (node_info->total_replication_slots == 0)
|
||||
{
|
||||
appendPQExpBuffer(&details,
|
||||
_("node has no replication slots"));
|
||||
}
|
||||
else if (node_info->inactive_replication_slots == 0)
|
||||
{
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("%i of %i replication slots are active"),
|
||||
node_info->total_replication_slots,
|
||||
node_info->total_replication_slots);
|
||||
@@ -817,8 +834,7 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
||||
{
|
||||
status = CHECK_STATUS_CRITICAL;
|
||||
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
_("%i of %i replication slots are inactive"),
|
||||
node_info->inactive_replication_slots,
|
||||
node_info->total_replication_slots);
|
||||
@@ -881,14 +897,12 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"--files=%i --threshold=%i",
|
||||
ready_archive_files, config_file_options.archive_ready_critical);
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i pending archive ready files | files=%i;%i;%i",
|
||||
ready_archive_files,
|
||||
ready_archive_files,
|
||||
@@ -896,8 +910,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
||||
config_file_options.archive_ready_critical);
|
||||
break;
|
||||
case OM_TEXT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i pending archive ready files, critical threshold: %i",
|
||||
ready_archive_files, config_file_options.archive_ready_critical);
|
||||
break;
|
||||
@@ -913,14 +926,12 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"--files=%i --threshold=%i",
|
||||
ready_archive_files, config_file_options.archive_ready_warning);
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i pending archive ready files | files=%i;%i;%i",
|
||||
ready_archive_files,
|
||||
ready_archive_files,
|
||||
@@ -929,8 +940,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
||||
|
||||
break;
|
||||
case OM_TEXT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i pending archive ready files (threshold: %i)",
|
||||
ready_archive_files, config_file_options.archive_ready_warning);
|
||||
break;
|
||||
@@ -965,13 +975,11 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"--files=%i", ready_archive_files);
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i pending archive ready files | files=%i;%i;%i",
|
||||
ready_archive_files,
|
||||
ready_archive_files,
|
||||
@@ -979,8 +987,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
||||
config_file_options.archive_ready_critical);
|
||||
break;
|
||||
case OM_TEXT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i pending archive ready files", ready_archive_files);
|
||||
break;
|
||||
|
||||
@@ -1080,14 +1087,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"--lag=%i --threshold=%i",
|
||||
lag_seconds, config_file_options.replication_lag_critical);
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i seconds | lag=%i;%i;%i",
|
||||
lag_seconds,
|
||||
lag_seconds,
|
||||
@@ -1095,8 +1100,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
||||
config_file_options.replication_lag_critical);
|
||||
break;
|
||||
case OM_TEXT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i seconds, critical threshold: %i)",
|
||||
lag_seconds, config_file_options.replication_lag_critical);
|
||||
break;
|
||||
@@ -1112,14 +1116,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"--lag=%i --threshold=%i",
|
||||
lag_seconds, config_file_options.replication_lag_warning);
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i seconds | lag=%i;%i;%i",
|
||||
lag_seconds,
|
||||
lag_seconds,
|
||||
@@ -1127,8 +1129,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
||||
config_file_options.replication_lag_critical);
|
||||
break;
|
||||
case OM_TEXT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i seconds, warning threshold: %i)",
|
||||
lag_seconds, config_file_options.replication_lag_warning);
|
||||
break;
|
||||
@@ -1163,14 +1164,12 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"--lag=%i",
|
||||
lag_seconds);
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i seconds | lag=%i;%i;%i",
|
||||
lag_seconds,
|
||||
lag_seconds,
|
||||
@@ -1178,8 +1177,7 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
|
||||
config_file_options.replication_lag_critical);
|
||||
break;
|
||||
case OM_TEXT:
|
||||
appendPQExpBuffer(
|
||||
&details,
|
||||
appendPQExpBuffer(&details,
|
||||
"%i seconds",
|
||||
lag_seconds);
|
||||
break;
|
||||
@@ -1314,7 +1312,6 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
|
||||
{
|
||||
case OM_NAGIOS:
|
||||
{
|
||||
|
||||
printf("REPMGR_DOWNSTREAM_SERVERS %s: %s | ",
|
||||
output_check_status(status),
|
||||
details.data);
|
||||
@@ -1402,14 +1399,6 @@ do_node_service(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (runtime_options.check == true)
|
||||
{
|
||||
if (action != ACTION_NONE)
|
||||
log_warning(_("--action not required for --check"));
|
||||
|
||||
return _do_node_service_check();
|
||||
}
|
||||
|
||||
if (runtime_options.list_actions == true)
|
||||
{
|
||||
return _do_node_service_list_actions(action);
|
||||
@@ -1479,12 +1468,6 @@ do_node_service(void)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
_do_node_service_check(void)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
_do_node_service_list_actions(t_server_action action)
|
||||
{
|
||||
@@ -1676,6 +1659,45 @@ do_node_rejoin(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* If --force-rewind specified, check pg_rewind can be used, and
|
||||
* pre-emptively fetch the list of configuration files which should be
|
||||
* archived
|
||||
*/
|
||||
|
||||
if (runtime_options.force_rewind == true)
|
||||
{
|
||||
PQExpBufferData reason;
|
||||
PQExpBufferData msg;
|
||||
|
||||
initPQExpBuffer(&reason);
|
||||
|
||||
if (can_use_pg_rewind(upstream_conn, config_file_options.data_directory, &reason) == false)
|
||||
{
|
||||
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
|
||||
log_detail("%s", reason.data);
|
||||
termPQExpBuffer(&reason);
|
||||
PQfinish(upstream_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
termPQExpBuffer(&reason);
|
||||
|
||||
initPQExpBuffer(&msg);
|
||||
appendPQExpBuffer(&msg,
|
||||
_("prerequisites for using pg_rewind are met"));
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info("%s", msg.data);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_INFO, "%s", msg.data);
|
||||
}
|
||||
termPQExpBuffer(&msg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Forcibly rewind node if requested (this is mainly for use when this
|
||||
* action is being executed by "repmgr standby switchover")
|
||||
@@ -1690,20 +1712,27 @@ do_node_rejoin(void)
|
||||
/* execute pg_rewind */
|
||||
initPQExpBuffer(&command);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
"%s -D ",
|
||||
make_pg_path("pg_rewind"));
|
||||
|
||||
appendShellString(
|
||||
&command,
|
||||
appendShellString(&command,
|
||||
config_file_options.data_directory);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
" --source-server='%s'",
|
||||
primary_node_record.conninfo);
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("pg_rewind would now be executed"));
|
||||
log_detail(_("pg_rewind command is:\n %s"),
|
||||
command.data);
|
||||
|
||||
PQfinish(upstream_conn);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
log_notice(_("executing pg_rewind"));
|
||||
log_debug("pg_rewind command is:\n %s",
|
||||
command.data);
|
||||
@@ -1749,19 +1778,72 @@ do_node_rejoin(void)
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&filebuf);
|
||||
|
||||
/* delete any replication slots copied in by pg_rewind */
|
||||
{
|
||||
PQExpBufferData slotdir_path;
|
||||
DIR *slotdir;
|
||||
struct dirent *slotdir_ent;
|
||||
|
||||
initPQExpBuffer(&slotdir_path);
|
||||
|
||||
appendPQExpBuffer(&slotdir_path,
|
||||
"%s/pg_replslot",
|
||||
config_file_options.data_directory);
|
||||
|
||||
slotdir = opendir(slotdir_path.data);
|
||||
|
||||
if (slotdir == NULL)
|
||||
{
|
||||
log_warning(_("unable to open replication slot directory \"%s\""),
|
||||
slotdir_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((slotdir_ent = readdir(slotdir)) != NULL) {
|
||||
struct stat statbuf;
|
||||
PQExpBufferData slotdir_ent_path;
|
||||
|
||||
if(strcmp(slotdir_ent->d_name, ".") == 0 || strcmp(slotdir_ent->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
initPQExpBuffer(&slotdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&slotdir_ent_path,
|
||||
"%s/%s",
|
||||
slotdir_path.data,
|
||||
slotdir_ent->d_name);
|
||||
|
||||
if (stat(slotdir_ent_path.data, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
termPQExpBuffer(&slotdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_debug("deleting slot directory \"%s\"", slotdir_ent_path.data);
|
||||
if (rmdir_recursive(slotdir_ent_path.data) != 0 && errno != EEXIST)
|
||||
{
|
||||
log_warning(_("unable to delete replication slot directory \"%s\""), slotdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_hint(_("directory may need to be manually removed"));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&slotdir_ent_path);
|
||||
}
|
||||
}
|
||||
termPQExpBuffer(&slotdir_path);
|
||||
}
|
||||
}
|
||||
|
||||
initPQExpBuffer(&follow_output);
|
||||
|
||||
success = do_standby_follow_internal(
|
||||
upstream_conn,
|
||||
success = do_standby_follow_internal(upstream_conn,
|
||||
&primary_node_record,
|
||||
&follow_output);
|
||||
|
||||
create_event_notification(
|
||||
upstream_conn,
|
||||
create_event_notification(upstream_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"node_rejoin",
|
||||
@@ -1787,13 +1869,13 @@ do_node_rejoin(void)
|
||||
|
||||
|
||||
/*
|
||||
* Intended mainly for "internal" use by `node rejoin` on the local node when
|
||||
* For "internal" use by `node rejoin` on the local node when
|
||||
* called by "standby switchover" from the remote node.
|
||||
*
|
||||
* This archives any configuration files in the data directory, which may be
|
||||
* overwritten by pg_rewind.
|
||||
*
|
||||
* Requires configuration file, optionally --config_archive_dir
|
||||
* Requires configuration file, optionally --config-archive-dir
|
||||
*/
|
||||
static void
|
||||
_do_node_archive_config(void)
|
||||
@@ -1831,8 +1913,6 @@ _do_node_archive_config(void)
|
||||
termPQExpBuffer(&archive_dir);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else if (!S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
@@ -1842,7 +1922,6 @@ _do_node_archive_config(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
arcdir = opendir(archive_dir.data);
|
||||
|
||||
if (arcdir == NULL)
|
||||
@@ -1854,42 +1933,46 @@ _do_node_archive_config(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* attempt to remove any existing files in the directory TODO: collate
|
||||
* problem files into list
|
||||
*/
|
||||
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||
if (runtime_options.dry_run == false)
|
||||
{
|
||||
PQExpBufferData arcdir_ent_path;
|
||||
|
||||
initPQExpBuffer(&arcdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&arcdir_ent_path,
|
||||
"%s/%s",
|
||||
archive_dir.data,
|
||||
arcdir_ent->d_name);
|
||||
|
||||
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
||||
/*
|
||||
* attempt to remove any existing files in the directory TODO: collate
|
||||
* problem files into list
|
||||
*/
|
||||
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||
{
|
||||
PQExpBufferData arcdir_ent_path;
|
||||
|
||||
initPQExpBuffer(&arcdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&arcdir_ent_path,
|
||||
"%s/%s",
|
||||
archive_dir.data,
|
||||
arcdir_ent->d_name);
|
||||
|
||||
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
||||
{
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlink(arcdir_ent_path.data) == -1)
|
||||
{
|
||||
log_error(_("unable to delete file in temporary archive directory"));
|
||||
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
closedir(arcdir);
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlink(arcdir_ent_path.data) == -1)
|
||||
{
|
||||
log_error(_("unable to delete file in temporary archive directory"));
|
||||
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
closedir(arcdir);
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
closedir(arcdir);
|
||||
}
|
||||
|
||||
closedir(arcdir);
|
||||
|
||||
/*
|
||||
* extract list of config files from --config-files
|
||||
*/
|
||||
@@ -1965,18 +2048,53 @@ _do_node_archive_config(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
|
||||
cell->key, dest_file.data);
|
||||
copy_file(cell->value, dest_file.data);
|
||||
copied_count++;
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info("file \"%s\" would be copied to \"%s\"",
|
||||
cell->key, dest_file.data);
|
||||
copied_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
|
||||
cell->key, dest_file.data);
|
||||
copy_file(cell->value, dest_file.data);
|
||||
copied_count++;
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&dest_file);
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_verbose(LOG_INFO, _("%i files would have been copied to \"%s\""),
|
||||
copied_count, archive_dir.data);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
|
||||
copied_count, archive_dir.data);
|
||||
}
|
||||
|
||||
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
|
||||
copied_count, archive_dir.data);
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
/*
|
||||
* Delete directory in --dry-run mode - it should be empty unless it's been
|
||||
* interfered with for some reason, in which case manual intervention is
|
||||
* required
|
||||
*/
|
||||
if (rmdir(archive_dir.data) != 0 && errno != EEXIST)
|
||||
{
|
||||
log_warning(_("unable to delete directory \"%s\""), archive_dir.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_hint(_("directory may need to be manually removed"));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir.data);
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&archive_dir);
|
||||
}
|
||||
@@ -1991,9 +2109,9 @@ _do_node_archive_config(void)
|
||||
* Not designed to be called if the instance is running, but does
|
||||
* not currently check.
|
||||
*
|
||||
* Requires -D/--pgdata, optionally --config_archive_dir
|
||||
* Requires -D/--pgdata, optionally --config-archive-dir
|
||||
*
|
||||
* Removes --config_archive_dir after successful copy
|
||||
* Removes --config-archive-dir after successful copy
|
||||
*/
|
||||
|
||||
static void
|
||||
@@ -2179,7 +2297,7 @@ do_node_help(void)
|
||||
|
||||
printf(_("NODE CHECK\n"));
|
||||
puts("");
|
||||
printf(_(" \"node check\" erforms some health checks on a node from a replication perspective.\n"));
|
||||
printf(_(" \"node check\" performs some health checks on a node from a replication perspective.\n"));
|
||||
puts("");
|
||||
printf(_(" Configuration file required, runs on local node only.\n"));
|
||||
puts("");
|
||||
@@ -2195,4 +2313,33 @@ do_node_help(void)
|
||||
|
||||
puts("");
|
||||
|
||||
printf(_("NODE REJOIN\n"));
|
||||
puts("");
|
||||
printf(_(" \"node rejoin\" enables a dormant (stopped) node to be rejoined to the replication cluster.\n"));
|
||||
puts("");
|
||||
printf(_(" Configuration file required, runs on local node only.\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \
|
||||
" (including usability of \"pg_rewind\" if requested)\n"));
|
||||
printf(_(" --force-rewind execute \"pg_rewind\" if necessary\n"));
|
||||
printf(_(" --config-files comma-separated list of configuration files to retain\n" \
|
||||
" after executing \"pg_rewind\"\n"));
|
||||
printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \
|
||||
" (default: /tmp)\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("NODE SERVICE\n"));
|
||||
puts("");
|
||||
printf(_(" \"node service\" executes a system service command to stop/start/restart/reload a node\n" \
|
||||
" or optionally display which command would be executed\n"));
|
||||
puts("");
|
||||
printf(_(" Configuration file required, runs on local node only.\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
||||
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
||||
printf(_(" --list-actions show what command would be performed for each action\n"));
|
||||
puts("");
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
extern void do_node_status(void);
|
||||
extern void do_node_check(void);
|
||||
|
||||
|
||||
extern void do_node_rejoin(void);
|
||||
extern void do_node_service(void);
|
||||
|
||||
|
||||
@@ -74,7 +74,11 @@ do_primary_register(void)
|
||||
|
||||
log_verbose(LOG_INFO, _("server is not in recovery"));
|
||||
|
||||
/* create the repmgr extension if it doesn't already exist */
|
||||
/*
|
||||
* create the repmgr extension if it doesn't already exist;
|
||||
* note that create_repmgr_extension() will take into account
|
||||
* the --dry-run option
|
||||
*/
|
||||
if (!create_repmgr_extension(conn))
|
||||
{
|
||||
PQfinish(conn);
|
||||
@@ -92,6 +96,7 @@ do_primary_register(void)
|
||||
return;
|
||||
}
|
||||
|
||||
initialize_voting_term(conn);
|
||||
|
||||
/* Ensure there isn't another registered node which is primary */
|
||||
primary_conn = get_primary_connection(conn, ¤t_primary_id, NULL);
|
||||
|
||||
@@ -359,7 +359,7 @@ do_standby_clone(void)
|
||||
* If copying of external configuration files requested, and any are
|
||||
* detected, perform sanity checks
|
||||
*/
|
||||
if (PQstatus(source_conn) == CONNECTION_OK)
|
||||
if (PQstatus(source_conn) == CONNECTION_OK && runtime_options.copy_external_config_files == true)
|
||||
{
|
||||
PGconn *superuser_conn = NULL;
|
||||
PGconn *privileged_conn = NULL;
|
||||
@@ -549,7 +549,7 @@ do_standby_clone(void)
|
||||
* This won't run in Barman mode as "config_files" is only populated in
|
||||
* "initialise_direct_clone()", which isn't called in Barman mode.
|
||||
*/
|
||||
if (runtime_options.copy_external_config_files && config_files.entries)
|
||||
if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
|
||||
{
|
||||
copy_configuration_files();
|
||||
}
|
||||
@@ -806,7 +806,6 @@ do_standby_register(void)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (PQstatus(conn) == CONNECTION_OK)
|
||||
{
|
||||
check_recovery_type(conn);
|
||||
@@ -838,7 +837,7 @@ do_standby_register(void)
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to the primary database"));
|
||||
log_hint(_("a primary must be configured before registering a standby"));
|
||||
log_hint(_("a primary node must be configured before registering a standby node"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -1567,59 +1566,23 @@ do_standby_follow(void)
|
||||
if (server_version_num < 90400)
|
||||
check_93_config();
|
||||
|
||||
if (runtime_options.upstream_node_id != NO_UPSTREAM_NODE)
|
||||
/*
|
||||
* Attempt to connect to primary.
|
||||
*
|
||||
* If --wait provided, loop for up `primary_follow_timeout` seconds
|
||||
* before giving up
|
||||
*/
|
||||
|
||||
for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
|
||||
{
|
||||
/* check not self! */
|
||||
if (runtime_options.upstream_node_id == config_file_options.node_id)
|
||||
primary_conn = get_primary_connection_quiet(local_conn,
|
||||
&primary_id,
|
||||
NULL);
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false)
|
||||
{
|
||||
log_error(_("provided \"--upstream-node-id\" %i is the current node!"),
|
||||
runtime_options.upstream_node_id);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
record_status = get_node_record(local_conn, runtime_options.upstream_node_id, &primary_node_record);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_error(_("unable to find record for specified upstream node %i"),
|
||||
runtime_options.upstream_node_id);
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
|
||||
{
|
||||
primary_conn = establish_db_connection(primary_node_record.conninfo, true);
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false)
|
||||
{
|
||||
log_debug("setting primary id to %i", runtime_options.upstream_node_id);
|
||||
primary_id = runtime_options.upstream_node_id;
|
||||
break;
|
||||
}
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Attempt to connect to primary.
|
||||
*
|
||||
* If --wait provided, loop for up `primary_follow_timeout` seconds
|
||||
* before giving up
|
||||
*/
|
||||
|
||||
for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
|
||||
{
|
||||
primary_conn = get_primary_connection_quiet(local_conn,
|
||||
&primary_id,
|
||||
NULL);
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false)
|
||||
{
|
||||
break;
|
||||
}
|
||||
sleep(1);
|
||||
break;
|
||||
}
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
PQfinish(local_conn);
|
||||
@@ -1628,6 +1591,13 @@ do_standby_follow(void)
|
||||
{
|
||||
log_error(_("unable to determine primary node"));
|
||||
|
||||
if (runtime_options.wait == true)
|
||||
{
|
||||
log_detail(_("no primary appeared after %i seconds"),
|
||||
config_file_options.primary_follow_timeout);
|
||||
log_hint(_("alter \"primary_follow_timeout\" in \"repmgr.conf\" to change this value"));
|
||||
}
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -1641,7 +1611,6 @@ do_standby_follow(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
/* XXX check this is not current upstream anyway */
|
||||
/* check replication connection */
|
||||
initialize_conninfo_params(&repl_conninfo, false);
|
||||
@@ -1730,11 +1699,12 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
{
|
||||
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||
int original_upstream_node_id = UNKNOWN_NODE_ID;
|
||||
t_node_info original_upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
char *errmsg = NULL;
|
||||
|
||||
|
||||
bool remove_old_replication_slot = false;
|
||||
/*
|
||||
* Fetch our node record so we can write application_name, if set, and to
|
||||
* get the upstream node ID, which we'll need to know if replication slots
|
||||
@@ -1803,6 +1773,8 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
param_set(&recovery_conninfo, "application_name", application_name);
|
||||
}
|
||||
|
||||
free_conninfo_params(&local_node_conninfo);
|
||||
|
||||
/*
|
||||
* store the original upstream node id so we can delete the
|
||||
* replication slot, if exists
|
||||
@@ -1816,9 +1788,34 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
original_upstream_node_id = primary_node_record->node_id;
|
||||
}
|
||||
|
||||
free_conninfo_params(&local_node_conninfo);
|
||||
|
||||
if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false && original_upstream_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
remove_old_replication_slot = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fetch original upstream's record */
|
||||
if (remove_old_replication_slot == true)
|
||||
{
|
||||
PGconn *local_conn = NULL;
|
||||
RecordStatus upstream_record_status = RECORD_NOT_FOUND;
|
||||
|
||||
/* abort if local connection not available */
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
|
||||
upstream_record_status = get_node_record(local_conn,
|
||||
original_upstream_node_id,
|
||||
&original_upstream_node_record);
|
||||
PQfinish(local_conn);
|
||||
|
||||
if (upstream_record_status != RECORD_FOUND)
|
||||
{
|
||||
log_warning(_("unable to retrieve node record for old upstream node %i"),
|
||||
original_upstream_node_id);
|
||||
log_detail(_("replication slot will need to be removed manually"));
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the application name to this node's name */
|
||||
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
|
||||
@@ -1870,7 +1867,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If replication slots are in use, and an inactive one for this node
|
||||
* exists on the former upstream, drop it.
|
||||
@@ -1878,37 +1874,16 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
* XXX check if former upstream is current primary?
|
||||
*/
|
||||
|
||||
if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false && original_upstream_node_id != UNKNOWN_NODE_ID)
|
||||
if (remove_old_replication_slot == true)
|
||||
{
|
||||
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus upstream_record_status = RECORD_NOT_FOUND;
|
||||
PGconn *local_conn = NULL;
|
||||
|
||||
log_verbose(LOG_INFO, "attempting to remove replication slot from old upstream node %i",
|
||||
original_upstream_node_id);
|
||||
|
||||
/* XXX should we poll for server restart? */
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
|
||||
upstream_record_status = get_node_record(local_conn,
|
||||
original_upstream_node_id,
|
||||
&upstream_node_record);
|
||||
|
||||
PQfinish(local_conn);
|
||||
|
||||
if (upstream_record_status != RECORD_FOUND)
|
||||
if (original_upstream_node_record.node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
log_warning(_("unable to retrieve node record for old upstream node %i"),
|
||||
original_upstream_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
PGconn *old_upstream_conn = establish_db_connection_quiet(upstream_node_record.conninfo);
|
||||
PGconn *old_upstream_conn = establish_db_connection_quiet(original_upstream_node_record.conninfo);
|
||||
|
||||
if (PQstatus(old_upstream_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_info(_("unable to connect to old upstream node %i to remove replication slot"),
|
||||
original_upstream_node_id);
|
||||
log_warning(_("unable to connect to old upstream node %i to remove replication slot"),
|
||||
original_upstream_node_id);
|
||||
log_hint(_("if reusing this node, you should manually remove any inactive replication slots"));
|
||||
}
|
||||
else
|
||||
@@ -1916,6 +1891,7 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
drop_replication_slot_if_exists(old_upstream_conn,
|
||||
original_upstream_node_id,
|
||||
local_node_record.slot_name);
|
||||
PQfinish(old_upstream_conn);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1941,7 +1917,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
config_file_options.node_id,
|
||||
primary_node_record->node_id);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4338,7 +4313,7 @@ run_file_backup(t_node_info *node_record)
|
||||
* empty */
|
||||
{
|
||||
maxlen_snprintf(command,
|
||||
"rsync --progress -a --files-from=%s/%s.txt %s:%s/base/%s/%s %s",
|
||||
"rsync --progress -a --files-from=%s/%s.txt %s:%s/%s/%s %s",
|
||||
local_repmgr_tmp_directory,
|
||||
cell_t->oid,
|
||||
config_file_options.barman_host,
|
||||
@@ -4994,6 +4969,17 @@ write_primary_conninfo(char *line, t_conninfo_param_list *param_list)
|
||||
}
|
||||
}
|
||||
|
||||
/* passfile provided as configuration option */
|
||||
if (config_file_options.passfile[0] != '\0')
|
||||
{
|
||||
/* check if the libpq we're using supports "passfile=" */
|
||||
if (has_passfile() == true)
|
||||
{
|
||||
appendPQExpBuffer(&conninfo_buf, " passfile=");
|
||||
appendConnStrVal(&conninfo_buf, config_file_options.passfile);
|
||||
}
|
||||
}
|
||||
|
||||
escaped = escape_recovery_conf_value(conninfo_buf.data);
|
||||
maxlen_snprintf(line, "primary_conninfo = '%s'\n", escaped);
|
||||
|
||||
@@ -5336,8 +5322,8 @@ do_standby_help(void)
|
||||
puts("");
|
||||
printf(_(" \"standby unregister\" unregisters an inactive standby node.\n"));
|
||||
puts("");
|
||||
printf(_(" --node-id ID node to unregister (optional, used when the node to unregister\n" \
|
||||
" is offline)\n"));
|
||||
printf(_(" --node-id ID of node to unregister (optional, used when the node to\n" \
|
||||
" unregister is offline)\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("STANDBY PROMOTE\n"));
|
||||
|
||||
462
repmgr-action-witness.c
Normal file
462
repmgr-action-witness.c
Normal file
@@ -0,0 +1,462 @@
|
||||
/*
|
||||
* repmgr-action-witness.c
|
||||
*
|
||||
* Implements witness actions for the repmgr command line utility
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2017
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "repmgr.h"
|
||||
#include "dirutil.h"
|
||||
#include "compat.h"
|
||||
#include "controldata.h"
|
||||
|
||||
#include "repmgr-client-global.h"
|
||||
#include "repmgr-action-witness.h"
|
||||
|
||||
static char repmgr_user[MAXLEN];
|
||||
static char repmgr_db[MAXLEN];
|
||||
|
||||
void
|
||||
do_witness_register(void)
|
||||
{
|
||||
PGconn *witness_conn = NULL;
|
||||
PGconn *primary_conn = NULL;
|
||||
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
bool record_created = false;
|
||||
|
||||
log_info(_("connecting to witness node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
|
||||
witness_conn = establish_db_connection_quiet(config_file_options.conninfo);
|
||||
|
||||
if (PQstatus(witness_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
log_detail("%s",
|
||||
PQerrorMessage(witness_conn));
|
||||
log_hint(_("the witness node must be running before it can be registered"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* check witness node's recovery type */
|
||||
recovery_type = get_recovery_type(witness_conn);
|
||||
|
||||
if (recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
log_error(_("provided node is a standby"));
|
||||
log_error(_("a witness node must run on an independent primary server"));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* check that witness node is not a BDR node */
|
||||
if (is_bdr_db_quiet(witness_conn) == true)
|
||||
{
|
||||
log_error(_("witness node is a BDR node"));
|
||||
log_hint(_("a witness node cannot be configured for a BDR cluster"));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
/* connect to primary with provided parameters */
|
||||
log_info(_("connecting to primary node"));
|
||||
/*
|
||||
* Extract the repmgr user and database names from the conninfo string
|
||||
* provided in repmgr.conf
|
||||
*/
|
||||
get_conninfo_value(config_file_options.conninfo, "user", repmgr_user);
|
||||
get_conninfo_value(config_file_options.conninfo, "dbname", repmgr_db);
|
||||
|
||||
param_set_ine(&source_conninfo, "user", repmgr_user);
|
||||
param_set_ine(&source_conninfo, "dbname", repmgr_db);
|
||||
|
||||
/* We need to connect to check configuration and copy it */
|
||||
primary_conn = establish_db_connection_by_params(&source_conninfo, false);
|
||||
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to the primary node"));
|
||||
log_hint(_("a primary node must be configured before registering a witness node"));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* check primary node's recovery type */
|
||||
recovery_type = get_recovery_type(witness_conn);
|
||||
|
||||
if (recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
log_error(_("provided primary node is a standby"));
|
||||
log_error(_("provide the connection details of the cluster's primary server"));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* check that primary node is not a BDR node */
|
||||
if (is_bdr_db_quiet(primary_conn) == true)
|
||||
{
|
||||
log_error(_("primary node is a BDR node"));
|
||||
log_hint(_("a witness node cannot be configured for a BDR cluster"));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* XXX sanity check witness node is not part of main cluster */
|
||||
|
||||
|
||||
/* create repmgr extension, if does not exist */
|
||||
if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn))
|
||||
{
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* check if node record exists on primary, overwrite if -F/--force provided,
|
||||
* otherwise exit with error
|
||||
*/
|
||||
|
||||
record_status = get_node_record(primary_conn,
|
||||
config_file_options.node_id,
|
||||
&node_record);
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
/*
|
||||
* If node is not a witness, cowardly refuse to do anything, let the
|
||||
* user work out what's the correct thing to do.
|
||||
*/
|
||||
if (node_record.type != WITNESS)
|
||||
{
|
||||
log_error(_("node \"%s\" (ID: %i) is already registered as a %s node"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id,
|
||||
get_node_type_string(node_record.type));
|
||||
log_hint(_("use \"repmgr %s unregister\" to remove a non-witness node record"),
|
||||
get_node_type_string(node_record.type));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (!runtime_options.force)
|
||||
{
|
||||
log_error(_("witness node is already registered"));
|
||||
log_hint(_("use option -F/--force to reregister the node"));
|
||||
|
||||
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// XXX check other node with same name does not exist
|
||||
|
||||
/*
|
||||
* if repmgr.nodes contains entries, delete if -F/--force provided,
|
||||
* otherwise exit with error
|
||||
*/
|
||||
get_all_node_records(witness_conn, &nodes);
|
||||
|
||||
log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count);
|
||||
|
||||
if (nodes.node_count > 0)
|
||||
{
|
||||
if (!runtime_options.force)
|
||||
{
|
||||
log_error(_("witness node is already initialised and contains node records"));
|
||||
log_hint(_("use option -F/--force to reinitialise the node"));
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
clear_node_info_list(&nodes);
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("prerequisites for registering the witness node are met"));
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
/* create record on primary */
|
||||
|
||||
/*
|
||||
* node record exists - update it (at this point we have already
|
||||
* established that -F/--force is in use)
|
||||
*/
|
||||
|
||||
init_node_record(&node_record);
|
||||
|
||||
/* these values are mandatory, setting them to anything else has no point */
|
||||
node_record.type = WITNESS;
|
||||
node_record.priority = 0;
|
||||
node_record.upstream_node_id = get_primary_node_id(primary_conn);
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
record_created = update_node_record(primary_conn,
|
||||
"witness register",
|
||||
&node_record);
|
||||
}
|
||||
else
|
||||
{
|
||||
record_created = create_node_record(primary_conn,
|
||||
"witness register",
|
||||
&node_record);
|
||||
}
|
||||
|
||||
if (record_created == false)
|
||||
{
|
||||
log_error(_("unable to create or update node record on primary"));
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* sync records from primary */
|
||||
if (witness_copy_node_records(primary_conn, witness_conn) == false)
|
||||
{
|
||||
log_error(_("unable to copy repmgr node records from primary"));
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* create event */
|
||||
create_event_record(primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"witness_register",
|
||||
true,
|
||||
NULL);
|
||||
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
|
||||
log_info(_("witness registration complete"));
|
||||
log_notice(_("witness node \"%s\" (ID: %i) successfully registered"),
|
||||
config_file_options.node_name, config_file_options.node_id);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_witness_unregister(void)
|
||||
{
|
||||
PGconn *witness_conn = NULL;
|
||||
PGconn *primary_conn = NULL;
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
bool node_record_deleted = false;
|
||||
bool witness_available = true;
|
||||
|
||||
log_info(_("connecting to witness node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
|
||||
witness_conn = establish_db_connection_quiet(config_file_options.conninfo);
|
||||
|
||||
if (PQstatus(witness_conn) != CONNECTION_OK)
|
||||
{
|
||||
if (!runtime_options.force)
|
||||
{
|
||||
log_error(_("unable to connect to witness node \"%s\" (ID: %i)"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
log_detail("%s", PQerrorMessage(witness_conn));
|
||||
log_hint(_("provide -F/--force to remove the witness record if the server is not running"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_notice(_("unable to connect to witness node \"%s\" (ID: %i), removing node record on cluster primary only"),
|
||||
config_file_options.node_name,
|
||||
config_file_options.node_id);
|
||||
witness_available = false;
|
||||
}
|
||||
|
||||
if (witness_available == true)
|
||||
{
|
||||
primary_conn = get_primary_connection_quiet(witness_conn, NULL, NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Extract the repmgr user and database names from the conninfo string
|
||||
* provided in repmgr.conf
|
||||
*/
|
||||
get_conninfo_value(config_file_options.conninfo, "user", repmgr_user);
|
||||
get_conninfo_value(config_file_options.conninfo, "dbname", repmgr_db);
|
||||
|
||||
param_set_ine(&source_conninfo, "user", repmgr_user);
|
||||
param_set_ine(&source_conninfo, "dbname", repmgr_db);
|
||||
|
||||
primary_conn = establish_db_connection_by_params(&source_conninfo, false);
|
||||
|
||||
}
|
||||
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to primary"));
|
||||
log_detail("%s", PQerrorMessage(primary_conn));
|
||||
|
||||
if (witness_available == true)
|
||||
{
|
||||
PQfinish(witness_conn);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_hint(_("provide connection details to primary server"));
|
||||
}
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Check node exists and is really a witness */
|
||||
record_status = get_node_record(primary_conn, config_file_options.node_id, &node_record);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_error(_("no record found for node %i"), config_file_options.node_id);
|
||||
|
||||
if (witness_available == true)
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (node_record.type != WITNESS)
|
||||
{
|
||||
log_error(_("node %i is not a witness node"), config_file_options.node_id);
|
||||
log_detail(_("node %i is a %s node"), config_file_options.node_id, get_node_type_string(node_record.type));
|
||||
|
||||
if (witness_available == true)
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("prerequisites for unregistering the witness node are met"));
|
||||
if (witness_available == true)
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
log_info(_("unregistering witness node %i"), config_file_options.node_id);
|
||||
node_record_deleted = delete_node_record(primary_conn,
|
||||
config_file_options.node_id);
|
||||
|
||||
if (node_record_deleted == false)
|
||||
{
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* sync records from primary */
|
||||
if (witness_available == true && witness_copy_node_records(primary_conn, witness_conn) == false)
|
||||
{
|
||||
log_error(_("unable to copy repmgr node records from primary"));
|
||||
PQfinish(primary_conn);
|
||||
PQfinish(witness_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Log the event */
|
||||
create_event_record(primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"witness_unregister",
|
||||
true,
|
||||
NULL);
|
||||
|
||||
PQfinish(primary_conn);
|
||||
|
||||
if (witness_available == true)
|
||||
PQfinish(witness_conn);
|
||||
|
||||
log_info(_("witness unregistration complete"));
|
||||
log_detail(_("witness node with id %i (conninfo: %s) successfully unregistered"),
|
||||
config_file_options.node_id, config_file_options.conninfo);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void do_witness_help(void)
|
||||
{
|
||||
print_help_header();
|
||||
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS] witness register\n"), progname());
|
||||
printf(_(" %s [OPTIONS] witness unregister\n"), progname());
|
||||
|
||||
printf(_("WITNESS REGISTER\n"));
|
||||
puts("");
|
||||
printf(_(" \"witness register\" registers a witness node.\n"));
|
||||
puts("");
|
||||
printf(_(" Requires provision of connection information for the primary\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check prerequisites but don't make any changes\n"));
|
||||
printf(_(" -F, --force overwrite an existing node record\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("WITNESS UNREGISTER\n"));
|
||||
puts("");
|
||||
printf(_(" \"witness register\" unregisters a witness node.\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check prerequisites but don't make any changes\n"));
|
||||
printf(_(" -F, --force unregister when witness node not running\n"));
|
||||
puts("");
|
||||
|
||||
return;
|
||||
}
|
||||
27
repmgr-action-witness.h
Normal file
27
repmgr-action-witness.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* repmgr-action-witness.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2017
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _REPMGR_ACTION_WITNESS_H_
|
||||
#define _REPMGR_ACTION_WITNESS_H_
|
||||
|
||||
extern void do_witness_register(void);
|
||||
extern void do_witness_unregister(void);
|
||||
|
||||
extern void do_witness_help(void);
|
||||
|
||||
#endif /* _REPMGR_ACTION_WITNESS_H_ */
|
||||
@@ -101,6 +101,7 @@ typedef struct
|
||||
bool replication_lag;
|
||||
bool role;
|
||||
bool slots;
|
||||
bool has_passfile;
|
||||
|
||||
/* "node join" options */
|
||||
char config_files[MAXLEN];
|
||||
@@ -151,7 +152,7 @@ typedef struct
|
||||
/* "node status" options */ \
|
||||
false, \
|
||||
/* "node check" options */ \
|
||||
false, false, false, false, false, \
|
||||
false, false, false, false, false, false, \
|
||||
/* "node join" options */ \
|
||||
"", \
|
||||
/* "node service" options */ \
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
#include "repmgr-client-global.h"
|
||||
#include "repmgr-action-primary.h"
|
||||
#include "repmgr-action-standby.h"
|
||||
#include "repmgr-action-witness.h"
|
||||
#include "repmgr-action-bdr.h"
|
||||
#include "repmgr-action-node.h"
|
||||
|
||||
@@ -446,6 +447,10 @@ main(int argc, char **argv)
|
||||
runtime_options.slots = true;
|
||||
break;
|
||||
|
||||
case OPT_HAS_PASSFILE:
|
||||
runtime_options.has_passfile = true;
|
||||
break;
|
||||
|
||||
/*--------------------
|
||||
* "node rejoin" options
|
||||
*--------------------
|
||||
@@ -473,10 +478,6 @@ main(int argc, char **argv)
|
||||
runtime_options.list_actions = true;
|
||||
break;
|
||||
|
||||
case OPT_CHECK:
|
||||
runtime_options.check = true;
|
||||
break;
|
||||
|
||||
case OPT_CHECKPOINT:
|
||||
runtime_options.checkpoint = true;
|
||||
break;
|
||||
@@ -707,6 +708,7 @@ main(int argc, char **argv)
|
||||
*
|
||||
* { PRIMARY | MASTER } REGISTER |
|
||||
* STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
|
||||
* WITNESS { CREATE | REGISTER | UNREGISTER }
|
||||
* BDR { REGISTER | UNREGISTER } |
|
||||
* NODE { STATUS | CHECK | REJOIN | SERVICE } |
|
||||
* CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
|
||||
@@ -775,6 +777,18 @@ main(int argc, char **argv)
|
||||
else if (strcasecmp(repmgr_action, "STATUS") == 0)
|
||||
action = NODE_STATUS;
|
||||
}
|
||||
else if (strcasecmp(repmgr_command, "WITNESS") == 0)
|
||||
{
|
||||
if (help_option == true)
|
||||
{
|
||||
do_witness_help();
|
||||
exit(SUCCESS);
|
||||
}
|
||||
else if (strcasecmp(repmgr_action, "REGISTER") == 0)
|
||||
action = WITNESS_REGISTER;
|
||||
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
|
||||
action = WITNESS_UNREGISTER;
|
||||
}
|
||||
else if (strcasecmp(repmgr_command, "BDR") == 0)
|
||||
#else
|
||||
if (strcasecmp(repmgr_command, "BDR") == 0)
|
||||
@@ -1168,6 +1182,12 @@ main(int argc, char **argv)
|
||||
do_standby_switchover();
|
||||
break;
|
||||
|
||||
/* WITNESS */
|
||||
case WITNESS_REGISTER:
|
||||
do_witness_register();
|
||||
break;
|
||||
case WITNESS_UNREGISTER:
|
||||
do_witness_unregister();
|
||||
break;
|
||||
#else
|
||||
/* we won't ever reach here, but stop the compiler complaining */
|
||||
@@ -1179,6 +1199,8 @@ main(int argc, char **argv)
|
||||
case STANDBY_PROMOTE:
|
||||
case STANDBY_FOLLOW:
|
||||
case STANDBY_SWITCHOVER:
|
||||
case WITNESS_REGISTER:
|
||||
case WITNESS_UNREGISTER:
|
||||
break;
|
||||
|
||||
#endif
|
||||
@@ -1298,7 +1320,7 @@ check_cli_parameters(const int action)
|
||||
|
||||
/*
|
||||
* XXX if -D/--pgdata provided, and also
|
||||
* config_file_options.pgdaga, warn -D/--pgdata will be
|
||||
* config_file_options.pgdata, warn -D/--pgdata will be
|
||||
* ignored
|
||||
*/
|
||||
|
||||
@@ -1328,6 +1350,22 @@ check_cli_parameters(const int action)
|
||||
*/
|
||||
}
|
||||
break;
|
||||
case WITNESS_REGISTER:
|
||||
{
|
||||
if (!runtime_options.host_param_provided)
|
||||
{
|
||||
item_list_append_format(&cli_errors,
|
||||
_("host name for the source node must be provided when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NODE_CHECK:
|
||||
if (runtime_options.has_passfile == true)
|
||||
{
|
||||
config_file_required = false;
|
||||
}
|
||||
break;
|
||||
case NODE_STATUS:
|
||||
if (runtime_options.node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
@@ -1376,6 +1414,8 @@ check_cli_parameters(const int action)
|
||||
case STANDBY_CLONE:
|
||||
case STANDBY_FOLLOW:
|
||||
case STANDBY_REGISTER:
|
||||
case WITNESS_REGISTER:
|
||||
case WITNESS_UNREGISTER:
|
||||
case CLUSTER_SHOW:
|
||||
case CLUSTER_MATRIX:
|
||||
case CLUSTER_CROSSCHECK:
|
||||
@@ -1615,6 +1655,9 @@ check_cli_parameters(const int action)
|
||||
case STANDBY_CLONE:
|
||||
case STANDBY_REGISTER:
|
||||
case STANDBY_SWITCHOVER:
|
||||
case WITNESS_REGISTER:
|
||||
case WITNESS_UNREGISTER:
|
||||
case NODE_REJOIN:
|
||||
case NODE_SERVICE:
|
||||
break;
|
||||
default:
|
||||
@@ -1670,6 +1713,11 @@ action_name(const int action)
|
||||
case STANDBY_FOLLOW:
|
||||
return "STANDBY FOLLOW";
|
||||
|
||||
case WITNESS_REGISTER:
|
||||
return "WITNESS REGISTER";
|
||||
case WITNESS_UNREGISTER:
|
||||
return "WITNESS UNREGISTER";
|
||||
|
||||
case BDR_REGISTER:
|
||||
return "BDR REGISTER";
|
||||
case BDR_UNREGISTER:
|
||||
@@ -2651,7 +2699,6 @@ init_node_record(t_node_info *node_record)
|
||||
node_record->priority = config_file_options.priority;
|
||||
node_record->active = true;
|
||||
|
||||
|
||||
if (config_file_options.location[0] != '\0')
|
||||
strncpy(node_record->location, config_file_options.location, MAXLEN);
|
||||
else
|
||||
@@ -2680,6 +2727,4 @@ init_node_record(t_node_info *node_record)
|
||||
{
|
||||
maxlen_snprintf(node_record->slot_name, "repmgr_slot_%i", config_file_options.node_id);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include "log.h"
|
||||
|
||||
|
||||
|
||||
#define NO_ACTION 0 /* Dummy default action */
|
||||
#define PRIMARY_REGISTER 1
|
||||
#define PRIMARY_UNREGISTER 2
|
||||
@@ -33,17 +32,19 @@
|
||||
#define STANDBY_PROMOTE 6
|
||||
#define STANDBY_FOLLOW 7
|
||||
#define STANDBY_SWITCHOVER 8
|
||||
#define BDR_REGISTER 9
|
||||
#define BDR_UNREGISTER 10
|
||||
#define NODE_STATUS 11
|
||||
#define NODE_CHECK 12
|
||||
#define NODE_SERVICE 13
|
||||
#define NODE_REJOIN 14
|
||||
#define CLUSTER_SHOW 15
|
||||
#define CLUSTER_CLEANUP 16
|
||||
#define CLUSTER_MATRIX 17
|
||||
#define CLUSTER_CROSSCHECK 18
|
||||
#define CLUSTER_EVENT 19
|
||||
#define WITNESS_REGISTER 9
|
||||
#define WITNESS_UNREGISTER 10
|
||||
#define BDR_REGISTER 11
|
||||
#define BDR_UNREGISTER 12
|
||||
#define NODE_STATUS 13
|
||||
#define NODE_CHECK 14
|
||||
#define NODE_SERVICE 15
|
||||
#define NODE_REJOIN 16
|
||||
#define CLUSTER_SHOW 17
|
||||
#define CLUSTER_CLEANUP 18
|
||||
#define CLUSTER_MATRIX 19
|
||||
#define CLUSTER_CROSSCHECK 20
|
||||
#define CLUSTER_EVENT 21
|
||||
|
||||
/* command line options without short versions */
|
||||
#define OPT_HELP 1001
|
||||
@@ -66,21 +67,21 @@
|
||||
#define OPT_UPSTREAM_NODE_ID 1018
|
||||
#define OPT_ACTION 1019
|
||||
#define OPT_LIST_ACTIONS 1020
|
||||
#define OPT_CHECK 1021
|
||||
#define OPT_CHECKPOINT 1022
|
||||
#define OPT_IS_SHUTDOWN_CLEANLY 1023
|
||||
#define OPT_ALWAYS_PROMOTE 1024
|
||||
#define OPT_FORCE_REWIND 1025
|
||||
#define OPT_NAGIOS 1026
|
||||
#define OPT_ARCHIVE_READY 1027
|
||||
#define OPT_OPTFORMAT 1028
|
||||
#define OPT_REPLICATION_LAG 1029
|
||||
#define OPT_CONFIG_FILES 1030
|
||||
#define OPT_SIBLINGS_FOLLOW 1031
|
||||
#define OPT_ROLE 1032
|
||||
#define OPT_DOWNSTREAM 1033
|
||||
#define OPT_SLOTS 1034
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 1035
|
||||
#define OPT_CHECKPOINT 1021
|
||||
#define OPT_IS_SHUTDOWN_CLEANLY 1022
|
||||
#define OPT_ALWAYS_PROMOTE 1023
|
||||
#define OPT_FORCE_REWIND 1024
|
||||
#define OPT_NAGIOS 1025
|
||||
#define OPT_ARCHIVE_READY 1026
|
||||
#define OPT_OPTFORMAT 1027
|
||||
#define OPT_REPLICATION_LAG 1028
|
||||
#define OPT_CONFIG_FILES 1029
|
||||
#define OPT_SIBLINGS_FOLLOW 1030
|
||||
#define OPT_ROLE 1031
|
||||
#define OPT_DOWNSTREAM 1032
|
||||
#define OPT_SLOTS 1033
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 1034
|
||||
#define OPT_HAS_PASSFILE 1035
|
||||
/* deprecated since 3.3 */
|
||||
#define OPT_DATA_DIR 999
|
||||
#define OPT_NO_CONNINFO_PASSWORD 998
|
||||
@@ -141,7 +142,6 @@ static struct option long_options[] =
|
||||
*
|
||||
* Note: --force-rewind accepted to pass to "node join"
|
||||
*/
|
||||
{"remote-config-file", required_argument, NULL, 'C'},
|
||||
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
|
||||
{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},
|
||||
|
||||
@@ -154,6 +154,7 @@ static struct option long_options[] =
|
||||
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG},
|
||||
{"role", no_argument, NULL, OPT_ROLE},
|
||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||
|
||||
/* "node rejoin" options */
|
||||
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
|
||||
@@ -162,7 +163,6 @@ static struct option long_options[] =
|
||||
|
||||
/* "node service" options */
|
||||
{"action", required_argument, NULL, OPT_ACTION},
|
||||
{"check", no_argument, NULL, OPT_CHECK},
|
||||
{"list-actions", no_argument, NULL, OPT_LIST_ACTIONS},
|
||||
{"checkpoint", no_argument, NULL, OPT_CHECKPOINT},
|
||||
|
||||
@@ -177,6 +177,8 @@ static struct option long_options[] =
|
||||
/* deprecated */
|
||||
{"check-upstream-config", no_argument, NULL, OPT_CHECK_UPSTREAM_CONFIG},
|
||||
{"no-conninfo-password", no_argument, NULL, OPT_NO_CONNINFO_PASSWORD},
|
||||
/* previously used by "standby switchover" */
|
||||
{"remote-config-file", required_argument, NULL, 'C'},
|
||||
/* legacy alias for -D/--pgdata */
|
||||
{"data-dir", required_argument, NULL, OPT_DATA_DIR},
|
||||
/* replaced by --node-id */
|
||||
|
||||
286
repmgr.c
286
repmgr.c
@@ -67,7 +67,6 @@ typedef struct repmgrdSharedState
|
||||
TimestampTz last_updated;
|
||||
int local_node_id;
|
||||
/* streaming failover */
|
||||
NodeState node_state;
|
||||
NodeVotingStatus voting_status;
|
||||
int current_electoral_term;
|
||||
int candidate_node_id;
|
||||
@@ -87,52 +86,30 @@ void _PG_fini(void);
|
||||
static void repmgr_shmem_startup(void);
|
||||
|
||||
Datum set_local_node_id(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_local_node_id);
|
||||
|
||||
Datum standby_set_last_updated(PG_FUNCTION_ARGS);
|
||||
Datum get_local_node_id(PG_FUNCTION_ARGS);
|
||||
PG_FUNCTION_INFO_V1(get_local_node_id);
|
||||
|
||||
Datum standby_set_last_updated(PG_FUNCTION_ARGS);
|
||||
PG_FUNCTION_INFO_V1(standby_set_last_updated);
|
||||
|
||||
Datum standby_get_last_updated(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(standby_get_last_updated);
|
||||
|
||||
|
||||
Datum request_vote(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(request_vote);
|
||||
|
||||
Datum get_voting_status(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(get_voting_status);
|
||||
|
||||
Datum set_voting_status_initiated(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_voting_status_initiated);
|
||||
|
||||
Datum other_node_is_candidate(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(other_node_is_candidate);
|
||||
|
||||
Datum notify_follow_primary(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(notify_follow_primary);
|
||||
|
||||
Datum get_new_primary(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(get_new_primary);
|
||||
|
||||
Datum reset_voting_status(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(reset_voting_status);
|
||||
|
||||
Datum am_bdr_failover_handler(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(am_bdr_failover_handler);
|
||||
|
||||
Datum unset_bdr_failover_handler(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
|
||||
|
||||
|
||||
@@ -144,7 +121,6 @@ _PG_init(void)
|
||||
{
|
||||
elog(DEBUG1, "repmgr init");
|
||||
|
||||
/* error here? */
|
||||
if (!process_shared_preload_libraries_in_progress)
|
||||
return;
|
||||
|
||||
@@ -227,12 +203,17 @@ repmgr_shmem_startup(void)
|
||||
Datum
|
||||
set_local_node_id(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int local_node_id = PG_GETARG_INT32(0);
|
||||
int local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
if (PG_ARGISNULL(0))
|
||||
PG_RETURN_NULL();
|
||||
|
||||
local_node_id = PG_GETARG_INT32(0);
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
|
||||
/* only set local_node_id once, as it should never change */
|
||||
if (shared_state->local_node_id == UNKNOWN_NODE_ID)
|
||||
@@ -245,6 +226,23 @@ set_local_node_id(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
get_local_node_id(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
local_node_id = shared_state->local_node_id;
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_INT32(local_node_id);
|
||||
}
|
||||
|
||||
|
||||
/* update and return last updated with current timestamp */
|
||||
Datum
|
||||
standby_set_last_updated(PG_FUNCTION_ARGS)
|
||||
@@ -254,7 +252,7 @@ standby_set_last_updated(PG_FUNCTION_ARGS)
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
shared_state->last_updated = last_updated;
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
@@ -272,7 +270,7 @@ standby_get_last_updated(PG_FUNCTION_ARGS)
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
last_updated = shared_state->last_updated;
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
@@ -286,210 +284,21 @@ standby_get_last_updated(PG_FUNCTION_ARGS)
|
||||
/* failover functions */
|
||||
/* ===================*/
|
||||
|
||||
Datum
|
||||
request_vote(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
StringInfoData query;
|
||||
|
||||
#if (PG_VERSION_NUM >= 90400)
|
||||
XLogRecPtr our_lsn = InvalidXLogRecPtr;
|
||||
bool isnull;
|
||||
#else
|
||||
char *value = NULL;
|
||||
char lsn_text[64] = "";
|
||||
#endif
|
||||
|
||||
/* node_id used for logging purposes */
|
||||
int requesting_node_id = PG_GETARG_INT32(0);
|
||||
int current_electoral_term = PG_GETARG_INT32(1);
|
||||
|
||||
int ret;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
/* only do something if local_node_id is initialised */
|
||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
/* this node has initiated voting or already responded to another node */
|
||||
if (shared_state->voting_status != VS_NO_VOTE)
|
||||
{
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
elog(INFO, "node %i has received request from node %i for electoral term %i (our term: %i)",
|
||||
shared_state->local_node_id,
|
||||
requesting_node_id, current_electoral_term,
|
||||
shared_state->current_electoral_term);
|
||||
|
||||
SPI_connect();
|
||||
|
||||
initStringInfo(&query);
|
||||
|
||||
appendStringInfo(
|
||||
&query,
|
||||
#if (PG_VERSION_NUM >= 100000)
|
||||
"SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
||||
#else
|
||||
"SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||
#endif
|
||||
|
||||
elog(DEBUG1, "query: %s", query.data);
|
||||
ret = SPI_execute(query.data, true, 0);
|
||||
|
||||
if (ret < 0)
|
||||
{
|
||||
SPI_finish();
|
||||
elog(WARNING, "unable to retrieve last received LSN");
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
#if (PG_VERSION_NUM >= 90400)
|
||||
PG_RETURN_LSN(InvalidOid);
|
||||
#else
|
||||
PG_RETURN_TEXT_P(cstring_to_text("0/0"));
|
||||
#endif
|
||||
}
|
||||
|
||||
#if (PG_VERSION_NUM >= 90400)
|
||||
our_lsn = DatumGetLSN(SPI_getbinval(SPI_tuptable->vals[0],
|
||||
SPI_tuptable->tupdesc,
|
||||
1, &isnull));
|
||||
|
||||
elog(DEBUG1, "our LSN is %X/%X",
|
||||
(uint32) (our_lsn >> 32),
|
||||
(uint32) our_lsn);
|
||||
#else
|
||||
value = SPI_getvalue(SPI_tuptable->vals[0],
|
||||
SPI_tuptable->tupdesc,
|
||||
1);
|
||||
strncpy(lsn_text, value, 64);
|
||||
pfree(value);
|
||||
elog(DEBUG1, "our LSN is %s", lsn_text);
|
||||
#endif
|
||||
|
||||
/* indicate this node has responded to a vote request */
|
||||
shared_state->voting_status = VS_VOTE_REQUEST_RECEIVED;
|
||||
shared_state->current_electoral_term = current_electoral_term;
|
||||
|
||||
/* should we free "query" here? */
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
#if (PG_VERSION_NUM >= 90400)
|
||||
PG_RETURN_LSN(our_lsn);
|
||||
#else
|
||||
PG_RETURN_TEXT_P(cstring_to_text(lsn_text));
|
||||
#endif
|
||||
#else
|
||||
PG_RETURN(InvalidOid);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
Datum
|
||||
get_voting_status(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
NodeVotingStatus voting_status;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
voting_status = shared_state->voting_status;
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_INT32(voting_status);
|
||||
#else
|
||||
PG_RETURN_INT32(VS_UNKNOWN);
|
||||
#endif
|
||||
}
|
||||
|
||||
Datum
|
||||
set_voting_status_initiated(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
int electoral_term = -1;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
/* only do something if local_node_id is initialised */
|
||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
shared_state->voting_status = VS_VOTE_INITIATED;
|
||||
shared_state->current_electoral_term += 1;
|
||||
|
||||
electoral_term = shared_state->current_electoral_term;
|
||||
|
||||
elog(INFO, "setting voting term to %i", electoral_term);
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_INT32(electoral_term);
|
||||
#else
|
||||
PG_RETURN_INT32(-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
Datum
|
||||
other_node_is_candidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
int requesting_node_id = PG_GETARG_INT32(0);
|
||||
int electoral_term = PG_GETARG_INT32(1);
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
/* only do something if local_node_id is initialised */
|
||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
if (shared_state->current_electoral_term == electoral_term)
|
||||
{
|
||||
if (shared_state->candidate_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
elog(INFO, "node %i requesting candidature, but node %i already candidate",
|
||||
requesting_node_id,
|
||||
shared_state->candidate_node_id);
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
}
|
||||
|
||||
shared_state->candidate_node_id = requesting_node_id;
|
||||
elog(INFO, "node %i is candidate", requesting_node_id);
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_BOOL(true);
|
||||
#else
|
||||
PG_RETURN_BOOL(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
Datum
|
||||
notify_follow_primary(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
int primary_node_id = PG_GETARG_INT32(0);
|
||||
int primary_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
if (PG_ARGISNULL(0))
|
||||
PG_RETURN_NULL();
|
||||
|
||||
primary_node_id = PG_GETARG_INT32(0);
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
/* only do something if local_node_id is initialised */
|
||||
@@ -499,6 +308,8 @@ notify_follow_primary(PG_FUNCTION_ARGS)
|
||||
shared_state->local_node_id,
|
||||
primary_node_id);
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
/* Explicitly set the primary node id */
|
||||
shared_state->candidate_node_id = primary_node_id;
|
||||
shared_state->follow_new_primary = true;
|
||||
@@ -526,6 +337,10 @@ get_new_primary(PG_FUNCTION_ARGS)
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
#endif
|
||||
|
||||
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
PG_RETURN_INT32(new_primary_node_id);
|
||||
}
|
||||
|
||||
@@ -542,6 +357,9 @@ reset_voting_status(PG_FUNCTION_ARGS)
|
||||
/* only do something if local_node_id is initialised */
|
||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
LWLockRelease(shared_state->lock);
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
|
||||
shared_state->voting_status = VS_NO_VOTE;
|
||||
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
||||
shared_state->follow_new_primary = false;
|
||||
@@ -556,16 +374,23 @@ reset_voting_status(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
am_bdr_failover_handler(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int node_id = PG_GETARG_INT32(0);
|
||||
int node_id = UNKNOWN_NODE_ID;
|
||||
bool am_handler = false;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
if (PG_ARGISNULL(0))
|
||||
PG_RETURN_NULL();
|
||||
|
||||
node_id = PG_GETARG_INT32(0);
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
if (shared_state->bdr_failover_handler == UNKNOWN_NODE_ID)
|
||||
{
|
||||
LWLockRelease(shared_state->lock);
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
shared_state->bdr_failover_handler = node_id;
|
||||
am_handler = true;
|
||||
}
|
||||
@@ -586,11 +411,16 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
/* only do something if local_node_id is initialised */
|
||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
LWLockRelease(shared_state->lock);
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
|
||||
shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
}
|
||||
|
||||
|
||||
@@ -68,7 +68,12 @@
|
||||
#recovery_min_apply_delay= # If provided, "recovery_min_apply_delay" in recovery.conf
|
||||
# will be set to this value.
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Witness server settings
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
#witness_sync_interval=15 # interval (in seconds) to synchronise node records
|
||||
# to the witness server
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Logging settings
|
||||
@@ -111,7 +116,7 @@
|
||||
# By default, all notifications will be passed; the notification types
|
||||
# can be filtered to explicitly named ones, e.g.:
|
||||
#
|
||||
# event_notifications=master_register,standby_register
|
||||
# event_notifications=primary_register,standby_register
|
||||
|
||||
#event_notification_command='' # An external program or script which
|
||||
# can be executed by the user under which
|
||||
@@ -134,7 +139,7 @@
|
||||
#use_primary_conninfo_password=false # explicitly set "password" in recovery.conf's
|
||||
# "primary_conninfo" parameter using the value contained
|
||||
# in the environment variable PGPASSWORD
|
||||
|
||||
#passfile='' # path to .pgpass file to include in "primary_conninfo"
|
||||
#------------------------------------------------------------------------------
|
||||
# external command options
|
||||
#------------------------------------------------------------------------------
|
||||
@@ -210,6 +215,15 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# 'manual': repmgrd will take no action and the node will require
|
||||
# manual attention to reattach it to replication
|
||||
# (does not apply to BDR mode)
|
||||
|
||||
#priority=100 # indicate a preferred priorty for promoting nodes;
|
||||
# a value of zero prevents the node being promoted to primary
|
||||
# (default: 100)
|
||||
|
||||
#reconnect_attempts=6 # Number attempts which will be made to reconnect to an unreachable
|
||||
# primary (or other upstream node)
|
||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||
# primary (or other upstream node)
|
||||
#promote_command= # command to execute when promoting a new primary; use something like:
|
||||
#
|
||||
# repmgr standby promote -f /etc/repmgr.conf
|
||||
@@ -223,6 +237,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# will wait for a notification from the new primary,
|
||||
# before falling back to degraded monitoring
|
||||
#monitoring_history=no
|
||||
|
||||
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
|
||||
# server being monitored is no longer available. -1 (default)
|
||||
# disables the timeout completely.
|
||||
@@ -260,8 +275,10 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
#service_stop_command = ''
|
||||
#service_restart_command = ''
|
||||
#service_reload_command = ''
|
||||
#service_promote_command = '' # Note: this overrides any value contained
|
||||
# in the setting "promote_command"
|
||||
#service_promote_command = '' # Note: this overrides any value contained in the setting
|
||||
# "promote_command". This is intended for systems which
|
||||
# provide a package-level promote command, such as Debian's
|
||||
# "pg_ctlcluster"
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
29
repmgr.h
29
repmgr.h
@@ -56,25 +56,26 @@
|
||||
#define NO_UPSTREAM_NODE -1
|
||||
#define UNKNOWN_NODE_ID -1
|
||||
|
||||
#define VOTING_TERM_NOT_SET -1
|
||||
|
||||
/*
|
||||
* various default values - ensure repmgr.conf.sample is update
|
||||
* if any of these are changed
|
||||
*/
|
||||
#define DEFAULT_LOCATION "default"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */
|
||||
#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */
|
||||
#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */
|
||||
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
|
||||
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
|
||||
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
|
||||
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
|
||||
|
||||
#define DEFAULT_LOCATION "default"
|
||||
#define DEFAULT_PRIORITY 100
|
||||
#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */
|
||||
#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */
|
||||
#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */
|
||||
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
|
||||
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
|
||||
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
|
||||
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
|
||||
#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */
|
||||
|
||||
#ifndef RECOVERY_COMMAND_FILE
|
||||
#define RECOVERY_COMMAND_FILE "recovery.conf"
|
||||
|
||||
@@ -1,25 +1,3 @@
|
||||
/*
|
||||
* repmgr_version.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2017
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _VERSION_H_
|
||||
#define _VERSION_H_
|
||||
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "4.0beta1"
|
||||
#define REPMGR_VERSION "4.0.1"
|
||||
|
||||
#endif
|
||||
|
||||
@@ -472,8 +472,8 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
|
||||
* event "bdr_failover"
|
||||
*/
|
||||
|
||||
create_event_notification_extended(
|
||||
next_node_conn,
|
||||
|
||||
create_event_notification_extended(next_node_conn,
|
||||
&config_file_options,
|
||||
monitored_node->node_id,
|
||||
"bdr_failover",
|
||||
|
||||
1123
repmgrd-physical.c
1123
repmgrd-physical.c
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,7 @@ void do_physical_node_check(void);
|
||||
|
||||
void monitor_streaming_primary(void);
|
||||
void monitor_streaming_standby(void);
|
||||
void monitor_streaming_witness(void);
|
||||
void close_connections_physical(void);
|
||||
|
||||
#endif /* _REPMGRD_PHYSICAL_H_ */
|
||||
|
||||
21
repmgrd.c
21
repmgrd.c
@@ -332,6 +332,24 @@ main(int argc, char **argv)
|
||||
|
||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||
|
||||
{
|
||||
/*
|
||||
* sanity-check that the shared library is loaded and shared memory
|
||||
* can be written by attempting to retrieve the previously stored node_id
|
||||
*/
|
||||
int stored_local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
stored_local_node_id = repmgrd_get_local_node_id(local_conn);
|
||||
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
log_error(_("unable to write to shared memory"));
|
||||
log_hint(_("ensure \"shared_preload_libraries\" includes \"repmgr\""));
|
||||
PQfinish(local_conn);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
if (config_file_options.replication_type == REPLICATION_TYPE_BDR)
|
||||
{
|
||||
log_debug("node id is %i", local_node_info.node_id);
|
||||
@@ -389,6 +407,9 @@ start_monitoring(void)
|
||||
case STANDBY:
|
||||
monitor_streaming_standby();
|
||||
break;
|
||||
case WITNESS:
|
||||
monitor_streaming_witness();
|
||||
break;
|
||||
#else
|
||||
case PRIMARY:
|
||||
case STANDBY:
|
||||
|
||||
2
sql/.gitignore
vendored
Normal file
2
sql/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Might be created by repmgr3
|
||||
/repmgr_funcs.sql
|
||||
@@ -18,14 +18,13 @@ SELECT * FROM repmgr.show_nodes;
|
||||
|
||||
-- functions
|
||||
SELECT repmgr.am_bdr_failover_handler(-1);
|
||||
SELECT repmgr.am_bdr_failover_handler(NULL);
|
||||
SELECT repmgr.get_new_primary();
|
||||
SELECT repmgr.get_voting_status();
|
||||
SELECT repmgr.notify_follow_primary(-1);
|
||||
SELECT repmgr.other_node_is_candidate(-1,-1);
|
||||
SELECT repmgr.request_vote(-1,-1);
|
||||
SELECT repmgr.notify_follow_primary(NULL);
|
||||
SELECT repmgr.reset_voting_status();
|
||||
SELECT repmgr.set_local_node_id(-1);
|
||||
SELECT repmgr.set_voting_status_initiated();
|
||||
SELECT repmgr.set_local_node_id(NULL);
|
||||
SELECT repmgr.standby_get_last_updated();
|
||||
SELECT repmgr.standby_set_last_updated();
|
||||
SELECT repmgr.unset_bdr_failover_handler();
|
||||
|
||||
31
strutil.c
31
strutil.c
@@ -369,6 +369,31 @@ escape_string(PGconn *conn, const char *string)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* simple function to escape double quotes only
|
||||
*/
|
||||
|
||||
void
|
||||
escape_double_quotes(char *string, PQExpBufferData *out)
|
||||
{
|
||||
char *ptr;
|
||||
|
||||
for (ptr = string; *ptr; ptr++)
|
||||
{
|
||||
if (*ptr == '"')
|
||||
{
|
||||
if ( (ptr == string) || (ptr > string && *(ptr - 1) != '\\'))
|
||||
{
|
||||
appendPQExpBufferChar(out, '\\');
|
||||
}
|
||||
}
|
||||
appendPQExpBufferChar(out, *ptr);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
char *
|
||||
string_skip_prefix(const char *prefix, char *string)
|
||||
{
|
||||
@@ -413,12 +438,16 @@ trim(char *s)
|
||||
--s2;
|
||||
*(s2 + 1) = '\0';
|
||||
|
||||
/* String is all whitespace - no need for further processing */
|
||||
if (s2 + 1 == s1)
|
||||
return s;
|
||||
|
||||
/* Trim left side */
|
||||
while ((isspace(*s1)) && (s1 < s2))
|
||||
++s1;
|
||||
|
||||
/* Copy finished string */
|
||||
memmove(s, s1, s2 - s1);
|
||||
memmove(s, s1, (s2 - s1) + 1);
|
||||
s[s2 - s1 + 1] = '\0';
|
||||
|
||||
return s;
|
||||
|
||||
15
strutil.h
15
strutil.h
@@ -33,12 +33,17 @@
|
||||
#define MAXLEN_STR STR(MAXLEN)
|
||||
|
||||
|
||||
/*
|
||||
* These values must match the Nagios return codes defined here:
|
||||
*
|
||||
* https://assets.nagios.com/downloads/nagioscore/docs/nagioscore/3/en/pluginapi.html
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
CHECK_STATUS_OK = 0,
|
||||
CHECK_STATUS_WARNING,
|
||||
CHECK_STATUS_CRITICAL,
|
||||
CHECK_STATUS_UNKNOWN
|
||||
CHECK_STATUS_OK = 0,
|
||||
CHECK_STATUS_WARNING = 1,
|
||||
CHECK_STATUS_CRITICAL = 2,
|
||||
CHECK_STATUS_UNKNOWN = 3
|
||||
} CheckStatus;
|
||||
|
||||
typedef enum
|
||||
@@ -142,6 +147,8 @@ extern char *escape_recovery_conf_value(const char *src);
|
||||
|
||||
extern char *escape_string(PGconn *conn, const char *string);
|
||||
|
||||
extern void escape_double_quotes(char *string, PQExpBufferData *out);
|
||||
|
||||
extern void
|
||||
append_where_clause(PQExpBufferData *where_clause, const char *clause,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
|
||||
Reference in New Issue
Block a user