mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
199 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8232337d8 | ||
|
|
c9eb1bfcc0 | ||
|
|
db552dfbc7 | ||
|
|
9732f78565 | ||
|
|
eb7dca2919 | ||
|
|
c113102926 | ||
|
|
ed6a167915 | ||
|
|
fbbe7afd61 | ||
|
|
ae1fc93e48 | ||
|
|
7b4ee80af2 | ||
|
|
0b8755e278 | ||
|
|
d3e1937808 | ||
|
|
871d6fdee3 | ||
|
|
c7dfe9e040 | ||
|
|
5c92a9e057 | ||
|
|
aa5f025738 | ||
|
|
5b91a2d409 | ||
|
|
596a19ee37 | ||
|
|
23ff83b3b4 | ||
|
|
ba1f6bee0d | ||
|
|
da9c8f2491 | ||
|
|
64035ef701 | ||
|
|
da3a5ab1dc | ||
|
|
9d301b4789 | ||
|
|
c070c649f7 | ||
|
|
3b823396eb | ||
|
|
c19e7f1025 | ||
|
|
e4b5a1e19f | ||
|
|
f96cc3b906 | ||
|
|
a481ca7ce2 | ||
|
|
32dc450a09 | ||
|
|
34dbf64f50 | ||
|
|
ea653a8dbc | ||
|
|
50894b6124 | ||
|
|
94e187c476 | ||
|
|
de6284ae79 | ||
|
|
c54045bcd8 | ||
|
|
c0a53471e1 | ||
|
|
2eec8b5d79 | ||
|
|
c11e92cf2a | ||
|
|
f294d09034 | ||
|
|
26c597ef5a | ||
|
|
b8efbb7a15 | ||
|
|
3044696c05 | ||
|
|
6dc1969ad5 | ||
|
|
cb41ef1733 | ||
|
|
d10f1f289e | ||
|
|
5731ba6043 | ||
|
|
3d6437c8f8 | ||
|
|
54b5c8ad94 | ||
|
|
0eca08ffaf | ||
|
|
05c1dc2b92 | ||
|
|
2bd300073d | ||
|
|
01e020df8e | ||
|
|
ae7963dc64 | ||
|
|
faffb2a6e7 | ||
|
|
5d57044118 | ||
|
|
07a88c78a5 | ||
|
|
f7df8b9c80 | ||
|
|
20920b3da1 | ||
|
|
683f4de182 | ||
|
|
0c62821ffb | ||
|
|
6b70e8bbe6 | ||
|
|
6b223698c9 | ||
|
|
aee12dc2c7 | ||
|
|
c5c86e1ada | ||
|
|
7476dc84f2 | ||
|
|
f6d63f5216 | ||
|
|
a608b0bc18 | ||
|
|
469ebba656 | ||
|
|
647c21ad0e | ||
|
|
3d2530d6f9 | ||
|
|
b26e400199 | ||
|
|
152e9545a4 | ||
|
|
83b8f05221 | ||
|
|
486f8e5a2c | ||
|
|
e517cc74d1 | ||
|
|
26285b470f | ||
|
|
1521657965 | ||
|
|
041604e303 | ||
|
|
0be0100a7c | ||
|
|
2133834dda | ||
|
|
d5fd93c350 | ||
|
|
5804778b58 | ||
|
|
407a7ea2f4 | ||
|
|
4d2eca0978 | ||
|
|
9d25544ab5 | ||
|
|
8506607388 | ||
|
|
e8e059c26d | ||
|
|
38d293694d | ||
|
|
54a10a0c3f | ||
|
|
a8016f602f | ||
|
|
de57ecdad1 | ||
|
|
1fde81cf3f | ||
|
|
146c412061 | ||
|
|
e9cb61ae7a | ||
|
|
50e9460b3e | ||
|
|
47e7cbe147 | ||
|
|
bf0be3eb43 | ||
|
|
270da1294c | ||
|
|
d3c47f450f | ||
|
|
c20475f94a | ||
|
|
e0560c3e70 | ||
|
|
3fa2bef6f4 | ||
|
|
f8a0b051c8 | ||
|
|
3e4a5e6ff5 | ||
|
|
020b5b6982 | ||
|
|
932326e4a0 | ||
|
|
019cd081e8 | ||
|
|
3ace908126 | ||
|
|
2ad174489c | ||
|
|
9124e0f0a2 | ||
|
|
060b746743 | ||
|
|
bdb82d3aba | ||
|
|
f6a6df3600 | ||
|
|
67e27f9ecd | ||
|
|
454c0b7bd9 | ||
|
|
faf297b07f | ||
|
|
0dae8c9f0b | ||
|
|
3f872cde0c | ||
|
|
e331069f53 | ||
|
|
53ebde8f33 | ||
|
|
5e9d50f8ca | ||
|
|
347e753c27 | ||
|
|
2f978847b1 | ||
|
|
3014f72fda | ||
|
|
e02ddd0f37 | ||
|
|
29fcee2209 | ||
|
|
f61f7f82eb | ||
|
|
efe28cbbeb | ||
|
|
6131c1d8ce | ||
|
|
c907b7b33d | ||
|
|
e6644305d3 | ||
|
|
31b856dd9f | ||
|
|
dff2bcc5de | ||
|
|
688e609169 | ||
|
|
3e68c9fcc6 | ||
|
|
d459b92186 | ||
|
|
2a898721c0 | ||
|
|
35782d83c0 | ||
|
|
e16eb42693 | ||
|
|
4d6dc57589 | ||
|
|
cbc97d84ac | ||
|
|
96fe7dd2d6 | ||
|
|
13935a88c9 | ||
|
|
5275890467 | ||
|
|
7f865fdaf3 | ||
|
|
9e2fb7ea13 | ||
|
|
a3428e4d8a | ||
|
|
03b9475755 | ||
|
|
de1eb3c459 | ||
|
|
a13eccccc5 | ||
|
|
158f132bc0 | ||
|
|
cdf54d217a | ||
|
|
1a8a82f207 | ||
|
|
60e877ca39 | ||
|
|
91531bffe4 | ||
|
|
fc5f46ca5a | ||
|
|
b76952e136 | ||
|
|
c3a1969f55 | ||
|
|
11d856a1ec | ||
|
|
fbf357947d | ||
|
|
47eaa99537 | ||
|
|
aeee11d1b7 | ||
|
|
e4713c5eca | ||
|
|
e55e5a0581 | ||
|
|
fb0aae183d | ||
|
|
52655e9cd5 | ||
|
|
c5d91ca88c | ||
|
|
9f5edd07ad | ||
|
|
f58b102d51 | ||
|
|
90733aecf7 | ||
|
|
e0be228c89 | ||
|
|
a9759cf6ca | ||
|
|
6852ac82c6 | ||
|
|
c27bd2a135 | ||
|
|
5045e2eb9d | ||
|
|
23f7af17a2 | ||
|
|
93936c090d | ||
|
|
564c951f0c | ||
|
|
3f5e8f6aec | ||
|
|
a6a97cda86 | ||
|
|
18c8e4c529 | ||
|
|
6984fe7029 | ||
|
|
5ecc3a0a8f | ||
|
|
febde097be | ||
|
|
19ea248226 | ||
|
|
acdbd1110a | ||
|
|
946683182c | ||
|
|
c9fbb7febf | ||
|
|
ff966fe533 | ||
|
|
7001960cc1 | ||
|
|
1cfba44799 | ||
|
|
d1f9ca4b43 | ||
|
|
f6c253f8a6 | ||
|
|
95ec8d8b21 | ||
|
|
041f1b7667 | ||
|
|
104279016a | ||
|
|
901a7603b1 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -47,9 +47,6 @@ lib*.pc
|
||||
# other
|
||||
/.lineno
|
||||
*.dSYM
|
||||
*.orig
|
||||
*.rej
|
||||
|
||||
# generated binaries
|
||||
repmgr
|
||||
repmgrd
|
||||
|
||||
@@ -2,7 +2,7 @@ License and Contributions
|
||||
=========================
|
||||
|
||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
||||
Copyright 2010-2019, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||
Copyright 2010-2018, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||
details.
|
||||
|
||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
||||
@@ -24,7 +24,7 @@ Code style
|
||||
Code in repmgr should be formatted to the same standards as the main PostgreSQL
|
||||
project. For more details see:
|
||||
|
||||
https://www.postgresql.org/docs/current/source-format.html
|
||||
https://www.postgresql.org/docs/current/static/source-format.html
|
||||
|
||||
Contributors should reformat their code similarly before submitting code to
|
||||
the project, in order to minimize merge conflicts with other work.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2010-2019, 2ndQuadrant Limited
|
||||
Copyright (c) 2010-2018, 2ndQuadrant Limited
|
||||
All rights reserved.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
||||
6
FAQ.md
6
FAQ.md
@@ -1,10 +1,10 @@
|
||||
FAQ - Frequently Asked Questions about repmgr
|
||||
=============================================
|
||||
|
||||
The repmgr 4 FAQ is located here: [repmgr FAQ (Frequently Asked Questions)](https://repmgr.org/docs/current/appendix-faq.html "repmgr FAQ")
|
||||
The repmgr 4 FAQ is located here:
|
||||
|
||||
https://repmgr.org/docs/appendix-faq.html
|
||||
|
||||
The repmgr 3.x FAQ can be found here:
|
||||
|
||||
https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/FAQ.md
|
||||
|
||||
Note that repmgr 3.x is no longer supported.
|
||||
|
||||
148
HISTORY
148
HISTORY
@@ -1,149 +1,4 @@
|
||||
4.3 2019-??
|
||||
repmgr: add "daemon (start|stop)" command; GitHub #528 (Ian)
|
||||
repmgr: add --version-number command line option (Ian)
|
||||
repmgr: add --compact option to "cluster show"; GitHub #521 (Ian)
|
||||
repmgr: cluster show - differentiate between unreachable nodes
|
||||
and nodes which are running but rejecting connections (Ian)
|
||||
repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian)
|
||||
repmgr: add "node check --data-directory-config"; GitHub #523 (Ian)
|
||||
repmgr: prevent potential race condition in "standby switchover"
|
||||
when checking received WAL location; GitHub #518 (Ian)
|
||||
repmgr: ensure "standby switchover" verifies repmgr can read the
|
||||
data directory on the demotion candidate; GitHub #523 (Ian)
|
||||
repmgr: ensure "standby switchover" verifies replication connection
|
||||
exists; GitHub #519 (Ian)
|
||||
repmgr: add sanity check for correct extension version (Ian)
|
||||
repmgr: ensure "witness register --dry-run" does not attempt to read node
|
||||
tables if repmgr extension not installed; GitHub #513 (Ian)
|
||||
repmgr: ensure "standby register" fails when --upstream-node-id is the
|
||||
same as the local node ID (Ian)
|
||||
repmgrd: check binary and extension major versions match; GitHub #515 (Ian)
|
||||
repmgrd: on a cascaded standby, don't fail over if "failover=manual";
|
||||
GitHub #531 (Ian)
|
||||
repmgrd: don't consider nodes where repmgrd is not running as promotion
|
||||
candidates (Ian)
|
||||
repmgrd: add option "connection_check_type" (Ian)
|
||||
repmgrd: improve witness monitoring when primary node not available (Ian)
|
||||
repmgrd: handle situation where a primary has unexpectedly appeared
|
||||
during failover; GitHub #420 (Ian)
|
||||
|
||||
4.2 2018-10-24
|
||||
repmgr: add parameter "shutdown_check_timeout" for use by "standby switchover";
|
||||
GitHub #504 (Ian)
|
||||
repmgr: add "--node-id" option to "repmgr cluster cleanup"; GitHub #493 (Ian)
|
||||
repmgr: report unreachable nodes when running "repmgr cluster (matrix|crosscheck);
|
||||
GitHub #246 (Ian)
|
||||
repmgr: add configuration file parameter "repmgr_bindir"; GitHub #246 (Ian)
|
||||
repmgr: fix "Missing replication slots" label in "node check"; GitHub #507 (Ian)
|
||||
repmgrd: fix parsing of -d/--daemonize option (Ian)
|
||||
repmgrd: support "pausing" of repmgrd (Ian)
|
||||
|
||||
4.1.1 2018-09-05
|
||||
logging: explicitly log the text of failed queries as ERRORs to
|
||||
assist logfile analysis; GitHub #498
|
||||
repmgr: truncate version string, if necessary; GitHub #490 (Ian)
|
||||
repmgr: improve messages emitted during "standby promote" (Ian)
|
||||
repmgr: "standby clone" - don't copy external config files in --dry-run
|
||||
mode; GitHub #491 (Ian)
|
||||
repmgr: add "cluster_cleanup" event; GitHub #492 (Ian)
|
||||
repmgr: (standby switchover) improve detection of free walsenders;
|
||||
GitHub #495 (Ian)
|
||||
repmgr: (node rejoin) improve replication slot handling; GitHub #499 (Ian)
|
||||
repmgrd: ensure that sending SIGHUP always results in the log file
|
||||
being reopened; GitHub #485 (Ian)
|
||||
repmgrd: report version number *after* logger initialisation; GitHub #487 (Ian)
|
||||
repmgrd: fix startup on witness node when local data is stale; GitHub #488/#489 (Ian)
|
||||
repmgrd: improve cascaded standby failover handling; GitHub #480 (Ian)
|
||||
repmgrd: improve reconnection handling (Ian)
|
||||
|
||||
4.1.0 2018-07-31
|
||||
repmgr: change default log_level to INFO, add documentation; GitHub #470 (Ian)
|
||||
repmgr: add "--missing-slots" check to "repmgr node check" (Ian)
|
||||
repmgr: improve command line error handling; GitHub #464 (Ian)
|
||||
repmgr: fix "standby register --wait-sync" when no timeout provided (Ian)
|
||||
repmgr: "cluster show" returns non-zero value if an issue encountered;
|
||||
GitHub #456 (Ian)
|
||||
repmgr: "node check" and "node status" returns non-zero value if an issue
|
||||
encountered (Ian)
|
||||
repmgr: add CSV output mode to "cluster event"; GitHub #471 (Ian)
|
||||
repmgr: add -q/--quiet option to suppress non-error output; GitHub #468 (Ian)
|
||||
repmgr: "node status" returns non-zero value if an issue encountered (Ian)
|
||||
repmgr: enable "recovery_min_apply_delay" to be 0; GitHub #448 (Ian)
|
||||
repmgr: "cluster cleanup" - add missing help options; GitHub #461/#462 (gclough)
|
||||
repmgr: ensure witness node follows new primary after switchover;
|
||||
GitHub #453 (Ian)
|
||||
repmgr: fix witness node handling in "node check"/"node status";
|
||||
GitHub #451 (Ian)
|
||||
repmgr: fix "primary_slot_name" when using "standby clone" with --recovery-conf-only;
|
||||
GitHub #474 (Ian)
|
||||
repmgr: don't perform a switchover if an exclusive backup is running;
|
||||
GitHub #476 (Martín)
|
||||
repmgr: enable "witness unregister" to be run on any node; GitHub #472 (Ian)
|
||||
repmgrd: create a PID file by default; GitHub #457 (Ian)
|
||||
repmgrd: daemonize process by default; GitHub #458 (Ian)
|
||||
|
||||
4.0.6 2018-06-14
|
||||
repmgr: (witness register) prevent registration of a witness server with the
|
||||
same name as an existing node (Ian)
|
||||
repmgr: (standby follow) check node has actually connected to new primary
|
||||
before reporting success; GitHub #444 (Ian)
|
||||
repmgr: (standby clone) improve handling of external configuration file copying,
|
||||
including consideration in --dry-run check; GitHub #443 (Ian)
|
||||
repmgr: (standby clone) don't require presence of "user" parameter in
|
||||
conninfo string; GitHub #437 (Ian)
|
||||
repmgr: (standby clone) improve documentation of --recovery-conf-only
|
||||
mode; GitHub #438 (Ian)
|
||||
repmgr: (node rejoin) fix bug when parsing --config-files parameter;
|
||||
GitHub #442 (Ian)
|
||||
repmgr: when using --dry-run, force log level to INFO to ensure output
|
||||
will always be displayed; GitHub #441 (Ian)
|
||||
repmgr: (cluster matrix/crosscheck) return non-zero exit code if node
|
||||
connection issues detected; GitHub #447 (Ian)
|
||||
repmgrd: ensure local node is counted as quorum member; GitHub #439 (Ian)
|
||||
|
||||
4.0.5 2018-05-02
|
||||
repmgr: poll demoted primary after restart as a standby during a
|
||||
switchover operation; GitHub #408 (Ian)
|
||||
repmgr: add configuration parameter "config_directory"; GitHub #424 (Ian)
|
||||
repmgr: add "dbname=replication" to all replication connection strings;
|
||||
GitHub #421 (Ian)
|
||||
repmgr: add sanity check if --upstream-node-id not supplied when executing
|
||||
"standby register"; GitHub #395 (Ian)
|
||||
repmgr: enable provision of "archive_cleanup_command" in recovery.conf;
|
||||
GitHub #416 (Ian)
|
||||
repmgr: actively check for node to rejoin cluster; GitHub #415 (Ian)
|
||||
repmgr: enable pg_rewind to be used with PostgreSQL 9.3/9.4; GitHub #413 (Ian)
|
||||
repmgr: fix minimum accepted value for "degraded_monitoring_timeout";
|
||||
GitHub #411 (Ian)
|
||||
repmgr: fix superuser password handling; GitHub #400 (Ian)
|
||||
repmgr: fix parsing of "archive_ready_critical" configuration file
|
||||
parameter; GitHub #426 (Ian)
|
||||
repmgr: fix display of conninfo parsing error messages (Ian)
|
||||
repmgr: fix "repmgr cluster crosscheck" output; GitHub #389 (Ian)
|
||||
repmgrd: prevent standby connection handle from going stale (Ian)
|
||||
repmgrd: fix memory leaks in witness code; GitHub #402 (AndrzejNowicki, Martín)
|
||||
repmgrd: handle "pg_ctl promote" timeout; GitHub #425 (Ian)
|
||||
repmgrd: handle failover situation with only two nodes in the primary
|
||||
location, and at least one node in another location; GitHub #407 (Ian)
|
||||
repmgrd: set "connect_timeout=2" when pinging a server (Ian)
|
||||
|
||||
4.0.4 2018-03-09
|
||||
repmgr: add "standby clone --recovery-conf-only" option; GitHub #382 (Ian)
|
||||
repmgr: make "standby promote" timeout values configurable; GitHub #387 (Ian)
|
||||
repmgr: improve replication slot warnings generated by "node status";
|
||||
GitHub #385 (Ian)
|
||||
repmgr: remove restriction on replication slots when cloning from
|
||||
a Barman server; GitHub #379 (Ian)
|
||||
repmgr: ensure "node rejoin" honours "--dry-run" option; GitHub #383 (Ian)
|
||||
repmgr: fix --superuser handling when cloning a standby; GitHub #380 (Ian)
|
||||
repmgr: update various help options; GitHub #391, #392 (hasegeli)
|
||||
repmgrd: add event "repmgrd_shutdown"; GitHub #393 (Ian)
|
||||
repmgrd: improve detection of status change from primary to standby (Ian)
|
||||
repmgrd: improve log output in various situations (Ian)
|
||||
repmgrd: improve reconnection to the local node after a failover (Ian)
|
||||
repmgrd: ensure witness server connects to new primary after a failover (Ian)
|
||||
|
||||
4.0.3 2018-02-15
|
||||
4.0.3 2018-02-
|
||||
repmgr: improve switchover handling when "pg_ctl" used to control the
|
||||
server and logging output is not explicitly redirected (Ian)
|
||||
repmgr: improve switchover log messages and exit code when old primary could
|
||||
@@ -162,7 +17,6 @@
|
||||
repmgr: fix upstream node display in "repmgr node status"; GitHub #363 (fanf2)
|
||||
repmgr: improve/clarify documentation and update --help output for
|
||||
"primary unregister"; GitHub #373 (Ian)
|
||||
repmgr: allow replication slots when Barman is configured; GitHub #379 (Ian)
|
||||
repmgr: fix parsing of "pg_basebackup_options"; GitHub #376 (Ian)
|
||||
repmgr: ensure "pg_subtrans" directory is created when cloning a standby in
|
||||
Barman mode (Ian)
|
||||
|
||||
46
Makefile.in
46
Makefile.in
@@ -11,13 +11,7 @@ EXTENSION = repmgr
|
||||
|
||||
DATA = \
|
||||
repmgr--unpackaged--4.0.sql \
|
||||
repmgr--4.0.sql \
|
||||
repmgr--4.0--4.1.sql \
|
||||
repmgr--4.1.sql \
|
||||
repmgr--4.1--4.2.sql \
|
||||
repmgr--4.2.sql \
|
||||
repmgr--4.2--4.3.sql \
|
||||
repmgr--4.3.sql
|
||||
repmgr--4.0.sql
|
||||
|
||||
REGRESS = repmgr_extension
|
||||
|
||||
@@ -32,26 +26,21 @@ all: \
|
||||
PG_CPPFLAGS = -std=gnu89 -I$(includedir_internal) -I$(libpq_srcdir) -Wall -Wmissing-prototypes -Wmissing-declarations $(EXTRA_CFLAGS)
|
||||
SHLIB_LINK = $(libpq)
|
||||
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
OBJS = \
|
||||
repmgr.o
|
||||
|
||||
include Makefile.global
|
||||
|
||||
ifeq ($(vpath_build),yes)
|
||||
HEADERS = $(wildcard *.h)
|
||||
else
|
||||
HEADERS_built = $(wildcard *.h)
|
||||
endif
|
||||
|
||||
$(info Building against PostgreSQL $(MAJORVERSION))
|
||||
|
||||
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
|
||||
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
|
||||
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o
|
||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o
|
||||
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
|
||||
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
|
||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
|
||||
DATE=$(shell date "+%Y-%m-%d")
|
||||
|
||||
repmgr_version.h: repmgr_version.h.in
|
||||
@@ -86,15 +75,28 @@ clean: additional-clean
|
||||
maintainer-clean: additional-maintainer-clean
|
||||
|
||||
additional-clean:
|
||||
rm -f *.o
|
||||
rm -f repmgr-client.o
|
||||
rm -f repmgr-action-primary.o
|
||||
rm -f repmgr-action-standby.o
|
||||
rm -f repmgr-action-witness.o
|
||||
rm -f repmgr-action-bdr.o
|
||||
rm -f repmgr-action-node.o
|
||||
rm -f repmgr-action-cluster.o
|
||||
rm -f repmgrd.o
|
||||
rm -f repmgrd-physical.o
|
||||
rm -f repmgrd-bdr.o
|
||||
rm -f compat.o
|
||||
rm -f configfile.o
|
||||
rm -f controldata.o
|
||||
rm -f dbutils.o
|
||||
rm -f dirutil.o
|
||||
rm -f log.o
|
||||
rm -f strutil.o
|
||||
|
||||
additional-maintainer-clean: clean
|
||||
$(MAKE) -C doc maintainer-clean
|
||||
maintainer-additional-clean: clean
|
||||
rm -f configure
|
||||
rm -f config.status config.log
|
||||
rm -f config.h
|
||||
rm -f repmgr_version.h
|
||||
rm -f Makefile
|
||||
rm -f Makefile.global
|
||||
@rm -rf autom4te.cache/
|
||||
|
||||
ifeq ($(MAJORVERSION),$(filter $(MAJORVERSION),9.3 9.4))
|
||||
|
||||
@@ -10,7 +10,7 @@ operations.
|
||||
`repmgr 4` is a complete rewrite of the existing `repmgr` codebase, allowing
|
||||
the use of all of the latest features in PostgreSQL replication.
|
||||
|
||||
PostgreSQL 11, 10, 9.6 and 9.5 are fully supported.
|
||||
PostgreSQL 10, 9.6 and 9.5 are fully supported.
|
||||
PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
||||
|
||||
`repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
|
||||
@@ -19,7 +19,7 @@ PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
||||
|
||||
`repmgr 4` supports monitoring of a two-node BDR 2.0 cluster on PostgreSQL 9.6
|
||||
only. Note that BDR 2.0 is not publicly available; please contact 2ndQuadrant
|
||||
for details.
|
||||
for details. `repmgr 4` will support future public BDR releases.
|
||||
|
||||
|
||||
Documentation
|
||||
@@ -27,7 +27,7 @@ Documentation
|
||||
|
||||
The main `repmgr` documentation is available here:
|
||||
|
||||
> [repmgr 4 documentation](https://repmgr.org/docs/4.2/index.html)
|
||||
> [repmgr 4 documentation](https://repmgr.org/docs/4.0/index.html)
|
||||
|
||||
The `README` file for `repmgr` 3.x is available here:
|
||||
|
||||
|
||||
20
TODO.md
20
TODO.md
@@ -1,20 +0,0 @@
|
||||
TODO
|
||||
====
|
||||
|
||||
This file contains a list of improvements which are desireable and/or have
|
||||
been requested, and which we aim to address/implement when time and resources
|
||||
permit.
|
||||
|
||||
It is *not* a roadmap and there's no guarantee of any item being implemented
|
||||
within any given timeframe.
|
||||
|
||||
|
||||
Enable suspension of repmgrd failover
|
||||
-------------------------------------
|
||||
|
||||
When performing maintenance, e.g. a switchover, it's necessary to stop all
|
||||
repmgrd nodes to prevent unintended failover; this is obviously inconvenient.
|
||||
We'll need to implement some way of notifying each repmgrd to suspend automatic
|
||||
failover until further notice.
|
||||
|
||||
Requested in GitHub #410 ( https://github.com/2ndQuadrant/repmgr/issues/410 )
|
||||
35
compat.c
35
compat.c
@@ -6,7 +6,7 @@
|
||||
* supported PostgreSQL versions. They're unlikely to change but
|
||||
* it would be worth keeping an eye on them for any fixes/improvements.
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@@ -98,42 +98,9 @@ appendShellString(PQExpBuffer buf, const char *str)
|
||||
|
||||
if (*p == '\'')
|
||||
appendPQExpBufferStr(buf, "'\"'\"'");
|
||||
else if (*p == '&')
|
||||
appendPQExpBufferStr(buf, "\\&");
|
||||
else
|
||||
appendPQExpBufferChar(buf, *p);
|
||||
}
|
||||
|
||||
appendPQExpBufferChar(buf, '\'');
|
||||
}
|
||||
|
||||
/*
|
||||
* Adapted from: src/fe_utils/string_utils.c
|
||||
*/
|
||||
void
|
||||
appendRemoteShellString(PQExpBuffer buf, const char *str)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
appendPQExpBufferStr(buf, "\\'");
|
||||
|
||||
for (p = str; *p; p++)
|
||||
{
|
||||
if (*p == '\n' || *p == '\r')
|
||||
{
|
||||
fprintf(stderr,
|
||||
_("shell command argument contains a newline or carriage return: \"%s\"\n"),
|
||||
str);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (*p == '\'')
|
||||
appendPQExpBufferStr(buf, "'\"'\"'");
|
||||
else if (*p == '&')
|
||||
appendPQExpBufferStr(buf, "\\&");
|
||||
else
|
||||
appendPQExpBufferChar(buf, *p);
|
||||
}
|
||||
|
||||
appendPQExpBufferStr(buf, "\\'");
|
||||
}
|
||||
|
||||
4
compat.h
4
compat.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* compat.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@@ -27,6 +27,4 @@ extern void appendConnStrVal(PQExpBuffer buf, const char *str);
|
||||
|
||||
extern void appendShellString(PQExpBuffer buf, const char *str);
|
||||
|
||||
extern void appendRemoteShellString(PQExpBuffer buf, const char *str);
|
||||
|
||||
#endif
|
||||
|
||||
414
configfile.c
414
configfile.c
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* config.c - parse repmgr.conf and other configuration-related functionality
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -28,8 +28,10 @@ char config_file_path[MAXPGPATH] = "";
|
||||
static bool config_file_provided = false;
|
||||
bool config_file_found = false;
|
||||
|
||||
static void parse_config(t_configuration_options *options, bool terse);
|
||||
static void _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *warning_list);
|
||||
static bool parse_bool(const char *s,
|
||||
const char *config_item,
|
||||
ItemList *error_list);
|
||||
|
||||
static void _parse_line(char *buf, char *name, char *value);
|
||||
static void parse_event_notifications_list(t_configuration_options *options, const char *arg);
|
||||
@@ -88,7 +90,8 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
|
||||
|
||||
if (pwd != NULL)
|
||||
{
|
||||
appendPQExpBufferStr(&fullpath, pwd);
|
||||
appendPQExpBuffer(&fullpath,
|
||||
"%s", pwd);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -104,7 +107,9 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
appendPQExpBufferStr(&fullpath, cwd);
|
||||
appendPQExpBuffer(&fullpath,
|
||||
"%s",
|
||||
cwd);
|
||||
}
|
||||
|
||||
appendPQExpBuffer(&fullpath,
|
||||
@@ -123,9 +128,9 @@ load_config(const char *config_file, bool verbose, bool terse, t_configuration_o
|
||||
|
||||
if (stat(config_file_path, &stat_config) != 0)
|
||||
{
|
||||
log_error(_("provided configuration file \"%s\" not found"),
|
||||
config_file);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_error(_("provided configuration file \"%s\" not found: %s"),
|
||||
config_file,
|
||||
strerror(errno));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -236,7 +241,7 @@ end_search:
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
void
|
||||
parse_config(t_configuration_options *options, bool terse)
|
||||
{
|
||||
/* Collate configuration file errors here for friendlier reporting */
|
||||
@@ -283,9 +288,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
memset(options->node_name, 0, sizeof(options->node_name));
|
||||
memset(options->conninfo, 0, sizeof(options->conninfo));
|
||||
memset(options->data_directory, 0, sizeof(options->data_directory));
|
||||
memset(options->config_directory, 0, sizeof(options->data_directory));
|
||||
memset(options->pg_bindir, 0, sizeof(options->pg_bindir));
|
||||
memset(options->repmgr_bindir, 0, sizeof(options->repmgr_bindir));
|
||||
options->replication_type = REPLICATION_TYPE_PHYSICAL;
|
||||
|
||||
/*-------------
|
||||
@@ -300,7 +303,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL;
|
||||
|
||||
/*-----------------------
|
||||
* standby clone settings
|
||||
* standby action settings
|
||||
*------------------------
|
||||
*/
|
||||
options->use_replication_slots = false;
|
||||
@@ -311,32 +314,9 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->tablespace_mapping.tail = NULL;
|
||||
memset(options->recovery_min_apply_delay, 0, sizeof(options->recovery_min_apply_delay));
|
||||
options->recovery_min_apply_delay_provided = false;
|
||||
memset(options->archive_cleanup_command, 0, sizeof(options->archive_cleanup_command));
|
||||
options->use_primary_conninfo_password = false;
|
||||
memset(options->passfile, 0, sizeof(options->passfile));
|
||||
|
||||
/*-------------------------
|
||||
* standby promote settings
|
||||
*-------------------------
|
||||
*/
|
||||
options->promote_check_timeout = DEFAULT_PROMOTE_CHECK_TIMEOUT;
|
||||
options->promote_check_interval = DEFAULT_PROMOTE_CHECK_INTERVAL;
|
||||
|
||||
/*------------------------
|
||||
* standby follow settings
|
||||
*------------------------
|
||||
*/
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
options->standby_follow_timeout = DEFAULT_STANDBY_FOLLOW_TIMEOUT;
|
||||
|
||||
/*------------------------
|
||||
* standby switchover settings
|
||||
*------------------------
|
||||
*/
|
||||
options->shutdown_check_timeout = DEFAULT_SHUTDOWN_CHECK_TIMEOUT;
|
||||
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
||||
options->wal_receive_check_timeout = DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT;
|
||||
|
||||
/*-----------------
|
||||
* repmgrd settings
|
||||
*-----------------
|
||||
@@ -356,14 +336,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->degraded_monitoring_timeout = -1;
|
||||
options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
|
||||
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
||||
options->repmgrd_standby_startup_timeout = -1; /* defaults to "standby_reconnect_timeout" if not set */
|
||||
memset(options->repmgrd_pid_file, 0, sizeof(options->repmgrd_pid_file));
|
||||
options->standby_disconnect_on_failover = false;
|
||||
options->sibling_nodes_disconnect_timeout = DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT;
|
||||
options->connection_check_type = CHECK_PING;
|
||||
options->primary_visibility_consensus = false;
|
||||
memset(options->failover_validation_command, 0, sizeof(options->failover_validation_command));
|
||||
options->election_rerun_interval = DEFAULT_ELECTION_RERUN_INTERVAL;
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
|
||||
/*-------------
|
||||
* witness settings
|
||||
@@ -378,24 +351,17 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->bdr_local_monitoring_only = false;
|
||||
options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT;
|
||||
|
||||
/*-------------------------
|
||||
* service command settings
|
||||
*-------------------------
|
||||
/*-----------------
|
||||
* service settings
|
||||
*-----------------
|
||||
*/
|
||||
memset(options->pg_ctl_options, 0, sizeof(options->pg_ctl_options));
|
||||
memset(options->service_start_command, 0, sizeof(options->service_start_command));
|
||||
memset(options->service_stop_command, 0, sizeof(options->service_stop_command));
|
||||
memset(options->service_start_command, 0, sizeof(options->service_start_command));
|
||||
memset(options->service_restart_command, 0, sizeof(options->service_restart_command));
|
||||
memset(options->service_reload_command, 0, sizeof(options->service_reload_command));
|
||||
memset(options->service_promote_command, 0, sizeof(options->service_promote_command));
|
||||
|
||||
/*---------------------------------
|
||||
* repmgrd service command settings
|
||||
*---------------------------------
|
||||
*/
|
||||
memset(options->repmgrd_service_start_command, 0, sizeof(options->repmgrd_service_start_command));
|
||||
memset(options->repmgrd_service_stop_command, 0, sizeof(options->repmgrd_service_stop_command));
|
||||
|
||||
/*----------------------------
|
||||
* event notification settings
|
||||
*----------------------------
|
||||
@@ -480,38 +446,25 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
/* Copy into correct entry in parameters struct */
|
||||
if (strcmp(name, "node_id") == 0)
|
||||
{
|
||||
options->node_id = repmgr_atoi(value, name, error_list, MIN_NODE_ID);
|
||||
options->node_id = repmgr_atoi(value, name, error_list, 1);
|
||||
node_id_found = true;
|
||||
}
|
||||
else if (strcmp(name, "node_name") == 0)
|
||||
{
|
||||
if (strlen(value) < sizeof(options->node_name))
|
||||
strncpy(options->node_name, value, sizeof(options->node_name));
|
||||
else
|
||||
item_list_append_format(error_list,
|
||||
_("value for \"node_name\" must contain fewer than %lu characters"),
|
||||
sizeof(options->node_name));
|
||||
}
|
||||
strncpy(options->node_name, value, MAXLEN);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy(options->conninfo, value, MAXLEN);
|
||||
else if (strcmp(name, "data_directory") == 0)
|
||||
strncpy(options->data_directory, value, MAXPGPATH);
|
||||
else if (strcmp(name, "config_directory") == 0)
|
||||
strncpy(options->config_directory, value, MAXPGPATH);
|
||||
|
||||
else if (strcmp(name, "replication_user") == 0)
|
||||
{
|
||||
if (strlen(value) < sizeof(options->replication_user))
|
||||
strncpy(options->replication_user, value, sizeof(options->replication_user));
|
||||
if (strlen(value) < NAMEDATALEN)
|
||||
strncpy(options->replication_user, value, NAMEDATALEN);
|
||||
else
|
||||
item_list_append_format(error_list,
|
||||
_("value for \"replication_user\" must contain fewer than %lu characters"),
|
||||
sizeof(options->replication_user));
|
||||
item_list_append(error_list,
|
||||
_("value for \"replication_user\" must contain fewer than " STR(NAMEDATALEN) " characters"));
|
||||
}
|
||||
else if (strcmp(name, "pg_bindir") == 0)
|
||||
strncpy(options->pg_bindir, value, MAXPGPATH);
|
||||
else if (strcmp(name, "repmgr_bindir") == 0)
|
||||
strncpy(options->repmgr_bindir, value, MAXPGPATH);
|
||||
|
||||
else if (strcmp(name, "replication_type") == 0)
|
||||
{
|
||||
@@ -547,42 +500,15 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
parse_time_unit_parameter(name, value, options->recovery_min_apply_delay, error_list);
|
||||
options->recovery_min_apply_delay_provided = true;
|
||||
}
|
||||
else if (strcmp(name, "archive_cleanup_command") == 0)
|
||||
strncpy(options->archive_cleanup_command, value, MAXLEN);
|
||||
else if (strcmp(name, "use_primary_conninfo_password") == 0)
|
||||
options->use_primary_conninfo_password = parse_bool(value, name, error_list);
|
||||
else if (strcmp(name, "passfile") == 0)
|
||||
strncpy(options->passfile, value, sizeof(options->passfile));
|
||||
|
||||
/* standby promote settings */
|
||||
else if (strcmp(name, "promote_check_timeout") == 0)
|
||||
options->promote_check_timeout = repmgr_atoi(value, name, error_list, 1);
|
||||
|
||||
else if (strcmp(name, "promote_check_interval") == 0)
|
||||
options->promote_check_interval = repmgr_atoi(value, name, error_list, 1);
|
||||
|
||||
/* standby follow settings */
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "standby_follow_timeout") == 0)
|
||||
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* standby switchover settings */
|
||||
else if (strcmp(name, "shutdown_check_timeout") == 0)
|
||||
options->shutdown_check_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
||||
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "wal_receive_check_timeout") == 0)
|
||||
options->wal_receive_check_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* node rejoin settings */
|
||||
else if (strcmp(name, "node_rejoin_timeout") == 0)
|
||||
options->node_rejoin_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* node check settings */
|
||||
else if (strcmp(name, "archive_ready_warning") == 0)
|
||||
options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1);
|
||||
else if (strcmp(name, "archive_ready_critical") == 0)
|
||||
else if (strcmp(name, "archive_ready_critcial") == 0)
|
||||
options->archive_ready_critical = repmgr_atoi(value, name, error_list, 1);
|
||||
else if (strcmp(name, "replication_lag_warning") == 0)
|
||||
options->replication_lag_warning = repmgr_atoi(value, name, error_list, 1);
|
||||
@@ -609,11 +535,11 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
else if (strcmp(name, "priority") == 0)
|
||||
options->priority = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "location") == 0)
|
||||
strncpy(options->location, value, sizeof(options->location));
|
||||
strncpy(options->location, value, MAXLEN);
|
||||
else if (strcmp(name, "promote_command") == 0)
|
||||
strncpy(options->promote_command, value, sizeof(options->promote_command));
|
||||
strncpy(options->promote_command, value, MAXLEN);
|
||||
else if (strcmp(name, "follow_command") == 0)
|
||||
strncpy(options->follow_command, value, sizeof(options->follow_command));
|
||||
strncpy(options->follow_command, value, MAXLEN);
|
||||
else if (strcmp(name, "reconnect_attempts") == 0)
|
||||
options->reconnect_attempts = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "reconnect_interval") == 0)
|
||||
@@ -623,45 +549,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
else if (strcmp(name, "monitoring_history") == 0)
|
||||
options->monitoring_history = parse_bool(value, name, error_list);
|
||||
else if (strcmp(name, "degraded_monitoring_timeout") == 0)
|
||||
options->degraded_monitoring_timeout = repmgr_atoi(value, name, error_list, -1);
|
||||
options->degraded_monitoring_timeout = repmgr_atoi(value, name, error_list, 1);
|
||||
else if (strcmp(name, "async_query_timeout") == 0)
|
||||
options->async_query_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "primary_notification_timeout") == 0)
|
||||
options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "repmgrd_standby_startup_timeout") == 0)
|
||||
options->repmgrd_standby_startup_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "repmgrd_pid_file") == 0)
|
||||
strncpy(options->repmgrd_pid_file, value, MAXPGPATH);
|
||||
else if (strcmp(name, "standby_disconnect_on_failover") == 0)
|
||||
options->standby_disconnect_on_failover = parse_bool(value, name, error_list);
|
||||
else if (strcmp(name, "sibling_nodes_disconnect_timeout") == 0)
|
||||
options->sibling_nodes_disconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "connection_check_type") == 0)
|
||||
{
|
||||
if (strcasecmp(value, "ping") == 0)
|
||||
{
|
||||
options->connection_check_type = CHECK_PING;
|
||||
}
|
||||
else if (strcasecmp(value, "connection") == 0)
|
||||
{
|
||||
options->connection_check_type = CHECK_CONNECTION;
|
||||
}
|
||||
else if (strcasecmp(value, "query") == 0)
|
||||
{
|
||||
options->connection_check_type = CHECK_QUERY;
|
||||
}
|
||||
else
|
||||
{
|
||||
item_list_append(error_list,
|
||||
_("value for \"connection_check_type\" must be \"ping\", \"connection\" or \"query\"\n"));
|
||||
}
|
||||
}
|
||||
else if (strcmp(name, "primary_visibility_consensus") == 0)
|
||||
options->primary_visibility_consensus = parse_bool(value, name, error_list);
|
||||
else if (strcmp(name, "failover_validation_command") == 0)
|
||||
strncpy(options->failover_validation_command, value, sizeof(options->failover_validation_command));
|
||||
else if (strcmp(name, "election_rerun_interval") == 0)
|
||||
options->election_rerun_interval = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* witness settings */
|
||||
else if (strcmp(name, "witness_sync_interval") == 0)
|
||||
@@ -675,48 +569,41 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
|
||||
/* service settings */
|
||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||
strncpy(options->pg_ctl_options, value, sizeof(options->pg_ctl_options));
|
||||
else if (strcmp(name, "service_start_command") == 0)
|
||||
strncpy(options->service_start_command, value, sizeof(options->service_start_command));
|
||||
strncpy(options->pg_ctl_options, value, MAXLEN);
|
||||
else if (strcmp(name, "service_stop_command") == 0)
|
||||
strncpy(options->service_stop_command, value, sizeof(options->service_stop_command));
|
||||
strncpy(options->service_stop_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_start_command") == 0)
|
||||
strncpy(options->service_start_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_restart_command") == 0)
|
||||
strncpy(options->service_restart_command, value, sizeof(options->service_restart_command));
|
||||
strncpy(options->service_restart_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_reload_command") == 0)
|
||||
strncpy(options->service_reload_command, value, sizeof(options->service_reload_command));
|
||||
strncpy(options->service_reload_command, value, MAXLEN);
|
||||
else if (strcmp(name, "service_promote_command") == 0)
|
||||
strncpy(options->service_promote_command, value, sizeof(options->service_promote_command));
|
||||
|
||||
/* repmgrd service settings */
|
||||
else if (strcmp(name, "repmgrd_service_start_command") == 0)
|
||||
strncpy(options->repmgrd_service_start_command, value, sizeof(options->repmgrd_service_start_command));
|
||||
else if (strcmp(name, "repmgrd_service_stop_command") == 0)
|
||||
strncpy(options->repmgrd_service_stop_command, value, sizeof(options->repmgrd_service_stop_command));
|
||||
|
||||
strncpy(options->service_promote_command, value, MAXLEN);
|
||||
|
||||
/* event notification settings */
|
||||
else if (strcmp(name, "event_notification_command") == 0)
|
||||
strncpy(options->event_notification_command, value, sizeof(options->event_notification_command));
|
||||
strncpy(options->event_notification_command, value, MAXLEN);
|
||||
else if (strcmp(name, "event_notifications") == 0)
|
||||
{
|
||||
/* store unparsed value for comparison when reloading config */
|
||||
strncpy(options->event_notifications_orig, value, sizeof(options->event_notifications_orig));
|
||||
strncpy(options->event_notifications_orig, value, MAXLEN);
|
||||
parse_event_notifications_list(options, value);
|
||||
}
|
||||
|
||||
/* barman settings */
|
||||
else if (strcmp(name, "barman_host") == 0)
|
||||
strncpy(options->barman_host, value, sizeof(options->barman_host));
|
||||
strncpy(options->barman_host, value, MAXLEN);
|
||||
else if (strcmp(name, "barman_server") == 0)
|
||||
strncpy(options->barman_server, value, sizeof(options->barman_server));
|
||||
strncpy(options->barman_server, value, MAXLEN);
|
||||
else if (strcmp(name, "barman_config") == 0)
|
||||
strncpy(options->barman_config, value, sizeof(options->barman_config));
|
||||
strncpy(options->barman_config, value, MAXLEN);
|
||||
|
||||
/* rsync/ssh settings */
|
||||
else if (strcmp(name, "rsync_options") == 0)
|
||||
strncpy(options->rsync_options, value, sizeof(options->rsync_options));
|
||||
strncpy(options->rsync_options, value, MAXLEN);
|
||||
else if (strcmp(name, "ssh_options") == 0)
|
||||
strncpy(options->ssh_options, value, sizeof(options->ssh_options));
|
||||
strncpy(options->ssh_options, value, MAXLEN);
|
||||
|
||||
/* undocumented settings for testing */
|
||||
else if (strcmp(name, "promote_delay") == 0)
|
||||
@@ -850,17 +737,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
PQconninfoFree(conninfo_options);
|
||||
}
|
||||
|
||||
/* set values for parameters which default to other parameters */
|
||||
|
||||
/*
|
||||
* From 4.1, "repmgrd_standby_startup_timeout" replaces "standby_reconnect_timeout"
|
||||
* in repmgrd; fall back to "standby_reconnect_timeout" if no value explicitly provided
|
||||
*/
|
||||
if (options->repmgrd_standby_startup_timeout == -1)
|
||||
{
|
||||
options->repmgrd_standby_startup_timeout = options->standby_reconnect_timeout;
|
||||
}
|
||||
|
||||
/* add warning about changed "barman_" parameter meanings */
|
||||
if ((options->barman_host[0] == '\0' && options->barman_server[0] != '\0') ||
|
||||
(options->barman_host[0] != '\0' && options->barman_server[0] == '\0'))
|
||||
@@ -877,19 +753,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
if (options->archive_ready_warning >= options->archive_ready_critical)
|
||||
{
|
||||
item_list_append(error_list,
|
||||
_("\"archive_ready_critical\" must be greater than \"archive_ready_warning\""));
|
||||
_("\archive_ready_critical\" must be greater than \"archive_ready_warning\""));
|
||||
}
|
||||
|
||||
if (options->replication_lag_warning >= options->replication_lag_critical)
|
||||
{
|
||||
item_list_append(error_list,
|
||||
_("\"replication_lag_critical\" must be greater than \"replication_lag_warning\""));
|
||||
}
|
||||
|
||||
if (options->standby_reconnect_timeout < options->node_rejoin_timeout)
|
||||
{
|
||||
item_list_append(error_list,
|
||||
_("\"standby_reconnect_timeout\" must be equal to or greater than \"node_rejoin_timeout\""));
|
||||
_("\replication_lag_critical\" must be greater than \"replication_lag_warning\""));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1055,11 +925,12 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
||||
char *ptr = NULL;
|
||||
int targ = strtol(value, &ptr, 10);
|
||||
|
||||
if (targ < 0)
|
||||
if (targ < 1)
|
||||
{
|
||||
if (errors != NULL)
|
||||
{
|
||||
item_list_append_format(errors,
|
||||
item_list_append_format(
|
||||
errors,
|
||||
_("invalid value provided for \"%s\""),
|
||||
name);
|
||||
}
|
||||
@@ -1093,19 +964,15 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
||||
* loop is started up; it therefore only needs to reload options required
|
||||
* by repmgrd, which are as follows:
|
||||
*
|
||||
* changeable options (keep the list in "doc/repmgrd-configuration.sgml" in sync
|
||||
* with these):
|
||||
*
|
||||
* changeable options:
|
||||
* - async_query_timeout
|
||||
* - bdr_local_monitoring_only
|
||||
* - bdr_recovery_timeout
|
||||
* - connection_check_type
|
||||
* - conninfo
|
||||
* - degraded_monitoring_timeout
|
||||
* - event_notification_command
|
||||
* - event_notifications
|
||||
* - failover
|
||||
* - failover_validation_command
|
||||
* - follow_command
|
||||
* - log_facility
|
||||
* - log_file
|
||||
@@ -1113,27 +980,17 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
||||
* - log_status_interval
|
||||
* - monitor_interval_secs
|
||||
* - monitoring_history
|
||||
* - primary_notification_timeout
|
||||
* - primary_visibility_consensus
|
||||
* - promote_command
|
||||
* - promote_delay
|
||||
* - reconnect_attempts
|
||||
* - reconnect_interval
|
||||
* - repmgrd_standby_startup_timeout
|
||||
* - retry_promote_interval_secs
|
||||
* - sibling_nodes_disconnect_timeout
|
||||
* - standby_disconnect_on_failover
|
||||
*
|
||||
*
|
||||
* Not publicly documented:
|
||||
* - promote_delay
|
||||
*
|
||||
* non-changeable options (repmgrd references these from the "repmgr.nodes"
|
||||
* table, not the configuration file)
|
||||
* non-changeable options
|
||||
*
|
||||
* - node_id
|
||||
* - node_name
|
||||
* - data_directory
|
||||
* - location
|
||||
* - priority
|
||||
* - replication_type
|
||||
*
|
||||
@@ -1142,7 +999,7 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
||||
|
||||
*/
|
||||
bool
|
||||
reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
reload_config(t_configuration_options *orig_options)
|
||||
{
|
||||
PGconn *conn;
|
||||
t_configuration_options new_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||
@@ -1152,50 +1009,17 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
static ItemList config_errors = {NULL, NULL};
|
||||
static ItemList config_warnings = {NULL, NULL};
|
||||
|
||||
PQExpBufferData errors;
|
||||
|
||||
log_info(_("reloading configuration file"));
|
||||
|
||||
_parse_config(&new_options, &config_errors, &config_warnings);
|
||||
|
||||
|
||||
if (server_type == PRIMARY || server_type == STANDBY)
|
||||
{
|
||||
if (new_options.promote_command[0] == '\0')
|
||||
{
|
||||
item_list_append(&config_errors, _("\"promote_command\": required parameter was not found"));
|
||||
}
|
||||
|
||||
if (new_options.follow_command[0] == '\0')
|
||||
{
|
||||
item_list_append(&config_errors, _("\"follow_command\": required parameter was not found"));
|
||||
}
|
||||
}
|
||||
|
||||
if (config_errors.head != NULL)
|
||||
{
|
||||
ItemListCell *cell = NULL;
|
||||
|
||||
/* XXX dump errors to log */
|
||||
log_warning(_("unable to parse new configuration, retaining current configuration"));
|
||||
|
||||
initPQExpBuffer(&errors);
|
||||
|
||||
appendPQExpBufferStr(&errors,
|
||||
"following errors were detected:\n");
|
||||
|
||||
for (cell = config_errors.head; cell; cell = cell->next)
|
||||
{
|
||||
appendPQExpBuffer(&errors,
|
||||
" %s\n", cell->string);
|
||||
}
|
||||
|
||||
log_detail("%s", errors.data);
|
||||
termPQExpBuffer(&errors);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* The following options cannot be changed */
|
||||
|
||||
if (new_options.node_id != orig_options->node_id)
|
||||
@@ -1204,12 +1028,13 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strncmp(new_options.node_name, orig_options->node_name, sizeof(orig_options->node_name)) != 0)
|
||||
if (strcmp(new_options.node_name, orig_options->node_name) != 0)
|
||||
{
|
||||
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* No configuration problems detected - copy any changed values
|
||||
*
|
||||
@@ -1247,7 +1072,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
}
|
||||
|
||||
/* conninfo */
|
||||
if (strncmp(orig_options->conninfo, new_options.conninfo, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
|
||||
{
|
||||
/* Test conninfo string works */
|
||||
conn = establish_db_connection(new_options.conninfo, false);
|
||||
@@ -1259,8 +1084,8 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
{
|
||||
strncpy(orig_options->conninfo, new_options.conninfo, MAXLEN);
|
||||
log_info(_("\"conninfo\" is now \"%s\""), new_options.conninfo);
|
||||
}
|
||||
|
||||
}
|
||||
PQfinish(conn);
|
||||
}
|
||||
|
||||
@@ -1274,7 +1099,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
}
|
||||
|
||||
/* event_notification_command */
|
||||
if (strncmp(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->event_notification_command, new_options.event_notification_command) != 0)
|
||||
{
|
||||
strncpy(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN);
|
||||
log_info(_("\"event_notification_command\" is now \"%s\""), new_options.event_notification_command);
|
||||
@@ -1283,7 +1108,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
}
|
||||
|
||||
/* event_notifications */
|
||||
if (strncmp(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->event_notifications_orig, new_options.event_notifications_orig) != 0)
|
||||
{
|
||||
strncpy(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN);
|
||||
log_info(_("\"event_notifications\" is now \"%s\""), new_options.event_notifications_orig);
|
||||
@@ -1303,7 +1128,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
}
|
||||
|
||||
/* follow_command */
|
||||
if (strncmp(orig_options->follow_command, new_options.follow_command, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->follow_command, new_options.follow_command) != 0)
|
||||
{
|
||||
strncpy(orig_options->follow_command, new_options.follow_command, MAXLEN);
|
||||
log_info(_("\"follow_command\" is now \"%s\""), new_options.follow_command);
|
||||
@@ -1338,8 +1163,9 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
|
||||
/* promote_command */
|
||||
if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
|
||||
{
|
||||
strncpy(orig_options->promote_command, new_options.promote_command, MAXLEN);
|
||||
log_info(_("\"promote_command\" is now \"%s\""), new_options.promote_command);
|
||||
@@ -1347,7 +1173,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* promote_delay (for testing use only; not documented */
|
||||
/* promote_delay */
|
||||
if (orig_options->promote_delay != new_options.promote_delay)
|
||||
{
|
||||
orig_options->promote_delay = new_options.promote_delay;
|
||||
@@ -1374,77 +1200,23 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* repmgrd_standby_startup_timeout */
|
||||
if (orig_options->repmgrd_standby_startup_timeout != new_options.repmgrd_standby_startup_timeout)
|
||||
{
|
||||
orig_options->repmgrd_standby_startup_timeout = new_options.repmgrd_standby_startup_timeout;
|
||||
log_info(_("\"repmgrd_standby_startup_timeout\" is now \"%i\""), new_options.repmgrd_standby_startup_timeout);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* standby_disconnect_on_failover */
|
||||
if (orig_options->standby_disconnect_on_failover != new_options.standby_disconnect_on_failover)
|
||||
{
|
||||
orig_options->standby_disconnect_on_failover = new_options.standby_disconnect_on_failover;
|
||||
log_info(_("\"standby_disconnect_on_failover\" is now \"%s\""),
|
||||
new_options.standby_disconnect_on_failover == true ? "TRUE" : "FALSE");
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* sibling_nodes_disconnect_timeout */
|
||||
if (orig_options->sibling_nodes_disconnect_timeout != new_options.sibling_nodes_disconnect_timeout)
|
||||
{
|
||||
orig_options->sibling_nodes_disconnect_timeout = new_options.sibling_nodes_disconnect_timeout;
|
||||
log_info(_("\"sibling_nodes_disconnect_timeout\" is now \"%i\""),
|
||||
new_options.sibling_nodes_disconnect_timeout);
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* connection_check_type */
|
||||
if (orig_options->connection_check_type != new_options.connection_check_type)
|
||||
{
|
||||
orig_options->connection_check_type = new_options.connection_check_type;
|
||||
log_info(_("\"connection_check_type\" is now \"%s\""),
|
||||
print_connection_check_type(new_options.connection_check_type));
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* primary_visibility_consensus */
|
||||
if (orig_options->primary_visibility_consensus != new_options.primary_visibility_consensus)
|
||||
{
|
||||
orig_options->primary_visibility_consensus = new_options.primary_visibility_consensus;
|
||||
log_info(_("\"primary_visibility_consensus\" is now \"%s\""),
|
||||
new_options.primary_visibility_consensus == true ? "TRUE" : "FALSE");
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* failover_validation_command */
|
||||
if (strncmp(orig_options->failover_validation_command, new_options.failover_validation_command, MAXPGPATH) != 0)
|
||||
{
|
||||
strncpy(orig_options->failover_validation_command, new_options.failover_validation_command, MAXPGPATH);
|
||||
log_info(_("\"failover_validation_command\" is now \"%s\""), new_options.failover_validation_command);
|
||||
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle changes to logging configuration
|
||||
*/
|
||||
|
||||
/* log_facility */
|
||||
if (strncmp(orig_options->log_facility, new_options.log_facility, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->log_facility, new_options.log_facility) != 0)
|
||||
{
|
||||
strncpy(orig_options->log_facility, new_options.log_facility, MAXLEN);
|
||||
strcpy(orig_options->log_facility, new_options.log_facility);
|
||||
log_info(_("\"log_facility\" is now \"%s\""), new_options.log_facility);
|
||||
|
||||
log_config_changed = true;
|
||||
}
|
||||
|
||||
/* log_file */
|
||||
if (strncmp(orig_options->log_file, new_options.log_file, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->log_file, new_options.log_file) != 0)
|
||||
{
|
||||
strncpy(orig_options->log_file, new_options.log_file, MAXLEN);
|
||||
strcpy(orig_options->log_file, new_options.log_file);
|
||||
log_info(_("\"log_file\" is now \"%s\""), new_options.log_file);
|
||||
|
||||
log_config_changed = true;
|
||||
@@ -1452,9 +1224,9 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
|
||||
|
||||
/* log_level */
|
||||
if (strncmp(orig_options->log_level, new_options.log_level, MAXLEN) != 0)
|
||||
if (strcmp(orig_options->log_level, new_options.log_level) != 0)
|
||||
{
|
||||
strncpy(orig_options->log_level, new_options.log_level, MAXLEN);
|
||||
strcpy(orig_options->log_level, new_options.log_level);
|
||||
log_info(_("\"log_level\" is now \"%s\""), new_options.log_level);
|
||||
|
||||
log_config_changed = true;
|
||||
@@ -1520,23 +1292,13 @@ exit_with_config_file_errors(ItemList *config_errors, ItemList *config_warnings,
|
||||
|
||||
|
||||
void
|
||||
exit_with_cli_errors(ItemList *error_list, const char *repmgr_command)
|
||||
exit_with_cli_errors(ItemList *error_list)
|
||||
{
|
||||
fprintf(stderr, _("The following command line errors were encountered:\n"));
|
||||
|
||||
print_item_list(error_list);
|
||||
|
||||
if (repmgr_command != NULL)
|
||||
{
|
||||
fprintf(stderr, _("Try \"%s --help\" or \"%s %s --help\" for more information.\n"),
|
||||
progname(),
|
||||
progname(),
|
||||
repmgr_command);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname());
|
||||
}
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname());
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -1639,16 +1401,13 @@ repmgr_atoi(const char *value, const char *config_item, ItemList *error_list, in
|
||||
*
|
||||
* TODO: accept "any unambiguous prefix of one of these" as per postgresql.conf:
|
||||
*
|
||||
* https://www.postgresql.org/docs/current/config-setting.html
|
||||
* https://www.postgresql.org/docs/current/static/config-setting.html
|
||||
*/
|
||||
bool
|
||||
static bool
|
||||
parse_bool(const char *s, const char *config_item, ItemList *error_list)
|
||||
{
|
||||
PQExpBufferData errors;
|
||||
|
||||
if (s == NULL)
|
||||
return true;
|
||||
|
||||
if (strcasecmp(s, "0") == 0)
|
||||
return false;
|
||||
|
||||
@@ -1930,9 +1689,6 @@ free_parsed_argv(char ***argv_array)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bool
|
||||
parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list)
|
||||
{
|
||||
@@ -2025,21 +1781,3 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
|
||||
|
||||
return backup_options_ok;
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
print_connection_check_type(ConnectionCheckType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case CHECK_PING:
|
||||
return "ping";
|
||||
case CHECK_QUERY:
|
||||
return "query";
|
||||
case CHECK_CONNECTION:
|
||||
return "connection";
|
||||
}
|
||||
|
||||
/* should never reach here */
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
94
configfile.h
94
configfile.h
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* configfile.h
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
@@ -37,13 +37,6 @@ typedef enum
|
||||
FAILOVER_AUTOMATIC
|
||||
} failover_mode_opt;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CHECK_PING,
|
||||
CHECK_QUERY,
|
||||
CHECK_CONNECTION
|
||||
} ConnectionCheckType;
|
||||
|
||||
typedef struct EventNotificationListCell
|
||||
{
|
||||
struct EventNotificationListCell *next;
|
||||
@@ -76,13 +69,11 @@ typedef struct
|
||||
{
|
||||
/* node information */
|
||||
int node_id;
|
||||
char node_name[NAMEDATALEN];
|
||||
char node_name[MAXLEN];
|
||||
char conninfo[MAXLEN];
|
||||
char replication_user[NAMEDATALEN];
|
||||
char data_directory[MAXPGPATH];
|
||||
char config_directory[MAXPGPATH];
|
||||
char pg_bindir[MAXPGPATH];
|
||||
char repmgr_bindir[MAXPGPATH];
|
||||
int replication_type;
|
||||
|
||||
/* log settings */
|
||||
@@ -91,33 +82,16 @@ typedef struct
|
||||
char log_file[MAXLEN];
|
||||
int log_status_interval;
|
||||
|
||||
/* standby clone settings */
|
||||
/* standby action settings */
|
||||
bool use_replication_slots;
|
||||
char pg_basebackup_options[MAXLEN];
|
||||
char restore_command[MAXLEN];
|
||||
TablespaceList tablespace_mapping;
|
||||
char recovery_min_apply_delay[MAXLEN];
|
||||
bool recovery_min_apply_delay_provided;
|
||||
char archive_cleanup_command[MAXLEN];
|
||||
bool use_primary_conninfo_password;
|
||||
char passfile[MAXPGPATH];
|
||||
|
||||
/* standby promote settings */
|
||||
int promote_check_timeout;
|
||||
int promote_check_interval;
|
||||
|
||||
/* standby follow settings */
|
||||
int primary_follow_timeout;
|
||||
int standby_follow_timeout;
|
||||
|
||||
/* standby switchover settings */
|
||||
int shutdown_check_timeout;
|
||||
int standby_reconnect_timeout;
|
||||
int wal_receive_check_timeout;
|
||||
|
||||
/* node rejoin settings */
|
||||
int node_rejoin_timeout;
|
||||
|
||||
/* node check settings */
|
||||
int archive_ready_warning;
|
||||
int archive_ready_critical;
|
||||
@@ -140,14 +114,7 @@ typedef struct
|
||||
int degraded_monitoring_timeout;
|
||||
int async_query_timeout;
|
||||
int primary_notification_timeout;
|
||||
int repmgrd_standby_startup_timeout;
|
||||
char repmgrd_pid_file[MAXPGPATH];
|
||||
bool standby_disconnect_on_failover;
|
||||
int sibling_nodes_disconnect_timeout;
|
||||
ConnectionCheckType connection_check_type;
|
||||
bool primary_visibility_consensus;
|
||||
char failover_validation_command[MAXPGPATH];
|
||||
int election_rerun_interval;
|
||||
int primary_follow_timeout;
|
||||
|
||||
/* BDR settings */
|
||||
bool bdr_local_monitoring_only;
|
||||
@@ -155,18 +122,14 @@ typedef struct
|
||||
|
||||
/* service settings */
|
||||
char pg_ctl_options[MAXLEN];
|
||||
char service_start_command[MAXPGPATH];
|
||||
char service_stop_command[MAXPGPATH];
|
||||
char service_restart_command[MAXPGPATH];
|
||||
char service_reload_command[MAXPGPATH];
|
||||
char service_promote_command[MAXPGPATH];
|
||||
|
||||
/* repmgrd service settings */
|
||||
char repmgrd_service_start_command[MAXPGPATH];
|
||||
char repmgrd_service_stop_command[MAXPGPATH];
|
||||
char service_stop_command[MAXLEN];
|
||||
char service_start_command[MAXLEN];
|
||||
char service_restart_command[MAXLEN];
|
||||
char service_reload_command[MAXLEN];
|
||||
char service_promote_command[MAXLEN];
|
||||
|
||||
/* event notification settings */
|
||||
char event_notification_command[MAXPGPATH];
|
||||
char event_notification_command[MAXLEN];
|
||||
char event_notifications_orig[MAXLEN];
|
||||
EventNotificationList event_notifications;
|
||||
|
||||
@@ -190,22 +153,11 @@ typedef struct
|
||||
|
||||
#define T_CONFIGURATION_OPTIONS_INITIALIZER { \
|
||||
/* node information */ \
|
||||
UNKNOWN_NODE_ID, "", "", "", "", "", "", "", REPLICATION_TYPE_PHYSICAL, \
|
||||
UNKNOWN_NODE_ID, "", "", "", "", "", REPLICATION_TYPE_PHYSICAL, \
|
||||
/* log settings */ \
|
||||
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
||||
/* standby clone settings */ \
|
||||
false, "", "", { NULL, NULL }, "", false, "", false, "", \
|
||||
/* standby promote settings */ \
|
||||
DEFAULT_PROMOTE_CHECK_TIMEOUT, DEFAULT_PROMOTE_CHECK_INTERVAL, \
|
||||
/* standby follow settings */ \
|
||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
|
||||
/* standby switchover settings */ \
|
||||
DEFAULT_SHUTDOWN_CHECK_TIMEOUT, \
|
||||
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
||||
DEFAULT_WAL_RECEIVE_CHECK_TIMEOUT, \
|
||||
/* node rejoin settings */ \
|
||||
DEFAULT_NODE_REJOIN_TIMEOUT, \
|
||||
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
||||
/* standby action settings */ \
|
||||
false, "", "", { NULL, NULL }, "", false, false, "", \
|
||||
/* node check settings */ \
|
||||
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
|
||||
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
|
||||
@@ -218,15 +170,12 @@ typedef struct
|
||||
DEFAULT_RECONNECTION_INTERVAL, \
|
||||
false, -1, \
|
||||
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||
-1, "", false, DEFAULT_SIBLING_NODES_DISCONNECT_TIMEOUT, \
|
||||
CHECK_PING, true, "", DEFAULT_ELECTION_RERUN_INTERVAL, \
|
||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||
/* BDR settings */ \
|
||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||
/* service settings */ \
|
||||
"", "", "", "", "", "", \
|
||||
/* repmgrd service settings */ \
|
||||
"", "", \
|
||||
/* event notification settings */ \
|
||||
"", "", { NULL, NULL }, \
|
||||
/* barman settings */ \
|
||||
@@ -298,20 +247,16 @@ typedef struct
|
||||
"", "", "", "" \
|
||||
}
|
||||
|
||||
#include "dbutils.h"
|
||||
|
||||
void set_progname(const char *argv0);
|
||||
const char *progname(void);
|
||||
|
||||
void load_config(const char *config_file, bool verbose, bool terse, t_configuration_options *options, char *argv0);
|
||||
bool reload_config(t_configuration_options *orig_options, t_server_type server_type);
|
||||
void parse_config(t_configuration_options *options, bool terse);
|
||||
bool reload_config(t_configuration_options *orig_options);
|
||||
|
||||
bool parse_recovery_conf(const char *data_dir, t_recovery_conf *conf);
|
||||
|
||||
bool parse_bool(const char *s,
|
||||
const char *config_item,
|
||||
ItemList *error_list);
|
||||
|
||||
int repmgr_atoi(const char *s,
|
||||
const char *config_item,
|
||||
ItemList *error_list,
|
||||
@@ -327,8 +272,7 @@ void free_parsed_argv(char ***argv_array);
|
||||
|
||||
|
||||
/* called by repmgr-client and repmgrd */
|
||||
void exit_with_cli_errors(ItemList *error_list, const char *repmgr_command);
|
||||
void exit_with_cli_errors(ItemList *error_list);
|
||||
void print_item_list(ItemList *item_list);
|
||||
const char *print_connection_check_type(ConnectionCheckType type);
|
||||
|
||||
#endif /* _REPMGR_CONFIGFILE_H_ */
|
||||
|
||||
38
configure
vendored
38
configure
vendored
@@ -1,8 +1,8 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.3.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.0.3.
|
||||
#
|
||||
# Report bugs to <repmgr@googlegroups.com>.
|
||||
# Report bugs to <pgsql-bugs@postgresql.org>.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -11,7 +11,7 @@
|
||||
# This configure script is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy, distribute and modify it.
|
||||
#
|
||||
# Copyright (c) 2010-2019, 2ndQuadrant Ltd.
|
||||
# Copyright (c) 2010-2018, 2ndQuadrant Ltd.
|
||||
## -------------------- ##
|
||||
## M4sh Initialization. ##
|
||||
## -------------------- ##
|
||||
@@ -269,7 +269,7 @@ fi
|
||||
$as_echo "$0: be upgraded to zsh 4.3.4 or later."
|
||||
else
|
||||
$as_echo "$0: Please tell bug-autoconf@gnu.org and
|
||||
$0: repmgr@googlegroups.com about your system, including
|
||||
$0: pgsql-bugs@postgresql.org about your system, including
|
||||
$0: any error possibly output before this message. Then
|
||||
$0: install a modern shell, or manually run the script
|
||||
$0: under such a shell if you do have one."
|
||||
@@ -582,10 +582,10 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='repmgr'
|
||||
PACKAGE_TARNAME='repmgr'
|
||||
PACKAGE_VERSION='4.3'
|
||||
PACKAGE_STRING='repmgr 4.3'
|
||||
PACKAGE_BUGREPORT='repmgr@googlegroups.com'
|
||||
PACKAGE_URL='https://repmgr.org/'
|
||||
PACKAGE_VERSION='4.0.3'
|
||||
PACKAGE_STRING='repmgr 4.0.3'
|
||||
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
||||
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
||||
|
||||
ac_subst_vars='LTLIBOBJS
|
||||
LIBOBJS
|
||||
@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures repmgr 4.3 to adapt to many kinds of systems.
|
||||
\`configure' configures repmgr 4.0.3 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1239,7 +1239,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of repmgr 4.3:";;
|
||||
short | recursive ) echo "Configuration of repmgr 4.0.3:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1249,8 +1249,8 @@ Some influential environment variables:
|
||||
Use these variables to override the choices made by `configure' or to help
|
||||
it to find libraries and programs with nonstandard names/locations.
|
||||
|
||||
Report bugs to <repmgr@googlegroups.com>.
|
||||
repmgr home page: <https://repmgr.org/>.
|
||||
Report bugs to <pgsql-bugs@postgresql.org>.
|
||||
repmgr home page: <https://2ndquadrant.com/en/resources/repmgr/>.
|
||||
_ACEOF
|
||||
ac_status=$?
|
||||
fi
|
||||
@@ -1313,14 +1313,14 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
repmgr configure 4.3
|
||||
repmgr configure 4.0.3
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
This configure script is free software; the Free Software Foundation
|
||||
gives unlimited permission to copy, distribute and modify it.
|
||||
|
||||
Copyright (c) 2010-2019, 2ndQuadrant Ltd.
|
||||
Copyright (c) 2010-2018, 2ndQuadrant Ltd.
|
||||
_ACEOF
|
||||
exit
|
||||
fi
|
||||
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by repmgr $as_me 4.3, which was
|
||||
It was created by repmgr $as_me 4.0.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by repmgr $as_me 4.3, which was
|
||||
This file was extended by repmgr $as_me 4.0.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -2415,14 +2415,14 @@ $config_files
|
||||
Configuration headers:
|
||||
$config_headers
|
||||
|
||||
Report bugs to <repmgr@googlegroups.com>.
|
||||
repmgr home page: <https://repmgr.org/>."
|
||||
Report bugs to <pgsql-bugs@postgresql.org>.
|
||||
repmgr home page: <https://2ndquadrant.com/en/resources/repmgr/>."
|
||||
|
||||
_ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
repmgr config.status 4.3
|
||||
repmgr config.status 4.0.3
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
AC_INIT([repmgr], [4.3], [repmgr@googlegroups.com], [repmgr], [https://repmgr.org/])
|
||||
AC_INIT([repmgr], [4.0.3], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2019, 2ndQuadrant Ltd.])
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
||||
|
||||
AC_CONFIG_HEADER(config.h)
|
||||
|
||||
|
||||
262
controldata.c
262
controldata.c
@@ -1,12 +1,6 @@
|
||||
/*
|
||||
* controldata.c - functions for reading the pg_control file
|
||||
*
|
||||
* The functions provided here enable repmgr to read a pg_control file
|
||||
* in a version-indepent way, even if the PostgreSQL instance is not
|
||||
* running. For that reason we can't use on the pg_control_*() functions
|
||||
* provided in PostgreSQL 9.6 and later.
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* controldata.c
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@@ -36,53 +30,6 @@
|
||||
|
||||
static ControlFileInfo *get_controlfile(const char *DataDir);
|
||||
|
||||
int
|
||||
get_pg_version(const char *data_directory, char *version_string)
|
||||
{
|
||||
char PgVersionPath[MAXPGPATH] = "";
|
||||
FILE *fp = NULL;
|
||||
char *endptr = NULL;
|
||||
char file_version_string[MAX_VERSION_STRING] = "";
|
||||
long file_major, file_minor;
|
||||
int ret;
|
||||
|
||||
snprintf(PgVersionPath, MAXPGPATH, "%s/PG_VERSION", data_directory);
|
||||
|
||||
fp = fopen(PgVersionPath, "r");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
log_warning(_("could not open file \"%s\" for reading"),
|
||||
PgVersionPath);
|
||||
log_detail("%s", strerror(errno));
|
||||
return UNKNOWN_SERVER_VERSION_NUM;
|
||||
}
|
||||
|
||||
file_version_string[0] = '\0';
|
||||
|
||||
ret = fscanf(fp, "%23s", file_version_string);
|
||||
fclose(fp);
|
||||
|
||||
if (ret != 1 || endptr == file_version_string)
|
||||
{
|
||||
log_warning(_("unable to determine major version number from PG_VERSION"));
|
||||
|
||||
return UNKNOWN_SERVER_VERSION_NUM;
|
||||
}
|
||||
|
||||
file_major = strtol(file_version_string, &endptr, 10);
|
||||
file_minor = 0;
|
||||
|
||||
if (*endptr == '.')
|
||||
file_minor = strtol(endptr + 1, NULL, 10);
|
||||
|
||||
if (version_string != NULL)
|
||||
strncpy(version_string, file_version_string, MAX_VERSION_STRING);
|
||||
|
||||
return ((int) file_major * 10000) + ((int) file_minor * 100);
|
||||
}
|
||||
|
||||
|
||||
uint64
|
||||
get_system_identifier(const char *data_directory)
|
||||
{
|
||||
@@ -90,14 +37,18 @@ get_system_identifier(const char *data_directory)
|
||||
uint64 system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
system_identifier = control_file_info->system_identifier;
|
||||
|
||||
if (control_file_info->control_file_processed == true)
|
||||
system_identifier = control_file_info->control_file->system_identifier;
|
||||
else
|
||||
system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
|
||||
|
||||
pfree(control_file_info->control_file);
|
||||
pfree(control_file_info);
|
||||
|
||||
return system_identifier;
|
||||
}
|
||||
|
||||
|
||||
DBState
|
||||
get_db_state(const char *data_directory)
|
||||
{
|
||||
@@ -106,15 +57,20 @@ get_db_state(const char *data_directory)
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
state = control_file_info->state;
|
||||
if (control_file_info->control_file_processed == true)
|
||||
state = control_file_info->control_file->state;
|
||||
else
|
||||
/* if we were unable to parse the control file, assume DB is shut down */
|
||||
state = DB_SHUTDOWNED;
|
||||
|
||||
pfree(control_file_info->control_file);
|
||||
pfree(control_file_info);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
XLogRecPtr
|
||||
extern XLogRecPtr
|
||||
get_latest_checkpoint_location(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info = NULL;
|
||||
@@ -122,8 +78,12 @@ get_latest_checkpoint_location(const char *data_directory)
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
checkPoint = control_file_info->checkPoint;
|
||||
if (control_file_info->control_file_processed == false)
|
||||
return InvalidXLogRecPtr;
|
||||
|
||||
checkPoint = control_file_info->control_file->checkPoint;
|
||||
|
||||
pfree(control_file_info->control_file);
|
||||
pfree(control_file_info);
|
||||
|
||||
return checkPoint;
|
||||
@@ -138,8 +98,16 @@ get_data_checksum_version(const char *data_directory)
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
data_checksum_version = (int) control_file_info->data_checksum_version;
|
||||
if (control_file_info->control_file_processed == false)
|
||||
{
|
||||
data_checksum_version = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
data_checksum_version = (int) control_file_info->control_file->data_checksum_version;
|
||||
}
|
||||
|
||||
pfree(control_file_info->control_file);
|
||||
pfree(control_file_info);
|
||||
|
||||
return data_checksum_version;
|
||||
@@ -166,143 +134,38 @@ describe_db_state(DBState state)
|
||||
case DB_IN_PRODUCTION:
|
||||
return _("in production");
|
||||
}
|
||||
|
||||
return _("unrecognized status code");
|
||||
}
|
||||
|
||||
|
||||
TimeLineID
|
||||
get_timeline(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info = NULL;
|
||||
TimeLineID timeline = -1;
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
timeline = (int) control_file_info->timeline;
|
||||
|
||||
pfree(control_file_info);
|
||||
|
||||
return timeline;
|
||||
}
|
||||
|
||||
|
||||
TimeLineID
|
||||
get_min_recovery_end_timeline(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info = NULL;
|
||||
TimeLineID timeline = -1;
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
timeline = (int) control_file_info->minRecoveryPointTLI;
|
||||
|
||||
pfree(control_file_info);
|
||||
|
||||
return timeline;
|
||||
}
|
||||
|
||||
|
||||
XLogRecPtr
|
||||
get_min_recovery_location(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info = NULL;
|
||||
XLogRecPtr minRecoveryPoint = InvalidXLogRecPtr;
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
minRecoveryPoint = control_file_info->minRecoveryPoint;
|
||||
|
||||
pfree(control_file_info);
|
||||
|
||||
return minRecoveryPoint;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* We maintain our own version of get_controlfile() as we need cross-version
|
||||
* we maintain our own version of get_controlfile() as we need cross-version
|
||||
* compatibility, and also don't care if the file isn't readable.
|
||||
*/
|
||||
static ControlFileInfo *
|
||||
get_controlfile(const char *DataDir)
|
||||
{
|
||||
char file_version_string[MAX_VERSION_STRING] = "";
|
||||
ControlFileInfo *control_file_info;
|
||||
int fd, version_num;
|
||||
int fd;
|
||||
char ControlFilePath[MAXPGPATH] = "";
|
||||
void *ControlFileDataPtr = NULL;
|
||||
int expected_size = 0;
|
||||
|
||||
control_file_info = palloc0(sizeof(ControlFileInfo));
|
||||
|
||||
/* set default values */
|
||||
control_file_info->control_file_processed = false;
|
||||
control_file_info->system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
|
||||
control_file_info->state = DB_SHUTDOWNED;
|
||||
control_file_info->checkPoint = InvalidXLogRecPtr;
|
||||
control_file_info->data_checksum_version = -1;
|
||||
control_file_info->timeline = -1;
|
||||
control_file_info->minRecoveryPointTLI = -1;
|
||||
control_file_info->minRecoveryPoint = InvalidXLogRecPtr;
|
||||
|
||||
/*
|
||||
* Read PG_VERSION, as we'll need to determine which struct to read
|
||||
* the control file contents into
|
||||
*/
|
||||
|
||||
version_num = get_pg_version(DataDir, file_version_string);
|
||||
|
||||
if (version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||
{
|
||||
log_warning(_("unable to determine server version number from PG_VERSION"));
|
||||
return control_file_info;
|
||||
}
|
||||
|
||||
if (version_num < MIN_SUPPORTED_VERSION_NUM)
|
||||
{
|
||||
log_warning(_("data directory appears to be initialised for %s"),
|
||||
file_version_string);
|
||||
log_detail(_("minimum supported PostgreSQL version is %s"),
|
||||
MIN_SUPPORTED_VERSION);
|
||||
return control_file_info;
|
||||
}
|
||||
control_file_info->control_file = palloc0(sizeof(ControlFileData));
|
||||
|
||||
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
|
||||
|
||||
if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1)
|
||||
{
|
||||
log_warning(_("could not open file \"%s\" for reading"),
|
||||
ControlFilePath);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_debug("could not open file \"%s\" for reading: %s",
|
||||
ControlFilePath, strerror(errno));
|
||||
return control_file_info;
|
||||
}
|
||||
|
||||
|
||||
if (version_num >= 90500)
|
||||
if (read(fd, control_file_info->control_file, sizeof(ControlFileData)) != sizeof(ControlFileData))
|
||||
{
|
||||
expected_size = sizeof(ControlFileData95);
|
||||
ControlFileDataPtr = palloc0(expected_size);
|
||||
}
|
||||
else if (version_num >= 90400)
|
||||
{
|
||||
expected_size = sizeof(ControlFileData94);
|
||||
ControlFileDataPtr = palloc0(expected_size);
|
||||
}
|
||||
else if (version_num >= 90300)
|
||||
{
|
||||
expected_size = sizeof(ControlFileData93);
|
||||
ControlFileDataPtr = palloc0(expected_size);
|
||||
}
|
||||
|
||||
|
||||
if (read(fd, ControlFileDataPtr, expected_size) != expected_size)
|
||||
{
|
||||
log_warning(_("could not read file \"%s\""),
|
||||
ControlFilePath);
|
||||
log_detail("%s", strerror(errno));
|
||||
|
||||
close(fd);
|
||||
|
||||
log_debug("could not read file \"%s\": %s",
|
||||
ControlFilePath, strerror(errno));
|
||||
return control_file_info;
|
||||
}
|
||||
|
||||
@@ -310,57 +173,12 @@ get_controlfile(const char *DataDir)
|
||||
|
||||
control_file_info->control_file_processed = true;
|
||||
|
||||
if (version_num >= 110000)
|
||||
{
|
||||
ControlFileData11 *ptr = (struct ControlFileData11 *)ControlFileDataPtr;
|
||||
control_file_info->system_identifier = ptr->system_identifier;
|
||||
control_file_info->state = ptr->state;
|
||||
control_file_info->checkPoint = ptr->checkPoint;
|
||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||
}
|
||||
else if (version_num >= 90500)
|
||||
{
|
||||
ControlFileData95 *ptr = (struct ControlFileData95 *)ControlFileDataPtr;
|
||||
control_file_info->system_identifier = ptr->system_identifier;
|
||||
control_file_info->state = ptr->state;
|
||||
control_file_info->checkPoint = ptr->checkPoint;
|
||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||
}
|
||||
else if (version_num >= 90400)
|
||||
{
|
||||
ControlFileData94 *ptr = (struct ControlFileData94 *)ControlFileDataPtr;
|
||||
control_file_info->system_identifier = ptr->system_identifier;
|
||||
control_file_info->state = ptr->state;
|
||||
control_file_info->checkPoint = ptr->checkPoint;
|
||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||
}
|
||||
else if (version_num >= 90300)
|
||||
{
|
||||
ControlFileData93 *ptr = (struct ControlFileData93 *)ControlFileDataPtr;
|
||||
control_file_info->system_identifier = ptr->system_identifier;
|
||||
control_file_info->state = ptr->state;
|
||||
control_file_info->checkPoint = ptr->checkPoint;
|
||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||
control_file_info->timeline = ptr->checkPointCopy.ThisTimeLineID;
|
||||
control_file_info->minRecoveryPointTLI = ptr->minRecoveryPointTLI;
|
||||
control_file_info->minRecoveryPoint = ptr->minRecoveryPoint;
|
||||
}
|
||||
|
||||
pfree(ControlFileDataPtr);
|
||||
|
||||
/*
|
||||
* We don't check the CRC here as we're potentially checking a pg_control
|
||||
* file from a different PostgreSQL version to the one repmgr was compiled
|
||||
* against.
|
||||
* against. However we're only interested in the first few fields, which
|
||||
* should be constant across supported versions
|
||||
*
|
||||
*/
|
||||
|
||||
return control_file_info;
|
||||
|
||||
323
controldata.h
323
controldata.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* controldata.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@@ -12,335 +12,16 @@
|
||||
#include "postgres_fe.h"
|
||||
#include "catalog/pg_control.h"
|
||||
|
||||
#define MAX_VERSION_STRING 24
|
||||
/*
|
||||
* A simplified representation of pg_control containing only those fields
|
||||
* required by repmgr.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
bool control_file_processed;
|
||||
uint64 system_identifier;
|
||||
DBState state;
|
||||
XLogRecPtr checkPoint;
|
||||
uint32 data_checksum_version;
|
||||
TimeLineID timeline;
|
||||
TimeLineID minRecoveryPointTLI;
|
||||
XLogRecPtr minRecoveryPoint;
|
||||
ControlFileData *control_file;
|
||||
} ControlFileInfo;
|
||||
|
||||
|
||||
|
||||
/* Same for 9.3, 9.4 */
|
||||
typedef struct CheckPoint93
|
||||
{
|
||||
XLogRecPtr redo; /* next RecPtr available when we began to
|
||||
* create CheckPoint (i.e. REDO start point) */
|
||||
TimeLineID ThisTimeLineID; /* current TLI */
|
||||
TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new
|
||||
* timeline (equals ThisTimeLineID otherwise) */
|
||||
bool fullPageWrites; /* current full_page_writes */
|
||||
uint32 nextXidEpoch; /* higher-order bits of nextXid */
|
||||
TransactionId nextXid; /* next free XID */
|
||||
Oid nextOid; /* next free OID */
|
||||
MultiXactId nextMulti; /* next free MultiXactId */
|
||||
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
|
||||
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
|
||||
Oid oldestXidDB; /* database with minimum datfrozenxid */
|
||||
MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */
|
||||
Oid oldestMultiDB; /* database with minimum datminmxid */
|
||||
pg_time_t time; /* time stamp of checkpoint */
|
||||
|
||||
TransactionId oldestActiveXid;
|
||||
} CheckPoint93;
|
||||
|
||||
|
||||
/* Same for 9.5, 9.6, 10, HEAD */
|
||||
typedef struct CheckPoint95
|
||||
{
|
||||
XLogRecPtr redo; /* next RecPtr available when we began to
|
||||
* create CheckPoint (i.e. REDO start point) */
|
||||
TimeLineID ThisTimeLineID; /* current TLI */
|
||||
TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new
|
||||
* timeline (equals ThisTimeLineID otherwise) */
|
||||
bool fullPageWrites; /* current full_page_writes */
|
||||
uint32 nextXidEpoch; /* higher-order bits of nextXid */
|
||||
TransactionId nextXid; /* next free XID */
|
||||
Oid nextOid; /* next free OID */
|
||||
MultiXactId nextMulti; /* next free MultiXactId */
|
||||
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
|
||||
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
|
||||
Oid oldestXidDB; /* database with minimum datfrozenxid */
|
||||
MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */
|
||||
Oid oldestMultiDB; /* database with minimum datminmxid */
|
||||
pg_time_t time; /* time stamp of checkpoint */
|
||||
TransactionId oldestCommitTsXid; /* oldest Xid with valid commit
|
||||
* timestamp */
|
||||
TransactionId newestCommitTsXid; /* newest Xid with valid commit
|
||||
* timestamp */
|
||||
|
||||
TransactionId oldestActiveXid;
|
||||
} CheckPoint95;
|
||||
|
||||
|
||||
typedef struct ControlFileData93
|
||||
{
|
||||
uint64 system_identifier;
|
||||
|
||||
uint32 pg_control_version; /* PG_CONTROL_VERSION */
|
||||
uint32 catalog_version_no; /* see catversion.h */
|
||||
|
||||
DBState state; /* see enum above */
|
||||
pg_time_t time; /* time stamp of last pg_control update */
|
||||
XLogRecPtr checkPoint; /* last check point record ptr */
|
||||
XLogRecPtr prevCheckPoint; /* previous check point record ptr */
|
||||
|
||||
CheckPoint93 checkPointCopy; /* copy of last check point record */
|
||||
|
||||
XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
|
||||
|
||||
XLogRecPtr minRecoveryPoint;
|
||||
TimeLineID minRecoveryPointTLI;
|
||||
XLogRecPtr backupStartPoint;
|
||||
XLogRecPtr backupEndPoint;
|
||||
bool backupEndRequired;
|
||||
|
||||
int wal_level;
|
||||
int MaxConnections;
|
||||
int max_prepared_xacts;
|
||||
int max_locks_per_xact;
|
||||
|
||||
uint32 maxAlign; /* alignment requirement for tuples */
|
||||
double floatFormat; /* constant 1234567.0 */
|
||||
|
||||
uint32 blcksz; /* data block size for this DB */
|
||||
uint32 relseg_size; /* blocks per segment of large relation */
|
||||
|
||||
uint32 xlog_blcksz; /* block size within WAL files */
|
||||
uint32 xlog_seg_size; /* size of each WAL segment */
|
||||
|
||||
uint32 nameDataLen; /* catalog name field width */
|
||||
uint32 indexMaxKeys; /* max number of columns in an index */
|
||||
|
||||
uint32 toast_max_chunk_size; /* chunk size in TOAST tables */
|
||||
|
||||
/* flag indicating internal format of timestamp, interval, time */
|
||||
bool enableIntTimes; /* int64 storage enabled? */
|
||||
|
||||
/* flags indicating pass-by-value status of various types */
|
||||
bool float4ByVal; /* float4 pass-by-value? */
|
||||
bool float8ByVal; /* float8, int8, etc pass-by-value? */
|
||||
|
||||
/* Are data pages protected by checksums? Zero if no checksum version */
|
||||
uint32 data_checksum_version;
|
||||
|
||||
} ControlFileData93;
|
||||
|
||||
|
||||
/*
|
||||
* Following field added since 9.3:
|
||||
*
|
||||
* int max_worker_processes;
|
||||
*/
|
||||
|
||||
typedef struct ControlFileData94
|
||||
{
|
||||
uint64 system_identifier;
|
||||
|
||||
uint32 pg_control_version; /* PG_CONTROL_VERSION */
|
||||
uint32 catalog_version_no; /* see catversion.h */
|
||||
|
||||
DBState state; /* see enum above */
|
||||
pg_time_t time; /* time stamp of last pg_control update */
|
||||
XLogRecPtr checkPoint; /* last check point record ptr */
|
||||
XLogRecPtr prevCheckPoint; /* previous check point record ptr */
|
||||
|
||||
CheckPoint93 checkPointCopy; /* copy of last check point record */
|
||||
|
||||
XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
|
||||
|
||||
XLogRecPtr minRecoveryPoint;
|
||||
TimeLineID minRecoveryPointTLI;
|
||||
XLogRecPtr backupStartPoint;
|
||||
XLogRecPtr backupEndPoint;
|
||||
bool backupEndRequired;
|
||||
|
||||
int wal_level;
|
||||
bool wal_log_hints;
|
||||
int MaxConnections;
|
||||
int max_worker_processes;
|
||||
int max_prepared_xacts;
|
||||
int max_locks_per_xact;
|
||||
|
||||
uint32 maxAlign; /* alignment requirement for tuples */
|
||||
double floatFormat; /* constant 1234567.0 */
|
||||
|
||||
uint32 blcksz; /* data block size for this DB */
|
||||
uint32 relseg_size; /* blocks per segment of large relation */
|
||||
|
||||
uint32 xlog_blcksz; /* block size within WAL files */
|
||||
uint32 xlog_seg_size; /* size of each WAL segment */
|
||||
|
||||
uint32 nameDataLen; /* catalog name field width */
|
||||
uint32 indexMaxKeys; /* max number of columns in an index */
|
||||
|
||||
uint32 toast_max_chunk_size; /* chunk size in TOAST tables */
|
||||
uint32 loblksize; /* chunk size in pg_largeobject */
|
||||
|
||||
bool enableIntTimes; /* int64 storage enabled? */
|
||||
|
||||
bool float4ByVal; /* float4 pass-by-value? */
|
||||
bool float8ByVal; /* float8, int8, etc pass-by-value? */
|
||||
|
||||
/* Are data pages protected by checksums? Zero if no checksum version */
|
||||
uint32 data_checksum_version;
|
||||
|
||||
} ControlFileData94;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Following field added since 9.4:
|
||||
*
|
||||
* bool track_commit_timestamp;
|
||||
*
|
||||
* Unchanged in 9.6
|
||||
*
|
||||
* In 10, following field appended *after* "data_checksum_version":
|
||||
*
|
||||
* char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
|
||||
*
|
||||
* (but we don't care about that)
|
||||
*/
|
||||
|
||||
typedef struct ControlFileData95
|
||||
{
|
||||
uint64 system_identifier;
|
||||
|
||||
uint32 pg_control_version; /* PG_CONTROL_VERSION */
|
||||
uint32 catalog_version_no; /* see catversion.h */
|
||||
|
||||
DBState state; /* see enum above */
|
||||
pg_time_t time; /* time stamp of last pg_control update */
|
||||
XLogRecPtr checkPoint; /* last check point record ptr */
|
||||
XLogRecPtr prevCheckPoint; /* previous check point record ptr */
|
||||
|
||||
CheckPoint95 checkPointCopy; /* copy of last check point record */
|
||||
|
||||
XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
|
||||
|
||||
XLogRecPtr minRecoveryPoint;
|
||||
TimeLineID minRecoveryPointTLI;
|
||||
XLogRecPtr backupStartPoint;
|
||||
XLogRecPtr backupEndPoint;
|
||||
bool backupEndRequired;
|
||||
|
||||
int wal_level;
|
||||
bool wal_log_hints;
|
||||
int MaxConnections;
|
||||
int max_worker_processes;
|
||||
int max_prepared_xacts;
|
||||
int max_locks_per_xact;
|
||||
bool track_commit_timestamp;
|
||||
|
||||
uint32 maxAlign; /* alignment requirement for tuples */
|
||||
double floatFormat; /* constant 1234567.0 */
|
||||
|
||||
uint32 blcksz; /* data block size for this DB */
|
||||
uint32 relseg_size; /* blocks per segment of large relation */
|
||||
|
||||
uint32 xlog_blcksz; /* block size within WAL files */
|
||||
uint32 xlog_seg_size; /* size of each WAL segment */
|
||||
|
||||
uint32 nameDataLen; /* catalog name field width */
|
||||
uint32 indexMaxKeys; /* max number of columns in an index */
|
||||
|
||||
uint32 toast_max_chunk_size; /* chunk size in TOAST tables */
|
||||
uint32 loblksize; /* chunk size in pg_largeobject */
|
||||
|
||||
bool enableIntTimes; /* int64 storage enabled? */
|
||||
|
||||
bool float4ByVal; /* float4 pass-by-value? */
|
||||
bool float8ByVal; /* float8, int8, etc pass-by-value? */
|
||||
|
||||
uint32 data_checksum_version;
|
||||
|
||||
} ControlFileData95;
|
||||
|
||||
/*
|
||||
* Following field removed in 11:
|
||||
*
|
||||
* XLogRecPtr prevCheckPoint;
|
||||
*
|
||||
* In 10, following field appended *after* "data_checksum_version":
|
||||
*
|
||||
* char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
|
||||
*
|
||||
* (but we don't care about that)
|
||||
*/
|
||||
|
||||
typedef struct ControlFileData11
|
||||
{
|
||||
uint64 system_identifier;
|
||||
|
||||
uint32 pg_control_version; /* PG_CONTROL_VERSION */
|
||||
uint32 catalog_version_no; /* see catversion.h */
|
||||
|
||||
DBState state; /* see enum above */
|
||||
pg_time_t time; /* time stamp of last pg_control update */
|
||||
XLogRecPtr checkPoint; /* last check point record ptr */
|
||||
|
||||
CheckPoint95 checkPointCopy; /* copy of last check point record */
|
||||
|
||||
XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
|
||||
|
||||
XLogRecPtr minRecoveryPoint;
|
||||
TimeLineID minRecoveryPointTLI;
|
||||
XLogRecPtr backupStartPoint;
|
||||
XLogRecPtr backupEndPoint;
|
||||
bool backupEndRequired;
|
||||
|
||||
int wal_level;
|
||||
bool wal_log_hints;
|
||||
int MaxConnections;
|
||||
int max_worker_processes;
|
||||
int max_prepared_xacts;
|
||||
int max_locks_per_xact;
|
||||
bool track_commit_timestamp;
|
||||
|
||||
uint32 maxAlign; /* alignment requirement for tuples */
|
||||
double floatFormat; /* constant 1234567.0 */
|
||||
|
||||
uint32 blcksz; /* data block size for this DB */
|
||||
uint32 relseg_size; /* blocks per segment of large relation */
|
||||
|
||||
uint32 xlog_blcksz; /* block size within WAL files */
|
||||
uint32 xlog_seg_size; /* size of each WAL segment */
|
||||
|
||||
uint32 nameDataLen; /* catalog name field width */
|
||||
uint32 indexMaxKeys; /* max number of columns in an index */
|
||||
|
||||
uint32 toast_max_chunk_size; /* chunk size in TOAST tables */
|
||||
uint32 loblksize; /* chunk size in pg_largeobject */
|
||||
|
||||
bool enableIntTimes; /* int64 storage enabled? */
|
||||
|
||||
bool float4ByVal; /* float4 pass-by-value? */
|
||||
bool float8ByVal; /* float8, int8, etc pass-by-value? */
|
||||
|
||||
uint32 data_checksum_version;
|
||||
|
||||
} ControlFileData11;
|
||||
|
||||
|
||||
extern int get_pg_version(const char *data_directory, char *version_string);
|
||||
extern DBState get_db_state(const char *data_directory);
|
||||
extern const char *describe_db_state(DBState state);
|
||||
extern int get_data_checksum_version(const char *data_directory);
|
||||
extern uint64 get_system_identifier(const char *data_directory);
|
||||
extern XLogRecPtr get_latest_checkpoint_location(const char *data_directory);
|
||||
extern TimeLineID get_timeline(const char *data_directory);
|
||||
extern TimeLineID get_min_recovery_end_timeline(const char *data_directory);
|
||||
extern XLogRecPtr get_min_recovery_location(const char *data_directory);
|
||||
|
||||
#endif /* _CONTROLDATA_H_ */
|
||||
|
||||
166
dbutils.h
166
dbutils.h
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* dbutils.h
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -20,7 +20,6 @@
|
||||
#ifndef _REPMGR_DBUTILS_H_
|
||||
#define _REPMGR_DBUTILS_H_
|
||||
|
||||
#include "access/timeline.h"
|
||||
#include "access/xlogdefs.h"
|
||||
#include "pqexpbuffer.h"
|
||||
#include "portability/instr_time.h"
|
||||
@@ -30,9 +29,7 @@
|
||||
#include "voting.h"
|
||||
|
||||
#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name "
|
||||
#define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''"
|
||||
#define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name"
|
||||
|
||||
#define BDR_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_status, node_name, node_local_dsn, node_init_from_dsn, node_read_only, node_seq_id"
|
||||
|
||||
#define ERRBUFF_SIZE 512
|
||||
|
||||
@@ -48,7 +45,6 @@ typedef enum
|
||||
typedef enum
|
||||
{
|
||||
REPMGR_INSTALLED = 0,
|
||||
REPMGR_OLD_VERSION_INSTALLED,
|
||||
REPMGR_AVAILABLE,
|
||||
REPMGR_UNAVAILABLE,
|
||||
REPMGR_UNKNOWN
|
||||
@@ -80,8 +76,7 @@ typedef enum
|
||||
NODE_STATUS_UP,
|
||||
NODE_STATUS_SHUTTING_DOWN,
|
||||
NODE_STATUS_DOWN,
|
||||
NODE_STATUS_UNCLEAN_SHUTDOWN,
|
||||
NODE_STATUS_REJECTED
|
||||
NODE_STATUS_UNCLEAN_SHUTDOWN
|
||||
} NodeStatus;
|
||||
|
||||
typedef enum
|
||||
@@ -99,32 +94,6 @@ typedef enum
|
||||
SLOT_ACTIVE
|
||||
} ReplSlotStatus;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BACKUP_STATE_UNKNOWN = -1,
|
||||
BACKUP_STATE_IN_BACKUP,
|
||||
BACKUP_STATE_NO_BACKUP
|
||||
} BackupState;
|
||||
|
||||
|
||||
/*
|
||||
* Struct to store extension version information
|
||||
*/
|
||||
|
||||
typedef struct s_extension_versions {
|
||||
char default_version[8];
|
||||
int default_version_num;
|
||||
char installed_version[8];
|
||||
int installed_version_num;
|
||||
} t_extension_versions;
|
||||
|
||||
#define T_EXTENSION_VERSIONS_INITIALIZER { \
|
||||
"", \
|
||||
UNKNOWN_SERVER_VERSION_NUM, \
|
||||
"", \
|
||||
UNKNOWN_SERVER_VERSION_NUM \
|
||||
}
|
||||
|
||||
/*
|
||||
* Struct to store node information
|
||||
*/
|
||||
@@ -134,8 +103,8 @@ typedef struct s_node_info
|
||||
int node_id;
|
||||
int upstream_node_id;
|
||||
t_server_type type;
|
||||
char node_name[NAMEDATALEN];
|
||||
char upstream_node_name[NAMEDATALEN];
|
||||
char node_name[MAXLEN];
|
||||
char upstream_node_name[MAXLEN];
|
||||
char conninfo[MAXLEN];
|
||||
char repluser[NAMEDATALEN];
|
||||
char location[MAXLEN];
|
||||
@@ -184,7 +153,7 @@ typedef struct s_node_info
|
||||
MS_NORMAL, \
|
||||
NULL, \
|
||||
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
||||
"", true, true, \
|
||||
"", true, true \
|
||||
/* various statistics */ \
|
||||
-1, -1, -1, -1, -1, -1 \
|
||||
}
|
||||
@@ -268,14 +237,18 @@ typedef struct s_bdr_node_info
|
||||
char node_sysid[MAXLEN];
|
||||
uint32 node_timeline;
|
||||
uint32 node_dboid;
|
||||
char node_status;
|
||||
char node_name[MAXLEN];
|
||||
char node_local_dsn[MAXLEN];
|
||||
char peer_state_name[MAXLEN];
|
||||
char node_init_from_dsn[MAXLEN];
|
||||
bool read_only;
|
||||
uint32 node_seq_id;
|
||||
} t_bdr_node_info;
|
||||
|
||||
#define T_BDR_NODE_INFO_INITIALIZER { \
|
||||
"", InvalidOid, InvalidOid, \
|
||||
"", "", "" \
|
||||
'?', "", "", "", \
|
||||
false, -1 \
|
||||
}
|
||||
|
||||
|
||||
@@ -302,16 +275,22 @@ typedef struct BdrNodeInfoList
|
||||
typedef struct
|
||||
{
|
||||
char current_timestamp[MAXLEN];
|
||||
bool in_recovery;
|
||||
XLogRecPtr last_wal_receive_lsn;
|
||||
XLogRecPtr last_wal_replay_lsn;
|
||||
uint64 last_wal_receive_lsn;
|
||||
uint64 last_wal_replay_lsn;
|
||||
char last_xact_replay_timestamp[MAXLEN];
|
||||
int replication_lag_time;
|
||||
bool receiving_streamed_wal;
|
||||
bool wal_replay_paused;
|
||||
int upstream_last_seen;
|
||||
} ReplInfo;
|
||||
|
||||
#define T_REPLINFO_INTIALIZER { \
|
||||
"", \
|
||||
InvalidXLogRecPtr, \
|
||||
InvalidXLogRecPtr, \
|
||||
"", \
|
||||
0 \
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char filepath[MAXPGPATH];
|
||||
@@ -342,24 +321,9 @@ typedef struct
|
||||
UNKNOWN_TIMELINE_ID, \
|
||||
InvalidXLogRecPtr \
|
||||
}
|
||||
/* global variables */
|
||||
|
||||
|
||||
typedef struct RepmgrdInfo {
|
||||
int node_id;
|
||||
int pid;
|
||||
char pid_text[MAXLEN];
|
||||
char pid_file[MAXLEN];
|
||||
bool pg_running;
|
||||
char pg_running_text[MAXLEN];
|
||||
RecoveryType recovery_type;
|
||||
bool running;
|
||||
char repmgrd_running[MAXLEN];
|
||||
bool paused;
|
||||
bool wal_paused_pending_wal;
|
||||
int upstream_last_seen;
|
||||
char upstream_last_seen_text[MAXLEN];
|
||||
} RepmgrdInfo;
|
||||
|
||||
extern int server_version_num;
|
||||
|
||||
/* macros */
|
||||
|
||||
@@ -376,22 +340,26 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
||||
bool atobool(const char *value);
|
||||
|
||||
/* connection functions */
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
PGconn *establish_db_connection(const char *conninfo,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_primary_db_connection(PGconn *conn,
|
||||
PGconn *establish_db_connection_as_user(const char *conninfo,
|
||||
const char *user,
|
||||
const bool exit_on_error);
|
||||
|
||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||
const bool exit_on_error);
|
||||
PGconn *establish_primary_db_connection(PGconn *conn,
|
||||
const bool exit_on_error);
|
||||
|
||||
PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||
PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||
|
||||
bool is_superuser_connection(PGconn *conn, t_connection_user *userinfo);
|
||||
void close_connection(PGconn **conn);
|
||||
|
||||
/* conninfo manipulation functions */
|
||||
bool get_conninfo_value(const char *conninfo, const char *keyword, char *output);
|
||||
bool get_conninfo_default_value(const char *param, char *output, int maxlen);
|
||||
|
||||
void initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults);
|
||||
void free_conninfo_params(t_conninfo_param_list *param_list);
|
||||
void copy_conninfo_params(t_conninfo_param_list *dest_list, t_conninfo_param_list *source_list);
|
||||
@@ -399,16 +367,15 @@ void conn_to_param_list(PGconn *conn, t_conninfo_param_list *param_list);
|
||||
void param_set(t_conninfo_param_list *param_list, const char *param, const char *value);
|
||||
void param_set_ine(t_conninfo_param_list *param_list, const char *param, const char *value);
|
||||
char *param_get(t_conninfo_param_list *param_list, const char *param);
|
||||
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char **errmsg, bool ignore_local_params);
|
||||
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char *errmsg, bool ignore_local_params);
|
||||
char *param_list_to_string(t_conninfo_param_list *param_list);
|
||||
char *normalize_conninfo_string(const char *conninfo_str);
|
||||
bool has_passfile(void);
|
||||
|
||||
|
||||
/* transaction functions */
|
||||
bool begin_transaction(PGconn *conn);
|
||||
bool commit_transaction(PGconn *conn);
|
||||
bool rollback_transaction(PGconn *conn);
|
||||
bool check_cluster_schema(PGconn *conn);
|
||||
|
||||
/* GUC manipulation functions */
|
||||
bool set_config(PGconn *conn, const char *config_param, const char *config_value);
|
||||
@@ -416,61 +383,43 @@ bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
int guc_set(PGconn *conn, const char *parameter, const char *op, const char *value);
|
||||
int guc_set_typed(PGconn *conn, const char *parameter, const char *op, const char *value, const char *datatype);
|
||||
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
||||
bool alter_system_int(PGconn *conn, const char *name, int value);
|
||||
bool pg_reload_conf(PGconn *conn);
|
||||
|
||||
/* server information functions */
|
||||
bool get_cluster_size(PGconn *conn, char *size);
|
||||
int get_server_version(PGconn *conn, char *server_version_buf);
|
||||
|
||||
int get_server_version(PGconn *conn, char *server_version);
|
||||
RecoveryType get_recovery_type(PGconn *conn);
|
||||
int get_primary_node_id(PGconn *conn);
|
||||
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
||||
int get_ready_archive_files(PGconn *conn, const char *data_directory);
|
||||
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
|
||||
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
||||
|
||||
/* repmgrd shared memory functions */
|
||||
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||
int repmgrd_get_local_node_id(PGconn *conn);
|
||||
bool repmgrd_check_local_node_id(PGconn *conn);
|
||||
BackupState server_in_exclusive_backup_mode(PGconn *conn);
|
||||
void repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile);
|
||||
pid_t repmgrd_get_pid(PGconn *conn);
|
||||
bool repmgrd_is_running(PGconn *conn);
|
||||
bool repmgrd_is_paused(PGconn *conn);
|
||||
bool repmgrd_pause(PGconn *conn, bool pause);
|
||||
pid_t get_wal_receiver_pid(PGconn *conn);
|
||||
|
||||
/* extension functions */
|
||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn, t_extension_versions *extversions);
|
||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
||||
|
||||
/* node management functions */
|
||||
void checkpoint(PGconn *conn);
|
||||
bool vacuum_table(PGconn *conn, const char *table);
|
||||
bool promote_standby(PGconn *conn, bool wait, int wait_seconds);
|
||||
bool resume_wal_replay(PGconn *conn);
|
||||
|
||||
|
||||
/* node record functions */
|
||||
t_server_type parse_node_type(const char *type);
|
||||
const char *get_node_type_string(t_server_type type);
|
||||
|
||||
RecordStatus get_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
||||
RecordStatus refresh_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
||||
|
||||
RecordStatus get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info);
|
||||
|
||||
RecordStatus get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info);
|
||||
t_node_info *get_node_record_pointer(PGconn *conn, int node_id);
|
||||
|
||||
bool get_local_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
||||
bool get_primary_node_record(PGconn *conn, t_node_info *node_info);
|
||||
|
||||
bool get_all_node_records(PGconn *conn, NodeInfoList *node_list);
|
||||
void get_all_node_records(PGconn *conn, NodeInfoList *node_list);
|
||||
void get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
|
||||
void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
|
||||
void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
|
||||
bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
||||
bool get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
||||
bool get_downsteam_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
||||
|
||||
bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||
bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||
@@ -479,7 +428,6 @@ bool truncate_node_records(PGconn *conn);
|
||||
|
||||
bool update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
|
||||
bool update_node_record_set_primary(PGconn *conn, int this_node_id);
|
||||
bool update_node_record_set_active_standby(PGconn *conn, int this_node_id);
|
||||
bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||
bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||
bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
|
||||
@@ -503,25 +451,20 @@ PGresult *get_event_records(PGconn *conn, int node_id, const char *node_name,
|
||||
|
||||
/* replication slot functions */
|
||||
void create_slot_name(char *slot_name, int node_id);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, PQExpBufferData *error_msg);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
int get_free_replication_slot_count(PGconn *conn);
|
||||
int get_inactive_replication_slots(PGconn *conn, KeyValueList *list);
|
||||
int get_free_replication_slots(PGconn *conn);
|
||||
|
||||
/* tablespace functions */
|
||||
bool get_tablespace_name_by_location(PGconn *conn, const char *location, char *name);
|
||||
|
||||
/* asynchronous query functions */
|
||||
bool cancel_query(PGconn *conn, int timeout);
|
||||
int wait_connection_availability(PGconn *conn, int timeout);
|
||||
int wait_connection_availability(PGconn *conn, long long timeout);
|
||||
|
||||
/* node availability functions */
|
||||
bool is_server_available(const char *conninfo);
|
||||
bool is_server_available_quiet(const char *conninfo);
|
||||
bool is_server_available_params(t_conninfo_param_list *param_list);
|
||||
ExecStatusType connection_ping(PGconn *conn);
|
||||
ExecStatusType connection_ping_reconnect(PGconn *conn);
|
||||
|
||||
/* monitoring functions */
|
||||
void
|
||||
@@ -537,8 +480,8 @@ add_monitoring_record(PGconn *primary_conn,
|
||||
long long unsigned int apply_lag_bytes
|
||||
);
|
||||
|
||||
int get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history, int node_id);
|
||||
bool delete_monitoring_records(PGconn *primary_conn, int keep_history, int node_id);
|
||||
int get_number_of_monitoring_records_to_delete(PGconn *primary_conn, int keep_history);
|
||||
bool delete_monitoring_records(PGconn *primary_conn, int keep_history);
|
||||
|
||||
|
||||
|
||||
@@ -552,27 +495,20 @@ bool get_new_primary(PGconn *conn, int *primary_node_id);
|
||||
void reset_voting_status(PGconn *conn);
|
||||
|
||||
/* replication status functions */
|
||||
XLogRecPtr get_primary_current_lsn(PGconn *conn);
|
||||
XLogRecPtr get_node_current_lsn(PGconn *conn);
|
||||
XLogRecPtr get_current_wal_lsn(PGconn *conn);
|
||||
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
||||
void init_replication_info(ReplInfo *replication_info);
|
||||
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
|
||||
bool get_replication_info(PGconn *conn, ReplInfo *replication_info);
|
||||
int get_replication_lag_seconds(PGconn *conn);
|
||||
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
||||
void get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *node_info);
|
||||
bool is_downstream_node_attached(PGconn *conn, char *node_name);
|
||||
void set_upstream_last_seen(PGconn *conn);
|
||||
int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
||||
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
||||
|
||||
/* BDR functions */
|
||||
int get_bdr_version_num(void);
|
||||
void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list);
|
||||
RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info);
|
||||
bool is_bdr_db(PGconn *conn, PQExpBufferData *output);
|
||||
bool is_bdr_db_quiet(PGconn *conn);
|
||||
bool is_active_bdr_node(PGconn *conn, const char *node_name);
|
||||
bool is_bdr_repmgr(PGconn *conn);
|
||||
char *get_default_bdr_replication_set(PGconn *conn);
|
||||
bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
|
||||
bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
|
||||
void add_extension_tables_to_bdr_replication_set(PGconn *conn);
|
||||
|
||||
35
dirutil.c
35
dirutil.c
@@ -3,7 +3,7 @@
|
||||
* dirmod.c
|
||||
* directory handling functions
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -50,7 +50,7 @@ typedef long pgpid_t;
|
||||
* and tablespace directories.
|
||||
*/
|
||||
DataDirState
|
||||
check_dir(const char *path)
|
||||
check_dir(char *path)
|
||||
{
|
||||
DIR *chkdir = NULL;
|
||||
struct dirent *file = NULL;
|
||||
@@ -91,17 +91,12 @@ check_dir(const char *path)
|
||||
* Create directory with error log message when failing
|
||||
*/
|
||||
bool
|
||||
create_dir(const char *path)
|
||||
create_dir(char *path)
|
||||
{
|
||||
char create_dir_path[MAXPGPATH];
|
||||
|
||||
/* mkdir_p() may modify the supplied path */
|
||||
strncpy(create_dir_path, path, MAXPGPATH);
|
||||
|
||||
if (mkdir_p(create_dir_path, 0700) == 0)
|
||||
if (mkdir_p(path, 0700) == 0)
|
||||
return true;
|
||||
|
||||
log_error(_("unable to create directory \"%s\""), create_dir_path);
|
||||
log_error(_("unable to create directory \"%s\""), path);
|
||||
log_detail("%s", strerror(errno));
|
||||
|
||||
return false;
|
||||
@@ -109,12 +104,13 @@ create_dir(const char *path)
|
||||
|
||||
|
||||
bool
|
||||
set_dir_permissions(const char *path)
|
||||
set_dir_permissions(char *path)
|
||||
{
|
||||
return (chmod(path, 0700) != 0) ? false : true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* function from initdb.c */
|
||||
/* source adapted from FreeBSD /src/bin/mkdir/mkdir.c */
|
||||
|
||||
@@ -202,9 +198,9 @@ mkdir_p(char *path, mode_t omode)
|
||||
|
||||
|
||||
bool
|
||||
is_pg_dir(const char *path)
|
||||
is_pg_dir(char *path)
|
||||
{
|
||||
char dirpath[MAXPGPATH] = "";
|
||||
char dirpath[MAXPGPATH];
|
||||
struct stat sb;
|
||||
|
||||
/* test pgdata */
|
||||
@@ -227,7 +223,7 @@ is_pg_dir(const char *path)
|
||||
* any further useful progress can be made.
|
||||
*/
|
||||
PgDirState
|
||||
is_pg_running(const char *path)
|
||||
is_pg_running(char *path)
|
||||
{
|
||||
long pid;
|
||||
FILE *pidf;
|
||||
@@ -276,8 +272,6 @@ is_pg_running(const char *path)
|
||||
log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
|
||||
}
|
||||
|
||||
fclose(pidf);
|
||||
|
||||
return PG_DIR_NOT_RUNNING;
|
||||
}
|
||||
|
||||
@@ -297,7 +291,7 @@ is_pg_running(const char *path)
|
||||
|
||||
|
||||
bool
|
||||
create_pg_dir(const char *path, bool force)
|
||||
create_pg_dir(char *path, bool force)
|
||||
{
|
||||
/* Check this directory can be used as a PGDATA dir */
|
||||
switch (check_dir(path))
|
||||
@@ -353,9 +347,8 @@ create_pg_dir(const char *path, bool force)
|
||||
}
|
||||
break;
|
||||
case DIR_ERROR:
|
||||
log_error(_("could not access directory \"%s\"")
|
||||
, path);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_error(_("could not access directory \"%s\": %s"),
|
||||
path, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -365,7 +358,7 @@ create_pg_dir(const char *path, bool force)
|
||||
|
||||
|
||||
int
|
||||
rmdir_recursive(const char *path)
|
||||
rmdir_recursive(char *path)
|
||||
{
|
||||
return nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||
}
|
||||
|
||||
16
dirutil.h
16
dirutil.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* dirutil.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -35,13 +35,13 @@ typedef enum
|
||||
} PgDirState;
|
||||
|
||||
extern int mkdir_p(char *path, mode_t omode);
|
||||
extern bool set_dir_permissions(const char *path);
|
||||
extern bool set_dir_permissions(char *path);
|
||||
|
||||
extern DataDirState check_dir(const char *path);
|
||||
extern bool create_dir(const char *path);
|
||||
extern bool is_pg_dir(const char *path);
|
||||
extern PgDirState is_pg_running(const char *path);
|
||||
extern bool create_pg_dir(const char *path, bool force);
|
||||
extern int rmdir_recursive(const char *path);
|
||||
extern DataDirState check_dir(char *path);
|
||||
extern bool create_dir(char *path);
|
||||
extern bool is_pg_dir(char *path);
|
||||
extern PgDirState is_pg_running(char *path);
|
||||
extern bool create_pg_dir(char *path, bool force);
|
||||
extern int rmdir_recursive(char *path);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -61,7 +61,7 @@ clean:
|
||||
|
||||
maintainer-clean:
|
||||
rm -rf html
|
||||
rm -f Makefile
|
||||
rm -rf Makefile
|
||||
|
||||
zip: html
|
||||
cp -r html repmgr-docs-$(REPMGR_VERSION)
|
||||
|
||||
@@ -21,16 +21,11 @@
|
||||
in PostgreSQL 9.3, as well as improved automated failover support
|
||||
via <application>repmgrd</application>, and is not compatible with PostgreSQL 9.2
|
||||
and earlier. We recommend upgrading to &repmgr; 4, as the &repmgr; 3.x
|
||||
series is no longer maintained.
|
||||
series will no longer be actively maintained.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
|
||||
with PostgreSQL 9.3, we recommend using repmgr 4.x. &repmgr; 2.x is
|
||||
no longer maintained.
|
||||
</para>
|
||||
<para>
|
||||
See also <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
|
||||
and <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
|
||||
repmgr 2.x supports PostgreSQL 9.0 ~ 9.3. While it is compatible
|
||||
with PostgreSQL 9.3, we recommend using repmgr 4.x.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
@@ -39,25 +34,15 @@
|
||||
<para>
|
||||
Replication slots, introduced in PostgreSQL 9.4, ensure that the
|
||||
primary server will retain WAL files until they have been consumed
|
||||
by all standby servers. This means standby servers should never
|
||||
fail due to not being able to retrieve required WAL files from the
|
||||
primary.
|
||||
by all standby servers. This makes WAL file management much easier,
|
||||
and if used `repmgr` will no longer insist on a fixed minimum number
|
||||
(default: 5000) of WAL files being retained.
|
||||
</para>
|
||||
<para>
|
||||
However this does mean that if a standby is no longer connected to the
|
||||
primary, the presence of the replication slot will cause WAL files
|
||||
to be retained indefinitely, and eventually lead to disk space
|
||||
exhaustion.
|
||||
to be retained indefinitely.
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
2ndQuadrant's recommended configuration is to configure
|
||||
<ulink url="https://www.pgbarman.org/">Barman</ulink> as a fallback
|
||||
source of WAL files, rather than maintain replication slots for
|
||||
each standby. See also: <link linkend="cloning-from-barman-restore-command">Using Barman as a WAL file source</link>.
|
||||
</para>
|
||||
</tip>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-replication-slots-number" xreflabel="Number of replication slots">
|
||||
@@ -76,7 +61,7 @@
|
||||
<para>
|
||||
Before PostgreSQL 10, hash indexes were not WAL logged and are therefore not suitable
|
||||
for use in streaming replication in PostgreSQL 9.6 and earlier. See the
|
||||
<ulink url="https://www.postgresql.org/docs/9.6/sql-createindex.html#AEN80279">PostgreSQL documentation</ulink>
|
||||
<ulink url="https://www.postgresql.org/docs/9.6/static/sql-createindex.html#AEN80279">PostgreSQL documentation</ulink>
|
||||
for details.
|
||||
</para>
|
||||
<para>
|
||||
@@ -84,125 +69,12 @@
|
||||
in a streaming replication cluster.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-upgrades" xreflabel="Upgrading PostgreSQL with repmgr">
|
||||
<title>Can &repmgr; assist with upgrading a PostgreSQL cluster?</title>
|
||||
<para>
|
||||
For <emphasis>minor</emphasis> version upgrades, e.g. from 9.6.7 to 9.6.8, a common
|
||||
approach is to upgrade a standby to the latest version, perform a
|
||||
<link linkend="performing-switchover">switchover</link> promoting it to a primary,
|
||||
then upgrade the former primary.
|
||||
</para>
|
||||
<para>
|
||||
For <emphasis>major</emphasis> version upgrades (e.g. from PostgreSQL 9.6 to PostgreSQL 10),
|
||||
the traditional approach is to "reseed" a cluster by upgrading a single
|
||||
node with <ulink url="https://www.postgresql.org/docs/current/pgupgrade.html">pg_upgrade</ulink>
|
||||
and recloning standbys from this.
|
||||
</para>
|
||||
<para>
|
||||
To minimize downtime during major upgrades from PostgreSQL 9.4 and later,
|
||||
<ulink url="https://www.2ndquadrant.com/en/resources/pglogical/">pglogical</ulink>
|
||||
can be used to set up a parallel cluster using the newer PostgreSQL version,
|
||||
which can be kept in sync with the existing production cluster until the
|
||||
new cluster is ready to be put into production.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-libdir-repmgr-error">
|
||||
<title>What does this error mean: <literal>ERROR: could not access file "$libdir/repmgr"</literal>?</title>
|
||||
<para>
|
||||
It means the &repmgr; extension code is not installed in the
|
||||
PostgreSQL application directory. This typically happens when using PostgreSQL
|
||||
packages provided by a third-party vendor, which often have different
|
||||
filesystem layouts.
|
||||
</para>
|
||||
<para>
|
||||
Either use PostgreSQL packages provided by the community or 2ndQuadrant; if this
|
||||
is not possible, contact your vendor for assistance.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-old-packages">
|
||||
<title>How can I obtain old versions of &repmgr; packages?</title>
|
||||
<para>
|
||||
See appendix <xref linkend="packages-old-versions"> for details.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-required-for-replication">
|
||||
<title>Is &repmgr; required for streaming replication?</title>
|
||||
<para>
|
||||
No.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; (together with <application>repmgrd</application>) assists with
|
||||
<emphasis>managing</emphasis> replication. It does not actually perform replication, which
|
||||
is part of the core PostgreSQL functionality.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-what-if-repmgr-uninstalled">
|
||||
<title>Will replication stop working if &repmgr; is uninstalled?</title>
|
||||
<para>
|
||||
No. See preceding question.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-version-mix">
|
||||
<title>Does it matter if different &repmgr; versions are present in the replication cluster?</title>
|
||||
<para>
|
||||
Yes. If different "major" &repmgr; versions (e.g. 3.3.x and 4.1.x) are present,
|
||||
&repmgr; (in particular <application>repmgrd</application>)
|
||||
may not run, or run properly, or in the worst case (if different <application>repmgrd</application>
|
||||
versions are running and there are differences in the failover implementation) break
|
||||
your replication cluster.
|
||||
</para>
|
||||
<para>
|
||||
If different "minor" &repmgr; versions (e.g. 4.1.1 and 4.1.6) are installed,
|
||||
&repmgr; will function, but we strongly recommend always running the same version
|
||||
to ensure there are no unexpected suprises, e.g. a newer version behaving slightly
|
||||
differently to the older version.
|
||||
</para>
|
||||
<para>
|
||||
See also <link linkend="faq-upgrade-repmgr">Should I upgrade &repmgr;?</link>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-upgrade-repmgr">
|
||||
<title>Should I upgrade &repmgr;?</title>
|
||||
<para>
|
||||
Yes.
|
||||
</para>
|
||||
<para>
|
||||
We don't release new versions for fun, you know. Upgrading may require a little effort,
|
||||
but running an older &repmgr; version with bugs which have since been fixed may end up
|
||||
costing you more effort. The same applies to PostgreSQL itself.
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-conf-data-directory">
|
||||
<title>Why do I need to specify the data directory location in repmgr.conf?</title>
|
||||
<para>
|
||||
In some circumstances &repmgr; may need to access a PostgreSQL data
|
||||
directory while the PostgreSQL server is not running, e.g. to confirm
|
||||
it shut down cleanly during a <link linkend="performing-switchover">switchover</link>.
|
||||
</para>
|
||||
<para>
|
||||
Additionally, this provides support when using &repmgr; on PostgreSQL 9.6 and
|
||||
earlier, where the <literal>repmgr</literal> user is not a superuser; in that
|
||||
case the <literal>repmgr</literal> user will not be able to access the
|
||||
<literal>data_directory</literal> configuration setting, access to which is restricted
|
||||
to superusers. (In PostgreSQL 10 and later, non-superusers can be added to the
|
||||
group <option>pg_read_all_settings</option> which will enable them to read this setting).
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
||||
<title><command>repmgr</command></title>
|
||||
|
||||
<sect2 id="faq-register-existing-node" xreflabel="registering an existing node">
|
||||
<sect2 id="faq-register-existing-node" xreflabel="">
|
||||
<title>Can I register an existing PostgreSQL server with repmgr?</title>
|
||||
<para>
|
||||
Yes, any existing PostgreSQL server which is part of the same replication
|
||||
@@ -211,26 +83,6 @@
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-clone-other-source" >
|
||||
<title>Can I use a standby not cloned by &repmgr; as a &repmgr; node?</title>
|
||||
|
||||
<para>
|
||||
For a standby which has been manually cloned or recovered from an external
|
||||
backup manager such as Barman, the command
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>
|
||||
can be used to create the correct <filename>recovery.conf</filename> file for
|
||||
use with &repmgr; (and will create a replication slot if required). Once this has been done,
|
||||
<link linkend="repmgr-standby-register">register the node</link> as usual.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-recovery-conf" >
|
||||
<title>What does &repmgr; write in <filename>recovery.conf</filename>, and what options can be set there?</title>
|
||||
<para>
|
||||
See section <link linkend="repmgr-standby-clone-recovery-conf">Customising recovery.conf</link>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-failed-primary-standby" xreflabel="Reintegrate a failed primary as a standby">
|
||||
<title>How can a failed primary be re-added as a standby?</title>
|
||||
<para>
|
||||
@@ -239,23 +91,19 @@
|
||||
needs to be re-registered as a standby.
|
||||
</para>
|
||||
<para>
|
||||
It's possible to use <command>pg_rewind</command> to re-synchronise the existing data
|
||||
directory, which will usually be much
|
||||
In PostgreSQL 9.5 and later, it's possible to use <command>pg_rewind</command>
|
||||
to re-synchronise the existing data directory, which will usually be much
|
||||
faster than re-cloning the server. However <command>pg_rewind</command> can only
|
||||
be used if PostgreSQL either has <varname>wal_log_hints</varname> enabled, or
|
||||
data checksums were enabled when the cluster was initialized.
|
||||
</para>
|
||||
<para>
|
||||
Note that <command>pg_rewind</command> is available as part of the core PostgreSQL
|
||||
distribution from PostgreSQL 9.5, and as a third-party utility for PostgreSQL 9.3 and 9.4.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; provides the command <command>repmgr node rejoin</command> which can
|
||||
optionally execute <command>pg_rewind</command>; see the <xref linkend="repmgr-node-rejoin">
|
||||
documentation for details, in particular the section <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||
documentation for details.
|
||||
</para>
|
||||
<para>
|
||||
If <command>pg_rewind</command> cannot be used, then the data directory will need
|
||||
If <command>pg_rewind</command> cannot be used, then the data directory will have
|
||||
to be re-cloned from scratch.
|
||||
</para>
|
||||
|
||||
@@ -328,24 +176,10 @@
|
||||
Under some circumstances event notifications can be generated for servers
|
||||
which have not yet been registered; it's also useful to retain a record
|
||||
of events which includes servers removed from the replication cluster
|
||||
which no longer have an entry in the <literal>repmgr.nodes</literal> table.
|
||||
which no longer have an entry in the <literal>repmrg.nodes</literal> table.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgr-recovery-conf-quoted-values" xreflabel="Quoted values in recovery.conf">
|
||||
<title>Why are some values in <filename>recovery.conf</filename> surrounded by pairs of single quotes?</title>
|
||||
<para>
|
||||
This is to ensure that user-supplied values which are written as parameter values in <filename>recovery.conf</filename>
|
||||
are escaped correctly and do not cause errors when <filename>recovery.conf</filename> is parsed.
|
||||
</para>
|
||||
<para>
|
||||
The escaping is performed by an internal PostgreSQL routine, which leaves strings consisting
|
||||
of digits and alphabetical characters only as-is, but wraps everything else in pairs of single quotes,
|
||||
even if the string does not contain any characters which need escaping.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="faq-repmgrd" xreflabel="repmgrd">
|
||||
@@ -355,7 +189,7 @@
|
||||
<sect2 id="faq-repmgrd-prevent-promotion" xreflabel="Prevent standby from being promoted to primary">
|
||||
<title>How can I prevent a node from ever being promoted to primary?</title>
|
||||
<para>
|
||||
In <filename>repmgr.conf</filename>, set its priority to a value of <literal>0</literal>; apply the changed setting with
|
||||
In `repmgr.conf`, set its priority to a value of 0 or less; apply the changed setting with
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>.
|
||||
</para>
|
||||
<para>
|
||||
@@ -403,36 +237,5 @@
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgrd-pg-bindir" xreflabel="repmgrd does not apply pg_bindir to promote_command or follow_command">
|
||||
<title>
|
||||
<application>repmgrd</application> ignores pg_bindir when executing <varname>promote_command</varname> or <varname>follow_command</varname>
|
||||
</title>
|
||||
<para>
|
||||
<varname>promote_command</varname> or <varname>follow_command</varname> can be user-defined scripts,
|
||||
so &repmgr; will not apply <option>pg_bindir</option> even if excuting &repmgr;. Always provide the full
|
||||
path; see <xref linkend="repmgrd-automatic-failover-configuration"> for more details.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="faq-repmgrd-startup-no-upstream" xreflabel="repmgrd does not start if upstream node is not running">
|
||||
<title>
|
||||
<application>repmgrd</application> aborts startup with the error "<literal>upstream node must be running before repmgrd can start</literal>"
|
||||
</title>
|
||||
<para>
|
||||
<application>repmgrd</application> does this to avoid starting up on a replication cluster
|
||||
which is not in a healthy state. If the upstream is unavailable, <application>repmgrd</application>
|
||||
may initiate a failover immediately after starting up, which could have unintended side-effects,
|
||||
particularly if <application>repmgrd</application> is not running on other nodes.
|
||||
</para>
|
||||
<para>
|
||||
In particular, it's possible that the node's local copy of the <literal>repmgr.nodes</literal> copy
|
||||
is out-of-date, which may lead to incorrect failover behaviour.
|
||||
</para>
|
||||
<para>
|
||||
The onus is therefore on the adminstrator to manually set the cluster to a stable, healthy state before
|
||||
starting <application>repmgrd</application>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
</appendix>
|
||||
|
||||
@@ -1,126 +1,48 @@
|
||||
<appendix id="appendix-packages" xreflabel="Package details">
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>&repmgr; package details</title>
|
||||
<title>&repmgr; package details</title>
|
||||
<para>
|
||||
This section provides technical details about various &repmgr; binary
|
||||
packages, such as location of the installed binaries and
|
||||
configuration files.
|
||||
</para>
|
||||
|
||||
<sect1 id="packages-centos" xreflabel="CentOS packages">
|
||||
<title>CentOS, RHEL, Scientific Linux etc.</title>
|
||||
<para>
|
||||
This section provides technical details about various &repmgr; binary
|
||||
packages, such as location of the installed binaries and
|
||||
configuration files.
|
||||
Currently packages are provided for versions 6.x and 7.x of CentOS et al.
|
||||
</para>
|
||||
|
||||
<sect1 id="packages-centos" xreflabel="CentOS packages">
|
||||
<title>CentOS Packages</title>
|
||||
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
<secondary>CentOS packages</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>CentOS</primary>
|
||||
<secondary>package information</secondary>
|
||||
</indexterm>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
Currently, &repmgr; RPM packages are provided for versions 6.x and 7.x of CentOS. These should also
|
||||
work on matching versions of Red Hat Enterprise Linux, Scientific Linux and Oracle Enterprise Linux;
|
||||
together with CentOS, these are the same RedHat-based distributions for which the main community project
|
||||
(PGDG) provides packages (see the <ulink url="https://yum.postgresql.org/">PostgreSQL RPM Building Project</ulink>
|
||||
page for details).
|
||||
For PostgreSQL 9.6 and lower, the CentOS packages use a mixture of <literal>9.6</literal>
|
||||
and <literal>96</literal> in various places to designate the major version;
|
||||
from PostgreSQL 10, the first part of the version number (e.g. <literal>10</literal>) is
|
||||
the major version, so there is more consistency in file/path/package naming.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Note these &repmgr; RPM packages are not designed to work with SuSE/OpenSuSE.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; packages are designed to be compatible with community-provided PostgreSQL packages.
|
||||
They may not work with vendor-specific packages such as those provided by RedHat for RHEL
|
||||
customers, as the filesystem layout may be different to the community RPMs.
|
||||
Please contact your support vendor for assistance.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<sect2 id="packages-centos-repositories">
|
||||
<title>CentOS repositories</title>
|
||||
|
||||
<para>
|
||||
&repmgr; packages are available from the public 2ndQuadrant repository, and also the
|
||||
PostgreSQL community repository. The 2ndQuadrant repository is updated immediately
|
||||
after each
|
||||
&repmgr; release.
|
||||
</para>
|
||||
|
||||
<table id="centos-2ndquadrant-repository">
|
||||
<title>2ndQuadrant public repository</title>
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-REDHAT-2NDQ</ulink></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<table id="centos-pgdg-repository">
|
||||
<title>PostgreSQL community repository (PGDG)</title>
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="https://yum.postgresql.org/repopackages.php">https://yum.postgresql.org/repopackages.php</ulink></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://yum.postgresql.org/">https://yum.postgresql.org/</ulink></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="packages-centos-details">
|
||||
<title>CentOS package details</title>
|
||||
|
||||
<para>
|
||||
The two tables below list relevant information, paths, commands etc. for the &repmgr; packages on
|
||||
CentOS 7 (with systemd) and CentOS 6 (no systemd). Substitute the appropriate PostgreSQL major
|
||||
version number for your installation.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
For PostgreSQL 9.6 and lower, the CentOS packages use a mixture of <literal>9.6</literal>
|
||||
and <literal>96</literal> in various places to designate the major version; e.g. the
|
||||
package name is <literal>repmgr96</literal>, but the binary directory is
|
||||
<filename>/var/lib/pgsql/9.6/data</filename>.
|
||||
</para>
|
||||
<para>
|
||||
From PostgreSQL 10, the first part of the version number (e.g. <literal>10</literal>) is
|
||||
the major version, so there is more consistency in file/path/package naming
|
||||
(package <literal>repmgr10</literal>, binary directory <filename>/var/lib/pgsql/10/data</filename>).
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</note>
|
||||
|
||||
<table id="centos-7-packages">
|
||||
<title>CentOS 7 packages</title>
|
||||
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="https://yum.postgresql.org/repopackages.php">https://yum.postgresql.org/repopackages.php</ulink></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://yum.postgresql.org/">https://yum.postgresql.org/</ulink></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Package name example:</entry>
|
||||
<entry><filename>repmgr10-4.0.4-1.rhel7.x86_64</filename></entry>
|
||||
<entry><filename>repmgr10-4.0.0-1.rhel7.x86_64</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
@@ -130,7 +52,7 @@
|
||||
|
||||
<row>
|
||||
<entry>Installation command:</entry>
|
||||
<entry><literal>yum install repmgr10</literal></entry>
|
||||
<entry><literal>yum install -y repmgr10</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
@@ -139,7 +61,7 @@
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgr in default path:</entry>
|
||||
<entry>In default path:</entry>
|
||||
<entry>NO</entry>
|
||||
</row>
|
||||
|
||||
@@ -148,14 +70,9 @@
|
||||
<entry><filename>/etc/repmgr/10/repmgr.conf</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Data directory:</entry>
|
||||
<entry><filename>/var/lib/pgsql/10/data</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgrd service command:</entry>
|
||||
<entry><command>systemctl [start|stop|restart|reload] repmgr10</command></entry>
|
||||
<entry><literal>service repmgr10</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
@@ -165,7 +82,7 @@
|
||||
|
||||
<row>
|
||||
<entry>repmgrd log file location:</entry>
|
||||
<entry>(not specified by package; set in <filename>repmgr.conf</filename>)</entry>
|
||||
<entry>(not specified)</entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
@@ -177,20 +94,29 @@
|
||||
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="https://yum.postgresql.org/repopackages.php">https://yum.postgresql.org/repopackages.php</ulink></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://yum.postgresql.org/">https://yum.postgresql.org/</ulink></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Package name example:</entry>
|
||||
<entry><filename>repmgr96-4.0.4-1.rhel6.x86_64</filename></entry>
|
||||
<entry><filename>repmgr96-4.0.0-1.rhel6.x86_64</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Metapackage:</entry>
|
||||
<entry>(none)</entry>
|
||||
<entry>NO</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Installation command:</entry>
|
||||
<entry><literal>yum install repmgr96</literal></entry>
|
||||
<entry><literal>yum install -y repmgr96</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
@@ -199,7 +125,7 @@
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgr in default path:</entry>
|
||||
<entry>In default path:</entry>
|
||||
<entry>NO</entry>
|
||||
</row>
|
||||
|
||||
@@ -208,14 +134,9 @@
|
||||
<entry><filename>/etc/repmgr/9.6/repmgr.conf</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Data directory:</entry>
|
||||
<entry><filename>/var/lib/pgsql/9.6/data</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgrd service command:</entry>
|
||||
<entry><literal>service [start|stop|restart|reload] repmgr-9.6</literal></entry>
|
||||
<entry>service repmgr-9.6</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
@@ -232,338 +153,6 @@
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
|
||||
|
||||
<sect1 id="packages-debian-ubuntu" xreflabel="Debian/Ubuntu packages">
|
||||
<title>Debian/Ubuntu Packages</title>
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
<secondary>Debian/Ubuntu packages</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>Debian/Ubuntu</primary>
|
||||
<secondary>package information</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
&repmgr; <literal>.deb</literal> packages are provided via the
|
||||
PostgreSQL Community APT repository, and are available for each community-supported
|
||||
PostgreSQL version, currently supported Debian releases, and currently supported
|
||||
Ubuntu LTS releases.
|
||||
</para>
|
||||
|
||||
<sect2 id="packages-apt-repository">
|
||||
<title>APT repository</title>
|
||||
|
||||
<para>
|
||||
&repmgr; packages are available from the PostgreSQL Community APT repository,
|
||||
which is updated immediately after each &repmgr; release.
|
||||
</para>
|
||||
|
||||
|
||||
<table id="apt-2ndquadrant-repository">
|
||||
<title>2ndQuadrant public repository</title>
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN">https://repmgr.org/docs/current/installation-packages.html#INSTALLATION-PACKAGES-DEBIAN</ulink></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
|
||||
<table id="apt-repository">
|
||||
<title>PostgreSQL Community APT repository (PGDG)</title>
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="http://apt.postgresql.org/">http://apt.postgresql.org/</ulink></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
<entry><ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="packages-debian-details">
|
||||
<title>Debian/Ubuntu package details</title>
|
||||
|
||||
<para>
|
||||
The table below lists relevant information, paths, commands etc. for the &repmgr; packages on
|
||||
Debian 9.x ("Stretch"). Substitute the appropriate PostgreSQL major
|
||||
version number for your installation.
|
||||
</para>
|
||||
<para>
|
||||
See also <xref linkend="repmgrd-configuration-debian-ubuntu"> for some specifics related
|
||||
to configuring the <application>repmgrd</application> daemon.
|
||||
</para>
|
||||
|
||||
<table id="debian-9-packages">
|
||||
<title>Debian 9.x packages</title>
|
||||
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
|
||||
<row>
|
||||
<entry>Package name example:</entry>
|
||||
<entry><filename>postgresql-10-repmgr</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Metapackage:</entry>
|
||||
<entry><filename>repmgr-common</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Installation command:</entry>
|
||||
<entry><literal>apt-get install postgresql-10-repmgr</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Binary location:</entry>
|
||||
<entry><filename>/usr/lib/postgresql/10/bin</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgr in default path:</entry>
|
||||
<entry>Yes (via wrapper script <filename>/usr/bin/repmgr</filename>)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Configuration file location:</entry>
|
||||
<entry>(not set by package)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>Data directory:</entry>
|
||||
<entry><filename>/var/lib/postgresql/10/main</filename></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>PostgreSQL service command:</entry>
|
||||
<entry><command>systemctl [start|stop|restart|reload] postgresql@10-main</command></entry>
|
||||
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgrd service command:</entry>
|
||||
<entry><command>systemctl [start|stop|restart|reload] repmgrd</command></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgrd service file location:</entry>
|
||||
<entry><filename>/etc/init.d/repmgrd</filename> (defaults in: <filename>/etc/defaults/repmgrd</filename>)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>repmgrd log file location:</entry>
|
||||
<entry>(not specified by package; set in <filename>repmgr.conf</filename>)</entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
<note>
|
||||
<para>
|
||||
Instead of using the <application>systemd</application> service command directly,
|
||||
it's recommended to execute <command>pg_ctlcluster</command> (as <literal>root</literal>,
|
||||
either directly or via <command>sudo</command>), e.g.:
|
||||
<programlisting>
|
||||
<command>pg_ctlcluster 10 main [start|stop|restart|reload]</command></programlisting>
|
||||
</para>
|
||||
<para>
|
||||
For pre-<application>systemd</application> systems, <command>pg_ctlcluster</command>
|
||||
can be executed directly by the <literal>postgres</literal> user.
|
||||
</para>
|
||||
</note>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="packages-snapshot" xreflabel="Snapshot packages">
|
||||
<title>Snapshot packages</title>
|
||||
<indexterm>
|
||||
<primary>snapshot packages</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
<secondary>snaphots</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
For testing new features and bug fixes, from time to time 2ndQuadrant provides
|
||||
so-called "snapshot packages" via its public repository. These packages
|
||||
are built from the &repmgr; source at a particular point in time, and are not formal
|
||||
releases.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
We do not recommend installing these packages in a production environment
|
||||
unless specifically advised.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
To install a snapshot package, it's necessary to install the 2ndQuadrant public snapshot repository,
|
||||
following the instructions here: <ulink url="https://dl.2ndquadrant.com/default/release/site/">https://dl.2ndquadrant.com/default/release/site/</ulink> but replace <literal>release</literal> with <literal>snapshot</literal>
|
||||
in the appropriate URL.
|
||||
</para>
|
||||
<para>
|
||||
For example, to install the snapshot RPM repository for PostgreSQL 9.6, execute (as <literal>root</literal>):
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/snapshot/get/9.6/rpm | bash</programlisting>
|
||||
|
||||
or as a normal user with root sudo access:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/snapshot/get/9.6/rpm | sudo bash</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Alternatively you can browse the repository here:
|
||||
<ulink url="https://dl.2ndquadrant.com/default/snapshot/browse/">https://dl.2ndquadrant.com/default/snapshot/browse/</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Once the repository is installed, installing or updating &repmgr; will result in the latest snapshot
|
||||
package being installed.
|
||||
</para>
|
||||
<para>
|
||||
The package name will be formatted like this:
|
||||
<programlisting>
|
||||
repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
|
||||
containg the snapshot build number (here: <literal>320</literal>) and the hash
|
||||
of the <application>git</application> commit it was built from (here: <literal>g5113ab0</literal>).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Note that the next formal release (in the above example <literal>4.1.1</literal>), once available,
|
||||
will install in place of any snapshot builds.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="packages-old-versions" xreflabel="Installing old package versions">
|
||||
<title>Installing old package versions</title>
|
||||
|
||||
<indexterm>
|
||||
<primary>old packages</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
<secondary>old versions</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>old package versions</secondary>
|
||||
</indexterm>
|
||||
|
||||
<sect2 id="packages-old-versions-debian" xreflabel="old Debian package versions">
|
||||
<title>Debian/Ubuntu</title>
|
||||
<para>
|
||||
An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
|
||||
<ulink url="http://atalia.postgresql.org/morgue/r/repmgr/">http://atalia.postgresql.org/morgue/r/repmgr/</ulink>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="packages-old-versions-rhel-centos" xreflabel="old RHEL/CentOS package versions">
|
||||
<title>RHEL/CentOS</title>
|
||||
<para>
|
||||
Old RPM packages (<literal>3.2</literal> and later) can be retrieved from the
|
||||
(deprecated) 2ndQuadrant repository at
|
||||
<ulink url="http://packages.2ndquadrant.com/">http://packages.2ndquadrant.com/</ulink>
|
||||
by installing the appropriate repository RPM:
|
||||
</para>
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
Old versions can be located with e.g.:
|
||||
<programlisting>
|
||||
yum --showduplicates list repmgr96</programlisting>
|
||||
(substitute the appropriate package name; see <xref linkend="packages-centos">) and installed with:
|
||||
<programlisting>
|
||||
yum install {package_name}-{version}</programlisting>
|
||||
where <literal>{package_name}</literal> is the base package name (e.g. <literal>repmgr96</literal>)
|
||||
and <literal>{version}</literal> is the version listed by the
|
||||
<command> yum --showduplicates list ...</command> command, e.g. <literal>4.0.6-1.rhel6</literal>.
|
||||
</para>
|
||||
<para>For example:
|
||||
<programlisting>
|
||||
yum install repmgr96-4.0.6-1.rhel6</programlisting>
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="packages-packager-info" xreflabel="Information for packagers">
|
||||
<title>Information for packagers</title>
|
||||
<indexterm>
|
||||
<primary>packages</primary>
|
||||
<secondary>information for packagers</secondary>
|
||||
</indexterm>
|
||||
<para>
|
||||
We recommend patching the following parameters when
|
||||
building the package as built-in default values for user convenience.
|
||||
These values can nevertheless be overridden by the user, if desired.
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Configuration file location: the default configuration file location
|
||||
can be hard-coded by patching <varname>package_conf_file</varname>
|
||||
in <filename>configfile.c</filename>:
|
||||
<programlisting>
|
||||
/* packagers: if feasible, patch configuration file path into "package_conf_file" */
|
||||
char package_conf_file[MAXPGPATH] = "";</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
See also: <xref linkend="configuration-file">
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
PID file location: the default <application>repmgrd</application> PID file
|
||||
location can be hard-coded by patching <varname>package_pid_file</varname>
|
||||
in <filename>repmgrd.c</filename>:
|
||||
<programlisting>
|
||||
/* packagers: if feasible, patch PID file path into "package_pid_file" */
|
||||
char package_pid_file[MAXPGPATH] = "";</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
See also: <xref linkend="repmgrd-pid-file">
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</sect1>
|
||||
</appendix>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,14 +5,14 @@
|
||||
<title>repmgr source code signing key</title>
|
||||
<para>
|
||||
The signing key ID used for <application>repmgr</application> source code bundles is:
|
||||
<ulink url="https://repmgr.org/download/SOURCE-GPG-KEY-repmgr">
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr">
|
||||
<literal>0x297F1DCC</literal></ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To download the <application>repmgr</application> source key to your computer:
|
||||
<programlisting>
|
||||
curl -s https://repmgr.org/download/SOURCE-GPG-KEY-repmgr | gpg --import
|
||||
curl -s http://packages.2ndquadrant.com/repmgr/SOURCE-GPG-KEY-repmgr | gpg --import
|
||||
gpg --fingerprint 0x297F1DCC
|
||||
</programlisting>
|
||||
then verify that the fingerprint is the expected value:
|
||||
@@ -33,5 +33,34 @@
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgr-rpm-key" xreflabel="repmgr rpm key">
|
||||
<title>repmgr RPM signing key</title>
|
||||
<para>
|
||||
The signing key ID used for <application>repmgr</application> source code bundles is:
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr">
|
||||
<literal>0x702D883A</literal></ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To download the <application>repmgr</application> source key to your computer:
|
||||
<programlisting>
|
||||
curl -s http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr | gpg --import
|
||||
gpg --fingerprint 0x702D883A
|
||||
</programlisting>
|
||||
then verify that the fingerprint is the expected value:
|
||||
<programlisting>
|
||||
AE4E 390E A58E 0037 6148 3F29 888D 018B 702D 883A</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To check a repository RPM, use <application>rpmkeys</application> to load the
|
||||
packaging signing key into the RPM database then use <literal>rpm -K</literal>, e.g.:
|
||||
<programlisting>
|
||||
sudo rpmkeys --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr
|
||||
rpm -K postgresql-bdr94-2ndquadrant-redhat-1.0-2.noarch.rpm
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
</appendix>
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
<appendix id="appendix-support" xreflabel="repmgr support">
|
||||
<indexterm>
|
||||
<primary>support</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>&repmgr; support</title>
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides 24x7
|
||||
production support for &repmgr; and other PostgreSQL
|
||||
products, including configuration assistance, installation
|
||||
verification and training for running a robust replication cluster.
|
||||
</para>
|
||||
<para>
|
||||
For further details see: <ulink url="https://2ndquadrant.com/en/support/">https://2ndquadrant.com/en/support/</ulink>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A mailing list/forum is provided via Google groups to discuss contributions or issues: <ulink url="https://groups.google.com/group/repmgr">https://groups.google.com/group/repmgr</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Please report bugs and other issues to: <ulink url="https://github.com/2ndQuadrant/repmgr">https://github.com/2ndQuadrant/repmgr</ulink>.
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
Please read the <link linkend="appendix-support-reporting-issues">following section</link> before submitting questions or issue reports.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<sect1 id="appendix-support-reporting-issues" xreflabel="Reportins Issues">
|
||||
<indexterm>
|
||||
<primary>support</primary>
|
||||
<secondary>reporting issues</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Reporting Issues</title>
|
||||
|
||||
<para>
|
||||
When asking questions or reporting issues, it is extremely helpful if the following information is included:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
&repmgr; version
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
How was &repmgr installed? From source? From packages? If
|
||||
so from which repository?
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<filename>repmpgr.conf</filename> files (suitably anonymized if necessary)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Contents of the <literal>repmgr.nodes</literal> table (suitably anonymized if necessary)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
PostgreSQL version
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
If issues are encountered with a &repmgr; client command, please provide
|
||||
the output of that command executed with the options
|
||||
<option>-LDEBUG --verbose</option>, which will ensure &repmgr; emits
|
||||
the maximum level of logging output.
|
||||
</para>
|
||||
<para>
|
||||
If issues are encountered with <application>repmgrd</application>,
|
||||
please provide relevant extracts from the &repmgr; log files
|
||||
and if possible the PostgreSQL log itself. Please ensure these
|
||||
logs do not contain any confidential data.
|
||||
</para>
|
||||
<para>
|
||||
In all cases it is <emphasis>extremely</emphasis> useful to receive
|
||||
information on how to reliably reproduce an issue with as much detail as
|
||||
possible.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
</appendix>
|
||||
@@ -4,5 +4,5 @@ BDR failover with repmgrd
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
> [BDR failover with repmgrd](https://repmgr.org/docs/current/repmgrd-bdr.html)
|
||||
> [BDR failover with repmgrd](https://repmgr.org/docs/4.0/repmgrd-bdr.html)
|
||||
|
||||
|
||||
@@ -4,4 +4,4 @@ Changes in repmgr 4
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
> [Release notes](https://repmgr.org/docs/current/release-4.0.html)
|
||||
> [Release notes](https://repmgr.org/docs/4.0/release-4.0.html)
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<sect2 id="cloning-from-barman-prerequisites">
|
||||
<sect2 id="cloning-from-barman-prerequisites" xreflabel="Prerequisites for cloning from Barman">
|
||||
<title>Prerequisites for cloning from Barman</title>
|
||||
<para>
|
||||
In order to enable Barman support for <command>repmgr standby clone</command>, following
|
||||
@@ -243,8 +243,8 @@
|
||||
</simpara>
|
||||
<simpara>
|
||||
As an alternative we recommend using 2ndQuadrant's <ulink url="https://www.pgbarman.org/">Barman</ulink>,
|
||||
which offloads WAL management to a separate server, removing the requirement to use a replication
|
||||
slot for each individual standby to reserve WAL. See section <xref linkend="cloning-from-barman">
|
||||
which offloads WAL management to a separate server, negating the need to use replication
|
||||
slots to reserve WAL. See section <xref linkend="cloning-from-barman">
|
||||
for more details on using &repmgr; together with Barman.
|
||||
</simpara>
|
||||
</tip>
|
||||
@@ -262,7 +262,7 @@
|
||||
meaning replication changes "cascade" down through a hierarchy of servers. This
|
||||
can be used to reduce load on the primary and minimize bandwith usage between
|
||||
sites. For more details, see the
|
||||
<ulink url="https://www.postgresql.org/docs/current/warm-standby.html#CASCADING-REPLICATION">
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/warm-standby.html#CASCADING-REPLICATION">
|
||||
PostgreSQL cascading replication documentation</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
@@ -352,13 +352,11 @@
|
||||
provide additional parameters for <command>pg_basebackup</command> to customise the
|
||||
cloning process.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, <command>pg_basebackup</command> performs a checkpoint before beginning the backup
|
||||
process. However, a normal checkpoint may take some time to complete;
|
||||
a fast checkpoint can be forced with <command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>'s
|
||||
<literal>-c/--fast-checkpoint</literal> option.
|
||||
Note that this may impact performance of the server being cloned from (typically the primary)
|
||||
a fast checkpoint can be forced with the <literal>-c/--fast-checkpoint</literal> option.
|
||||
However this may impact performance of the server being cloned from (typically the primary)
|
||||
so should be used with care.
|
||||
</para>
|
||||
<tip>
|
||||
@@ -372,18 +370,6 @@
|
||||
Other options can be passed to <command>pg_basebackup</command> by including them
|
||||
in the <filename>repmgr.conf</filename> setting <varname>pg_basebackup_options</varname>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Not that by default, &repmgr; executes <command>pg_basebackup</command> with <option>-X/--wal-method</option>
|
||||
(PostgreSQL 9.6 and earlier: <option>-X/--xlog-method</option>) set to <literal>stream</literal>.
|
||||
From PostgreSQL 9.6, if replication slots are in use, it will also create a replication slot before
|
||||
running the base backup, and execute <command>pg_basebackup</command> with the
|
||||
<option>-S/--slot</option> option set to the name of the previously created replication slot.
|
||||
</para>
|
||||
<para>
|
||||
These parameters can set by the user in <varname>pg_basebackup_options</varname>, in which case they
|
||||
will override the &repmgr; default values. However normally there's no reason to do this.
|
||||
</para>
|
||||
<para>
|
||||
If using a separate directory to store WAL files, provide the option <literal>--waldir</literal>
|
||||
(<literal>--xlogdir</literal> in PostgreSQL 9.6 and earlier) with the absolute path to the
|
||||
@@ -391,41 +377,25 @@
|
||||
a symlink will automatically be created from the main data directory.
|
||||
</para>
|
||||
<para>
|
||||
See the <ulink url="https://www.postgresql.org/docs/current/app-pgbasebackup.html">PostgreSQL pg_basebackup documentation</ulink>
|
||||
See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">PostgreSQL pg_basebackup documentation</ulink>
|
||||
for more details of available options.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="cloning-advanced-managing-passwords" xreflabel="Managing passwords">
|
||||
<title>Managing passwords</title>
|
||||
<indexterm>
|
||||
<primary>cloning</primary>
|
||||
<secondary>using passwords</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
If replication connections to a standby's upstream server are password-protected,
|
||||
the standby must be able to provide the password so it can begin streaming replication.
|
||||
the standby must be able to provide the password so it can begin streaming
|
||||
replication.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The recommended way to do this is to store the password in the <literal>postgres</literal> system
|
||||
user's <filename>~/.pgpass</filename> file. It's also possible to store the password in the
|
||||
environment variable <varname>PGPASSWORD</varname>, however this is not recommended for
|
||||
security reasons. For more details see the
|
||||
<ulink url="https://www.postgresql.org/docs/current/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using a <filename>pgpass</filename> file, an entry for the replication user (by default the
|
||||
user who connects to the <literal>repmgr</literal> database) <emphasis>must</emphasis>
|
||||
be provided, with database name set to <literal>replication</literal>, e.g.:
|
||||
<programlisting>
|
||||
node1:5432:replication:repmgr:12345</programlisting>
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
If, for whatever reason, you wish to include the password in <filename>recovery.conf</filename>,
|
||||
set <varname>use_primary_conninfo_password</varname> to <literal>true</literal> in
|
||||
@@ -437,7 +407,8 @@
|
||||
</para>
|
||||
<para>
|
||||
It is of course also possible to include the password value in the <varname>conninfo</varname>
|
||||
string for each node, but this is obviously a security risk and should be avoided.
|
||||
string for each node, but this is obviously a security risk and should be
|
||||
avoided.
|
||||
</para>
|
||||
<para>
|
||||
From PostgreSQL 9.6, <application>libpq</application> supports the <varname>passfile</varname>
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
<sect1 id="configuration-file-log-settings" xreflabel="log settings">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>log settings</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>log settings</primary>
|
||||
<secondary>configuration in repmgr.conf</secondary>
|
||||
</indexterm>
|
||||
<title>Log settings</title>
|
||||
|
||||
<para>
|
||||
By default, &repmgr; and <application>repmgrd</application> write log output to
|
||||
<literal>STDERR</literal>. An alternative log destination can be specified
|
||||
(either a file or <literal>syslog</literal>).
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
The &repmgr; application itself will continue to write log output to <literal>STDERR</literal>
|
||||
even if another log destination is configured, as otherwise any output resulting from a command
|
||||
line operation will "disappear" into the log.
|
||||
</para>
|
||||
<para>
|
||||
This behaviour can be overriden with the command line option <option>--log-to-file</option>,
|
||||
which will redirect all logging output to the configured log destination. This is recommended
|
||||
when &repmgr; is executed by another application, particularly <application>repmgrd</application>,
|
||||
to enable log output generated by the &repmgr; application to be stored for later reference.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry id="repmgr-conf-log-level" xreflabel="log_level">
|
||||
<term><varname>log_level</varname> (<type>string</type>)
|
||||
<indexterm>
|
||||
<primary><varname>log_level</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
One of <option>DEBUG</option>, <option>INFO</option>, <option>NOTICE</option>,
|
||||
<option>WARNING</option>, <option>ERROR</option>, <option>ALERT</option>, <option>CRIT</option>
|
||||
or <option>EMERG</option>.
|
||||
</para>
|
||||
<para>
|
||||
Default is <option>INFO</option>.
|
||||
</para>
|
||||
<para>
|
||||
Note that <option>DEBUG</option> will produce a substantial amount of log output
|
||||
and should not be enabled in normal use.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="repmgr-conf-log-facility" xreflabel="log_facility">
|
||||
<term><varname>log_facility</varname> (<type>string</type>)
|
||||
<indexterm>
|
||||
<primary><varname>log_facility</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Logging facility: possible values are <option>STDERR</option> (default), or for
|
||||
syslog integration, one of <option>LOCAL0</option>, <option>LOCAL1</option>, <option>...</option>,
|
||||
<option>LOCAL7</option>, <option>USER</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="repmgr-conf-log-file" xreflabel="log_file">
|
||||
<term><varname>log_file</varname> (<type>string</type>)
|
||||
<indexterm>
|
||||
<primary><varname>log_file</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
If <xref linkend="repmgr-conf-log-facility"> is set to <option>STDERR</option>, log output
|
||||
can be redirected to the specified file.
|
||||
</para>
|
||||
<para>
|
||||
See <xref linkend="repmgrd-log-rotation"> for information on configuring log rotation.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="repmgr-conf-log-status-interval" xreflabel="log_status_interval">
|
||||
<term><varname>log_status_interval</varname> (<type>integer</type>)
|
||||
<indexterm>
|
||||
<primary><varname>log_status_interval</varname> configuration file parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
This setting causes <application>repmgrd</application> to emit a status log
|
||||
line at the specified interval (in seconds, default <literal>300</literal>)
|
||||
describing <application>repmgrd</application>'s current state, e.g.:
|
||||
</para>
|
||||
<programlisting>
|
||||
[2018-07-12 00:47:32] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</sect1>
|
||||
@@ -1,130 +0,0 @@
|
||||
<sect1 id="configuration-file-service-commands" xreflabel="service command settings">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>service command settings</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>service command settings</primary>
|
||||
<secondary>configuration in repmgr.conf</secondary>
|
||||
</indexterm>
|
||||
<title>Service command settings</title>
|
||||
|
||||
<para>
|
||||
In some circumstances, &repmgr; (and <application>repmgrd</application>) need to
|
||||
be able to stop, start or restart PostgreSQL. &repmgr; commands which need to do this
|
||||
include <link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>,
|
||||
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link> and
|
||||
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
|
||||
</para>
|
||||
<para>
|
||||
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
|
||||
server. However this can lead to various problems, particularly when PostgreSQL has been
|
||||
installed from packages, and especially so if <application>systemd</application> is in use.
|
||||
</para>
|
||||
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
|
||||
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
|
||||
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
|
||||
<para>
|
||||
With this in mind, we recommend to <emphasis>always</emphasis> configure &repmgr; to use the
|
||||
available system service commands.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To do this, specify the appropriate command for each action
|
||||
in <filename>repmgr.conf</filename> using the following configuration
|
||||
parameters:
|
||||
<programlisting>
|
||||
service_start_command
|
||||
service_stop_command
|
||||
service_restart_command
|
||||
service_reload_command</programlisting>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; will not apply <option>pg_bindir</option> when executing any of these commands;
|
||||
these can be user-defined scripts so must always be specified with the full path.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
It's also possible to specify a <varname>service_promote_command</varname>.
|
||||
This is intended for systems which provide a package-level promote command,
|
||||
such as Debian's <application>pg_ctlcluster</application>, to promote the
|
||||
PostgreSQL from standby to primary.
|
||||
</para>
|
||||
<para>
|
||||
If your packaging system does not provide such a command, it can be left empty,
|
||||
and &repmgr; will generate the appropriate `pg_ctl ... promote` command.
|
||||
</para>
|
||||
<para>
|
||||
Do not confuse this with <varname>promote_command</varname>, which is used
|
||||
by <application>repmgrd</application> to execute <xref linkend="repmgr-standby-promote">.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
To confirm which command &repmgr; will execute for each action, use
|
||||
<command><link linkend="repmgr-node-service">repmgr node service --list-actions --action=...</link></command>, e.g.:
|
||||
<programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=restart
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=reload</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
These commands will be executed by the system user which &repmgr; runs as (usually <literal>postgres</literal>)
|
||||
and will probably require passwordless sudo access to be able to execute the command.
|
||||
</para>
|
||||
<para>
|
||||
For example, using <application>systemd</application> on CentOS 7, the service commands can be
|
||||
set as follows:
|
||||
<programlisting>
|
||||
service_start_command = 'sudo systemctl start postgresql-9.6'
|
||||
service_stop_command = 'sudo systemctl stop postgresql-9.6'
|
||||
service_restart_command = 'sudo systemctl restart postgresql-9.6'
|
||||
service_reload_command = 'sudo systemctl reload postgresql-9.6'</programlisting>
|
||||
and <filename>/etc/sudoers</filename> should be set as follows:
|
||||
<programlisting>
|
||||
Defaults:postgres !requiretty
|
||||
postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
|
||||
/usr/bin/systemctl start postgresql-9.6, \
|
||||
/usr/bin/systemctl restart postgresql-9.6, \
|
||||
/usr/bin/systemctl reload postgresql-9.6</programlisting>
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<indexterm>
|
||||
<primary>pg_ctlcluster</primary>
|
||||
<secondary>service command settings</secondary>
|
||||
</indexterm>
|
||||
<para>
|
||||
Debian/Ubuntu users: instead of calling <command>sudo systemctl</command> directly, use
|
||||
<command>sudo pg_ctlcluster</command>, e.g.:
|
||||
<programlisting>
|
||||
service_start_command = 'sudo pg_ctlcluster 9.6 main start'
|
||||
service_stop_command = 'sudo pg_ctlcluster 9.6 main stop'
|
||||
service_restart_command = 'sudo pg_ctlcluster 9.6 main restart'
|
||||
service_reload_command = 'sudo pg_ctlcluster 9.6 main reload'</programlisting>
|
||||
and set <filename>/etc/sudoers</filename> accordingly.
|
||||
</para>
|
||||
<para>
|
||||
While <command>pg_ctlcluster</command> will work when executed as user <literal>postgres</literal>,
|
||||
it's strongly recommended to use <command>sudo pg_ctlcluster</command> on <application>systemd</application>
|
||||
systems, to ensure <application>systemd</application> has a correct picture of
|
||||
the PostgreSQL application state.
|
||||
</para>
|
||||
|
||||
</important>
|
||||
|
||||
</sect1>
|
||||
@@ -1,10 +1,10 @@
|
||||
<sect1 id="configuration-file-settings" xreflabel="required configuration file settings">
|
||||
<sect1 id="configuration-file-settings" xreflabel="configuration file settings">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>required settings</secondary>
|
||||
<secondary>settings</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Required configuration file settings</title>
|
||||
<title>Configuration file settings</title>
|
||||
<para>
|
||||
Each <filename>repmgr.conf</filename> file must contain the following parameters:
|
||||
</para>
|
||||
@@ -39,10 +39,6 @@
|
||||
called <varname>standby1</varname> (for example), things will be confusing
|
||||
to say the least.
|
||||
</para>
|
||||
<para>
|
||||
The string's maximum length is 63 characters and it should
|
||||
contain only printable ASCII characters.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@@ -60,7 +56,7 @@
|
||||
</para>
|
||||
<para>
|
||||
For details on conninfo strings, see section <ulink
|
||||
url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING">Connection Strings</>
|
||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING">Connection Strings</>
|
||||
in the PosgreSQL documentation.
|
||||
</para>
|
||||
<para>
|
||||
@@ -68,7 +64,7 @@
|
||||
<varname>connect_timeout</varname> in the <varname>conninfo</varname>
|
||||
string to determine the length of time which elapses before a network
|
||||
connection attempt is abandoned; for details see <ulink
|
||||
url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT">
|
||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNECT-CONNECT-TIMEOUT">
|
||||
the PostgreSQL documentation</>.
|
||||
</para>
|
||||
</listitem>
|
||||
@@ -96,10 +92,7 @@
|
||||
|
||||
<para>
|
||||
For a full list of annotated configuration items, see the file
|
||||
<ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
For <application>repmgrd</application>-specific settings, see <xref linkend="repmgrd-configuration">.
|
||||
<ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
@@ -1,15 +1,15 @@
|
||||
<sect1 id="configuration-file" xreflabel="configuration file">
|
||||
<sect1 id="configuration-file" xreflabel="configuration file location">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>repmgr.conf</secondary>
|
||||
<secondary>repmgr.conf location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file</title>
|
||||
|
||||
<title>Configuration file location</title>
|
||||
<para>
|
||||
<application>repmgr</application> and <application>repmgrd</application>
|
||||
use a common configuration file, by default called
|
||||
@@ -21,55 +21,6 @@
|
||||
for more details.
|
||||
</para>
|
||||
|
||||
<sect2 id="configuration-file-format" xreflabel="configuration file format">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>format</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file format</title>
|
||||
|
||||
<para>
|
||||
<filename>repmgr.conf</filename> is a plain text file with one parameter/value
|
||||
combination per line.
|
||||
</para>
|
||||
<para>
|
||||
Whitespace is insignificant (except within a quoted parameter value) and blank lines are ignored.
|
||||
Hash marks (<literal>#</literal>) designate the remainder of the line as a comment.
|
||||
Parameter values that are not simple identifiers or numbers should be single-quoted.
|
||||
Note that single quote cannot be embedded in a parameter value.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
&repmgr; will interpret double-quotes as being part of a string value; only use single quotes
|
||||
to quote parameter values.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<para>
|
||||
Example of a valid <filename>repmgr.conf</filename> file:
|
||||
<programlisting>
|
||||
# repmgr.conf
|
||||
|
||||
node_id=1
|
||||
node_name= node1
|
||||
conninfo ='host=node1 dbname=repmgr user=repmgr connect_timeout=2'
|
||||
data_directory = /var/lib/pgsql/11/data</programlisting>
|
||||
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="configuration-file-location" xreflabel="configuration file location">
|
||||
<indexterm>
|
||||
<primary>repmgr.conf</primary>
|
||||
<secondary>location</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Configuration file location</title>
|
||||
|
||||
<para>
|
||||
The configuration file will be searched for in the following locations:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
@@ -99,7 +50,7 @@ data_directory = /var/lib/pgsql/11/data</programlisting>
|
||||
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
||||
an error will be raised if it is not found or not readable, and no attempt will be made to
|
||||
check default locations; this is to prevent <application>repmgr</application> unexpectedly
|
||||
reading the wrong configuration file.
|
||||
reading the wrong configuraton file.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
@@ -114,7 +65,5 @@ data_directory = /var/lib/pgsql/11/data</programlisting>
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</sect2>
|
||||
</sect1>
|
||||
</note>
|
||||
</sect1>
|
||||
|
||||
@@ -1,304 +1,16 @@
|
||||
<chapter id="configuration" xreflabel="Configuration">
|
||||
<title>repmgr configuration</title>
|
||||
|
||||
<sect1 id="configuration-prerequisites" xreflabel="Prerequisites for configuration">
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>prerequisites</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>ssh</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Prerequisites for configuration</title>
|
||||
<para>
|
||||
Following software must be installed on both servers:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><application>PostgreSQL</application></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgr</application>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
At network level, connections between the PostgreSQL port (default: <literal>5432</literal>)
|
||||
must be possible between all nodes.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Passwordless <command>SSH</command> connectivity between all servers in the replication cluster
|
||||
is not required, but is necessary in the following cases:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
||||
data directory (as is the case with e.g. <link linkend="packages-debian-ubuntu">Debian packages</link>);
|
||||
in this case <command>rsync</command> must also be installed on all servers.
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
||||
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
Consider setting <varname>ConnectTimeout</varname> to a low value in your SSH configuration.
|
||||
This will make it faster to detect any SSH connection errors.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<sect2 id="configuration-postgresql" xreflabel="PostgreSQL configuration">
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>PostgreSQL</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>PostgreSQL configuration</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>PostgreSQL configuration for &repmgr;</title>
|
||||
<para>
|
||||
The following PostgreSQL configuration parameters may need to be changed in order
|
||||
for &repmgr; (and replication itself) to function correctly.
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>hot_standby</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>hot_standby</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>hot_standby</option> must always be set to <literal>on</literal>, as &repmgr; needs
|
||||
to be able to connect to each server it manages.
|
||||
</para>
|
||||
<para>
|
||||
Note that <option>hot_standby</option> defaults to <literal>on</literal> from PostgreSQL 10
|
||||
and later; in PostgreSQL 9.6 and earlier, the default was <literal>off</literal>.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY">hot_standby</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>wal_level</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_level</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>wal_level</option> must be one of <option>replica</option> or <option>logical</option>
|
||||
(PostgreSQL 9.5 and earlier: one of <option>hot_standby</option> or <option>logical</option>).
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL">wal_level</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>max_wal_senders</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>max_wal_senders</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>max_wal_senders</option> must be set to a value of <literal>2</literal> or greater.
|
||||
In general you will need one WAL sender for each standby which will attach to the PostgreSQL
|
||||
instance; additionally &repmgr; will require two free WAL senders in order to clone further
|
||||
standbys.
|
||||
</para>
|
||||
<para>
|
||||
<option>max_wal_senders</option> should be set to an appropriate value on all PostgreSQL
|
||||
instances in the replication cluster which may potentially become a primary server or
|
||||
(in cascading replication) the upstream server of a standby.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-WAL-SENDERS">max_wal_senders</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>max_replication_slots</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>max_replication_slots</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If you are intending to use replication slots, <option>max_replication_slots</option>
|
||||
must be set to a non-zero value.
|
||||
</para>
|
||||
<para>
|
||||
<option>max_replication_slots</option> should be set to an appropriate value on all PostgreSQL
|
||||
instances in the replication cluster which may potentially become a primary server or
|
||||
(in cascading replication) the upstream server of a standby.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-REPLICATION-SLOTS">max_replication_slots</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>wal_log_hints</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_log_hints</option></term>
|
||||
<listitem>
|
||||
<para>If you are intending to use <application>pg_rewind</application>,
|
||||
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||
<option>wal_log_hints</option>.
|
||||
</para>
|
||||
<para>
|
||||
For more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LOG-HINTS">wal_log_hints</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>archive_mode</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>archive_mode</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
We suggest setting <option>archive_mode</option> to <literal>on</literal> (and
|
||||
<option>archive_command</option> to <literal>/bin/true</literal>; see below)
|
||||
even if you are currently not planning to use WAL file archiving.
|
||||
</para>
|
||||
<para>
|
||||
This will make it simpler to set up WAL file archiving if it is ever required,
|
||||
as changes to <option>archive_mode</option> require a full PostgreSQL server
|
||||
restart, while <option>archive_command</option> changes can be applied via a normal
|
||||
configuration reload.
|
||||
</para>
|
||||
<para>
|
||||
However, &repmgr; itself does not require WAL file archiving.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-MODE">archive_mode</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>archive_command</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>archive_command</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If you have set <option>archive_mode</option> to <literal>on</literal> but are not currently planning
|
||||
to use WAL file archiving, set <option>archive_command</option> to a command which does nothing but returns
|
||||
<literal>true</literal>, such as <command>/bin/true</command>. See above for details.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND">archive_command</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<indexterm>
|
||||
<primary>wal_keep_segments</primary>
|
||||
<secondary>PostgreSQL configuration</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_keep_segments</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Normally there is no need to set <option>wal_keep_segments</option> (default: <literal>0</literal>), as it
|
||||
is <emphasis>not</emphasis> a reliable way of ensuring that all required WAL segments are available to standbys.
|
||||
Replication slots and/or an archiving solution such as Barman are recommended to ensure standbys have a reliable
|
||||
source of WAL segments at all times.
|
||||
</para>
|
||||
<para>
|
||||
The only reason ever to set <option>wal_keep_segments</option> is you have
|
||||
you have configured <option>pg_basebackup_options</option>
|
||||
in <filename>repmgr.conf</filename> to include the setting <literal>--wal-method=fetch</literal>
|
||||
(PostgreSQL 9.6 and earlier: <literal>--xlog-method=fetch</literal>)
|
||||
<emphasis>and</emphasis> you have <emphasis>not</emphasis> set <option>restore_command</option>
|
||||
in <filename>repmgr.conf</filename> to fetch WAL files from a reliable source such as Barman,
|
||||
in which case you'll need to set <option>wal_keep_segments</option>
|
||||
to a sufficiently high number to ensure that all WAL files required by the standby
|
||||
are retained. However we do not recommend managing replication in this way.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL documentation: <ulink url="https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-WAL-KEEP-SEGMENTS">wal_keep_segments</ulink>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
<para>
|
||||
See also the <link linkend="quickstart-postgresql-configuration">PostgreSQL configuration</link> section in the
|
||||
<link linkend="quickstart">Quick-start guide</link>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
&configuration-file;
|
||||
&configuration-file-required-settings;
|
||||
&configuration-file-log-settings;
|
||||
&configuration-file-service-commands;
|
||||
&configuration-file-settings;
|
||||
|
||||
<sect1 id="configuration-permissions" xreflabel="Database user permissions">
|
||||
<sect1 id="configuration-permissions" xreflabel="User permissions">
|
||||
<indexterm>
|
||||
<primary>configuration</primary>
|
||||
<secondary>database user permissions</secondary>
|
||||
<secondary>user permissions</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgr database user permissions</title>
|
||||
<title>repmgr user permissions</title>
|
||||
<para>
|
||||
&repmgr; will create an extension database containing objects
|
||||
for administering &repmgr; metadata. The user defined in the <varname>conninfo</varname>
|
||||
|
||||
86
doc/configuring-witness-server.sgml
Normal file
86
doc/configuring-witness-server.sgml
Normal file
@@ -0,0 +1,86 @@
|
||||
<chapter id="using-witness-server">
|
||||
<indexterm>
|
||||
<primary>witness server</primary>
|
||||
<seealso>Using a witness server with repmgrd</seealso>
|
||||
</indexterm>
|
||||
|
||||
|
||||
<title>Using a witness server</title>
|
||||
<para>
|
||||
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
||||
is not part of the streaming replication cluster; its purpose is, if a
|
||||
failover situation occurs, to provide proof that the primary server
|
||||
itself is unavailable.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A typical use case for a witness server is a two-node streaming replication
|
||||
setup, where the primary and standby are in different locations (data centres).
|
||||
By creating a witness server in the same location as the primary, if the primary
|
||||
becomes unavailable it's possible for the standby to decide whether it can
|
||||
promote itself without risking a "split brain" scenario: if it can't see either the
|
||||
witness or the primary server, it's likely there's a network-level interruption
|
||||
and it should not promote itself. If it can seen the witness but not the primary,
|
||||
this proves there is no network interruption and the primary itself is unavailable,
|
||||
and it can therefore promote itself (and ideally take action to fence the
|
||||
former primary).
|
||||
</para>
|
||||
<para>
|
||||
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
||||
be preferable to use location-based failover, which ensures that only nodes
|
||||
in the same location as the primary will ever be promotion candidates;
|
||||
see <xref linkend="repmgrd-network-split"> for more details.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
A witness server will only be useful if <application>repmgrd</application>
|
||||
is in use.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect1 id="creating-witness-server">
|
||||
<title>Creating a witness server</title>
|
||||
<para>
|
||||
To create a witness server, set up a normal PostgreSQL instance on a server
|
||||
in the same physical location as the cluster's primary server.
|
||||
</para>
|
||||
<para>
|
||||
This instance should *not* be on the same physical host as the primary server,
|
||||
as otherwise if the primary server fails due to hardware issues, the witness
|
||||
server will be lost too.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
||||
command, which would automatically create a PostgreSQL instance. However
|
||||
this often resulted in an unsatisfactory, hard-to-customise instance.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
The witness server should be configured in the same way as a normal
|
||||
&repmgr; node; see section <xref linkend="configuration">.
|
||||
</para>
|
||||
<para>
|
||||
Register the witness server with <xref linkend="repmgr-witness-register">.
|
||||
This will create the &repmgr; extension on the witness server, and make
|
||||
a copy of the &repmgr; metadata.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
As the witness server is not part of the replication cluster, further
|
||||
changes to the &repmgr; metadata will be synchronised by
|
||||
<application>repmgrd</application>.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
Once the witness server has been configured, <application>repmgrd</application>
|
||||
should be started; for more details see <xref linkend="repmgrd-witness-server">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
</chapter>
|
||||
@@ -88,7 +88,7 @@
|
||||
|
||||
<para>
|
||||
The values provided for <literal>%t</literal> and <literal>%d</literal>
|
||||
may contain spaces, so should be quoted in the provided command
|
||||
will probably contain spaces, so should be quoted in the provided command
|
||||
configuration, e.g.:
|
||||
<programlisting>
|
||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
@@ -147,104 +147,34 @@
|
||||
<para>
|
||||
By default, all notification types will be passed to the designated script;
|
||||
the notification types can be filtered to explicitly named ones using the
|
||||
<varname>event_notifications</varname> parameter.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Events generated by the &repmgr; command:
|
||||
<varname>event_notifications</varname> parameter:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-primary-register-events">cluster_created</link></literal></simpara>
|
||||
<simpara><literal>primary_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-primary-register-events">primary_register</link></literal></simpara>
|
||||
<simpara><literal>primary_unregister</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-primary-unregister-events">primary_unregister</link></literal></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-clone-events">standby_clone</link></literal></simpara>
|
||||
<simpara><literal>standby_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-register-events">standby_register</link></literal></simpara>
|
||||
<simpara><literal>standby_register_sync</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-register-events">standby_register_sync</link></literal></simpara>
|
||||
<simpara><literal>standby_unregister</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-unregister-events">standby_unregister</link></literal></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-promote-events">standby_promote</link></literal></simpara>
|
||||
<simpara><literal>standby_clone</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-follow-events">standby_follow</link></literal></simpara>
|
||||
<simpara><literal>standby_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-standby-switchover-events">standby_switchover</link></literal></simpara>
|
||||
<simpara><literal>standby_follow</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-witness-register-events">witness_register</link></literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-witness-unregister-events">witness_unregister</link></literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-node-rejoin-events">node_rejoin</link></literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal><link linkend="repmgr-cluster-cleanup-events">cluster_cleanup</link></literal></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Events generated by <application>repmgrd</application> (streaming replication mode):
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_start</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_shutdown</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_reload</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_follow</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_aborted</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_standby_reconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_promote_error</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_local_disconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_local_reconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_upstream_disconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_upstream_reconnect</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><literal>standby_disconnect_manual</literal></simpara>
|
||||
</listitem>
|
||||
@@ -254,13 +184,39 @@
|
||||
<listitem>
|
||||
<simpara><literal>standby_recovery</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Events generated by <application>repmgrd</application> (BDR mode):
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><literal>witness_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>witness_unregister</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>node_rejoin</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_start</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_shutdown</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_follow</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_upstream_disconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_upstream_reconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_promote_error</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_failover</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
@@ -38,21 +38,24 @@
|
||||
<!ENTITY quickstart SYSTEM "quickstart.sgml">
|
||||
<!ENTITY configuration SYSTEM "configuration.sgml">
|
||||
<!ENTITY configuration-file SYSTEM "configuration-file.sgml">
|
||||
<!ENTITY configuration-file-required-settings SYSTEM "configuration-file-required-settings.sgml">
|
||||
<!ENTITY configuration-file-log-settings SYSTEM "configuration-file-log-settings.sgml">
|
||||
<!ENTITY configuration-file-service-commands SYSTEM "configuration-file-service-commands.sgml">
|
||||
<!ENTITY configuration-file-settings SYSTEM "configuration-file-settings.sgml">
|
||||
<!ENTITY cloning-standbys SYSTEM "cloning-standbys.sgml">
|
||||
<!ENTITY promoting-standby SYSTEM "promoting-standby.sgml">
|
||||
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.sgml">
|
||||
<!ENTITY switchover SYSTEM "switchover.sgml">
|
||||
<!ENTITY configuring-witness-server SYSTEM "configuring-witness-server.sgml">
|
||||
|
||||
<!ENTITY event-notifications SYSTEM "event-notifications.sgml">
|
||||
<!ENTITY upgrading-repmgr SYSTEM "upgrading-repmgr.sgml">
|
||||
|
||||
<!ENTITY repmgrd-overview SYSTEM "repmgrd-overview.sgml">
|
||||
<!ENTITY repmgrd-automatic-failover SYSTEM "repmgrd-automatic-failover.sgml">
|
||||
<!ENTITY repmgrd-configuration SYSTEM "repmgrd-configuration.sgml">
|
||||
<!ENTITY repmgrd-operation SYSTEM "repmgrd-operation.sgml">
|
||||
<!ENTITY repmgrd-demonstration SYSTEM "repmgrd-demonstration.sgml">
|
||||
<!ENTITY repmgrd-monitoring SYSTEM "repmgrd-monitoring.sgml">
|
||||
<!ENTITY repmgrd-degraded-monitoring SYSTEM "repmgrd-degraded-monitoring.sgml">
|
||||
<!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
|
||||
<!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
|
||||
<!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
|
||||
<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">
|
||||
|
||||
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
|
||||
@@ -68,23 +71,16 @@
|
||||
<!ENTITY repmgr-node-status SYSTEM "repmgr-node-status.sgml">
|
||||
<!ENTITY repmgr-node-check SYSTEM "repmgr-node-check.sgml">
|
||||
<!ENTITY repmgr-node-rejoin SYSTEM "repmgr-node-rejoin.sgml">
|
||||
<!ENTITY repmgr-node-service SYSTEM "repmgr-node-service.sgml">
|
||||
<!ENTITY repmgr-cluster-show SYSTEM "repmgr-cluster-show.sgml">
|
||||
<!ENTITY repmgr-cluster-matrix SYSTEM "repmgr-cluster-matrix.sgml">
|
||||
<!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
|
||||
<!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
|
||||
<!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
|
||||
<!ENTITY repmgr-daemon-status SYSTEM "repmgr-daemon-status.sgml">
|
||||
<!ENTITY repmgr-daemon-start SYSTEM "repmgr-daemon-start.sgml">
|
||||
<!ENTITY repmgr-daemon-stop SYSTEM "repmgr-daemon-stop.sgml">
|
||||
<!ENTITY repmgr-daemon-pause SYSTEM "repmgr-daemon-pause.sgml">
|
||||
<!ENTITY repmgr-daemon-unpause SYSTEM "repmgr-daemon-unpause.sgml">
|
||||
|
||||
<!ENTITY appendix-release-notes SYSTEM "appendix-release-notes.sgml">
|
||||
<!ENTITY appendix-faq SYSTEM "appendix-faq.sgml">
|
||||
<!ENTITY appendix-signatures SYSTEM "appendix-signatures.sgml">
|
||||
<!ENTITY appendix-packages SYSTEM "appendix-packages.sgml">
|
||||
<!ENTITY appendix-support SYSTEM "appendix-support.sgml">
|
||||
|
||||
<!ENTITY bookindex SYSTEM "bookindex.sgml">
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
end of the preceding section (<xref linkend="promoting-standby">),
|
||||
execute this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf standby follow
|
||||
$ repmgr -f /etc/repmgr.conf repmgr standby follow
|
||||
INFO: changing node 3's primary to node 2
|
||||
NOTICE: restarting server using "pg_ctl -l /var/log/postgresql/startup.log -w -D '/var/lib/postgresql/data' restart"
|
||||
waiting for server to shut down......... done
|
||||
|
||||
@@ -1,129 +1,88 @@
|
||||
<sect1 id="installation-packages" xreflabel="Installing from packages">
|
||||
<title>Installing &repmgr; from packages</title>
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>from packages</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
We recommend installing &repmgr; using the available packages for your
|
||||
system.
|
||||
</para>
|
||||
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, CentOS and Fedora">
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, Fedora and CentOS">
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>on Red Hat/CentOS/Fedora etc.</secondary>
|
||||
<secondary>on Redhat/CentOS/Fedora etc.</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>RedHat/CentOS/Fedora</title>
|
||||
<title>RedHat/Fedora/CentOS</title>
|
||||
<para>
|
||||
&repmgr; RPM packages for RedHat/CentOS variants and Fedora are available from the
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink>; see following
|
||||
section for details.
|
||||
</para>
|
||||
<para>
|
||||
RPM packages for &repmgr; are also available via Yum through
|
||||
RPM packages for &repmgr; are available via Yum through
|
||||
the PostgreSQL Global Development Group RPM repository
|
||||
(<ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</ulink>).
|
||||
Follow the instructions for your distribution (RedHat, CentOS,
|
||||
Fedora, etc.) and architecture as detailed there. Note that it can take some days
|
||||
for new &repmgr; packages to become available via the this repository.
|
||||
Fedora, etc.) and architecture as detailed there.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; RPM packages are designed to be compatible with the community-provided PostgreSQL packages
|
||||
and 2ndQuadrant's <ulink url="https://www.2ndquadrant.com/en/resources/2ndqpostgres/">2ndQPostgres</ulink>.
|
||||
They may not work with vendor-specific packages such as those provided by RedHat for RHEL
|
||||
customers, as the PostgreSQL filesystem layout may be different to the community RPMs.
|
||||
Please contact your support vendor for assistance.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
For more information on the package contents, including details of installation
|
||||
paths and relevant <link linkend="configuration-file-service-commands">service commands</link>,
|
||||
see the appendix section <xref linkend="packages-centos">.
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> also provides its
|
||||
own RPM packages which are made available
|
||||
at the same time as each &repmgr; release, as it can take some days for
|
||||
them to become available via the main PGDG repository. See following section for details:
|
||||
</para>
|
||||
|
||||
|
||||
<sect3 id="installation-packages-redhat-2ndq">
|
||||
<title>2ndQuadrant public RPM yum repository</title>
|
||||
|
||||
<title>2ndQuadrant repmgr yum repository</title>
|
||||
<para>
|
||||
Beginning with <ulink url="http://repmgr.org/release-notes-3.1.3.html">repmgr 3.1.3</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
repository for &repmgr; releases. This repository complements the main
|
||||
<ulink url="https://yum.postgresql.org/repopackages.php">PGDG community repository</ulink>,
|
||||
but enables repmgr users to access the latest &repmgr; packages before they are
|
||||
available via the PGDG repository, which can take several days to be updated following
|
||||
a fresh &repmgr; release.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Locate the repository RPM for your PostgreSQL version from the list at:
|
||||
<ulink url="https://dl.2ndquadrant.com/">https://dl.2ndquadrant.com/</ulink>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository definition for your distribution and PostgreSQL version
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
||||
</para>
|
||||
<para>
|
||||
For example, for PostgreSQL 10 on CentOS, execute:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/release/get/10/rpm | sudo bash</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For PostgreSQL 9.6 on CentOS, execute:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/release/get/9.6/rpm | sudo bash</programlisting>
|
||||
</para>
|
||||
|
||||
|
||||
<para>
|
||||
Verify that the repository is installed with:
|
||||
<programlisting>
|
||||
sudo yum repolist</programlisting>
|
||||
The output should contain two entries like this:
|
||||
<programlisting>
|
||||
2ndquadrant-dl-default-release-pg10/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 4
|
||||
2ndquadrant-dl-default-release-pg10-debug/7/x86_64 2ndQuadrant packages (PG10) for 7 - x86_64 - Debug 3</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
Import the repository public key (optional but recommended):
|
||||
<programlisting>
|
||||
sudo yum install repmgr10</programlisting>
|
||||
rpm --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For packages for PostgreSQL 9.6 and earlier, the package name does not contain
|
||||
a period between major and minor version numbers, e.g.
|
||||
<literal>repmgr96</literal>.
|
||||
</para>
|
||||
</note>
|
||||
<tip>
|
||||
<para>
|
||||
To determine the names of available packages, execute:
|
||||
<programlisting>
|
||||
yum search repmgr</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository RPM for your distribution (this enables the 2ndQuadrant
|
||||
repository as a source of repmgr packages):
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>Fedora:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>RHEL, CentOS etc:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
e.g.:
|
||||
<programlisting>
|
||||
$ yum install http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr96</literal>), e.g.:
|
||||
<programlisting>
|
||||
$ yum install repmgr96</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
@@ -132,13 +91,13 @@ yum search repmgr</programlisting>
|
||||
<emphasis>Compatibility with PGDG Repositories</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
The 2ndQuadrant &repmgr; yum repository packages use the same definitions and file system layout as the
|
||||
main PGDG repository.
|
||||
The 2ndQuadrant &repmgr; yum repository uses exactly the same package definitions as the
|
||||
main PGDG repository and is effectively a selective mirror for &repmgr; packages only.
|
||||
</para>
|
||||
<para>
|
||||
Normally <application>yum</application> will prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
Normally yum should prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
</para>
|
||||
<para>
|
||||
To ensure the 2ndQuadrant repository is always prioritised, install <literal>yum-plugin-priorities</literal>
|
||||
@@ -152,33 +111,30 @@ yum search repmgr</programlisting>
|
||||
To install a specific package version, execute <command>yum --showduplicates list</command>
|
||||
for the package in question:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum --showduplicates list repmgr10
|
||||
[root@localhost ~]# yum --showduplicates list repmgr96
|
||||
Loaded plugins: fastestmirror
|
||||
Loading mirror speeds from cached hostfile
|
||||
* base: ftp.iij.ad.jp
|
||||
* extras: ftp.iij.ad.jp
|
||||
* updates: ftp.iij.ad.jp
|
||||
Available Packages
|
||||
repmgr10.x86_64 4.0.3-1.rhel7 pgdg10
|
||||
repmgr10.x86_64 4.0.4-1.rhel7 pgdg10
|
||||
repmgr10.x86_64 4.0.5-1.el7 2ndquadrant-repo-10</programlisting>
|
||||
repmgr96.x86_64 3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.2.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.rhel6 pgdg96
|
||||
repmgr96.x86_64 4.0.0-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 4.0.0-1.rhel6 pgdg96</programlisting>
|
||||
then append the appropriate version number to the package name with a hyphen, e.g.:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum install repmgr10-4.0.3-1.rhel7</programlisting>
|
||||
[root@localhost ~]# yum install repmgr96-3.3.2-1.el6</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>Installing old packages</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
See appendix <link linkend="packages-old-versions-rhel-centos">Installing old package versions</link>
|
||||
for details on how to retrieve older package versions.
|
||||
</para>
|
||||
|
||||
</sect3>
|
||||
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
|
||||
|
||||
<indexterm>
|
||||
@@ -192,83 +148,6 @@ yum search repmgr</programlisting>
|
||||
Instructions can be found in the APT section of the PostgreSQL Wiki
|
||||
(<ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink>).
|
||||
</para>
|
||||
<para>
|
||||
For more information on the package contents, including details of installation
|
||||
paths and relevant <link linkend="configuration-file-service-commands">service commands</link>,
|
||||
see the appendix section <xref linkend="packages-debian-ubuntu">.
|
||||
</para>
|
||||
|
||||
<sect3 id="installation-packages-debian-ubuntu-2ndq">
|
||||
<title>2ndQuadrant public apt repository for Debian/Ubuntu</title>
|
||||
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
|
||||
<ulink url="https://dl.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://dl.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository definition for your distribution and PostgreSQL version
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages) by executing:
|
||||
<programlisting>
|
||||
curl https://dl.2ndquadrant.com/default/release/get/deb | sudo bash</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
This will automatically install the following additional packages, if not already present:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><literal>lsb-release</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>apt-transport-https</literal></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
sudo apt-get install postgresql-10-repmgr</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For packages for PostgreSQL 9.6 and earlier, the package name includes
|
||||
a period between major and minor version numbers, e.g.
|
||||
<literal>postgresql-9.6-repmgr</literal>.
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>Installing old packages</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
See appendix <link linkend="packages-old-versions-debian">Installing old package versions</link>
|
||||
for details on how to retrieve older package versions.
|
||||
</para>
|
||||
|
||||
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
@@ -13,9 +13,8 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
&repmgr; 4.x is compatible with all PostgreSQL versions from 9.3. See
|
||||
section <link linkend="install-compatibility-matrix">&repmgr; compatibility matrix</link>
|
||||
for an overview of version compatibility.
|
||||
From version 4.0, repmgr is compatible with all PostgreSQL versions from 9.3, including PostgreSQL 10.
|
||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
@@ -32,33 +31,34 @@
|
||||
<para>
|
||||
&repmgr; must be installed on each server in the replication cluster.
|
||||
If installing repmgr from packages, the package version must match the PostgreSQL
|
||||
version. If installing from source, &repmgr; must be compiled against the same
|
||||
version. If installing from source, repmgr must be compiled against the same
|
||||
major version.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
The same "major" &repmgr; version (e.g. <literal>4.2.x</literal>) <emphasis>must</emphasis>
|
||||
be installed on all node in the replication cluster. We strongly recommend keeping all
|
||||
nodes on the same (preferably latest) "minor" &repmgr; version to minimize the risk
|
||||
of incompatibilities.
|
||||
</simpara>
|
||||
<simpara>
|
||||
If different "major" &repmgr; versions (e.g. 3.3.x and 4.1.x)
|
||||
are installed on different nodes, in the best case &repmgr; (in particular <application>repmgrd</application>)
|
||||
will not run. In the worst case, you will end up with a broken cluster.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
A dedicated system user for &repmgr; is <emphasis>not</emphasis> required; as many &repmgr; and
|
||||
A dedicated system user for &repmgr; is *not* required; as many &repmgr; and
|
||||
<application>repmgrd</application> actions require direct access to the PostgreSQL data directory,
|
||||
these commands should be executed by the <literal>postgres</literal> user.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
See also <link linkend="configuration-prerequisites">Prerequisites for configuration</link>
|
||||
for information on networking requirements.
|
||||
Passwordless <command>ssh</command> connectivity between all servers in the replication cluster
|
||||
is not required, but is necessary in the following cases:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>if you need &repmgr; to copy configuration files from outside the PostgreSQL
|
||||
data directory (in which case <command>rsync</command> is also required)</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>to perform <link linkend="performing-switchover">switchover operations</link></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
when executing <command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>
|
||||
and <command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
@@ -69,111 +69,4 @@
|
||||
terminated if your <command>ssh</command> session to the server is interrupted or closed.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<sect2 id="install-compatibility-matrix">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgr</primary>
|
||||
<secondary>compatibility matrix</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>compatibility matrix</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>&repmgr; compatibility matrix</title>
|
||||
<para>
|
||||
The following table provides an overview of which &repmgr; version supports
|
||||
which PostgreSQL version.
|
||||
</para>
|
||||
|
||||
|
||||
<table id="repmgr-compatibility-matrix">
|
||||
<title>&repmgr; compatibility matrix</title>
|
||||
|
||||
<tgroup cols="2">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; version
|
||||
</entry>
|
||||
<entry>
|
||||
Latest release
|
||||
</entry>
|
||||
<entry>
|
||||
Supported PostgreSQL versions
|
||||
</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 4.x
|
||||
</entry>
|
||||
<entry>
|
||||
<link linkend="release-4.2">4.2</link> (2018-10-24)
|
||||
</entry>
|
||||
<entry>
|
||||
9.3, 9.4, 9.5, 9.6, 10, 11
|
||||
</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 3.x
|
||||
</entry>
|
||||
<entry>
|
||||
<ulink url="https://repmgr.org/release-notes-3.3.2.html">3.3.2</ulink> (2017-05-30)
|
||||
</entry>
|
||||
<entry>
|
||||
9.3, 9.4, 9.5, 9.6
|
||||
</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 2.x
|
||||
</entry>
|
||||
<entry>
|
||||
<ulink url="https://repmgr.org/release-notes-2.0.3.html">2.0.3</ulink> (2015-04-16)
|
||||
</entry>
|
||||
<entry>
|
||||
9.0, 9.1, 9.2, 9.3, 9.4
|
||||
</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
The &repmgr; 2.x and 3.x series are no longer maintained or supported.
|
||||
We strongly recommend upgrading to the latest &repmgr; version.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
|
||||
<para>
|
||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4.
|
||||
</para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
PostgreSQL 9.3 does not support replication slots, so corresponding &repmgr; functionality
|
||||
is not available.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
In PostgreSQL 9.3 and PostgreSQL 9.4, <command>pg_rewind</command> is not part of the core
|
||||
distribution. <command>pg_rewind</command> will need to be compiled separately to be able
|
||||
to use any &repmgr; functionality which takes advantage of it.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
@@ -26,68 +26,12 @@
|
||||
add the <ulink
|
||||
url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
|
||||
repository to your <filename>sources.list</filename> if you
|
||||
have not already done so, and ensure the source repository is enabled.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If not configured, the source repository can be added by including
|
||||
a <literal>deb-src</literal> line as a copy of the existing <literal>deb</literal>
|
||||
line in the repository file, which is usually
|
||||
<filename>/etc/apt/sources.list.d/pgdg.list</filename>, e.g.:
|
||||
<programlisting>
|
||||
deb http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main
|
||||
deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
<para>
|
||||
Then install the prerequisites for
|
||||
building PostgreSQL with e.g.:
|
||||
have not already done so. Then install the pre-requisites for
|
||||
building PostgreSQL with:
|
||||
<programlisting>
|
||||
sudo apt-get update
|
||||
sudo apt-get build-dep postgresql-9.6</programlisting>
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL version for your target repmgr version.
|
||||
</simpara>
|
||||
</important>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using <command>apt-get build-dep</command> is not possible, the
|
||||
following packages may need to be installed manually:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><literal>llibedit-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibkrb5-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibpam0g-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibreadline-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibselinux1-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibssl-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibxml2-dev</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>llibxslt1-dev</literal></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
@@ -101,55 +45,15 @@ deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisti
|
||||
sudo yum install yum-utils openjade docbook-dtds docbook-style-dsssl docbook-style-xsl
|
||||
sudo yum-builddep postgresql96</programlisting>
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL version for your target repmgr version.
|
||||
</simpara>
|
||||
</important>
|
||||
<note>
|
||||
<para>
|
||||
If using <command>yum-builddep</command> is not possible, the
|
||||
following packages may need to be installed manually:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><literal>libselinux-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>libxml2-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>libxslt-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>openssl-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>pam-devel</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>readline-devel</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
If building against PostgreSQL 11 or later configured with the <option>--with-llvm</option> option
|
||||
(this is the case with the PGDG-provided packages) you'll also need to install the
|
||||
<literal>llvm-toolset-7-clang</literal> package. This is available via the
|
||||
<ulink url="https://wiki.centos.org/AdditionalResources/Repositories/SCL">Software Collections (SCL) Repository</ulink>.
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
Select the appropriate PostgreSQL versions for your target repmgr version.
|
||||
</simpara>
|
||||
</note>
|
||||
</sect2>
|
||||
|
||||
|
||||
@@ -176,7 +80,7 @@ deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisti
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There are also tags for each &repmgr; release, e.g. <literal>v4.2.0</literal>.
|
||||
There are also tags for each &repmgr; release, e.g. <filename>REL4_0_STABLE</filename>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@@ -242,7 +146,7 @@ deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisti
|
||||
The &repmgr; documentation is (like the main PostgreSQL project)
|
||||
written in DocBook format. To build it locally as HTML, you'll need to
|
||||
install the required packages as described in the
|
||||
<ulink url="https://www.postgresql.org/docs/9.6/docguide-toolsets.html">
|
||||
<ulink url="https://www.postgresql.org/docs/9.6/static/docguide-toolsets.html">
|
||||
PostgreSQL documentation</ulink> then execute:
|
||||
<programlisting>
|
||||
./configure && make install-doc</programlisting>
|
||||
@@ -261,7 +165,7 @@ deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisti
|
||||
<note>
|
||||
<simpara>
|
||||
Due to changes in PostgreSQL's documentation build system from PostgreSQL 10,
|
||||
the documentation can currently only be built against PostgreSQL 9.6 or earlier.
|
||||
the documentation can currently only be built agains PostgreSQL 9.6 or earlier.
|
||||
This limitation will be fixed when time and resources permit.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<date>2017</date>
|
||||
|
||||
<copyright>
|
||||
<year>2010-2019</year>
|
||||
<year>2010-2018</year>
|
||||
<holder>2ndQuadrant, Ltd.</holder>
|
||||
</copyright>
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
<title>Legal Notice</title>
|
||||
|
||||
<para>
|
||||
<productname>repmgr</productname> is Copyright © 2010-2019
|
||||
<productname>repmgr</productname> is Copyright © 2010-2018
|
||||
by 2ndQuadrant, Ltd. All rights reserved.
|
||||
</para>
|
||||
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
<title>repmgr overview</title>
|
||||
|
||||
<para>
|
||||
This chapter provides a high-level overview of &repmgr;'s components and
|
||||
functionality.
|
||||
This chapter provides a high-level overview of repmgr's components and functionality.
|
||||
</para>
|
||||
<sect1 id="repmgr-concepts" xreflabel="Concepts">
|
||||
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
<chapter id="quickstart" xreflabel="Quick-start guide">
|
||||
<title>Quick-start guide</title>
|
||||
|
||||
<indexterm>
|
||||
<primary>quickstart</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
This section gives a quick introduction to &repmgr;, including setting up a
|
||||
sample &repmgr; installation and a basic replication cluster.
|
||||
@@ -54,8 +50,7 @@
|
||||
</para>
|
||||
<para>
|
||||
If you want <application>repmgr</application> to copy configuration files which are
|
||||
located outside the PostgreSQL data directory, and/or to test
|
||||
<command><link linkend="repmgr-standby-switchover">switchover</link></command>
|
||||
located outside the PostgreSQL data directory, and/or to test <command>switchover</command>
|
||||
functionality, you will also need passwordless SSH connections between both servers, and
|
||||
<application>rsync</application> should be installed.
|
||||
</para>
|
||||
@@ -68,7 +63,7 @@
|
||||
</tip>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-postgresql-configuration" xreflabel="PostgreSQL configuration">
|
||||
<sect1 id="quickstart-postgresql-configuration">
|
||||
<title>PostgreSQL configuration</title>
|
||||
<para>
|
||||
On the primary server, a PostgreSQL instance must be initialised and running.
|
||||
@@ -83,13 +78,6 @@
|
||||
|
||||
max_wal_senders = 10
|
||||
|
||||
# Enable replication slots; set this figure to at least one more
|
||||
# than the number of standbys which will connect to this server.
|
||||
# Note that repmgr will only make use of replication slots if
|
||||
# "use_replication_slots" is set to "true" in repmgr.conf
|
||||
|
||||
max_replication_slots = 0
|
||||
|
||||
# Ensure WAL files contain enough information to enable read-only queries
|
||||
# on the standby.
|
||||
#
|
||||
@@ -97,7 +85,7 @@
|
||||
# PostgreSQL 9.6 and later: one of 'replica' or 'logical'
|
||||
# ('hot_standby' will still be accepted as an alias for 'replica')
|
||||
#
|
||||
# See: https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL
|
||||
# See: https://www.postgresql.org/docs/current/static/runtime-config-wal.html#GUC-WAL-LEVEL
|
||||
|
||||
wal_level = 'hot_standby'
|
||||
|
||||
@@ -114,6 +102,16 @@
|
||||
# you WALs in a secure place. /bin/true is an example of a command that
|
||||
# ignores archiving. Use something more sensible.
|
||||
archive_command = '/bin/true'
|
||||
|
||||
# If you have configured "pg_basebackup_options"
|
||||
# in "repmgr.conf" to include the setting "--xlog-method=fetch" (from
|
||||
# PostgreSQL 10 "--wal-method=fetch"), *and* you have not set
|
||||
# "restore_command" in "repmgr.conf"to fetch WAL files from another
|
||||
# source such as Barman, you'll need to set "wal_keep_segments" to a
|
||||
# high enough value to ensure that all WAL files generated while
|
||||
# the standby is being cloned are retained until the standby starts up.
|
||||
#
|
||||
# wal_keep_segments = 5000
|
||||
</programlisting>
|
||||
<tip>
|
||||
<simpara>
|
||||
@@ -128,9 +126,6 @@
|
||||
and the cluster was not initialised using data checksums, you may want to consider enabling
|
||||
<varname>wal_log_hints</varname>; for more details see <xref linkend="repmgr-node-rejoin-pg-rewind">.
|
||||
</para>
|
||||
<para>
|
||||
See also the <link linkend="configuration-postgresql">PostgreSQL configuration</link> section in the <link linkend="configuration">repmgr configuaration guide</link>.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="quickstart-repmgr-user-database">
|
||||
@@ -201,20 +196,11 @@
|
||||
<sect1 id="quickstart-standby-preparation">
|
||||
<title>Preparing the standby</title>
|
||||
<para>
|
||||
On the standby, do <emphasis>not</emphasis> create a PostgreSQL instance (i.e.
|
||||
do not execute <application>initdb</application> or any database creation
|
||||
scripts provided by packages), but do ensure the destination
|
||||
On the standby, do not create a PostgreSQL instance, but do ensure the destination
|
||||
data directory (and any other directories which you want PostgreSQL to use)
|
||||
exist and are owned by the <literal>postgres</literal> system user. Permissions
|
||||
must be set to <literal>0700</literal> (<literal>drwx------</literal>).
|
||||
</para>
|
||||
<tip>
|
||||
<simpara>
|
||||
&repmgr; will place a copy of the primary's database files in this directory.
|
||||
It will however refuse to run if a PostgreSQL instance has already been
|
||||
created there.
|
||||
</simpara>
|
||||
</tip>
|
||||
<para>
|
||||
Check the primary database is reachable from the standby using <application>psql</application>:
|
||||
</para>
|
||||
@@ -224,7 +210,7 @@
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; stores connection information as <ulink
|
||||
url="https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING">libpq
|
||||
url="https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING">libpq
|
||||
connection strings</ulink> throughout. This documentation refers to them as <literal>conninfo</literal>
|
||||
strings; an alternative name is <literal>DSN</literal> (<literal>data source name</literal>).
|
||||
We'll use these in place of the <command>-h hostname -d databasename -U username</command> syntax.
|
||||
@@ -248,45 +234,17 @@
|
||||
<para>
|
||||
<filename>repmgr.conf</filename> should not be stored inside the PostgreSQL data directory,
|
||||
as it could be overwritten when setting up or reinitialising the PostgreSQL
|
||||
server. See sections <xref linkend="configuration"> and <xref linkend="configuration-file">
|
||||
server. See sections on <xref linkend="configuration-file"> and <xref linkend="configuration-file-settings">
|
||||
for further details about <filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
&repmgr; only uses <option>pg_bindir</option> when it executes
|
||||
PostgreSQL binaries directly.
|
||||
</para>
|
||||
<para>
|
||||
For user-defined scripts such as <option>promote_command</option> and the
|
||||
various <option>service_*_command</option>s, you <emphasis>must</emphasis>
|
||||
always explicitly provide the full path to the binary or script being
|
||||
executed, even if it is &repmgr; itself.
|
||||
</para>
|
||||
<para>
|
||||
This is because these options can contain user-defined scripts in arbitrary
|
||||
locations, so prepending <option>pg_bindir</option> may break them.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
For Debian-based distributions we recommend explictly setting
|
||||
<option>pg_bindir</option> to the directory where <command>pg_ctl</command> and other binaries
|
||||
<literal>pg_bindir</literal> to the directory where <command>pg_ctl</command> and other binaries
|
||||
not in the standard path are located. For PostgreSQL 9.6 this would be <filename>/usr/lib/postgresql/9.6/bin/</filename>.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
If your distribution places the &repmgr; binaries in a location other than the
|
||||
PostgreSQL installation directory, specify this with <option>repmgr_bindir</option>
|
||||
to enable &repmgr; to perform operations (e.g.
|
||||
<command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command>)
|
||||
on other nodes.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
See the file
|
||||
<ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/repmgr.conf.sample">repmgr.conf.sample</>
|
||||
@@ -446,7 +404,7 @@
|
||||
</para>
|
||||
<para>
|
||||
From PostgreSQL 9.6 you can also use the view
|
||||
<ulink url="https://www.postgresql.org/docs/current/monitoring-stats.html#PG-STAT-WAL-RECEIVER-VIEW">
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/monitoring-stats.html#PG-STAT-WAL-RECEIVER-VIEW">
|
||||
<literal>pg_stat_wal_receiver</literal></ulink> to check the replication status from the standby.
|
||||
|
||||
<programlisting>
|
||||
|
||||
@@ -15,14 +15,9 @@
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Purges monitoring history from the <literal>repmgr.monitoring_history</literal> table to
|
||||
prevent excessive table growth.
|
||||
</para>
|
||||
<para>
|
||||
By default <emphasis>all</emphasis> data will be removed; Use the <option>-k/--keep-history</option>
|
||||
option to specify the number of days of monitoring history to retain.
|
||||
</para>
|
||||
<para>
|
||||
This command can be executed manually or as a cronjob.
|
||||
prevent excessive table growth. Use the <literal>-k/--keep-history</literal> to specify the
|
||||
number of days of monitoring history to retain. This command can be used
|
||||
manually or as a cronjob.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -43,35 +38,4 @@
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-cluster-cleanup-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>cluster_cleanup</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Only delete monitoring records for the specified node.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
For more details see the sections <xref linkend="repmgrd-monitoring"> and
|
||||
<xref linkend="repmgrd-monitoring-configuration">.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
@@ -38,59 +38,5 @@
|
||||
and therefore determine the state of outbound connections from that node.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr cluster crosscheck</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The check completed successfully and all nodes are reachable.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_SSH (12)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more nodes could not be accessed via SSH.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
This only applies to nodes unreachable from the node where
|
||||
this command is executed.
|
||||
</simpara>
|
||||
<simpara>
|
||||
It's also possible that the crosscheck establishes that
|
||||
connections between PostgreSQL on all nodes are functioning,
|
||||
even if SSH access between some nodes is not possible.
|
||||
</simpara>
|
||||
</note>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
PostgreSQL on one or more nodes could not be reached.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
This error code overrides <option>ERR_BAD_SSH</option>.
|
||||
</simpara>
|
||||
</note>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -49,22 +49,6 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Output format</title>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--csv</literal>: generate output in CSV format. Note that the <literal>Details</literal>
|
||||
column will currently not be emitted in CSV format.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
|
||||
@@ -97,49 +97,5 @@
|
||||
useful result.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr cluster matrix</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The check completed successfully and all nodes are reachable.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_SSH (12)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more nodes could not be accessed via SSH.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
PostgreSQL on one or more nodes could not be reached.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
This error code overrides <option>ERR_BAD_SSH</option>.
|
||||
</simpara>
|
||||
</note>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -22,14 +22,6 @@
|
||||
directly and can be run on any node in the cluster; this is also useful when analyzing
|
||||
connectivity from a particular node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Node availability is tested by connecting from the node where
|
||||
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
||||
is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
|
||||
better overviews of connections between nodes.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -52,186 +44,72 @@
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
|
||||
ID | Name | Role | Status | Upstream | Location | Priority | Connection string
|
||||
----+-------+---------+-----------+----------+----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | * running | | default | 100 | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>Notes</title>
|
||||
<para>
|
||||
The column <literal>Role</literal> shows the expected server role according to the
|
||||
&repmgr; metadata.
|
||||
</para>
|
||||
<para>
|
||||
<literal>Status</literal> shows whether the server is running or unreachable.
|
||||
&repmgr; metadata. <literal>Status</literal> shows whether the server is running or unreachable.
|
||||
If the node has an unexpected role not reflected in the &repmgr; metadata, e.g. a node was manually
|
||||
promoted to primary, this will be highlighted with an exclamation mark.
|
||||
If a connection to the node cannot be made, this will be highlighted with a question mark.
|
||||
Note that the node will only be shown as <literal>? unreachable</literal>
|
||||
if a connection is not possible at network level; if the PostgreSQL instance on the
|
||||
node is pingable but not accepting connections, it will be shown as <literal>? running</literal>.
|
||||
</para>
|
||||
<para>
|
||||
In the following example, executed on <literal>node3</literal>, <literal>node1</literal> is not reachable
|
||||
at network level and assumed to be down; <literal>node2</literal> has been promoted to primary
|
||||
(but <literal>node3</literal> is not attached to it, and its metadata has not yet been updated);
|
||||
<literal>node4</literal> is running but rejecting connections (from <literal>node3</literal> at least).
|
||||
promoted to primary, this will be highlighted with an exclamation mark, e.g.:
|
||||
<programlisting>
|
||||
ID | Name | Role | Status | Upstream | Location | Priority | Connection string
|
||||
----+-------+---------+----------------------+----------+----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | ? unreachable | | default | 100 | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | ! running as primary | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr
|
||||
4 | node4 | standby | ? running | node1 | default | 100 | host=db_node4 dbname=repmgr user=repmgr
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
|
||||
WARNING: following issues were detected
|
||||
- unable to connect to node "node1" (ID: 1)
|
||||
- node "node1" (ID: 1) is registered as an active primary but is unreachable
|
||||
- node "node2" (ID: 2) is registered as standby but running as primary
|
||||
- unable to connect to node "node4" (ID: 4)
|
||||
HINT: execute with --verbose option to see connection error messages</programlisting>
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+----------------------+----------+----------+-----------------------------------------
|
||||
1 | node1 | primary | ? unreachable | | default | host=db_node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | ! running as primary | node1 | default | host=db_node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=db_node3 dbname=repmgr user=repmgr
|
||||
|
||||
WARNING: following issues were detected
|
||||
node "node1" (ID: 1) is registered as an active primary but is unreachable
|
||||
node "node2" (ID: 2) is registered as standby but running as primary</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Node availability is tested by connecting from the node where
|
||||
<command>repmgr cluster show</command> is executed, and does not necessarily imply the node
|
||||
is down. See <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck"> to get
|
||||
a better overviews of connections between nodes.
|
||||
</para>
|
||||
<para>
|
||||
To diagnose connection issues, execute <command>repmgr cluster show</command>
|
||||
with the <option>--verbose</option> option; this will display the error message
|
||||
for each failed connection attempt.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
Use <xref linkend="repmgr-cluster-matrix"> and <xref linkend="repmgr-cluster-crosscheck">
|
||||
to diagnose connection issues across the whole replication cluster.
|
||||
</para>
|
||||
</tip>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--csv</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts, e.g.:
|
||||
<programlisting>
|
||||
<para>
|
||||
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||
1,-1,-1
|
||||
2,0,0
|
||||
3,0,1</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The columns have following meanings:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
node ID
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
availability (0 = available, -1 = unavailable)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--compact</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Suppress display of the <literal>conninfo</literal> column.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--terse</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Suppress warnings about connection issues.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--verbose</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Display the full text of any database connection error messages
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr cluster show</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
No issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
An issue was encountered while attempting to retrieve
|
||||
&repmgr; metadata.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_DB_CONN (6)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to connect to the local PostgreSQL instance.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more issues were detected with the replication configuration,
|
||||
e.g. a node was not in its expected state.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">, <xref linkend="repmgr-daemon-status">
|
||||
The columns have following meanings:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
node ID
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
availability (0 = available, -1 = unavailable)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
<refentry id="repmgr-daemon-pause">
|
||||
<indexterm>
|
||||
<primary>repmgr daemon pause</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>pausing</secondary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr daemon pause</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr daemon pause</refname>
|
||||
<refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to pause failover operations</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
This command can be run on any active node in the replication cluster to instruct all
|
||||
running <application>repmgrd</application> instances to "pause" themselves, i.e. take no
|
||||
action (such as promoting themselves or following a new primary) if a failover event is detected.
|
||||
</para>
|
||||
<para>
|
||||
This functionality is useful for performing maintenance operations, such as switchovers
|
||||
or upgrades, which might otherwise trigger a failover if <application>repmgrd</application>
|
||||
is running normally.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
It's important to wait a few seconds after restarting PostgreSQL on any node before running
|
||||
<command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
|
||||
on the restarted node will take a second or two before it has updated its status.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-unpause"> will instruct all previously paused <application>repmgrd</application>
|
||||
instances to resume normal failover operation.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
<command>repmgr daemon pause</command> can be executed on any active node in the
|
||||
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
|
||||
It will have no effect on previously paused nodes.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf daemon pause
|
||||
NOTICE: node 1 (node1) paused
|
||||
NOTICE: node 2 (node2) paused
|
||||
NOTICE: node 3 (node3) paused</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check if nodes are reachable but don't pause <application>repmgrd</application>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr daemon unpause</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could be paused on all nodes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_REPMGRD_PAUSE (26)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could not be paused on one or mode nodes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-daemon-status">
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
@@ -1,203 +0,0 @@
|
||||
<refentry id="repmgr-daemon-start">
|
||||
<indexterm>
|
||||
<primary>repmgr daemon start</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>starting</secondary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr daemon start</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr daemon start</refname>
|
||||
<refpurpose>Start the <application>repmgrd</application> daemon</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
This command starts the <application>repmgrd</application> daemon on the
|
||||
local node.
|
||||
</para>
|
||||
<para>
|
||||
By default, &repmgr; will wait for up to 15 seconds to confirm that <application>repmgrd</application>
|
||||
started. This behaviour can be overridden by specifying a diffent value using the <option>--wait</option>
|
||||
option, or disabled altogether with the <option>--no-wait</option> option.
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
The <filename>repmgr.conf</filename> parameter <varname>repmgrd_service_start_command</varname>
|
||||
must be set for <command>repmgr daemon start</command> to work; see section
|
||||
<xref linkend="repmgr-daemon-start-configuration"> for details.
|
||||
</para>
|
||||
</important>
|
||||
</refsect1>
|
||||
|
||||
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually attempt to start <application>repmgrd</application>.
|
||||
</para>
|
||||
<para>
|
||||
This action will output the command which would be executed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-w</option></term>
|
||||
<term><option>--wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Wait for the specified number of seconds to confirm that <application>repmgrd</application>
|
||||
started successfully.
|
||||
</para>
|
||||
<para>
|
||||
Note that providing <option>--wait=0</option> is the equivalent of <option>--no-wait</option>.
|
||||
</para>
|
||||
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Don't wait to confirm that <application>repmgrd</application>
|
||||
started successfully.
|
||||
</para>
|
||||
<para>
|
||||
This is equivalent to providing <option>--wait=0</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-daemon-start-configuration" xreflabel="repmgr daemon start configuration">
|
||||
<title>Configuration file settings</title>
|
||||
<para>
|
||||
The following parameter in <filename>repmgr.conf</filename> is relevant
|
||||
to <command>repmgr daemon start</command>:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>repmgrd_service_start_command</primary>
|
||||
<secondary>with "repmgr daemon start"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>repmgrd_service_start_command</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<command>repmgr daemon start</command> will execute the command defined by the
|
||||
<varname>repmgrd_service_start_command</varname> parameter in <filename>repmgr.conf</filename>.
|
||||
This must be set to a shell command which will start <application>repmgrd</application>;
|
||||
if &repmgr; was installed from a package, this will be the service command defined by the
|
||||
package. For more details see <link linkend="appendix-packages">Appendix: &repmgr; package details</link>.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
If &repmgr; was installed from a system package, and you do not configure
|
||||
<varname>repmgrd_service_start_command</varname> to an appropriate service command, this may
|
||||
result in the system becoming confused about the state of the <application>repmgrd</application>
|
||||
service; this is particularly the case with <literal>systemd</literal>.
|
||||
</para>
|
||||
</important>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr daemon start</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The <application>repmgrd</application> start command (defined in
|
||||
<varname>repmgrd_service_start_command</varname>) was successfully executed.
|
||||
</para>
|
||||
<para>
|
||||
If the <option>--wait</option> option was provided, &repmgr; will confirm that
|
||||
<application>repmgrd</application> has actually started up.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<varname>repmgrd_service_start_command</varname> is not defined in
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_DB_CONN (6)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to connect to the local PostgreSQL node.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL must be running before <application>repmgrd</application>
|
||||
can be started. Additionally, unless the <option>--no-wait</option> option was
|
||||
provided, &repmgr; needs to be able to connect to the local PostgreSQL node
|
||||
to determine the state of <application>repmgrd</application>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_REPMGRD_SERVICE (27)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The <application>repmgrd</application> start command (defined in
|
||||
<varname>repmgrd_service_start_command</varname>) was not successfully executed.
|
||||
</para>
|
||||
<para>
|
||||
This can also mean that &repmgr; was unable to confirm whether <application>repmgrd</application>
|
||||
successfully started (unless the <option>--no-wait</option> option was provided).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-stop">, <xref linkend="repmgr-daemon-status">, <xref linkend="repmgrd-daemon">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
@@ -1,186 +0,0 @@
|
||||
<refentry id="repmgr-daemon-status">
|
||||
<indexterm>
|
||||
<primary>repmgr daemon status</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>displaying daemon status</secondary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr daemon status</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr daemon status</refname>
|
||||
<refpurpose>display information about the status of <application>repmgrd</application> on each node in the cluster</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
This command provides an overview over all active nodes in the cluster and the state
|
||||
of each node's <application>repmgrd</application> instance. It can be used to check
|
||||
the result of <xref linkend="repmgr-daemon-pause"> and <xref linkend="repmgr-daemon-unpause">
|
||||
operations.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
<command>repmgr daemon status</command> can be executed on any active node in the
|
||||
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
|
||||
</para>
|
||||
<para>
|
||||
If PostgreSQL is not running on a node, &repmgr; will not be able to determine the
|
||||
status of that node's <application>repmgrd</application> instance.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
After restarting PostgreSQL on any node, the <application>repmgrd</application> instance
|
||||
will take a second or two before it is able to update its status. Until then,
|
||||
<application>repmgrd</application> will be shown as not running.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Examples</title>
|
||||
<para>
|
||||
<application>repmgrd</application> running normally on all nodes:
|
||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen
|
||||
----+-------+---------+----------+---------+---------+-------+---------+--------------------
|
||||
1 | node1 | primary | 100 | running | running | 71987 | no | n/a
|
||||
2 | node2 | standby | 100 | running | running | 71996 | no | 1 second(s) ago
|
||||
3 | node3 | standby | 100 | running | running | 72042 | no | 1 second(s) ago
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
|
||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen
|
||||
----+-------+---------+----------+---------+---------+-------+---------+--------------------
|
||||
1 | node1 | primary | 100 | running | running | 71987 | yes | n/a
|
||||
2 | node2 | standby | 100 | running | running | 71996 | yes | 0 second(s) ago
|
||||
3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<application>repmgrd</application> not running on one node:
|
||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||
ID | Name | Role | Priority | Status | repmgrd | PID | Paused? | Upstream last seen
|
||||
----+-------+---------+----------+---------+-------------+-------+---------+--------------------
|
||||
1 | node1 | primary | 100 | running | running | 71987 | yes | n/a
|
||||
2 | node2 | standby | 100 | running | not running | n/a | n/a | n/a
|
||||
3 | node3 | standby | 100 | running | running | 72042 | yes | 0 second(s) ago</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--csv</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<command>repmgr daemon status</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts, e.g.:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf daemon status --csv
|
||||
1,node1,primary,1,1,5722,1,100,-1
|
||||
2,node2,standby,1,0,-1,1,100,1
|
||||
3,node3,standby,1,1,5779,1,100,1</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The columns have following meanings:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
node ID
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
node name
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
node type (primary or standby)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
PostgreSQL server running (1 = running, 0 = not running)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgrd</application> running (1 = running, 0 = not running, -1 = unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgrd</application> PID (-1 if not running or status unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgrd</application> paused (1 = paused, 0 = not paused, -1 = unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgrd</application> node priority
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
interval in seconds since the node's upstream was last seen (this will be -1 if the value could not be retrieved, or the node is primary)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--verbose</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Display the full text of any database connection error messages
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-cluster-show">
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
@@ -1,200 +0,0 @@
|
||||
<refentry id="repmgr-daemon-stop">
|
||||
<indexterm>
|
||||
<primary>repmgr daemon stop</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>stopping</secondary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr daemon stop</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr daemon stop</refname>
|
||||
<refpurpose>Stop the <application>repmgrd</application> daemon</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
This command stops the <application>repmgrd</application> daemon on the
|
||||
local node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, &repmgr; will wait for up to 15 seconds to confirm that <application>repmgrd</application>
|
||||
stopped. This behaviour can be overridden by specifying a diffent value using the <option>--wait</option>
|
||||
option, or disabled altogether with the <option>--no-wait</option> option.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
If PostgreSQL is not running on the local node, under some circumstances &repmgr; may not
|
||||
be able to confirm if <application>repmgrd</application> has actually stopped.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
The <filename>repmgr.conf</filename> parameter <varname>repmgrd_service_stop_command</varname>
|
||||
must be set for <command>repmgr daemon stop</command> to work; see section
|
||||
<xref linkend="repmgr-daemon-stop-configuration"> for details.
|
||||
</para>
|
||||
</important>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Configuration</title>
|
||||
<para>
|
||||
<command>repmgr daemon stop</command> will execute the command defined by the
|
||||
<varname>repmgrd_service_stop_command</varname> parameter in <filename>repmgr.conf</filename>.
|
||||
This must be set to a shell command which will stop <application>repmgrd</application>;
|
||||
if &repmgr; was installed from a package, this will be the service command defined by the
|
||||
package. For more details see <link linkend="appendix-packages">Appendix: &repmgr; package details</link>.
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
If &repmgr; was installed from a system package, and you do not configure
|
||||
<varname>repmgrd_service_stop_command</varname> to an appropriate service command, this may
|
||||
result in the system becoming confused about the state of the <application>repmgrd</application>
|
||||
service; this is particularly the case with <literal>systemd</literal>.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually attempt to stop <application>repmgrd</application>.
|
||||
</para>
|
||||
<para>
|
||||
This action will output the command which would be executed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-w</option></term>
|
||||
<term><option>--wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Wait for the specified number of seconds to confirm that <application>repmgrd</application>
|
||||
stopped successfully.
|
||||
</para>
|
||||
<para>
|
||||
Note that providing <option>--wait=0</option> is the equivalent of <option>--no-wait</option>.
|
||||
</para>
|
||||
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Don't wait to confirm that <application>repmgrd</application>
|
||||
stopped successfully.
|
||||
</para>
|
||||
<para>
|
||||
This is equivalent to providing <option>--wait=0</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-daemon-stop-configuration" xreflabel="repmgr daemon stop configuration">
|
||||
<title>Configuration file settings</title>
|
||||
<para>
|
||||
The following parameter in <filename>repmgr.conf</filename> is relevant
|
||||
to <command>repmgr daemon stop</command>:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>repmgrd_service_stop_command</primary>
|
||||
<secondary>with "repmgr daemon stop"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>repmgrd_service_stop_command</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<command>repmgr daemon stop</command> will execute the command defined by the
|
||||
<varname>repmgrd_service_stop_command</varname> parameter in <filename>repmgr.conf</filename>.
|
||||
This must be set to a shell command which will stop <application>repmgrd</application>;
|
||||
if &repmgr; was installed from a package, this will be the service command defined by the
|
||||
package. For more details see <link linkend="appendix-packages">Appendix: &repmgr; package details</link>.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
If &repmgr; was installed from a system package, and you do not configure
|
||||
<varname>repmgrd_service_stop_command</varname> to an appropriate service command, this may
|
||||
result in the system becoming confused about the state of the <application>repmgrd</application>
|
||||
service; this is particularly the case with <literal>systemd</literal>.
|
||||
</para>
|
||||
</important>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr daemon stop</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could be stopped.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<varname>repmgrd_service_stop_command</varname> is not defined in
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_REPMGRD_SERVICE (27)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could not be stopped.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-start">, <xref linkend="repmgr-daemon-status">, <xref linkend="repmgrd-daemon">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
@@ -1,109 +0,0 @@
|
||||
<refentry id="repmgr-daemon-unpause">
|
||||
<indexterm>
|
||||
<primary>repmgr daemon unpause</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>unpausing</secondary>
|
||||
</indexterm>
|
||||
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr daemon unpause</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr daemon unpause</refname>
|
||||
<refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to resume failover operations</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
This command can be run on any active node in the replication cluster to instruct all
|
||||
running <application>repmgrd</application> instances to "unpause"
|
||||
(following a previous execution of <xref linkend="repmgr-daemon-pause">)
|
||||
and resume normal failover/monitoring operation.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
It's important to wait a few seconds after restarting PostgreSQL on any node before running
|
||||
<command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
|
||||
on the restarted node will take a second or two before it has updated its status.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
<command>repmgr daemon unpause</command> can be executed on any active node in the
|
||||
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
|
||||
It will have no effect on nodes which are not already paused.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf daemon unpause
|
||||
NOTICE: node 1 (node1) unpaused
|
||||
NOTICE: node 2 (node2) unpaused
|
||||
NOTICE: node 3 (node3) unpaused</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check if nodes are reachable but don't unpause <application>repmgrd</application>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr daemon unpause</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could be unpaused on all nodes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_REPMGRD_PAUSE (26)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could not be unpaused on one or mode nodes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-status">
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
@@ -18,14 +18,6 @@
|
||||
Performs some health checks on a node from a replication perspective.
|
||||
This command must be run on the local node.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Currently &repmgr; performs health checks on physical replication
|
||||
slots only, with the aim of warning about streaming replication standbys which
|
||||
have become detached and the associated risk of uncontrolled WAL file
|
||||
growth.
|
||||
</para>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -38,8 +30,7 @@
|
||||
Replication lag: OK (N/A - node is primary)
|
||||
WAL archiving: OK (0 pending files)
|
||||
Downstream servers: OK (2 of 2 downstream nodes attached)
|
||||
Replication slots: OK (node has no physical replication slots)
|
||||
Missing replication slots: OK (node has no missing physical replication slots)</programlisting>
|
||||
Replication slots: OK (node has no replication slots)</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
@@ -52,7 +43,7 @@
|
||||
OK (node is primary)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Parameters for individual checks are as follows:
|
||||
Parameters for individual checks are as follows:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
@@ -70,9 +61,7 @@
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--archive-ready</literal>: checks for WAL files which have not yet been archived,
|
||||
and returns <literal>WARNING</literal> or <literal>CRITICAL</literal> if the number
|
||||
exceeds <varname>archive_ready_warning</varname> or <varname>archive_ready_critical</varname> respectively.
|
||||
<literal>--archive-ready</literal>: checks for WAL files which have not yet been archived
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
@@ -84,127 +73,15 @@
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--slots</literal>: checks there are no inactive physical replication slots
|
||||
<literal>--slots</literal>: checks there are no inactive replication slots
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--missing-slots</literal>: checks there are no missing physical replication slots
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--data-directory-config</literal>: checks the data directory configured in
|
||||
<filename>repmgr.conf</filename> matches the actual data directory.
|
||||
This check is not directly related to replication, but is useful to verify &repmgr;
|
||||
is correctly configured.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
Individual checks can also be output in a Nagios-compatible format by additionally
|
||||
providing the option <literal>--nagios</literal>.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Output format</title>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--csv</literal>: generate output in CSV format (not available
|
||||
for individual checks)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--nagios</literal>: generate output in a Nagios-compatible format
|
||||
(for individual checks only)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
|
||||
<para>
|
||||
When executing <command>repmgr node check</command> with one of the individual
|
||||
checks listed above, &repmgr; will emit one of the following Nagios-style exit codes
|
||||
(even if <literal>--nagios</literal> is not supplied):
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>0</literal>: OK
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>1</literal>: WARNING
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>2</literal>: ERROR
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>3</literal>: UNKNOWN
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
|
||||
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr status check</command>
|
||||
if no individual check was specified.
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
No issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-node-status">, <xref linkend="repmgr-cluster-show">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
@@ -28,10 +28,6 @@
|
||||
If the node is running and needs to be attached to the current primary, use
|
||||
<xref linkend="repmgr-standby-follow">.
|
||||
</para>
|
||||
<para>
|
||||
Note <xref linkend="repmgr-standby-follow"> can only be used for standbys which have not diverged
|
||||
from the rest of the cluster.
|
||||
</para>
|
||||
</tip>
|
||||
</refsect1>
|
||||
|
||||
@@ -50,155 +46,11 @@
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually execute the rejoin.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Execute <application>pg_rewind</application>.
|
||||
</para>
|
||||
<para>
|
||||
It is only necessary to provide the <application>pg_rewind</application> path
|
||||
if using PostgreSQL 9.3 or 9.4, and <application>pg_rewind</application>
|
||||
is not installed in the PostgreSQL <filename>bin</filename> directory.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--config-files</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
comma-separated list of configuration files to retain after
|
||||
executing <application>pg_rewind</application>.
|
||||
</para>
|
||||
<para>
|
||||
Currently <application>pg_rewind</application> will overwrite
|
||||
the local node's configuration files with the files from the source node,
|
||||
so it's advisable to use this option to ensure they are kept.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--config-archive-dir</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Directory to temporarily store configuration files specified with
|
||||
<option>--config-files</option>; default: <filename>/tmp</filename>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-W/--no-wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Don't wait for the node to rejoin cluster.
|
||||
</para>
|
||||
<para>
|
||||
If this option is supplied, &repmgr; will restart the node but
|
||||
not wait for it to connect to the primary.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Configuration file settings</title>
|
||||
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>node_rejoin_timeout</literal>:
|
||||
the maximum length of time (in seconds) to wait for
|
||||
the node to reconnect to the replication cluster (defaults to
|
||||
the value set in <literal>standby_reconnect_timeout</literal>,
|
||||
60 seconds).
|
||||
</simpara>
|
||||
<simpara>
|
||||
Note that <literal>standby_reconnect_timeout</literal> must be
|
||||
set to a value equal to or greater than
|
||||
<literal>node_rejoin_timeout</literal>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-node-rejoin-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>node_rejoin</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr node rejoin</command>:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The node rejoin succeeded; or if <option>--dry-run</option> was provided,
|
||||
no issues were detected which would prevent the node rejoin.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
A configuration issue was detected which prevented &repmgr; from
|
||||
continuing with the node rejoin.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NO_RESTART (4)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The node could not be restarted.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_REJOIN_FAIL (24)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The node rejoin operation failed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Notes</title>
|
||||
@@ -222,162 +74,78 @@
|
||||
postgres --single -D /var/lib/pgsql/data/ < /dev/null</programlisting>
|
||||
</para>
|
||||
</tip>
|
||||
<para>
|
||||
&repmgr; will attempt to verify whether the node can rejoin as-is, or whether
|
||||
<command>pg_rewind</command> must be used (see following section).
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
|
||||
|
||||
<indexterm>
|
||||
<primary>pg_rewind</primary>
|
||||
<secondary>using with "repmgr node rejoin"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Using <command>pg_rewind</command></title>
|
||||
<para>
|
||||
<command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
|
||||
node which has diverged from the rest of the cluster, typically a failed primary.
|
||||
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later as part of the core distribution,
|
||||
and can be installed from external sources for PostgreSQL 9.3 and 9.4.
|
||||
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<command>pg_rewind</command> <emphasis>requires</emphasis> that either
|
||||
<varname>wal_log_hints</varname> is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
<ulink url="https://www.postgresql.org/docs/current/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
We strongly recommend familiarizing yourself with <command>pg_rewind</command> before attempting
|
||||
to use it with &repmgr;, as while it is an extremely useful tool, it is <emphasis>not</emphasis>
|
||||
a "magic bullet" which can resolve all problematic replication situations.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A typical use-case for <command>pg_rewind</command> is when a scenario like the following
|
||||
is encountered:
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
|
||||
INFO: replication connection to the rejoin target node was successful
|
||||
INFO: local and rejoin target system identifiers match
|
||||
DETAIL: system identifier is 6652184002263212600
|
||||
ERROR: this node cannot attach to rejoin target node 3
|
||||
DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710
|
||||
HINT: use --force-rewind to execute pg_rewind</programlisting>
|
||||
|
||||
Here, <literal>node3</literal> was promoted to a primary while the local node was
|
||||
still attached to the previous primary; this can potentially happen during e.g. a
|
||||
network split. <command>pg_rewind</command> can re-sync the local node with <literal>node3</literal>,
|
||||
removing the need for a full reclone.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
|
||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
|
||||
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
|
||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||
</para>
|
||||
|
||||
<important>
|
||||
<para>
|
||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||
overwritten with those from the source server.
|
||||
</para>
|
||||
<para>
|
||||
To prevent this happening, provide a comma-separated list of files to retain
|
||||
using the <literal>--config-file</literal> command line option; the specified files
|
||||
will be archived in a temporary directory (whose parent directory can be specified with
|
||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
||||
complete.
|
||||
</para>
|
||||
</important>
|
||||
<para>
|
||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||
overwritten with those from the source server.
|
||||
</para>
|
||||
<para>
|
||||
To prevent this happening, provide a comma-separated list of files to retain
|
||||
using the <literal>--config-file</literal> command line option; the specified files
|
||||
will be archived in a temporary directory (whose parent directory can be specified with
|
||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
||||
complete.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Example, first using <literal>--dry-run</literal>, then actually executing the
|
||||
<literal>node rejoin command</literal>.
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run
|
||||
INFO: replication connection to the rejoin target node was successful
|
||||
INFO: local and rejoin target system identifiers match
|
||||
DETAIL: system identifier is 6652460429293670710
|
||||
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
||||
DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
INFO: prerequisites for using pg_rewind are met
|
||||
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node2/postgresql.local.conf"
|
||||
INFO: file "postgresql.replication-setup.conf" would be copied to "/tmp/repmgr-config-archive-node2/postgresql.replication-setup.conf"
|
||||
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
||||
INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
||||
INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1"
|
||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
||||
INFO: pg_rewind would now be executed
|
||||
DETAIL: pg_rewind command is:
|
||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'
|
||||
INFO: prerequisites for executing NODE REJOIN are met</programlisting>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
||||
this checks the prerequisites for using <application>pg_rewind</application>, but is
|
||||
not an absolute guarantee that actually executing <application>pg_rewind</application>
|
||||
will succeed. See also section <xref linkend="repmgr-node-rejoin-caveats"> below.
|
||||
</para>
|
||||
|
||||
</note>
|
||||
|
||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind
|
||||
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
||||
DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
INFO: prerequisites for using pg_rewind are met
|
||||
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
|
||||
NOTICE: executing pg_rewind
|
||||
DETAIL: pg_rewind command is "pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'"
|
||||
NOTICE: 2 files copied to /var/lib/postgresql/data
|
||||
NOTICE: setting node 2's upstream to node 3
|
||||
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||
NOTICE: 2 files copied to /var/lib/pgsql/data
|
||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
||||
INFO: deleting "recovery.done"
|
||||
INFO: setting node 1's primary to node 2
|
||||
NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||
waiting for server to start.... done
|
||||
server started
|
||||
NOTICE: NODE REJOIN successful
|
||||
DETAIL: node 2 is now attached to node 3</programlisting>
|
||||
DETAIL: node 1 is now attached to node 2</programlisting>
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-node-rejoin-caveats" xreflabel="Caveats">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgr node rejoin</primary>
|
||||
<secondary>caveats</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Caveats when using <command>repmgr node rejoin</command></title>
|
||||
<para>
|
||||
<command>repmgr node rejoin</command> attempts to determine whether it will succeed by
|
||||
comparing the timelines and relative WAL positions of the local node (rejoin candidate) and primary
|
||||
(rejoin target). This is particularly important if planning to use <application>pg_rewind</application>,
|
||||
which currently (as of PostgreSQL 11) may appear to succeed (or indicate there is no action
|
||||
needed) but potentially allow an impossible action, such as trying to rejoin a standby to a
|
||||
primary which is behind the standby. &repmgr; will prevent this situation from occurring.
|
||||
</para>
|
||||
<para>
|
||||
Currently it is <emphasis>not</emphasis> possible to detect a situation where the rejoin target
|
||||
is a standby which has been "promoted" by removing <filename>recovery.conf</filename>
|
||||
(PostgreSQL 12 and later: <filename>standby.signal</filename>) and restarting it.
|
||||
In this case there will be no information about the point the rejoin target diverged
|
||||
from the current standby; the rejoin operation will fail and
|
||||
the current standby's PostgreSQL log will contain entries with the text
|
||||
"<literal>record with incorrect prev-link</literal>".
|
||||
</para>
|
||||
<para>
|
||||
We strongly recommend running <command>repmgr node rejoin</command> with the
|
||||
<option>--dry-run</option> option first. Additionally it might be a good idea
|
||||
to execute the <application>pg_rewind</application> command displayed by
|
||||
&repmgr; with the <application>pg_rewind</application> <option>--dry-run</option>
|
||||
option. Note that <application>pg_rewind</application> does not indicate that it
|
||||
is running in <option>--dry-run</option> mode.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
|
||||
@@ -1,151 +0,0 @@
|
||||
<refentry id="repmgr-node-service">
|
||||
<indexterm>
|
||||
<primary>repmgr node service</primary>
|
||||
</indexterm>
|
||||
|
||||
<refmeta>
|
||||
<refentrytitle>repmgr node service</refentrytitle>
|
||||
</refmeta>
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr node service</refname>
|
||||
<refpurpose>show or execute the system service command to stop/start/restart/reload/promote a node</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
<para>
|
||||
Shows or executes the system service command to stop/start/restart/reload a node.
|
||||
</para>
|
||||
<para>
|
||||
This command is mainly meant for internal &repmgr; usage, but is useful for
|
||||
confirming the command configuration.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Log the steps which would be taken, including displaying the command which would be executed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--action</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The action to perform. One of <literal>start</literal>, <literal>stop</literal>,
|
||||
<literal>restart</literal>, <literal>reload</literal> or <literal>promote</literal>.
|
||||
</para>
|
||||
<para>
|
||||
If the parameter <option>--list-actions</option> is provided together with
|
||||
<option>--action</option>, the command which would be executed will be printed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--list-actions</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
List all configured commands.
|
||||
</para>
|
||||
<para>
|
||||
If the parameter <option>--action</option> is provided together with
|
||||
<option>--list-actions</option>, the command which would be executed for that
|
||||
particular action will be printed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--checkpoint</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Issue a <command>CHECKPOINT</command> before stopping or restarting the node.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr node service</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
No issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_LOCAL_COMMAND (5)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Execution of the system service command failed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Examples</title>
|
||||
<para>
|
||||
See what action would be taken for a restart:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint --dry-run
|
||||
INFO: a CHECKPOINT would be issued here
|
||||
INFO: would execute server command "sudo service postgresql-11 restart"</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Restart the PostgreSQL instance:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --action=restart --checkpoint
|
||||
NOTICE: issuing CHECKPOINT
|
||||
DETAIL: executing server command "sudo service postgresql-11 restart"
|
||||
Redirecting to /bin/systemctl restart postgresql-11.service</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
List all commands:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions
|
||||
Following commands would be executed for each action:
|
||||
|
||||
start: "sudo service postgresql-11 start"
|
||||
stop: "sudo service postgresql-11 stop"
|
||||
restart: "sudo service postgresql-11 restart"
|
||||
reload: "sudo service postgresql-11 reload"
|
||||
promote: "/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote"</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
List a single command:
|
||||
<programlisting>
|
||||
[postgres@node1 ~]$ repmgr -f /etc/repmgr/11/repmgr.conf node service --list-actions --action=promote
|
||||
/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote </programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
@@ -24,7 +24,7 @@
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf node status
|
||||
$ repmgr -f /etc/repmgr.comf node status
|
||||
Node "node1":
|
||||
PostgreSQL version: 10beta1
|
||||
Total data size: 30 MB
|
||||
@@ -38,54 +38,10 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Output format</title>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--csv</literal>: generate output in CSV format
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr node status</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
No issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NODE_STATUS (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more issues were detected.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
See <xref linkend="repmgr-node-check"> to diagnose issues and <xref linkend="repmgr-cluster-show">
|
||||
for an overview of all nodes in the cluster.
|
||||
See <xref linkend="repmgr-node-check"> to diagnose issues.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
@@ -17,25 +17,16 @@
|
||||
<title>Description</title>
|
||||
<para>
|
||||
<command>repmgr primary register</command> registers a primary node in a
|
||||
streaming replication cluster, and configures it for use with &repmgr;, including
|
||||
streaming replication cluster, and configures it for use with repmgr, including
|
||||
installing the &repmgr; extension. This command needs to be executed before any
|
||||
standby nodes are registered.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
It's possibly to install the &repmgr; extension manually before executing
|
||||
<command>repmgr primary register</command>; in this case &repmgr; will
|
||||
detect the presence of the extension and skip that step.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
Execute with the <option>--dry-run</option> option to check what would happen without
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen without
|
||||
actually registering the primary.
|
||||
</para>
|
||||
<para>
|
||||
@@ -44,58 +35,23 @@
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If providing the configuration file location with <option>-f/--config-file</option>,
|
||||
avoid using a relative path, as &repmgr; stores the configuration file location
|
||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||
a relative path into an absolute one, but this may not be the same as the path you
|
||||
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
<para>
|
||||
If providing the configuration file location with <literal>-f/--config-file</literal>,
|
||||
avoid using a relative path, as &repmgr; stores the configuration file location
|
||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||
a relative path into an absolute one, but this may not be the same as the path you
|
||||
would explicitly provide (e.g. <filename>./repmgr.conf</filename> might be converted
|
||||
to <filename>/path/to/./repmgr.conf</filename>, whereas you'd normally write
|
||||
<filename>/path/to/repmgr.conf</filename>).
|
||||
</para>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually register the primary.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-F</option>, <option>--force</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Overwrite an existing node record
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="repmgr-primary-register-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
Following <link linkend="event-notifications">event notifications</link> will be generated:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara><literal>cluster_created</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>primary_register</literal></simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
A <literal>primary_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
<command>repmgr primary unregister</command> can be run on any active &repmgr; node,
|
||||
<command>repmgr primary unregister</command> should be run on the current primary,
|
||||
with the ID of the node to unregister passed as <option>--node-id</option>.
|
||||
</para>
|
||||
<para>
|
||||
@@ -64,7 +64,7 @@
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-primary-unregister-events">
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>primary_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
|
||||
@@ -25,11 +25,9 @@
|
||||
<note>
|
||||
<simpara>
|
||||
<command>repmgr standby clone</command> does not start the standby, and after cloning
|
||||
a standby, the command <command>repmgr standby register</command> must be executed to
|
||||
notify &repmgr; of its existence.
|
||||
<command>repmgr standby register</command> must be executed to notify &repmgr; of its presence.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
@@ -49,7 +47,7 @@
|
||||
not be copied by default. &repmgr; can copy these files, either to the same
|
||||
location on the standby server (provided appropriate directory and file permissions
|
||||
are available), or into the standby's data directory. This requires passwordless
|
||||
SSH access to the primary server. Add the option <option>--copy-external-config-files</option>
|
||||
SSH access to the primary server. Add the option <literal>--copy-external-config-files</literal>
|
||||
to the <command>repmgr standby clone</command> command; by default files will be copied to
|
||||
the same path as on the upstream server. Note that the user executing <command>repmgr</command>
|
||||
must have write access to those directories.
|
||||
@@ -59,96 +57,15 @@
|
||||
<literal>--copy-external-config-files=pgdata</literal>, but note that
|
||||
any include directives in the copied files may need to be updated.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
When executing <command>repmgr standby clone</command> with the
|
||||
<option>--copy-external-config-files</option> aand <option>--dry-run</option>
|
||||
options, &repmgr; will check the SSH connection to the source node, but
|
||||
will not verify whether the files can actually be copied.
|
||||
</para>
|
||||
<para>
|
||||
During the actual clone operation, a check will be made before the database itself
|
||||
is cloned to determine whether the files can actually be copied; if any problems are
|
||||
encountered, the clone operation will be aborted, enabling the user to fix
|
||||
any issues before retrying the clone operation.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<tip>
|
||||
<simpara>
|
||||
For reliable configuration file management we recommend using a
|
||||
configuration management tool such as Ansible, Chef, Puppet or Salt.
|
||||
</simpara>
|
||||
</tip>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-clone-recovery-conf">
|
||||
<indexterm>
|
||||
<primary>recovery.conf</primary>
|
||||
<secondary>customising with "repmgr standby clone"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Customising recovery.conf</title>
|
||||
<para>
|
||||
By default, &repmgr; will create a minimal <filename>recovery.conf</filename>
|
||||
containing following parameters:
|
||||
</para>
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>standby_mode</varname> (always <literal>'on'</literal>)</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>recovery_target_timeline</varname> (always <literal>'latest'</literal>)</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>primary_conninfo</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>primary_slot_name</varname> (if replication slots in use)</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
The following additional parameters can be specified in <filename>repmgr.conf</filename>
|
||||
for inclusion in <filename>recovery.conf</filename>:
|
||||
</para>
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>restore_command</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>archive_cleanup_command</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara><varname>recovery_min_apply_delay</varname></simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
We recommend using <ulink url="https://www.pgbarman.org/">Barman</ulink> to manage
|
||||
WAL file archiving. For more details on combining &repmgr; and <application>Barman</application>,
|
||||
in particular using <varname>restore_command</varname> to configure Barman as a backup source of
|
||||
WAL files, see <xref linkend="cloning-from-barman">.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-clone-wal-management">
|
||||
<refsect1 id="repmgr-standby-clone-wal-management" xreflabel="Managing WAL during the cloning process">
|
||||
<title>Managing WAL during the cloning process</title>
|
||||
<para>
|
||||
When initially cloning a standby, you will need to ensure
|
||||
@@ -170,7 +87,7 @@
|
||||
pg_basebackup_options='--xlog-method=fetch'</programlisting>
|
||||
|
||||
and ensure that <literal>wal_keep_segments</literal> is set to an appropriately high value.
|
||||
See the <ulink url="https://www.postgresql.org/docs/current/app-pgbasebackup.html">
|
||||
See the <ulink url="https://www.postgresql.org/docs/current/static/app-pgbasebackup.html">
|
||||
pg_basebackup</ulink> documentation for details.
|
||||
</para>
|
||||
|
||||
@@ -183,186 +100,12 @@
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="repmgr-standby-create-recovery-conf">
|
||||
|
||||
<indexterm>
|
||||
<primary>recovery.conf</primary>
|
||||
<secondary>generating for a standby cloned by another method</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Using a standby cloned by another method</title>
|
||||
<para>
|
||||
&repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
|
||||
<command><ulink url="http://docs.pgbarman.org/release/2.5/#recover">barman recover</ulink></command> command).
|
||||
</para>
|
||||
<para>
|
||||
To integrate the standby as a &repmgr; node, once the standby has been cloned,
|
||||
ensure the <filename>repmgr.conf</filename>
|
||||
file is created for the node, and that it has been registered using
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>.
|
||||
Then execute the command <command>repmgr standby clone --recovery-conf-only</command>.
|
||||
This will create the <filename>recovery.conf</filename> file needed to attach
|
||||
the node to its upstream, and will also create a replication slot on the
|
||||
upstream node if required.
|
||||
</para>
|
||||
<para>
|
||||
Note that the upstream node must be running. An existing
|
||||
<filename>recovery.conf</filename> will not be overwritten unless the
|
||||
<option>-F/--force</option> option is provided.
|
||||
</para>
|
||||
<para>
|
||||
Execute <command>repmgr standby clone --recovery-conf-only --dry-run</command>
|
||||
to check the prerequisites for creating the <filename>recovery.conf</filename> file,
|
||||
and display the contents of the file without actually creating it.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
<option>--recovery-conf-only</option> was introduced in &repmgr; <link linkend="release-4.0.4">4.0.4</link>.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-d, --dbname=CONNINFO</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Connection string of the upstream node to use for cloning.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually clone the standby.
|
||||
</para>
|
||||
<para>
|
||||
If <option>--recovery-conf-only</option> specified, the contents of
|
||||
the generated <filename>recovery.conf</filename> file will be displayed
|
||||
but the file itself not written.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-c, --fast-checkpoint</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Force fast checkpoint (not effective when cloning from Barman).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--copy-external-config-files[={samepath|pgdata}]</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Copy configuration files located outside the data directory on the source
|
||||
node to the same path on the standby (default) or to the
|
||||
PostgreSQL data directory.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-upstream-connection</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
When using Barman, do not connect to upstream node.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-R, --remote-user=USERNAME</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Remote system username for SSH operations (default: current local system username).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option> --recovery-conf-only</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Create <filename>recovery.conf</filename> file for a previously cloned instance. &repmgr 4.0.4 and later.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--replication-user</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
User to make replication connections with (optional, not usually required).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--superuser</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If the &repmgr; user is not a superuser, the name of a valid superuser must
|
||||
be provided with this option.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--upstream-conninfo</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>primary_conninfo</literal> value to write in recovery.conf
|
||||
when the intended upstream server does not yet exist.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--upstream-node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
ID of the upstream node to replicate from (optional, defaults to primary node)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--without-barman </option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Do not use Barman even if configured.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-clone-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_clone</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
See <xref linkend="cloning-standbys"> for details about various aspects of cloning.
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -9,61 +9,27 @@
|
||||
|
||||
<refnamediv>
|
||||
<refname>repmgr standby follow</refname>
|
||||
<refpurpose>attach a running standby to a new upstream node</refpurpose>
|
||||
<refpurpose>attach a standby to a new primary</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
<para>
|
||||
Attaches the standby ("follow candidate") to a new upstream node
|
||||
("follow target"). Typically this will be the primary, but this
|
||||
command can also be used to attach the standby to another standby.
|
||||
</para>
|
||||
<para>
|
||||
This command requires a valid
|
||||
Attaches the standby to a new primary. This command requires a valid
|
||||
<filename>repmgr.conf</filename> file for the standby, either specified
|
||||
explicitly with <literal>-f/--config-file</literal> or located in a
|
||||
default location; no additional arguments are required.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default &repmgr; will attempt to attach the standby to the current primary.
|
||||
If <option>--upstream-node-id</option> is provided, &repmgr; will attempt
|
||||
to attach the standby to the specified node, which can be another standby.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This command will force a restart of the standby server, which must be
|
||||
running.
|
||||
running. It can only be used to attach an active standby to the current primary node
|
||||
(and not to another standby).
|
||||
</para>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, see
|
||||
<xref linkend="repmgr-node-rejoin">
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, use
|
||||
<xref linkend="repmgr-node-rejoin">.
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
<command>repmgr standby follow</command> will wait up to
|
||||
<varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
|
||||
to verify the standby has actually connected to the new upstream node.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If <option>recovery_min_apply_delay</option> is set for the standby, it
|
||||
will not attach to the new upstream node until it has replayed available
|
||||
WAL.
|
||||
</para>
|
||||
<para>
|
||||
Conversely, if the standby is attached to an upstream standby
|
||||
which has <option>recovery_min_apply_delay</option> set, the upstream
|
||||
standby's replay state may actually be behind that of its new downstream node.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -90,55 +56,25 @@
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually follow a new upstream node.
|
||||
</para>
|
||||
<para>
|
||||
This will also verify whether the standby is capable of following the new upstream node.
|
||||
Check prerequisites but don't actually follow a new standby.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
If a standby was turned into a primary by removing <filename>recovery.conf</filename>
|
||||
(<application>PostgreSQL 12</application> and later: <filename>standby.signal</filename>),
|
||||
&repmgr; will <emphasis>not</emphasis> be able to determine whether that primary's timeline
|
||||
has diverged from the timeline of the standby ("follow candidate").
|
||||
</para>
|
||||
<para>
|
||||
We recommend always to use <link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>
|
||||
to promote a standby to primary, as this will ensure that the new primary
|
||||
will perform a timeline switch (making it practical to check for timeline divergence)
|
||||
and also that &repmgr; metadata is updated correctly.
|
||||
This does not guarantee the standby can follow the primary; in
|
||||
particular, whether the primary and standby timelines have diverged,
|
||||
can currently only be determined by actually attempting to
|
||||
attach the standby to the primary.
|
||||
</para>
|
||||
</important>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--upstream-node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Node ID of the new upstream node ("follow target").
|
||||
</para>
|
||||
<para>
|
||||
If not provided, &repmgr; will attempt to follow the current primary node.
|
||||
</para>
|
||||
<para>
|
||||
Note that when using <application>repmgrd</application>, <option>--upstream-node-id</option>
|
||||
should always be configured;
|
||||
see <link linkend="repmgrd-automatic-failover-configuration">Automatic failover configuration</link>
|
||||
for details.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-w</option></term>
|
||||
<term><option>-W</option></term>
|
||||
<term><option>--wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Wait for a primary to appear. &repmgr; will wait for up to
|
||||
<varname>primary_follow_timeout</varname> seconds
|
||||
(default: 60 seconds) to verify that the standby is following the new primary.
|
||||
This value can be defined in <filename>repmgr.conf</filename>.
|
||||
Wait for a primary to appear.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -147,103 +83,12 @@
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to test the follow operation as
|
||||
far as possible, without actually changing the status of the node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Note that &repmgr; will first attempt to determine whether the standby
|
||||
("follow candidate") is capable of following the
|
||||
new upstream node ("follow target").
|
||||
</para>
|
||||
<para>
|
||||
If, for example, the new upstream node has diverged from this node's timeline,
|
||||
for example if the new upstream node was promoted to primary while this node
|
||||
was still attached to the original primary, it will <emphasis>not</emphasis>
|
||||
be possible to follow the new upstream node, and &repmgr; will emit an error
|
||||
message like this:
|
||||
<programlisting>
|
||||
ERROR: this node cannot attach to follow target node 3
|
||||
DETAIL: follow target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/6108880</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
In this case, it may be possible to have this node follow the new upstream
|
||||
using <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>
|
||||
with the <option>--force-rewind</option> to execute <command>pg_rewind</command>.
|
||||
This does mean that transactions which exist on this node, but not the new upstream,
|
||||
will be lost.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr standby follow</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The follow operation succeeded; or if <option>--dry-run</option> was provided,
|
||||
no issues were detected which would prevent the follow operation.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_BAD_CONFIG (1)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
A configuration issue was detected which prevented &repmgr; from
|
||||
continuing with the follow operation.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_NO_RESTART (4)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The node could not be restarted.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_DB_CONN (6)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to establish a database connection to one of the nodes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_FOLLOW_FAIL (23)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to complete the follow command.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-follow-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
<para>
|
||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the node
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the primary
|
||||
being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
@@ -256,3 +101,4 @@ DETAIL: follow target server's timeline 2 forked off current database system tim
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -26,33 +26,8 @@
|
||||
by using <xref linkend="repmgr-standby-follow">; if <application>repmgrd</application>
|
||||
is active, it will handle this automatically.
|
||||
</para>
|
||||
<para>
|
||||
Note that &repmgr; will wait for up to <varname>promote_check_timeout</varname> seconds
|
||||
(default: 60 seconds) to verify that the standby has been promoted, and will
|
||||
check the promotion every <varname>promote_check_interval</varname> seconds (default: 1 second).
|
||||
Both values can be defined in <filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If WAL replay is paused on the standby, and not all WAL files on the standby have been
|
||||
replayed, &repmgr; will not attempt to promote it.
|
||||
</para>
|
||||
<para>
|
||||
This is because if WAL replay is paused, PostgreSQL itself will not react to a promote command
|
||||
until WAL replay is resumed and all pending WAL has been replayed. This means
|
||||
attempting to promote PostgreSQL in this state will leave PostgreSQL in a condition where the
|
||||
promotion may occur at a unpredictable point in the future.
|
||||
</para>
|
||||
<para>
|
||||
Note that if the standby is in archive recovery, &repmgr; will not be able to determine
|
||||
if more WAL is pending replay, and will abort the promotion attempt if WAL replay is paused.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Example</title>
|
||||
<para>
|
||||
@@ -67,129 +42,7 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check if this node can be promoted, but don't carry out the promotion
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Configuration file settings</title>
|
||||
<para>
|
||||
The following parameters in <filename>repmgr.conf</filename> are relevant to the
|
||||
promote operation:
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<indexterm>
|
||||
<primary>promote_check_interval</primary>
|
||||
<secondary>with "repmgr standby promote "</secondary>
|
||||
</indexterm>
|
||||
<simpara>
|
||||
<literal>promote_check_interval</literal>:
|
||||
interval (in seconds, default: 1 second) to wait between each check
|
||||
to determine whether the standby has been promoted.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<indexterm>
|
||||
<primary>promote_check_timeout</primary>
|
||||
<secondary>with "repmgr standby promote "</secondary>
|
||||
</indexterm>
|
||||
<simpara>
|
||||
<literal>promote_check_timeout</literal>:
|
||||
time (in seconds, default: 60 seconds) to wait to verify that the standby has been promoted
|
||||
before exiting with <literal>ERR_PROMOTION_FAIL</literal>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr standby promote</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The standby was successfully promoted to primary.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_DB_CONN (6)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to connect to the local PostgreSQL node.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL must be running before the node can be promoted.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_PROMOTION_FAIL (8)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The node could not be promoted to primary for one of the following
|
||||
reasons:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
there is an existing primary node in the replication cluster
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
the node is not a standby
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
WAL replay is paused on the node
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
execution of the PostgreSQL promote command failed
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="repmgr-standby-promote-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_promote</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
|
||||
@@ -92,74 +92,7 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-register-node-cloned-other-source">
|
||||
<title>Registering a node not cloned by repmgr</title>
|
||||
<para>
|
||||
If you've cloned a standby using another method (e.g. <application>barman</application>'s
|
||||
<command>barman recover</command> command), first execute
|
||||
<link linkend="repmgr-standby-create-recovery-conf">repmgr standby clone --recovery-conf-only</link>
|
||||
to add the <filename>recovery.conf</filename> file, then register the standby as usual.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually register the standby.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-F</option><option>--force</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Overwrite an existing node record
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--upstream-node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
ID of the upstream node to replicate from (optional)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--wait-start</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
wait for the standby to start (timeout in seconds, default 30 seconds)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--wait-sync</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
wait for the node record to synchronise to the standby (optional timeout in seconds)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-register-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_register</literal> <link linkend="event-notifications">event notification</link>
|
||||
@@ -173,7 +106,7 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
primary node, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
<refpurpose>promote a standby to primary and demote the existing primary to a standby</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
@@ -35,29 +34,7 @@
|
||||
&repmgr; will attempt to check for potential issues but cannot guarantee
|
||||
a successful switchover.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; will refuse to perform the switchover if an exclusive backup is running on
|
||||
the current primary, or if WAL replay is paused on the standby.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
For more details on performing a switchover, including preparation and configuration,
|
||||
see section <xref linkend="performing-switchover">.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
|
||||
<application>repmgrd</application> instances to pause operations while the switchover
|
||||
is being carried out, to prevent <application>repmgrd</application> from
|
||||
unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
|
||||
</para>
|
||||
<para>
|
||||
Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
|
||||
is not running on any nodes while a switchover is being executed.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -68,9 +45,8 @@
|
||||
<term><option>--always-promote</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Promote standby to primary, even if it is behind or has diverged
|
||||
from the original primary. The original primary will be shut down in any case,
|
||||
and will need to be manually reintegrated into the replication cluster.
|
||||
Promote standby to primary, even if it is behind original primary
|
||||
(original primary will be shut down in any case).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -108,14 +84,11 @@
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
|
||||
<term><option>--force-rewind</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Use <application>pg_rewind</application> to reintegrate the old primary if necessary
|
||||
(and the prerequisites for using <application>pg_rewind</application> are met).
|
||||
If using PostgreSQL 9.3 or 9.4, and the <application>pg_rewind</application>
|
||||
binary is not installed in the PostgreSQL <filename>bin</filename> directory,
|
||||
provide its full path. For more details see also <xref linkend="switchover-pg-rewind">.
|
||||
(PostgreSQL 9.5 and later).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -130,23 +103,7 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--repmgrd-no-pause</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Don't pause <application>repmgrd</application> while executing a switchover.
|
||||
</para>
|
||||
<para>
|
||||
This option should not be used unless you take steps by other means
|
||||
to ensure <application>repmgrd</application> is paused or not
|
||||
running on all nodes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
|
||||
<term><option>--siblings-follow</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
@@ -158,126 +115,6 @@
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Configuration file settings</title>
|
||||
|
||||
<para>
|
||||
The following parameters in <filename>repmgr.conf</filename> are relevant to the
|
||||
switchover operation:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>replication_lag_critical</primary>
|
||||
<secondary>with "repmgr standby switchover"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>replication_lag_critical</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
If replication lag (in seconds) on the standby exceeds this value, the
|
||||
switchover will be aborted (unless the <literal>-F/--force</literal> option
|
||||
is provided)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>shutdown_check_timeout</primary>
|
||||
<secondary>with "repmgr standby switchover"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>shutdown_check_timeout</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The maximum number of seconds to wait for the
|
||||
demotion candidate (current primary) to shut down, before aborting the switchover.
|
||||
</para>
|
||||
<para>
|
||||
Note that this parameter is set on the node where <command>repmgr standby switchover</command>
|
||||
is executed (promotion candidate); setting it on the demotion candidate (former primary) will
|
||||
have no effect.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
In versions prior to <link linkend="release-4.2">&repmgr; 4.2</link>, <command>repmgr standby switchover</command> would
|
||||
use the values defined in <literal>reconnect_attempts</literal> and <literal>reconnect_interval</literal>
|
||||
to determine the timeout for demotion candidate shutdown.
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>wal_receive_check_timeout</primary>
|
||||
<secondary>with "repmgr standby switchover"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>wal_receive_check_timeout</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
After the primary has shut down, the maximum number of seconds to wait for the
|
||||
walreceiver on the standby to flush WAL to disk before comparing WAL receive location
|
||||
with the primary's shut down location.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>standby_reconnect_timeout</primary>
|
||||
<secondary>with "repmgr standby switchover"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>standby_reconnect_timeout</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The maximum number of seconds to attempt to wait for the demotion candidate (former primary)
|
||||
to reconnect to the promoted primary (default: 60 seconds)
|
||||
</para>
|
||||
<para>
|
||||
Note that this parameter is set on the node where <command>repmgr standby switchover</command>
|
||||
is executed (promotion candidate); setting it on the demotion candidate (former primary) will
|
||||
have no effect.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<indexterm>
|
||||
<primary>node_rejoin_timeout</primary>
|
||||
<secondary>with "repmgr standby switchover"</secondary>
|
||||
</indexterm>
|
||||
|
||||
<term><option>node_rejoin_timeout</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
maximum number of seconds to attempt to wait for the demotion candidate (former primary)
|
||||
to reconnect to the promoted primary (default: 60 seconds)
|
||||
</para>
|
||||
<para>
|
||||
Note that this parameter is set on the the demotion candidate (former primary);
|
||||
setting it on the node where <command>repmgr standby switchover</command> is
|
||||
executed will have no effect.
|
||||
</para>
|
||||
<para>
|
||||
However, this value <emphasis>must</emphasis> be less than <option>standby_reconnect_timeout</option> on the
|
||||
promotion candidate (the node where <command>repmgr standby switchover</command> is executed).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
|
||||
@@ -285,7 +122,10 @@
|
||||
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
|
||||
possible without actually changing the status of either node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<application>repmgrd</application> should not be active on any nodes while a switchover is being
|
||||
executed. This restriction may be lifted in a later version.
|
||||
</para>
|
||||
<para>
|
||||
External database connections, e.g. from an application, should not be permitted while
|
||||
the switchover is taking place. In particular, active transactions on the primary
|
||||
@@ -293,7 +133,7 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-switchover-events">
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
<literal>standby_switchover</literal> and <literal>standby_promote</literal>
|
||||
@@ -310,7 +150,7 @@
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
One of the following exit codes will be emitted by <command>repmgr standby switchover</command>:
|
||||
Following exit codes can be emitted by <literal>repmgr standby switchover</literal>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
@@ -318,8 +158,7 @@
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The switchover completed successfully; or if <option>--dry-run</option> was provided,
|
||||
no issues were detected which would prevent the switchover operation.
|
||||
The switchover completed successfully.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -339,7 +178,7 @@
|
||||
<para>
|
||||
The switchover was executed but a problem was encountered.
|
||||
Typically this means the former primary could not be reattached
|
||||
as a standby. Check preceding log messages for more information.
|
||||
as a standby.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -350,10 +189,7 @@
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-standby-follow">, <xref linkend="repmgr-node-rejoin">
|
||||
</para>
|
||||
<para>
|
||||
For more details on performing a switchover operation, see the section <xref linkend="performing-switchover">.
|
||||
For more details see the section <xref linkend="performing-switchover">.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
@@ -44,22 +44,6 @@
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<varname>node_id</varname> of the node to unregister (optional)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1 id="repmgr-standby-unregister-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
|
||||
@@ -23,27 +23,14 @@
|
||||
use of the witness server with <application>repmgrd</application>.
|
||||
</para>
|
||||
<para>
|
||||
When executing <command>repmgr witness register</command>, database connection
|
||||
information for the cluster primary server must also be provided.
|
||||
When executing <command>repmgr witness register</command>, connection information
|
||||
for the cluster primary server must also be provided. &repmgr; will automatically
|
||||
use the <varname>user</varname> and <varname>dbname</varname> values defined
|
||||
in the <varname>conninfo</varname> string defined in the witness node's
|
||||
<filename>repmgr.conf</filename>, if these are not explicitly provided.
|
||||
</para>
|
||||
<para>
|
||||
In most cases it's only necessary to provide the primary's hostname with
|
||||
the <option>-h</option>/<option>--host</option> option; &repmgr; will
|
||||
automatically use the <varname>user</varname> and <varname>dbname</varname>
|
||||
values defined in the <varname>conninfo</varname> string defined in the
|
||||
witness node's <filename>repmgr.conf</filename>, unless these are explicitly
|
||||
provided as command line options.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
The primary server must be registered with <command><link linkend="repmgr-primary-register">repmgr primary register</link></command> before the witness
|
||||
server can be registered.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Execute with the <option>--dry-run</option> option to check what would happen
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen
|
||||
without actually registering the witness server.
|
||||
</para>
|
||||
</refsect1>
|
||||
@@ -63,7 +50,7 @@
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="repmgr-witness-register-events">
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>witness_register</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
|
||||
@@ -20,10 +20,7 @@
|
||||
</para>
|
||||
<para>
|
||||
The node does not have to be running to be unregistered, however if this is the
|
||||
case then either provide connection information for the primary server, or
|
||||
execute <command>repmgr witness unregister</command> on a running node and
|
||||
provide the parameter <option>--node-id</option> with the node ID of the
|
||||
witness server.
|
||||
case then connection information for the primary server must be provided.
|
||||
</para>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen
|
||||
@@ -39,17 +36,17 @@
|
||||
INFO: connecting to witness node "node3" (ID: 3)
|
||||
INFO: unregistering witness node 3
|
||||
INFO: witness unregistration complete
|
||||
DETAIL: witness node with UD 3 successfully unregistered</programlisting>
|
||||
DETAIL: witness node with id 3 (conninfo: host=node3 dbname=repmgr user=repmgr port=5499) successfully unregistered</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Unregistering a non-running witness node:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf witness unregister -h node1 -p 5501 -F
|
||||
INFO: connecting to node "node3" (ID: 3)
|
||||
NOTICE: unable to connect to node "node3" (ID: 3), removing node record on cluster primary only
|
||||
INFO: connecting to witness node "node3" (ID: 3)
|
||||
NOTICE: unable to connect to witness node "node3" (ID: 3), removing node record on cluster primary only
|
||||
INFO: unregistering witness node 3
|
||||
INFO: witness unregistration complete
|
||||
DETAIL: witness node with id ID 3 successfully unregistered</programlisting>
|
||||
DETAIL: witness node with id 3 (conninfo: host=node3 dbname=repmgr user=repmgr port=5499) successfully unregistered</programlisting>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -65,34 +62,8 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually unregister the witness.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Unregister witness server with the specified node ID.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1 id="repmgr-witness-unregister-events">
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>witness_unregister</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
|
||||
@@ -24,26 +24,26 @@
|
||||
<abstract>
|
||||
<para>
|
||||
This is the official documentation of &repmgr; &repmgrversion; for
|
||||
use with PostgreSQL 9.3 - PostgreSQL 11.
|
||||
use with PostgreSQL 9.3 - PostgreSQL 10.
|
||||
It describes the functionality supported by the current version of &repmgr;.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
&repmgr; is developed by
|
||||
&repmgr; was developed by
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||
along with contributions from other individuals and companies.
|
||||
Contributions from the community are appreciated and welcome - get
|
||||
in touch via <ulink url="https://github.com/2ndQuadrant/repmgr">github</ulink>
|
||||
or <ulink url="https://groups.google.com/group/repmgr">the mailing list/forum</ulink>.
|
||||
in touch via <ulink url="https://github.com/2ndQuadrant/repmgr">github</>
|
||||
or <ulink url="https://groups.google.com/group/repmgr">the mailing list/forum</>.
|
||||
Multiple 2ndQuadrant customers contribute funding
|
||||
to make repmgr development possible.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
&repmgr; is fully supported by 2ndQuadrant's
|
||||
<ulink url="https://www.2ndquadrant.com/en/support/support-postgresql/">24/7 Production Support</ulink>.
|
||||
2ndQuadrant, a Major Sponsor of the PostgreSQL project, continues to develop and maintain &repmgr;.
|
||||
Other companies as well as individual developers are welcome to participate in the efforts.
|
||||
2ndQuadrant, a Platinum sponsor of the PostgreSQL project,
|
||||
continues to develop repmgr to meet internal needs and those of customers.
|
||||
Other companies as well as individual developers
|
||||
are welcome to participate in the efforts.
|
||||
</para>
|
||||
</abstract>
|
||||
|
||||
@@ -73,16 +73,21 @@
|
||||
&promoting-standby;
|
||||
&follow-new-primary;
|
||||
&switchover;
|
||||
&configuring-witness-server;
|
||||
&event-notifications;
|
||||
&upgrading-repmgr;
|
||||
</part>
|
||||
|
||||
<part id="using-repmgrd">
|
||||
<title>Using repmgrd</title>
|
||||
&repmgrd-overview;
|
||||
&repmgrd-automatic-failover;
|
||||
&repmgrd-configuration;
|
||||
&repmgrd-operation;
|
||||
&repmgrd-demonstration;
|
||||
&repmgrd-cascading-replication;
|
||||
&repmgrd-network-split;
|
||||
&repmgrd-witness-server;
|
||||
&repmgrd-degraded-monitoring;
|
||||
&repmgrd-monitoring;
|
||||
&repmgrd-bdr;
|
||||
</part>
|
||||
|
||||
@@ -102,24 +107,17 @@
|
||||
&repmgr-node-status;
|
||||
&repmgr-node-check;
|
||||
&repmgr-node-rejoin;
|
||||
&repmgr-node-service;
|
||||
&repmgr-cluster-show;
|
||||
&repmgr-cluster-matrix;
|
||||
&repmgr-cluster-crosscheck;
|
||||
&repmgr-cluster-event;
|
||||
&repmgr-cluster-cleanup;
|
||||
&repmgr-daemon-status;
|
||||
&repmgr-daemon-start;
|
||||
&repmgr-daemon-stop;
|
||||
&repmgr-daemon-pause;
|
||||
&repmgr-daemon-unpause;
|
||||
</part>
|
||||
|
||||
&appendix-release-notes;
|
||||
&appendix-signatures;
|
||||
&appendix-faq;
|
||||
&appendix-packages;
|
||||
&appendix-support;
|
||||
|
||||
<![%include-index;[&bookindex;]]>
|
||||
<![%include-xslt-index;[<index id="bookindex"></index>]]>
|
||||
|
||||
@@ -13,285 +13,5 @@
|
||||
providing monitoring information about the state of each standby.
|
||||
</para>
|
||||
|
||||
<sect1 id="repmgrd-witness-server" xreflabel="Using a witness server with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>witness server</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>witness server</primary>
|
||||
<secondary>repmgrd</secondary>
|
||||
</indexterm>
|
||||
<title>Using a witness server</title>
|
||||
<para>
|
||||
A <xref linkend="witness-server"> is a normal PostgreSQL instance which
|
||||
is not part of the streaming replication cluster; its purpose is, if a
|
||||
failover situation occurs, to provide proof that it is the primary server
|
||||
itself which is unavailable, rather than e.g. a network split between
|
||||
different physical locations.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A typical use case for a witness server is a two-node streaming replication
|
||||
setup, where the primary and standby are in different locations (data centres).
|
||||
By creating a witness server in the same location (data centre) as the primary,
|
||||
if the primary becomes unavailable it's possible for the standby to decide whether
|
||||
it can promote itself without risking a "split brain" scenario: if it can't see either the
|
||||
witness or the primary server, it's likely there's a network-level interruption
|
||||
and it should not promote itself. If it can see the witness but not the primary,
|
||||
this proves there is no network interruption and the primary itself is unavailable,
|
||||
and it can therefore promote itself (and ideally take action to fence the
|
||||
former primary).
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<emphasis>Never</emphasis> install a witness server on the same physical host
|
||||
as another node in the replication cluster managed by &repmgr; - it's essential
|
||||
the witness is not affected in any way by failure of another node.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
For more complex replication scenarios,e.g. with multiple datacentres, it may
|
||||
be preferable to use location-based failover, which ensures that only nodes
|
||||
in the same location as the primary will ever be promotion candidates;
|
||||
see <xref linkend="repmgrd-network-split"> for more details.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
A witness server will only be useful if <application>repmgrd</application>
|
||||
is in use.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect2 id="creating-witness-server">
|
||||
<title>Creating a witness server</title>
|
||||
<para>
|
||||
To create a witness server, set up a normal PostgreSQL instance on a server
|
||||
in the same physical location as the cluster's primary server.
|
||||
</para>
|
||||
<para>
|
||||
This instance should <emphasis>not</emphasis> be on the same physical host as the primary server,
|
||||
as otherwise if the primary server fails due to hardware issues, the witness
|
||||
server will be lost too.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
&repmgr; 3.3 and earlier provided a <command>repmgr create witness</command>
|
||||
command, which would automatically create a PostgreSQL instance. However
|
||||
this often resulted in an unsatisfactory, hard-to-customise instance.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
The witness server should be configured in the same way as a normal
|
||||
&repmgr; node; see section <xref linkend="configuration">.
|
||||
</para>
|
||||
<para>
|
||||
Register the witness server with <xref linkend="repmgr-witness-register">.
|
||||
This will create the &repmgr; extension on the witness server, and make
|
||||
a copy of the &repmgr; metadata.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
As the witness server is not part of the replication cluster, further
|
||||
changes to the &repmgr; metadata will be synchronised by
|
||||
<application>repmgrd</application>.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
Once the witness server has been configured, <application>repmgrd</application>
|
||||
should be started.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To unregister a witness server, use <xref linkend="repmgr-witness-unregister">.
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="repmgrd-network-split" xreflabel="Handling network splits with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>network splits</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>network splits</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Handling network splits with repmgrd</title>
|
||||
<para>
|
||||
A common pattern for replication cluster setups is to spread servers over
|
||||
more than one datacentre. This can provide benefits such as geographically-
|
||||
distributed read replicas and DR (disaster recovery capability). However
|
||||
this also means there is a risk of disconnection at network level between
|
||||
datacentre locations, which would result in a split-brain scenario if
|
||||
servers in a secondary data centre were no longer able to see the primary
|
||||
in the main data centre and promoted a standby among themselves.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; enables provision of "<xref linkend="witness-server">" to
|
||||
artificially create a quorum of servers in a particular location, ensuring
|
||||
that nodes in another location will not elect a new primary if they
|
||||
are unable to see the majority of nodes. However this approach does not
|
||||
scale well, particularly with more complex replication setups, e.g.
|
||||
where the majority of nodes are located outside of the primary datacentre.
|
||||
It also means the <literal>witness</literal> node needs to be managed as an
|
||||
extra PostgreSQL instance outside of the main replication cluster, which
|
||||
adds administrative and programming complexity.
|
||||
</para>
|
||||
<para>
|
||||
<literal>repmgr4</literal> introduces the concept of <literal>location</literal>:
|
||||
each node is associated with an arbitrary location string (default is
|
||||
<literal>default</literal>); this is set in <filename>repmgr.conf</filename>, e.g.:
|
||||
<programlisting>
|
||||
node_id=1
|
||||
node_name=node1
|
||||
conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'
|
||||
data_directory='/var/lib/postgresql/data'
|
||||
location='dc1'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
In a failover situation, <application>repmgrd</application> will check if any servers in the
|
||||
same location as the current primary node are visible. If not, <application>repmgrd</application>
|
||||
will assume a network interruption and not promote any node in any
|
||||
other location (it will however enter <link linkend="repmgrd-degraded-monitoring">degraded monitoring</link>
|
||||
mode until a primary becomes visible).
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-standby-disconnection-on-failover" xreflabel="Standby disconnection on failover">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>standby disconnection on failover</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>standby disconnection on failover</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Standby disconnection on failover</title>
|
||||
<para>
|
||||
If <option>standby_disconnect_on_failover</option> is set to <literal>true</literal> in
|
||||
<filename>repmgr.conf</filename>, in a failover situation <application>repmgrd</application> will forcibly disconnect
|
||||
the local node's WAL receiver before making a failover decision.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<option>standby_disconnect_on_failover</option> is available from PostgreSQL 9.5 and later.
|
||||
Additionally this requires that the <literal>repmgr</literal> database user is a superuser.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
By doing this, it's possible to ensure that, at the point the failover decision is made, no nodes
|
||||
are receiving data from the primary and their LSN location will be static.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
<option>standby_disconnect_on_failover</option> <emphasis>must</emphasis> be set to the same value on
|
||||
all nodes.
|
||||
</para>
|
||||
</important>
|
||||
<para>
|
||||
Note that when using <option>standby_disconnect_on_failover</option> there will be a delay of 5 seconds
|
||||
plus however many seconds it takes to confirm the WAL receiver is disconnected before
|
||||
<application>repmgrd</application> proceeds with the failover decision.
|
||||
</para>
|
||||
<para>
|
||||
Following the failover operation, no matter what the outcome, each node will reconnect its WAL receiver.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-failover-validation" xreflabel="Failover validation">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>failover validation</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>failover validation</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Failover validation</title>
|
||||
<para>
|
||||
From <link linkend="release-4.3">repmgr 4.3</link>, &repmgr; makes it possible to provide a script
|
||||
to <application>repmgrd</application> which, in a failover situation,
|
||||
will be executed by the promotion candidate (the node which has been selected
|
||||
to be the new primary) to confirm whether the node should actually be promoted.
|
||||
</para>
|
||||
<para>
|
||||
To use this, <option>failover_validation_command</option> in <filename>repmgr.conf</filename>
|
||||
to a script executable by the <literal>postgres</literal> system user, e.g.:
|
||||
<programlisting>
|
||||
failover_validation_command=/path/to/script.sh %n %a</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The <literal>%n</literal> parameter will be replaced with the node ID, and the
|
||||
<literal>%a</literal> parameter will be replaced by the node name when the script is executed.
|
||||
</para>
|
||||
<para>
|
||||
This script must return an exit code of <literal>0</literal> to indicate the node should promote itself.
|
||||
Any other value will result in the promotion being aborted and the election rerun.
|
||||
There is a pause of <option>election_rerun_interval</option> seconds before the election is rerun.
|
||||
</para>
|
||||
<para>
|
||||
Sample <application>repmgrd</application> log file output during which the failover validation
|
||||
script rejects the proposed promotion candidate:
|
||||
<programlisting>
|
||||
[2019-03-13 21:01:30] [INFO] visible nodes: 2; total nodes: 2; no nodes have seen the primary within the last 4 seconds
|
||||
[2019-03-13 21:01:30] [NOTICE] promotion candidate is "node2" (ID: 2)
|
||||
[2019-03-13 21:01:30] [NOTICE] executing "failover_validation_command"
|
||||
[2019-03-13 21:01:30] [DETAIL] /usr/local/bin/failover-validation.sh 2
|
||||
[2019-03-13 21:01:30] [INFO] output returned by failover validation command:
|
||||
Node ID: 2
|
||||
|
||||
[2019-03-13 21:01:30] [NOTICE] failover validation command returned a non-zero value: "1"
|
||||
[2019-03-13 21:01:30] [NOTICE] promotion candidate election will be rerun
|
||||
[2019-03-13 21:01:30] [INFO] 1 followers to notify
|
||||
[2019-03-13 21:01:30] [NOTICE] notifying node "node3" (node ID: 3) to rerun promotion candidate selection
|
||||
INFO: node 3 received notification to rerun promotion candidate election
|
||||
[2019-03-13 21:01:30] [NOTICE] rerunning election after 15 seconds ("election_rerun_interval")</programlisting>
|
||||
</para>
|
||||
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="cascading-replication" xreflabel="Cascading replication">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>cascading replication</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>cascading replication</primary>
|
||||
<secondary>repmgrd</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd and cascading replication</title>
|
||||
<para>
|
||||
Cascading replication - where a standby can connect to an upstream node and not
|
||||
the primary server itself - was introduced in PostgreSQL 9.2. &repmgr; and
|
||||
<application>repmgrd</application> support cascading replication by keeping track of the relationship
|
||||
between standby servers - each node record is stored with the node id of its
|
||||
upstream ("parent") server (except of course the primary server).
|
||||
</para>
|
||||
<para>
|
||||
In a failover situation where the primary node fails and a top-level standby
|
||||
is promoted, a standby connected to another standby will not be affected
|
||||
and continue working as normal (even if the upstream standby it's connected
|
||||
to becomes the primary node). If however the node's direct upstream fails,
|
||||
the "cascaded standby" will attempt to reconnect to that node's parent
|
||||
(unless <varname>failover</varname> is set to <literal>manual</literal> in
|
||||
<filename>repmgr.conf</filename>).
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
</chapter>
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
Due to the nature of BDR 1.x/2.x, it's only safe to use this solution for
|
||||
Due to the nature of BDR, it's only safe to use this solution for
|
||||
a two-node scenario. Introducing additional nodes will create an inherent
|
||||
risk of node desynchronisation if a node goes down without being cleanly
|
||||
removed from the cluster.
|
||||
@@ -99,16 +99,15 @@
|
||||
replication cluster. The database must be the BDR-enabled database.
|
||||
</para>
|
||||
<para>
|
||||
If defined, the <varname>event_notifications</varname> parameter will restrict
|
||||
execution of the script defined in <varname>event_notification_command</varname>
|
||||
If defined, the evenr <application>event_notifications</application> parameter
|
||||
will restrict execution of <varname>event_notification_command</varname>
|
||||
to the specified event(s).
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
<varname>event_notification_command</varname> is the script which does the actual "heavy lifting"
|
||||
of reconfiguring the proxy server/ connection pooler. It is fully
|
||||
user-definable; see section <xref linkend="bdr-event-notification-command"> for a reference
|
||||
implementation.
|
||||
user-definable; a reference implementation is documented below.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
@@ -170,8 +169,8 @@
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-event-notification-command" xreflabel="Defining the BDR failover "event_notification command"">
|
||||
<title>Defining the BDR failover "event_notification_command"</title>
|
||||
<sect1 id="bdr-event-notification-command" xreflabel="BDR failover event notification command">
|
||||
<title>Defining the "event_notification_command"</title>
|
||||
<para>
|
||||
Key to "failover" execution is the <literal>event_notification_command</literal>,
|
||||
which is a user-definable script specified in <filename>repmpgr.conf</filename>
|
||||
|
||||
22
doc/repmgrd-cascading-replication.sgml
Normal file
22
doc/repmgrd-cascading-replication.sgml
Normal file
@@ -0,0 +1,22 @@
|
||||
<chapter id="repmgrd-cascading-replication">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>cascading replication</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd and cascading replication</title>
|
||||
<para>
|
||||
Cascading replication - where a standby can connect to an upstream node and not
|
||||
the primary server itself - was introduced in PostgreSQL 9.2. &repmgr; and
|
||||
<application>repmgrd</application> support cascading replication by keeping track of the relationship
|
||||
between standby servers - each node record is stored with the node id of its
|
||||
upstream ("parent") server (except of course the primary server).
|
||||
</para>
|
||||
<para>
|
||||
In a failover situation where the primary node fails and a top-level standby
|
||||
is promoted, a standby connected to another standby will not be affected
|
||||
and continue working as normal (even if the upstream standby it's connected
|
||||
to becomes the primary node). If however the node's direct upstream fails,
|
||||
the "cascaded standby" will attempt to reconnect to that node's parent.
|
||||
</para>
|
||||
</chapter>
|
||||
File diff suppressed because it is too large
Load Diff
75
doc/repmgrd-degraded-monitoring.sgml
Normal file
75
doc/repmgrd-degraded-monitoring.sgml
Normal file
@@ -0,0 +1,75 @@
|
||||
<chapter id="repmgrd-degraded-monitoring">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>degraded monitoring</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>"degraded monitoring" mode</title>
|
||||
<para>
|
||||
In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
|
||||
of monitoring the nodes' upstream server. In these cases it enters "degraded
|
||||
monitoring" mode, where <application>repmgrd</application> remains active but is waiting for the situation
|
||||
to be resolved.
|
||||
</para>
|
||||
<para>
|
||||
Situations where this happens are:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, no nodes in the primary node's location are visible</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but no promotion candidate is available</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but the promotion candidate could not be promoted</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but the node was unable to follow the new primary</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but no primary has become available</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but automatic failover is not enabled for the node</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>repmgrd is monitoring the primary node, but it is not available</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Example output in a situation where there is only one standby with <literal>failover=manual</literal>,
|
||||
and the primary node is unavailable (but is later restarted):
|
||||
<programlisting>
|
||||
[2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)
|
||||
[2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts
|
||||
[2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
(...)
|
||||
[2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts
|
||||
[2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||
[2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate
|
||||
[2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate
|
||||
[2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node
|
||||
[2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||
[2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||
[2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring
|
||||
[2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)</programlisting>
|
||||
|
||||
</para>
|
||||
<para>
|
||||
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
||||
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
||||
after which <application>repmgrd</application> will terminate.
|
||||
|
||||
</para>
|
||||
|
||||
</chapter>
|
||||
96
doc/repmgrd-demonstration.sgml
Normal file
96
doc/repmgrd-demonstration.sgml
Normal file
@@ -0,0 +1,96 @@
|
||||
<chapter id="repmgrd-demonstration">
|
||||
<title>repmgrd demonstration</title>
|
||||
<para>
|
||||
To demonstrate automatic failover, set up a 3-node replication cluster (one primary
|
||||
and two standbys streaming directly from the primary) so that the cluster looks
|
||||
something like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+--------------------------------------
|
||||
1 | node1 | primary | * running | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | standby | running | node1 | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node1 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
||||
log output, which at log level <literal>INFO</literal> will look like this:
|
||||
<programlisting>
|
||||
[2017-08-24 17:31:00] [NOTICE] using configuration file "/etc/repmgr.conf"
|
||||
[2017-08-24 17:31:00] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr"
|
||||
[2017-08-24 17:31:00] [NOTICE] starting monitoring of node <literal>node2</literal> (ID: 2)
|
||||
[2017-08-24 17:31:00] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
||||
3 | node3 | repmgrd_start | t | 2017-08-24 17:35:54 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||
2 | node2 | repmgrd_start | t | 2017-08-24 17:35:50 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||
1 | node1 | repmgrd_start | t | 2017-08-24 17:35:46 | monitoring cluster primary "node1" (node ID: 1) </programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Now stop the current primary server with e.g.:
|
||||
<programlisting>
|
||||
pg_ctl -D /var/lib/postgresql/data -m immediate stop</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will force the primary to shut down straight away, aborting all processes
|
||||
and transactions. This will cause a flurry of activity in the <application>repmgrd</application> log
|
||||
files as each <application>repmgrd</application> detects the failure of the primary and a failover
|
||||
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
||||
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
||||
<programlisting>
|
||||
[2017-08-24 23:32:01] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state
|
||||
[2017-08-24 23:32:08] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2017-08-24 23:32:08] [INFO] checking state of node 1, 1 of 5 attempts
|
||||
[2017-08-24 23:32:08] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:09] [INFO] checking state of node 1, 2 of 5 attempts
|
||||
[2017-08-24 23:32:09] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:10] [INFO] checking state of node 1, 3 of 5 attempts
|
||||
[2017-08-24 23:32:10] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:11] [INFO] checking state of node 1, 4 of 5 attempts
|
||||
[2017-08-24 23:32:11] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2017-08-24 23:32:12] [INFO] checking state of node 1, 5 of 5 attempts
|
||||
[2017-08-24 23:32:12] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||
INFO: setting voting term to 1
|
||||
INFO: node 2 is candidate
|
||||
INFO: node 3 has received request from node 2 for electoral term 1 (our term: 0)
|
||||
[2017-08-24 23:32:12] [NOTICE] this node is the winner, will now promote self and inform other nodes
|
||||
INFO: connecting to standby database
|
||||
NOTICE: promoting standby
|
||||
DETAIL: promoting server using 'pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' promote'
|
||||
INFO: reconnecting to promoted server
|
||||
NOTICE: STANDBY PROMOTE successful
|
||||
DETAIL: node 2 was successfully promoted to primary
|
||||
INFO: node 3 received notification to follow node 2
|
||||
[2017-08-24 23:32:13] [INFO] switching to primary monitoring mode</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
||||
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
||||
(<literal>node2</literal>):
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show
|
||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
||||
----+-------+---------+-----------+----------+----------+----------------------------------------------------
|
||||
1 | node1 | primary | - failed | | default | host=node1 dbname=repmgr user=repmgr
|
||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
||||
3 | node3 | standby | running | node2 | default | host=node3 dbname=repmgr user=repmgr</programlisting>
|
||||
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr cluster event</command> will display a summary of what happened to each server
|
||||
during the failover:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+--------------------------+----+---------------------+-----------------------------------------------------------------------------------
|
||||
3 | node3 | repmgrd_failover_follow | t | 2017-08-24 23:32:16 | node 3 now following new upstream node 2
|
||||
3 | node3 | standby_follow | t | 2017-08-24 23:32:16 | node 3 is now attached to node 2
|
||||
2 | node2 | repmgrd_failover_promote | t | 2017-08-24 23:32:13 | node 2 promoted to primary; old primary 1 marked as failed
|
||||
2 | node2 | standby_promote | t | 2017-08-24 23:32:13 | node 2 was successfully promoted to primary</programlisting>
|
||||
</para>
|
||||
</chapter>
|
||||
76
doc/repmgrd-monitoring.sgml
Normal file
76
doc/repmgrd-monitoring.sgml
Normal file
@@ -0,0 +1,76 @@
|
||||
<chapter id="repmgrd-monitoring">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>monitoring</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Monitoring with repmgrd</title>
|
||||
<para>
|
||||
When <application>repmgrd</application> is running with the option <literal>monitoring_history=true</literal>,
|
||||
it will constantly write standby node status information to the
|
||||
<varname>monitoring_history</varname> table, providing a near-real time
|
||||
overview of replication status on all nodes
|
||||
in the cluster.
|
||||
</para>
|
||||
<para>
|
||||
The view <literal>replication_status</literal> shows the most recent state
|
||||
for each node, e.g.:
|
||||
<programlisting>
|
||||
repmgr=# select * from repmgr.replication_status;
|
||||
-[ RECORD 1 ]-------------+------------------------------
|
||||
primary_node_id | 1
|
||||
standby_node_id | 2
|
||||
standby_name | node2
|
||||
node_type | standby
|
||||
active | t
|
||||
last_monitor_time | 2017-08-24 16:28:41.260478+09
|
||||
last_wal_primary_location | 0/6D57A00
|
||||
last_wal_standby_location | 0/5000000
|
||||
replication_lag | 29 MB
|
||||
replication_time_lag | 00:00:11.736163
|
||||
apply_lag | 15 MB
|
||||
communication_time_lag | 00:00:01.365643</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The interval in which monitoring history is written is controlled by the
|
||||
configuration parameter <varname>monitor_interval_secs</varname>;
|
||||
default is 2.
|
||||
</para>
|
||||
<para>
|
||||
As this can generate a large amount of monitoring data in the table
|
||||
<literal>repmgr.monitoring_history</literal>. it's advisable to regularly
|
||||
purge historical data using the <xref linkend="repmgr-cluster-cleanup">
|
||||
command; use the <literal>-k/--keep-history</literal> option to
|
||||
specify how many day's worth of data should be retained.
|
||||
</para>
|
||||
<para>
|
||||
It's possible to use <application>repmgrd</application> to run in monitoring
|
||||
mode only (without automatic failover capability) for some or all
|
||||
nodes by setting <literal>failover=manual</literal> in the node's
|
||||
<filename>repmgr.conf</filename> file. In the event of the node's upstream failing,
|
||||
no failover action will be taken and the node will require manual intervention to
|
||||
be reattached to replication. If this occurs, an
|
||||
<link linkend="event-notifications">event notification</link>
|
||||
<varname>standby_disconnect_manual</varname> will be created.
|
||||
</para>
|
||||
<para>
|
||||
Note that when a standby node is not streaming directly from its upstream
|
||||
node, e.g. recovering WAL from an archive, <varname>apply_lag</varname> will always appear as
|
||||
<literal>0 bytes</literal>.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If monitoring history is enabled, the contents of the <literal>repmgr.monitoring_history</literal>
|
||||
table will be replicated to attached standbys. This means there will be a small but
|
||||
constant stream of replication activity which may not be desirable. To prevent
|
||||
this, convert the table to an <literal>UNLOGGED</literal> one with:
|
||||
<programlisting>
|
||||
ALTER TABLE repmgr.monitoring_history SET UNLOGGED;</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will however mean that monitoring history will not be available on
|
||||
another node following a failover, and the view <literal>repmgr.replication_status</literal>
|
||||
will not work on standbys.
|
||||
</para>
|
||||
</tip>
|
||||
</chapter>
|
||||
48
doc/repmgrd-network-split.sgml
Normal file
48
doc/repmgrd-network-split.sgml
Normal file
@@ -0,0 +1,48 @@
|
||||
<chapter id="repmgrd-network-split" xreflabel="Handling network splits with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>network splits</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Handling network splits with repmgrd</title>
|
||||
<para>
|
||||
A common pattern for replication cluster setups is to spread servers over
|
||||
more than one datacentre. This can provide benefits such as geographically-
|
||||
distributed read replicas and DR (disaster recovery capability). However
|
||||
this also means there is a risk of disconnection at network level between
|
||||
datacentre locations, which would result in a split-brain scenario if
|
||||
servers in a secondary data centre were no longer able to see the primary
|
||||
in the main data centre and promoted a standby among themselves.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; enables provision of "<xref linkend="witness-server">" to
|
||||
artificially create a quorum of servers in a particular location, ensuring
|
||||
that nodes in another location will not elect a new primary if they
|
||||
are unable to see the majority of nodes. However this approach does not
|
||||
scale well, particularly with more complex replication setups, e.g.
|
||||
where the majority of nodes are located outside of the primary datacentre.
|
||||
It also means the <literal>witness</literal> node needs to be managed as an
|
||||
extra PostgreSQL instance outside of the main replication cluster, which
|
||||
adds administrative and programming complexity.
|
||||
</para>
|
||||
<para>
|
||||
<literal>repmgr4</literal> introduces the concept of <literal>location</literal>:
|
||||
each node is associated with an arbitrary location string (default is
|
||||
<literal>default</literal>); this is set in <filename>repmgr.conf</filename>, e.g.:
|
||||
<programlisting>
|
||||
node_id=1
|
||||
node_name=node1
|
||||
conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'
|
||||
data_directory='/var/lib/postgresql/data'
|
||||
location='dc1'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
In a failover situation, <application>repmgrd</application> will check if any servers in the
|
||||
same location as the current primary node are visible. If not, <application>repmgrd</application>
|
||||
will assume a network interruption and not promote any node in any
|
||||
other location (it will however enter <xref linkend="repmgrd-degraded-monitoring"> mode until
|
||||
a primary becomes visible).
|
||||
</para>
|
||||
|
||||
</chapter>
|
||||
|
||||
@@ -1,386 +0,0 @@
|
||||
<chapter id="repmgrd-operation" xreflabel="repmgrd operation">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>operation</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd operation</title>
|
||||
|
||||
|
||||
<sect1 id="repmgrd-pausing">
|
||||
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>pausing</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>pausing repmgrd</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>Pausing repmgrd</title>
|
||||
|
||||
<para>
|
||||
In normal operation, <application>repmgrd</application> monitors the state of the
|
||||
PostgreSQL node it is running on, and will take appropriate action if problems
|
||||
are detected, e.g. (if so configured) promote the node to primary, if the existing
|
||||
primary has been determined as failed.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
However, <application>repmgrd</application> is unable to distinguish between
|
||||
planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
|
||||
or installing PostgreSQL maintenance released), and an actual server outage. In versions prior to
|
||||
&repmgr; 4.2 it was necessary to stop <application>repmgrd</application> on all nodes (or at least
|
||||
on all nodes where <application>repmgrd</application> is
|
||||
<link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
|
||||
to prevent <application>repmgrd</application> from making unintentional changes to the
|
||||
replication cluster.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
|
||||
can now be "paused", i.e. instructed not to take any action such as performing a failover.
|
||||
This can be done from any node in the cluster, removing the need to stop/restart
|
||||
each <application>repmgrd</application> individually.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
For major PostgreSQL upgrades, e.g. from PostgreSQL 10 to PostgreSQL 11,
|
||||
<application>repmgrd</application> should be shut down completely and only started up
|
||||
once the &repmgr; packages for the new PostgreSQL major version have been installed.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<sect2 id="repmgrd-pausing-prerequisites">
|
||||
<title>Prerequisites for pausing <application>repmgrd</application></title>
|
||||
<para>
|
||||
In order to be able to pause/unpause <application>repmgrd</application>, following
|
||||
prerequisites must be met:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara><link linkend="release-4.2">&repmgr; 4.2</link> or later must be installed on all nodes.</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version).</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
PostgreSQL on all nodes must be accessible from the node where the
|
||||
<literal>pause</literal>/<literal>unpause</literal> operation is executed, using the
|
||||
<varname>conninfo</varname> string shown by <link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
These conditions are required for normal &repmgr; operation in any case.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="repmgrd-pausing-execution">
|
||||
<title>Pausing/unpausing <application>repmgrd</application></title>
|
||||
<para>
|
||||
To pause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link>, e.g.:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf daemon pause
|
||||
NOTICE: node 1 (node1) paused
|
||||
NOTICE: node 2 (node2) paused
|
||||
NOTICE: node 3 (node3) paused</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The state of <application>repmgrd</application> on each node can be checked with
|
||||
<link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>, e.g.:
|
||||
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
|
||||
ID | Name | Role | Status | repmgrd | PID | Paused?
|
||||
----+-------+---------+---------+---------+------+---------
|
||||
1 | node1 | primary | running | running | 7851 | yes
|
||||
2 | node2 | standby | running | running | 7889 | yes
|
||||
3 | node3 | standby | running | running | 7918 | yes</programlisting>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If executing a switchover with <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
|
||||
&repmgr; will automatically pause/unpause <application>repmgrd</application> as part of the switchover process.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
If the primary (in this example, <literal>node1</literal>) is stopped, <application>repmgrd</application>
|
||||
running on one of the standbys (here: <literal>node2</literal>) will react like this:
|
||||
<programlisting>
|
||||
[2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts
|
||||
[2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
...
|
||||
[2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
[2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts
|
||||
[2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||
[2018-09-20 12:22:25] [NOTICE] node is paused
|
||||
[2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state
|
||||
[2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator
|
||||
[2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
If the primary becomes available again (e.g. following a software upgrade), <application>repmgrd</application>
|
||||
will automatically reconnect, e.g.:
|
||||
<programlisting>
|
||||
[2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf daemon unpause
|
||||
NOTICE: node 1 (node1) unpaused
|
||||
NOTICE: node 2 (node2) unpaused
|
||||
NOTICE: node 3 (node3) unpaused</programlisting>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If the previous primary is no longer accessible when <application>repmgrd</application>
|
||||
is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using
|
||||
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>,
|
||||
and any standbys attached to the new primary with
|
||||
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>.
|
||||
</para>
|
||||
<para>
|
||||
This is to prevent <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
|
||||
resulting in the automatic promotion of a new primary, which may be a problem particularly
|
||||
in larger clusters, where <application>repmgrd</application> could select a different promotion
|
||||
candidate to the one intended by the administrator.
|
||||
</para>
|
||||
</note>
|
||||
</sect2>
|
||||
<sect2 id="repmgrd-pausing-details">
|
||||
<title>Details on the <application>repmgrd</application> pausing mechanism</title>
|
||||
|
||||
<para>
|
||||
The pause state of each node will be stored over a PostgreSQL restart.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
|
||||
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link> can be
|
||||
executed even if <application>repmgrd</application> is not running; in this case,
|
||||
<application>repmgrd</application> will start up in whichever pause state has been set.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
|
||||
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
|
||||
<emphasis>do not</emphasis> stop/start <application>repmgrd</application>.
|
||||
</para>
|
||||
</note>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-wal-replay-pause">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>paused WAL replay</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd and paused WAL replay</title>
|
||||
<para>
|
||||
If WAL replay has been paused (using <command>pg_wal_replay_pause()</command>,
|
||||
on PostgreSQL 9.6 and earlier <command>pg_xlog_replay_pause()</command>),
|
||||
in a failover situation <application>repmgrd</application> will
|
||||
automatically resume WAL replay.
|
||||
</para>
|
||||
<para>
|
||||
This is because if WAL replay is paused, but WAL is pending replay,
|
||||
PostgreSQL cannot be promoted until WAL replay is resumed.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
|
||||
will refuse to promote a node in this state, as the PostgreSQL
|
||||
<command>promote</command> command will not be acted on until
|
||||
WAL replay is resumed, leaving the cluster in a potentially
|
||||
unstable state. In this case it is up to the user to
|
||||
decide whether to resume WAL replay.
|
||||
</para>
|
||||
</note>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-degraded-monitoring" xreflabel="repmgrd degraded monitoring">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>degraded monitoring</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>degraded monitoring</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>"degraded monitoring" mode</title>
|
||||
<para>
|
||||
In certain circumstances, <application>repmgrd</application> is not able to fulfill its primary mission
|
||||
of monitoring the node's upstream server. In these cases it enters "degraded monitoring"
|
||||
mode, where <application>repmgrd</application> remains active but is waiting for the situation
|
||||
to be resolved.
|
||||
</para>
|
||||
<para>
|
||||
Situations where this happens are:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, no nodes in the primary node's location are visible</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but no promotion candidate is available</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but the promotion candidate could not be promoted</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but the node was unable to follow the new primary</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but no primary has become available</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>a failover situation has occurred, but automatic failover is not enabled for the node</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary)</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Example output in a situation where there is only one standby with <literal>failover=manual</literal>,
|
||||
and the primary node is unavailable (but is later restarted):
|
||||
<programlisting>
|
||||
[2017-08-29 10:59:19] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)
|
||||
[2017-08-29 10:59:33] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2017-08-29 10:59:33] [INFO] checking state of node 1, 1 of 5 attempts
|
||||
[2017-08-29 10:59:33] [INFO] sleeping 1 seconds until next reconnection attempt
|
||||
(...)
|
||||
[2017-08-29 10:59:37] [INFO] checking state of node 1, 5 of 5 attempts
|
||||
[2017-08-29 10:59:37] [WARNING] unable to reconnect to node 1 after 5 attempts
|
||||
[2017-08-29 10:59:37] [NOTICE] this node is not configured for automatic failover so will not be considered as promotion candidate
|
||||
[2017-08-29 10:59:37] [NOTICE] no other nodes are available as promotion candidate
|
||||
[2017-08-29 10:59:37] [HINT] use "repmgr standby promote" to manually promote this node
|
||||
[2017-08-29 10:59:37] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||
[2017-08-29 10:59:53] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state (automatic failover disabled)
|
||||
[2017-08-29 11:00:45] [NOTICE] reconnected to upstream node 1 after 68 seconds, resuming monitoring
|
||||
[2017-08-29 11:00:57] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in normal state (automatic failover disabled)</programlisting>
|
||||
|
||||
</para>
|
||||
<para>
|
||||
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
||||
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
||||
after which <application>repmgrd</application> will terminate.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If <application>repmgrd</application> is monitoring a primary mode which has been stopped
|
||||
and manually restarted as a standby attached to a new primary, it will automatically detect
|
||||
the status change and update the node record to reflect the node's new status
|
||||
as an active standby. It will then resume monitoring the node as a standby.
|
||||
</para>
|
||||
</note>
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="repmgrd-monitoring" xreflabel="Storing monitoring data">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>monitoring</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>monitoring</primary>
|
||||
<secondary>with repmgrd</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Storing monitoring data</title>
|
||||
<para>
|
||||
When <application>repmgrd</application> is running with the option <literal>monitoring_history=true</literal>,
|
||||
it will constantly write standby node status information to the
|
||||
<varname>monitoring_history</varname> table, providing a near-real time
|
||||
overview of replication status on all nodes
|
||||
in the cluster.
|
||||
</para>
|
||||
<para>
|
||||
The view <literal>replication_status</literal> shows the most recent state
|
||||
for each node, e.g.:
|
||||
<programlisting>
|
||||
repmgr=# select * from repmgr.replication_status;
|
||||
-[ RECORD 1 ]-------------+------------------------------
|
||||
primary_node_id | 1
|
||||
standby_node_id | 2
|
||||
standby_name | node2
|
||||
node_type | standby
|
||||
active | t
|
||||
last_monitor_time | 2017-08-24 16:28:41.260478+09
|
||||
last_wal_primary_location | 0/6D57A00
|
||||
last_wal_standby_location | 0/5000000
|
||||
replication_lag | 29 MB
|
||||
replication_time_lag | 00:00:11.736163
|
||||
apply_lag | 15 MB
|
||||
communication_time_lag | 00:00:01.365643</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The interval in which monitoring history is written is controlled by the
|
||||
configuration parameter <varname>monitor_interval_secs</varname>;
|
||||
default is 2.
|
||||
</para>
|
||||
<para>
|
||||
As this can generate a large amount of monitoring data in the table
|
||||
<literal>repmgr.monitoring_history</literal>. it's advisable to regularly
|
||||
purge historical data using the <xref linkend="repmgr-cluster-cleanup">
|
||||
command; use the <literal>-k/--keep-history</literal> option to
|
||||
specify how many day's worth of data should be retained.
|
||||
</para>
|
||||
<para>
|
||||
It's possible to use <application>repmgrd</application> to run in monitoring
|
||||
mode only (without automatic failover capability) for some or all
|
||||
nodes by setting <literal>failover=manual</literal> in the node's
|
||||
<filename>repmgr.conf</filename> file. In the event of the node's upstream failing,
|
||||
no failover action will be taken and the node will require manual intervention to
|
||||
be reattached to replication. If this occurs, an
|
||||
<link linkend="event-notifications">event notification</link>
|
||||
<varname>standby_disconnect_manual</varname> will be created.
|
||||
</para>
|
||||
<para>
|
||||
Note that when a standby node is not streaming directly from its upstream
|
||||
node, e.g. recovering WAL from an archive, <varname>apply_lag</varname> will always appear as
|
||||
<literal>0 bytes</literal>.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If monitoring history is enabled, the contents of the <literal>repmgr.monitoring_history</literal>
|
||||
table will be replicated to attached standbys. This means there will be a small but
|
||||
constant stream of replication activity which may not be desirable. To prevent
|
||||
this, convert the table to an <literal>UNLOGGED</literal> one with:
|
||||
<programlisting>
|
||||
ALTER TABLE repmgr.monitoring_history SET UNLOGGED;</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will however mean that monitoring history will not be available on
|
||||
another node following a failover, and the view <literal>repmgr.replication_status</literal>
|
||||
will not work on standbys.
|
||||
</para>
|
||||
</tip>
|
||||
</sect1>
|
||||
|
||||
|
||||
</chapter>
|
||||
@@ -1,187 +0,0 @@
|
||||
<chapter id="repmgrd-overview" xreflabel="repmgrd overview">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>overview</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>repmgrd overview</title>
|
||||
|
||||
<para>
|
||||
<application>repmgrd</application> ("<literal>replication manager daemon</literal>")
|
||||
is a management and monitoring daemon which runs
|
||||
on each node in a replication cluster. It can automate actions such as
|
||||
failover and updating standbys to follow the new primary, as well as
|
||||
providing monitoring information about the state of each standby.
|
||||
</para>
|
||||
<para>
|
||||
<application>repmgrd</application> is designed to be straightforward to set up
|
||||
and does not require additional external infrastructure.
|
||||
</para>
|
||||
<para>
|
||||
Functionality provided by <application>repmgrd</application> includes:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
wide range of <link linkend="repmgrd-basic-configuration">configuration options</link>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
option to execute custom scripts ("<link linkend="event-notifications">event notifications</link>
|
||||
at different points in the failover sequence
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
ability to <link linkend="repmgrd-pausing">pause repmgrd</link>
|
||||
operation on all nodes with a
|
||||
<link linkend="repmgr-daemon-pause"><command>single command</command></link>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
optional <link linkend="repmgrd-witness-server">witness server</link>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
"location" configuration option to restrict
|
||||
potential promotion candidates to a single location
|
||||
(e.g. when nodes are spread over multiple data centres)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<link linkend="connection-check-type">choice of method</link> to determine node availability
|
||||
(PostgreSQL ping, query execution or new connection)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
retention of monitoring statistics (optional)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
<sect1 id="repmgrd-demonstration">
|
||||
|
||||
<title>repmgrd demonstration</title>
|
||||
<para>
|
||||
To demonstrate automatic failover, set up a 3-node replication cluster (one primary
|
||||
and two standbys streaming directly from the primary) so that the cluster looks
|
||||
something like this:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --compact
|
||||
ID | Name | Role | Status | Upstream | Location | Prio.
|
||||
----+-------+---------+-----------+----------+----------+-------
|
||||
1 | node1 | primary | * running | | default | 100
|
||||
2 | node2 | standby | running | node1 | default | 100
|
||||
3 | node3 | standby | running | node1 | default | 100</programlisting>
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
See section <link linkend="repmgrd-automatic-failover-configuration">Required configuration for automatic failover</link>
|
||||
for an example of minimal <filename>repmgr.conf</filename> file settings suitable for use with <application>repmgrd</application>.
|
||||
</para>
|
||||
</tip>
|
||||
<para>
|
||||
Start <application>repmgrd</application> on each standby and verify that it's running by examining the
|
||||
log output, which at log level <literal>INFO</literal> will look like this:
|
||||
<programlisting>
|
||||
[2019-03-15 06:32:05] [NOTICE] repmgrd (repmgrd 4.3) starting up
|
||||
[2019-03-15 06:32:05] [INFO] connecting to database "host=node2 dbname=repmgr user=repmgr connect_timeout=2"
|
||||
INFO: set_repmgrd_pid(): provided pidfile is /var/run/repmgr/repmgrd-11.pid
|
||||
[2019-03-15 06:32:05] [NOTICE] starting monitoring of node "node2" (ID: 2)
|
||||
[2019-03-15 06:32:05] [INFO] monitoring connection to upstream node "node1" (node ID: 1)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Each <application>repmgrd</application> should also have recorded its successful startup as an event:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event --event=repmgrd_start
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+---------------+----+---------------------+-------------------------------------------------------------
|
||||
3 | node3 | repmgrd_start | t | 2019-03-14 04:17:30 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||
2 | node2 | repmgrd_start | t | 2019-03-14 04:11:47 | monitoring connection to upstream node "node1" (node ID: 1)
|
||||
1 | node1 | repmgrd_start | t | 2019-03-14 04:04:31 | monitoring cluster primary "node1" (node ID: 1)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Now stop the current primary server with e.g.:
|
||||
<programlisting>
|
||||
pg_ctl -D /var/lib/postgresql/data -m immediate stop</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This will force the primary to shut down straight away, aborting all processes
|
||||
and transactions. This will cause a flurry of activity in the <application>repmgrd</application> log
|
||||
files as each <application>repmgrd</application> detects the failure of the primary and a failover
|
||||
decision is made. This is an extract from the log of a standby server (<literal>node2</literal>)
|
||||
which has promoted to new primary after failure of the original primary (<literal>node1</literal>).
|
||||
<programlisting>
|
||||
[2019-03-15 06:37:50] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
|
||||
[2019-03-15 06:37:50] [INFO] checking state of node 1, 1 of 3 attempts
|
||||
[2019-03-15 06:37:50] [INFO] sleeping 5 seconds until next reconnection attempt
|
||||
[2019-03-15 06:37:55] [INFO] checking state of node 1, 2 of 3 attempts
|
||||
[2019-03-15 06:37:55] [INFO] sleeping 5 seconds until next reconnection attempt
|
||||
[2019-03-15 06:38:00] [INFO] checking state of node 1, 3 of 3 attempts
|
||||
[2019-03-15 06:38:00] [WARNING] unable to reconnect to node 1 after 3 attempts
|
||||
[2019-03-15 06:38:00] [INFO] primary and this node have the same location ("default")
|
||||
[2019-03-15 06:38:00] [INFO] local node's last receive lsn: 0/900CBF8
|
||||
[2019-03-15 06:38:00] [INFO] node 3 last saw primary node 12 second(s) ago
|
||||
[2019-03-15 06:38:00] [INFO] last receive LSN for sibling node "node3" (ID: 3) is: 0/900CBF8
|
||||
[2019-03-15 06:38:00] [INFO] node "node3" (ID: 3) has same LSN as current candidate "node2" (ID: 2)
|
||||
[2019-03-15 06:38:00] [INFO] visible nodes: 2; total nodes: 2; no nodes have seen the primary within the last 4 seconds
|
||||
[2019-03-15 06:38:00] [NOTICE] promotion candidate is "node2" (ID: 2)
|
||||
[2019-03-15 06:38:00] [NOTICE] this node is the winner, will now promote itself and inform other nodes
|
||||
[2019-03-15 06:38:00] [INFO] promote_command is:
|
||||
"/usr/pgsql-11/bin/repmgr -f /etc/repmgr/11/repmgr.conf standby promote"
|
||||
NOTICE: promoting standby to primary
|
||||
DETAIL: promoting server "node2" (ID: 2) using "/usr/pgsql-11/bin/pg_ctl -w -D '/var/lib/pgsql/11/data' promote"
|
||||
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
|
||||
NOTICE: STANDBY PROMOTE successful
|
||||
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
|
||||
[2019-03-15 06:38:01] [INFO] 3 followers to notify
|
||||
[2019-03-15 06:38:01] [NOTICE] notifying node "node3" (node ID: 3) to follow node 2
|
||||
INFO: node 3 received notification to follow node 2
|
||||
[2019-03-15 06:38:01] [INFO] switching to primary monitoring mode
|
||||
[2019-03-15 06:38:01] [NOTICE] monitoring cluster primary "node2" (node ID: 2)</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The cluster status will now look like this, with the original primary (<literal>node1</literal>)
|
||||
marked as inactive, and standby <literal>node3</literal> now following the new primary
|
||||
(<literal>node2</literal>):
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --compact
|
||||
ID | Name | Role | Status | Upstream | Location | Prio.
|
||||
----+-------+---------+-----------+----------+----------+-------
|
||||
1 | node1 | primary | - failed | | default | 100
|
||||
2 | node2 | primary | * running | | default | 100
|
||||
3 | node3 | standby | running | node2 | default | 100</programlisting>
|
||||
|
||||
</para>
|
||||
<para>
|
||||
<link linkend="repmgr-cluster-event"><command>repmgr cluster event</command></link> will display a summary of
|
||||
what happened to each server during the failover:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster event
|
||||
Node ID | Name | Event | OK | Timestamp | Details
|
||||
---------+-------+----------------------------+----+---------------------+-------------------------------------------------------------
|
||||
3 | node3 | repmgrd_failover_follow | t | 2019-03-15 06:38:03 | node 3 now following new upstream node 2
|
||||
3 | node3 | standby_follow | t | 2019-03-15 06:38:02 | standby attached to upstream node "node2" (node ID: 2)
|
||||
2 | node2 | repmgrd_reload | t | 2019-03-15 06:38:01 | monitoring cluster primary "node2" (node ID: 2)
|
||||
2 | node2 | repmgrd_failover_promote | t | 2019-03-15 06:38:01 | node 2 promoted to primary; old primary 1 marked as failed
|
||||
2 | node2 | standby_promote | t | 2019-03-15 06:38:01 | server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
</chapter>
|
||||
31
doc/repmgrd-witness-server.sgml
Normal file
31
doc/repmgrd-witness-server.sgml
Normal file
@@ -0,0 +1,31 @@
|
||||
<chapter id="repmgrd-witness-server" xreflabel="Using a witness server with repmgrd">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>witness server</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>Using a witness server with repmgrd</title>
|
||||
<para>
|
||||
In a situation caused e.g. by a network interruption between two
|
||||
data centres, it's important to avoid a "split-brain" situation where
|
||||
both sides of the network assume they are the active segment and the
|
||||
side without an active primary unilaterally promotes one of its standbys.
|
||||
</para>
|
||||
<para>
|
||||
To prevent this situation happening, it's essential to ensure that one
|
||||
network segment has a "voting majority", so other segments will know
|
||||
they're in the minority and not attempt to promote a new primary. Where
|
||||
an odd number of servers exists, this is not an issue. However, if each
|
||||
network has an even number of nodes, it's necessary to provide some way
|
||||
of ensuring a majority, which is where the witness server becomes useful.
|
||||
</para>
|
||||
<para>
|
||||
This is not a fully-fledged standby node and is not integrated into
|
||||
replication, but it effectively represents the "casting vote" when
|
||||
deciding which network segment has a majority. A witness server can
|
||||
be set up using <xref linkend="repmgr-witness-register">. Note that it only
|
||||
makes sense to create a witness server in conjunction with running
|
||||
<application>repmgrd</application>; the witness server will require its own
|
||||
<application>repmgrd</application> instance.
|
||||
</para>
|
||||
</chapter>
|
||||
@@ -19,10 +19,9 @@
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr standby switchover</command> differs from other &repmgr;
|
||||
actions in that it also performs actions on other servers (the demotion
|
||||
candidate, and optionally any other servers which are to follow the new primary),
|
||||
which means passwordless SSH access is required to those servers from the one where
|
||||
<command>repmgr standby switchover</command> is executed.
|
||||
actions in that it also performs actions on another server (the demotion
|
||||
candidate), which means passwordless SSH access is required to that server
|
||||
from the one where <command>repmgr standby switchover</command> is executed.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
@@ -58,14 +57,7 @@
|
||||
|
||||
<para>
|
||||
As mentioned in the previous section, success of the switchover operation depends on
|
||||
&repmgr; being able to shut down the current primary server quickly and cleanly.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Ensure that the promotion candidate has sufficient free walsenders available
|
||||
(PostgreSQL configuration item <varname>max_wal_senders</varname>), and if replication
|
||||
slots are in use, at least one free slot is available for the demotion candidate (
|
||||
PostgreSQL configuration item <varname>max_replication_slots</varname>).
|
||||
&repmgr; being able to shut down the current primary server quickly and cleanly.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@@ -75,22 +67,13 @@
|
||||
promotion candidate to all standbys attached to the demotion candidate.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
&repmgr; expects to find the &repmgr; binary in the same path on the remote
|
||||
server as on the local server.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Double-check which commands will be used to stop/start/restart the current
|
||||
primary; this can be done by e.g. executing <command><link linkend="repmgr-node-service">repmgr node service</link></command>
|
||||
on the current primary:
|
||||
primary; on the primary execute:
|
||||
<programlisting>
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list-actions --action=restart</programlisting>
|
||||
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=stop
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=start
|
||||
repmgr -f /etc/repmgr.conf node service --list --action=restart</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@@ -109,11 +92,7 @@
|
||||
<para>
|
||||
If the <option>service_*_command</option> options aren't defined, &repmgr; will
|
||||
fall back to using <application>pg_ctl</application> to stop/start/restart
|
||||
PostgreSQL, which may not work properly, particularly when executed on a remote
|
||||
server.
|
||||
</para>
|
||||
<para>
|
||||
For more details, see <xref linkend="configuration-file-service-commands">.
|
||||
PostgreSQL, which may not work properly.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
@@ -131,20 +110,13 @@
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Check that access from applications is minimalized or preferably blocked
|
||||
completely, so applications are not unexpectedly interrupted.
|
||||
Check that access from applications is minimalized or preferably blocked
|
||||
completely, so applications are not unexpectedly interrupted.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If an exclusive backup is running on the current primary, or if WAL replay is paused on the standby,
|
||||
&repmgr; will <emphasis>not</emphasis> perform the switchover.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Check there is no significant replication lag on standbys attached to the
|
||||
current primary.
|
||||
Check there is no significant replication lag on standbys attached to the
|
||||
current primary.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@@ -155,19 +127,10 @@
|
||||
manually with <command>repmgr node check --archive-ready</command>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
|
||||
<application>repmgrd</application> instances to pause operations while the switchover
|
||||
is being carried out, to prevent <application>repmgrd</application> from
|
||||
unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
|
||||
</para>
|
||||
<para>
|
||||
Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
|
||||
is not running on any nodes while a switchover is being executed.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
|
||||
promoting a node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Finally, consider executing <command>repmgr standby switchover</command> with the
|
||||
@@ -200,60 +163,34 @@
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<para>
|
||||
Note that following parameters in <filename>repmgr.conf</filename> are relevant to the
|
||||
switchover operation:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>reconnect_attempts</literal>: number of times to check the original primary
|
||||
for a clean shutdown after executing the shutdown command, before aborting
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>reconnect_interval</literal>: interval (in seconds) to check the original
|
||||
primary for a clean shutdown after executing the shutdown command (up to a maximum
|
||||
of <literal>reconnect_attempts</literal> tries)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>replication_lag_critical</literal>:
|
||||
if replication lag (in seconds) on the standby exceeds this value, the
|
||||
switchover will be aborted (unless the <literal>-F/--force</literal> option
|
||||
is provided)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<note>
|
||||
<simpara>
|
||||
See <xref linkend="repmgr-standby-switchover"> for a full list of available
|
||||
command line options and <filename>repmgr.conf</filename> settings relevant
|
||||
to performing a switchover.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
<sect2 id="switchover-pg-rewind" xreflabel="Switchover and pg_rewind">
|
||||
<indexterm>
|
||||
<primary>pg_rewind</primary>
|
||||
<secondary>using with "repmgr standby switchover"</secondary>
|
||||
</indexterm>
|
||||
<title>Switchover and pg_rewind</title>
|
||||
<para>
|
||||
If the demotion candidate does not shut down smoothly or cleanly, there's a risk it
|
||||
will have a slightly divergent timeline and will not be able to attach to the new
|
||||
primary. To fix this situation without needing to reclone the old primary, it's
|
||||
possible to use the <application>pg_rewind</application> utility, which will usually be
|
||||
able to resync the two servers.
|
||||
</para>
|
||||
<para>
|
||||
To have &repmgr; execute <application>pg_rewind</application> if it detects this
|
||||
situation after promoting the new primary, add the <option>--force-rewind</option>
|
||||
option.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
If &repmgr; detects a situation where it needs to execute <application>pg_rewind</application>,
|
||||
it will execute a <literal>CHECKPOINT</literal> on the new primary before executing
|
||||
<application>pg_rewind</application>.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
For more details on <application>pg_rewind</application>, see:
|
||||
<ulink url="https://www.postgresql.org/docs/current/app-pgrewind.html">https://www.postgresql.org/docs/current/app-pgrewind.html</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
<application>pg_rewind</application> has been part of the core PostgreSQL distribution since
|
||||
version 9.5. Users of versions 9.3 and 9.4 will need to manually install it; the source code is available here:
|
||||
<ulink url="https://github.com/vmware/pg_rewind">https://github.com/vmware/pg_rewind</ulink>.
|
||||
If the <application>pg_rewind</application>
|
||||
binary is not installed in the PostgreSQL <filename>bin</filename> directory, provide
|
||||
its full path on the demotion candidate with <option>--force-rewind</option>.
|
||||
</para>
|
||||
<para>
|
||||
Note that building the 9.3/9.4 version of <application>pg_rewind</application> requires the PostgreSQL
|
||||
source code. Also, PostgreSQL 9.3 does not provide <varname>wal_log_hints</varname>,
|
||||
meaning data checksums must have been enabled when the database was initialized.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="switchover-execution" xreflabel="Executing the switchover command">
|
||||
@@ -311,21 +248,7 @@
|
||||
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
If <application>repmgrd</application> is in use, it's worth double-checking that
|
||||
all nodes are unpaused by executing <command><link linkend="repmgr-daemon-status">repmgr-daemon-status</link></command>.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
Users of &repmgr; versions prior to 4.2 will need to manually restart <application>repmgrd</application>
|
||||
on all nodes after the switchover is completed.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="switchover-caveats" xreflabel="Caveats">
|
||||
<indexterm>
|
||||
<primary>switchover</primary>
|
||||
@@ -347,80 +270,21 @@
|
||||
<simpara>
|
||||
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
<ulink url="https://www.postgresql.org/docs/current/app-pgrewind.html">pg_rewind documentation</ulink>
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html">pg_rewind documentation</ulink>
|
||||
for details.
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<application>repmgrd</application> should not be running with setting <varname>failover=automatic</varname>
|
||||
in <filename>repmgr.conf</filename> when a switchover is carried out, otherwise the
|
||||
<application>repmgrd</application> daemon may try and promote a standby by itself.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
We hope to remove some of these restrictions in future versions of &repmgr;.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="switchover-troubleshooting" xreflabel="Troubleshooting">
|
||||
<indexterm>
|
||||
<primary>switchover</primary>
|
||||
<secondary>troubleshooting</secondary>
|
||||
</indexterm>
|
||||
<title>Troubleshooting switchover issues</title>
|
||||
|
||||
<para>
|
||||
As <link linkend="performing-switchover">emphasised previously</link>, performing a switchover
|
||||
is a non-trivial operation and there are a number of potential issues which can occur.
|
||||
While &repmgr; attempts to perform sanity checks, there's no guaranteed way of determining the success of
|
||||
a switchover without actually carrying it out.
|
||||
</para>
|
||||
|
||||
<sect2 id="switchover-troubleshooting-primary-shutdown">
|
||||
<title>Demotion candidate (old primary) does not shut down</title>
|
||||
<para>
|
||||
&repmgr; may abort a switchover with a message like:
|
||||
<programlisting>
|
||||
ERROR: shutdown of the primary server could not be confirmed
|
||||
HINT: check the primary server status before performing any further actions</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This means the shutdown of the old primary has taken longer than &repmgr; expected,
|
||||
and it has given up waiting.
|
||||
</para>
|
||||
<para>
|
||||
In this case, check the PostgreSQL log on the primary server to see what is going
|
||||
on. It's entirely possible the shutdown process is just taking longer than the
|
||||
timeout set by the configuration parameter <varname>shutdown_check_timeout</varname>
|
||||
(default: 60 seconds), in which case you may need to adjust this parameter.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Note that <varname>shutdown_check_timeout</varname> is set on the node where
|
||||
<command>repmgr standby switchover</command> is executed (promotion candidate); setting it on the
|
||||
demotion candidate (former primary) will have no effect.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
If the primary server has shut down cleanly, and no other node has been promoted,
|
||||
it is safe to restart it, in which case the replication cluster will be restored
|
||||
to its original configuration.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="switchover-troubleshooting-exclusive-backup">
|
||||
<title>Switchover aborts with an "exclusive backup" error</title>
|
||||
<para>
|
||||
&repmgr; may abort a switchover with a message like:
|
||||
<programlisting>
|
||||
ERROR: unable to perform a switchover while primary server is in exclusive backup mode
|
||||
HINT: stop backup before attempting the switchover</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
This means an exclusive backup is running on the current primary; interrupting this
|
||||
will not only abort the backup, but potentially leave the primary with an ambiguous
|
||||
backup state.
|
||||
</para>
|
||||
<para>
|
||||
To proceed, either wait until the backup has finished, or cancel it with the command
|
||||
<command>SELECT pg_stop_backup()</command>. For more details see the PostgreSQL
|
||||
documentation section
|
||||
<ulink url="https://www.postgresql.org/docs/current/continuous-archiving.html#BACKUP-LOWLEVEL-BASE-BACKUP-EXCLUSIVE">Making an exclusive low level backup</ulink>.
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
|
||||
@@ -4,6 +4,6 @@ Upgrading from repmgr 3
|
||||
This document has been integrated into the main `repmgr` documentation
|
||||
and is now located here:
|
||||
|
||||
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/current/upgrading-from-repmgr-3.html)
|
||||
> [Upgrading from repmgr 3.x](https://repmgr.org/docs/4.0/upgrading-from-repmgr-3.html)
|
||||
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
<title>Upgrading repmgr</title>
|
||||
|
||||
<para>
|
||||
&repmgr; is updated regularly with minor releases (e.g. 4.0.1 to 4.0.2)
|
||||
&repmgr; is updated regularly with point releases (e.g. 4.0.1 to 4.0.2)
|
||||
containing bugfixes and other minor improvements. Any substantial new
|
||||
functionality will be included in a major release (e.g. 4.0 to 4.1).
|
||||
functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
|
||||
</para>
|
||||
|
||||
<sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
|
||||
@@ -19,202 +19,37 @@
|
||||
</indexterm>
|
||||
<title>Upgrading repmgr 4.x and later</title>
|
||||
<para>
|
||||
From version 4, &repmgr; consists of three elements:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
the <application>repmgr</application> and <application>repmgrd</application> executables
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
the objects for the &repmgr; PostgreSQL extension (SQL files for creating/updating
|
||||
repmgr metadata, and the extension control file)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
the shared library module used by <application>repmgrd</application> which
|
||||
is resident in the PostgreSQL backend
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
With <emphasis>minor releases</emphasis>, usually changes are only made to the <application>repmgr</application>
|
||||
and <application>repmgrd</application> executables. In this case, the upgrade is quite straightforward,
|
||||
and is simply a case of installing the new version, and restarting <application>repmgrd</application>
|
||||
(if running).
|
||||
&repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
|
||||
of the two following steps:
|
||||
<orderedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
Install the updated package (or compile the updated source)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
In the database where the &repmgr; extension is installed, execute
|
||||
<command>ALTER EXTENSION repmgr UPDATE</command>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</orderedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For <emphasis>major releases</emphasis>, the &repmgr; PostgreSQL extension will need to be updated
|
||||
to the latest version. Additionally, if the shared library module has been updated (this is sometimes,
|
||||
but not always the case), PostgreSQL itself will need to be restarted on each node.
|
||||
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
</para>
|
||||
</important>
|
||||
|
||||
<sect2 id="upgrading-minor-version" xreflabel="Upgrading a minor version release">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>minor release</secondary>
|
||||
</indexterm>
|
||||
<title>Upgrading a minor version release</title>
|
||||
|
||||
<para>
|
||||
The process for installing minor version upgrades is quite straightforward:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
install the new &repmgr; version
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
restart <application>repmgrd</application> on all nodes where it is running
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
Some packaging systems (e.g. <link linkend="packages-debian-ubuntu">Debian/Ubuntu</link>
|
||||
may restart <application>repmgrd</application> as part of the package upgrade process.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Minor version upgrades can be performed in any order on the nodes in the replication
|
||||
cluster.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A PostgreSQL restart is <emphasis>not</emphasis> required for minor version upgrades.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
The same &repmgr; "major version" (e.g. <literal>4.2</literal>) must be
|
||||
installed on all nodes in the replication cluster. While it's possible to have differing
|
||||
&repmgr; "minor versions" (e.g. <literal>4.2.1</literal>) on different nodes,
|
||||
we strongly recommend updating all nodes to the latest minor version.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="upgrading-major-version" xreflabel="Upgrading a major version release">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>major release</secondary>
|
||||
</indexterm>
|
||||
<title>Upgrading a major version release</title>
|
||||
<para>
|
||||
"major version" upgrades need to be planned more carefully, as they may include
|
||||
changes to the &repmgr; metadata (which need to be propagated from the primary to all
|
||||
standbys) and/or changes to the shared object file used by <application>repmgrd</application>
|
||||
(which require a PostgreSQL restart).
|
||||
</para>
|
||||
<para>
|
||||
With this in mind,
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<orderedlist>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Stop <application>repmgrd</application> (if in use) on all nodes where it is running.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Disable the <application>repmgrd</application> service on all nodes where it is in use;
|
||||
this is to prevent packages from prematurely restarting <application>repmgrd</application>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Install the updated package (or compile the updated source) on all nodes.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If running a <literal>systemd</literal>-based Linux distribution, execute (as <literal>root</literal>,
|
||||
or with appropriate <literal>sudo</literal> permissions):
|
||||
<programlisting>
|
||||
systemctl daemon-reload</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
If the &repmgr; shared library module has been updated (check the <link linkend="appendix-release-notes">release notes</link>!),
|
||||
restart PostgreSQL, then <application>repmgrd</application> (if in use) on each node,
|
||||
The order in which this is applied to individual nodes is not critical,
|
||||
and it's also fine to restart PostgreSQL on all nodes first before starting <application>repmgrd</application>.
|
||||
</simpara>
|
||||
<simpara>
|
||||
Note that if the upgrade requires a PostgreSQL restart, <application>repmgrd</application>
|
||||
will only function correctly once all nodes have been restarted.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
On the primary node, execute
|
||||
<programlisting>
|
||||
ALTER EXTENSION repmgr UPDATE</programlisting>
|
||||
in the database where &repmgr; is installed.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
Reenable the <application>repmgrd</application> service on all nodes where it is in use, and
|
||||
ensure it is running.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</orderedlist>
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
|
||||
with a PostgreSQL minor version upgrade, which will require a restart in any case.
|
||||
New PostgreSQL minor version are usually released every couple of months.
|
||||
</para>
|
||||
</tip>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="upgrading-check-repmgrd" xreflabel="Checking repmgrd status after an upgrade">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>checking repmgrd status</secondary>
|
||||
</indexterm>
|
||||
<title>Checking repmgrd status after an upgrade</title>
|
||||
<para>
|
||||
From <link linkend="release-4.2">repmgr 4.2</link>, once the upgrade is complete, execute the <command><link linkend="repmgr-daemon-status">repmgr daemon status</link></command>
|
||||
command (on any node) to show an overview of the status of <application>repmgrd</application> on all nodes.
|
||||
</para>
|
||||
</sect2>
|
||||
<para>
|
||||
If the <application>repmgrd</application> daemon is in use, we recommend stopping it
|
||||
before upgrading &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
|
||||
changes to the shared object file used by <application>repmgrd</application>; check the
|
||||
release notes for details.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
|
||||
@@ -247,20 +82,13 @@ ALTER EXTENSION repmgr UPDATE</programlisting>
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
For further details please see the <ulink url="https://www.postgresql.org/docs/current/pgupgrade.html">pg_upgrade documentation</ulink>.
|
||||
For further details please see the <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade documentation</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
If replication slots are in use, bear in mind these will <emphasis>not</emphasis>
|
||||
be recreated by <application>pg_upgrade</application>. These will need to
|
||||
be recreated manually.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
Use <command><link linkend="repmgr-node-check">repmgr node check</link></command>
|
||||
to determine which replacation slots need to be recreated.
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
|
||||
1
doc/version.sgml
Normal file
1
doc/version.sgml
Normal file
@@ -0,0 +1 @@
|
||||
<!ENTITY repmgrversion "4.0.3">
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* errcode.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -44,10 +44,5 @@
|
||||
#define ERR_REGISTRATION_SYNC 20
|
||||
#define ERR_OUT_OF_MEMORY 21
|
||||
#define ERR_SWITCHOVER_INCOMPLETE 22
|
||||
#define ERR_FOLLOW_FAIL 23
|
||||
#define ERR_REJOIN_FAIL 24
|
||||
#define ERR_NODE_STATUS 25
|
||||
#define ERR_REPMGRD_PAUSE 26
|
||||
#define ERR_REPMGRD_SERVICE 27
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
@@ -47,7 +47,7 @@ SELECT repmgr.am_bdr_failover_handler(NULL);
|
||||
SELECT repmgr.get_new_primary();
|
||||
get_new_primary
|
||||
-----------------
|
||||
-1
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT repmgr.notify_follow_primary(-1);
|
||||
|
||||
23
log.c
23
log.c
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* log.c - Logging methods
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -42,7 +42,7 @@ _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_li
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||
|
||||
int log_type = REPMGR_STDERR;
|
||||
int log_level = LOG_INFO;
|
||||
int log_level = LOG_NOTICE;
|
||||
int last_log_level = LOG_INFO;
|
||||
int verbose_logging = false;
|
||||
int terse_logging = false;
|
||||
@@ -70,7 +70,7 @@ _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_li
|
||||
|
||||
/*
|
||||
* Store the requested level so that if there's a subsequent log_hint() or
|
||||
* log_detail(), we can suppress that if --terse was specified,
|
||||
* log_detail(), we can suppress that if appropriate.
|
||||
*/
|
||||
last_log_level = level;
|
||||
|
||||
@@ -85,7 +85,7 @@ _stderr_log_with_level(const char *level_name, int level, const char *fmt, va_li
|
||||
|
||||
time(&t);
|
||||
tm = localtime(&t);
|
||||
strftime(buf, sizeof(buf), "[%Y-%m-%d %H:%M:%S]", tm);
|
||||
strftime(buf, 100, "[%Y-%m-%d %H:%M:%S]", tm);
|
||||
fprintf(stderr, "%s [%s] ", buf, level_name);
|
||||
}
|
||||
else
|
||||
@@ -329,21 +329,6 @@ logger_set_terse(void)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
logger_set_level(int new_log_level)
|
||||
{
|
||||
log_level = new_log_level;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
logger_set_min_level(int min_log_level)
|
||||
{
|
||||
if (min_log_level > log_level)
|
||||
log_level = min_log_level;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
detect_log_level(const char *level)
|
||||
{
|
||||
|
||||
4
log.h
4
log.h
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* log.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -128,8 +128,6 @@ bool logger_shutdown(void);
|
||||
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
void logger_set_min_level(int min_log_level);
|
||||
void logger_set_level(int new_log_level);
|
||||
|
||||
void
|
||||
log_detail(const char *fmt,...)
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
@@ -1,32 +0,0 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pidfile()
|
||||
RETURNS TEXT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_running()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_paused()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||
LANGUAGE C STRICT;
|
||||
166
repmgr--4.1.sql
166
repmgr--4.1.sql
@@ -1,166 +0,0 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
|
||||
CREATE TABLE repmgr.nodes (
|
||||
node_id INTEGER PRIMARY KEY,
|
||||
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
node_name TEXT NOT NULL,
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||
location TEXT NOT NULL DEFAULT 'default',
|
||||
priority INT NOT NULL DEFAULT 100,
|
||||
conninfo TEXT NOT NULL,
|
||||
repluser VARCHAR(63) NOT NULL,
|
||||
slot_name TEXT NULL,
|
||||
config_file TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE repmgr.events (
|
||||
node_id INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
details TEXT NULL
|
||||
);
|
||||
|
||||
DO $repmgr$
|
||||
DECLARE
|
||||
DECLARE server_version_num INT;
|
||||
BEGIN
|
||||
SELECT setting
|
||||
FROM pg_catalog.pg_settings
|
||||
WHERE name = 'server_version_num'
|
||||
INTO server_version_num;
|
||||
IF server_version_num >= 90400 THEN
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location PG_LSN NOT NULL,
|
||||
last_wal_standby_location PG_LSN,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
ELSE
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location TEXT NOT NULL,
|
||||
last_wal_standby_location TEXT,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
END IF;
|
||||
END$repmgr$;
|
||||
|
||||
|
||||
|
||||
CREATE INDEX idx_monitoring_history_time
|
||||
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||
|
||||
CREATE VIEW repmgr.show_nodes AS
|
||||
SELECT n.node_id,
|
||||
n.node_name,
|
||||
n.active,
|
||||
n.upstream_node_id,
|
||||
un.node_name AS upstream_node_name,
|
||||
n.type,
|
||||
n.priority,
|
||||
n.conninfo
|
||||
FROM repmgr.nodes n
|
||||
LEFT JOIN repmgr.nodes un
|
||||
ON un.node_id = n.upstream_node_id;
|
||||
|
||||
|
||||
/* XXX update upgrade scripts! */
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
|
||||
/* ================= */
|
||||
/* repmgrd functions */
|
||||
/* ================= */
|
||||
|
||||
/* monitoring functions */
|
||||
|
||||
CREATE FUNCTION set_local_node_id(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_local_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_set_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_get_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
/* failover functions */
|
||||
|
||||
CREATE FUNCTION notify_follow_primary(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_new_primary()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION reset_voting_status()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION am_bdr_failover_handler(INT)
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION unset_bdr_failover_handler()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE VIEW repmgr.replication_status AS
|
||||
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||
n.type AS node_type, n.active, last_monitor_time,
|
||||
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||
m.last_wal_standby_location,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||
CASE WHEN n.type='standby' THEN
|
||||
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||
ELSE NULL
|
||||
END AS replication_time_lag,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||
FROM repmgr.monitoring_history m
|
||||
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||
);
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
|
||||
CREATE FUNCTION set_upstream_last_seen()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_upstream_last_seen()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_wal_receiver_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||
LANGUAGE C STRICT;
|
||||
197
repmgr--4.2.sql
197
repmgr--4.2.sql
@@ -1,197 +0,0 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
|
||||
CREATE TABLE repmgr.nodes (
|
||||
node_id INTEGER PRIMARY KEY,
|
||||
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
node_name TEXT NOT NULL,
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||
location TEXT NOT NULL DEFAULT 'default',
|
||||
priority INT NOT NULL DEFAULT 100,
|
||||
conninfo TEXT NOT NULL,
|
||||
repluser VARCHAR(63) NOT NULL,
|
||||
slot_name TEXT NULL,
|
||||
config_file TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE repmgr.events (
|
||||
node_id INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
details TEXT NULL
|
||||
);
|
||||
|
||||
DO $repmgr$
|
||||
DECLARE
|
||||
DECLARE server_version_num INT;
|
||||
BEGIN
|
||||
SELECT setting
|
||||
FROM pg_catalog.pg_settings
|
||||
WHERE name = 'server_version_num'
|
||||
INTO server_version_num;
|
||||
IF server_version_num >= 90400 THEN
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location PG_LSN NOT NULL,
|
||||
last_wal_standby_location PG_LSN,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
ELSE
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location TEXT NOT NULL,
|
||||
last_wal_standby_location TEXT,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
END IF;
|
||||
END$repmgr$;
|
||||
|
||||
|
||||
|
||||
CREATE INDEX idx_monitoring_history_time
|
||||
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||
|
||||
CREATE VIEW repmgr.show_nodes AS
|
||||
SELECT n.node_id,
|
||||
n.node_name,
|
||||
n.active,
|
||||
n.upstream_node_id,
|
||||
un.node_name AS upstream_node_name,
|
||||
n.type,
|
||||
n.priority,
|
||||
n.conninfo
|
||||
FROM repmgr.nodes n
|
||||
LEFT JOIN repmgr.nodes un
|
||||
ON un.node_id = n.upstream_node_id;
|
||||
|
||||
|
||||
/* XXX update upgrade scripts! */
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
|
||||
/* ================= */
|
||||
/* repmgrd functions */
|
||||
/* ================= */
|
||||
|
||||
/* monitoring functions */
|
||||
|
||||
CREATE FUNCTION set_local_node_id(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_local_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_set_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_get_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
/* failover functions */
|
||||
|
||||
CREATE FUNCTION notify_follow_primary(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_new_primary()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION reset_voting_status()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION am_bdr_failover_handler(INT)
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION unset_bdr_failover_handler()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pidfile()
|
||||
RETURNS TEXT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_running()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_paused()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
CREATE VIEW repmgr.replication_status AS
|
||||
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||
n.type AS node_type, n.active, last_monitor_time,
|
||||
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||
m.last_wal_standby_location,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||
CASE WHEN n.type='standby' THEN
|
||||
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||
ELSE NULL
|
||||
END AS replication_time_lag,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||
FROM repmgr.monitoring_history m
|
||||
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||
);
|
||||
|
||||
217
repmgr--4.3.sql
217
repmgr--4.3.sql
@@ -1,217 +0,0 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
|
||||
CREATE TABLE repmgr.nodes (
|
||||
node_id INTEGER PRIMARY KEY,
|
||||
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
node_name TEXT NOT NULL,
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||
location TEXT NOT NULL DEFAULT 'default',
|
||||
priority INT NOT NULL DEFAULT 100,
|
||||
conninfo TEXT NOT NULL,
|
||||
repluser VARCHAR(63) NOT NULL,
|
||||
slot_name TEXT NULL,
|
||||
config_file TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE repmgr.events (
|
||||
node_id INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
details TEXT NULL
|
||||
);
|
||||
|
||||
DO $repmgr$
|
||||
DECLARE
|
||||
DECLARE server_version_num INT;
|
||||
BEGIN
|
||||
SELECT setting
|
||||
FROM pg_catalog.pg_settings
|
||||
WHERE name = 'server_version_num'
|
||||
INTO server_version_num;
|
||||
IF server_version_num >= 90400 THEN
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location PG_LSN NOT NULL,
|
||||
last_wal_standby_location PG_LSN,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
ELSE
|
||||
EXECUTE $repmgr_func$
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location TEXT NOT NULL,
|
||||
last_wal_standby_location TEXT,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
)
|
||||
$repmgr_func$;
|
||||
END IF;
|
||||
END$repmgr$;
|
||||
|
||||
|
||||
|
||||
CREATE INDEX idx_monitoring_history_time
|
||||
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||
|
||||
CREATE VIEW repmgr.show_nodes AS
|
||||
SELECT n.node_id,
|
||||
n.node_name,
|
||||
n.active,
|
||||
n.upstream_node_id,
|
||||
un.node_name AS upstream_node_name,
|
||||
n.type,
|
||||
n.priority,
|
||||
n.conninfo
|
||||
FROM repmgr.nodes n
|
||||
LEFT JOIN repmgr.nodes un
|
||||
ON un.node_id = n.upstream_node_id;
|
||||
|
||||
|
||||
/* XXX update upgrade scripts! */
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
|
||||
/* ================= */
|
||||
/* repmgrd functions */
|
||||
/* ================= */
|
||||
|
||||
/* monitoring functions */
|
||||
|
||||
CREATE FUNCTION set_local_node_id(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_local_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_set_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_get_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_upstream_last_seen()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_upstream_last_seen()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
/* failover functions */
|
||||
|
||||
CREATE FUNCTION notify_follow_primary(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_new_primary()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION reset_voting_status()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION am_bdr_failover_handler(INT)
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION unset_bdr_failover_handler()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pidfile()
|
||||
RETURNS TEXT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_running()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_paused()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_wal_receiver_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
|
||||
|
||||
/* views */
|
||||
|
||||
CREATE VIEW repmgr.replication_status AS
|
||||
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||
n.type AS node_type, n.active, last_monitor_time,
|
||||
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||
m.last_wal_standby_location,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||
CASE WHEN n.type='standby' THEN
|
||||
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||
ELSE NULL
|
||||
END AS replication_time_lag,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||
FROM repmgr.monitoring_history m
|
||||
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||
);
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* Implements BDR-related actions for the repmgr command line utility
|
||||
*
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -83,10 +83,9 @@ do_bdr_register(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* BDR 2 implementation is for 2 nodes only */
|
||||
if (get_bdr_version_num() < 3 && bdr_nodes.node_count > 2)
|
||||
if (bdr_nodes.node_count > 2)
|
||||
{
|
||||
log_error(_("repmgr can only support BDR 2.x clusters with 2 nodes"));
|
||||
log_error(_("repmgr can only support BDR clusters with 2 nodes"));
|
||||
log_detail(_("this BDR cluster has %i nodes"), bdr_nodes.node_count);
|
||||
PQfinish(conn);
|
||||
pfree(dbname);
|
||||
@@ -126,7 +125,7 @@ do_bdr_register(void)
|
||||
}
|
||||
|
||||
/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
|
||||
extension_status = get_repmgr_extension_status(conn, NULL);
|
||||
extension_status = get_repmgr_extension_status(conn);
|
||||
|
||||
if (extension_status == REPMGR_UNKNOWN)
|
||||
{
|
||||
@@ -177,7 +176,6 @@ do_bdr_register(void)
|
||||
|
||||
if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false)
|
||||
{
|
||||
log_debug("bdr_node_has_repmgr_set() = false");
|
||||
bdr_node_set_repmgr_set(conn, config_file_options.node_name);
|
||||
}
|
||||
|
||||
@@ -191,7 +189,7 @@ do_bdr_register(void)
|
||||
{
|
||||
NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER;
|
||||
|
||||
(void) get_all_node_records(conn, &local_node_records);
|
||||
get_all_node_records(conn, &local_node_records);
|
||||
|
||||
if (local_node_records.node_count == 0)
|
||||
{
|
||||
@@ -203,7 +201,6 @@ do_bdr_register(void)
|
||||
if (bdr_nodes.node_count == 0)
|
||||
{
|
||||
log_error(_("unable to retrieve any BDR node records"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -216,7 +213,7 @@ do_bdr_register(void)
|
||||
ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;
|
||||
|
||||
/* skip the local node */
|
||||
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0)
|
||||
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, MAXLEN) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -232,14 +229,14 @@ do_bdr_register(void)
|
||||
}
|
||||
|
||||
/* check repmgr schema exists, skip if not */
|
||||
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);
|
||||
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn);
|
||||
|
||||
if (other_node_extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
(void) get_all_node_records(bdr_node_conn, &existing_nodes);
|
||||
get_all_node_records(bdr_node_conn, &existing_nodes);
|
||||
|
||||
for (cell = existing_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
@@ -255,35 +252,7 @@ do_bdr_register(void)
|
||||
}
|
||||
|
||||
/* Add the repmgr extension tables to a replication set */
|
||||
|
||||
if (get_bdr_version_num() < 3)
|
||||
{
|
||||
add_extension_tables_to_bdr_replication_set(conn);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* this is the only table we need to replicate */
|
||||
char *replication_set = get_default_bdr_replication_set(conn);
|
||||
|
||||
/*
|
||||
* this probably won't happen, but we need to be sure we're using
|
||||
* the replication set metadata correctly...
|
||||
*/
|
||||
if (conn == NULL)
|
||||
{
|
||||
log_error(_("unable to retrieve default BDR replication set"));
|
||||
log_hint(_("see preceding messages"));
|
||||
log_debug("check query in get_default_bdr_replication_set()");
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (is_table_in_bdr_replication_set(conn, "nodes", replication_set) == false)
|
||||
{
|
||||
add_table_to_bdr_replication_set(conn, "nodes", replication_set);
|
||||
}
|
||||
|
||||
pfree(replication_set);
|
||||
}
|
||||
add_extension_tables_to_bdr_replication_set(conn);
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
@@ -304,9 +273,9 @@ do_bdr_register(void)
|
||||
node_info.active = true;
|
||||
node_info.priority = config_file_options.priority;
|
||||
|
||||
strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name));
|
||||
strncpy(node_info.location, config_file_options.location, sizeof(node_info.location));
|
||||
strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo));
|
||||
strncpy(node_info.node_name, config_file_options.node_name, MAXLEN);
|
||||
strncpy(node_info.location, config_file_options.location, MAXLEN);
|
||||
strncpy(node_info.conninfo, config_file_options.conninfo, MAXLEN);
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
@@ -330,7 +299,7 @@ do_bdr_register(void)
|
||||
* name set when the node was registered.
|
||||
*/
|
||||
|
||||
if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0)
|
||||
if (strncmp(node_info.node_name, config_file_options.node_name, MAXLEN) != 0)
|
||||
{
|
||||
log_error(_("a record for node %i is already registered with node_name \"%s\""),
|
||||
config_file_options.node_id, node_info.node_name);
|
||||
@@ -442,7 +411,7 @@ do_bdr_unregister(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
extension_status = get_repmgr_extension_status(conn, NULL);
|
||||
extension_status = get_repmgr_extension_status(conn);
|
||||
if (extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
log_error(_("repmgr is not installed on database \"%s\""), dbname);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgr-action-bdr.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* repmgr-action-cluster.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
* Copyright (c) 2ndQuadrant, 2010-2018
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -30,14 +30,14 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
char node_name[NAMEDATALEN];
|
||||
char node_name[MAXLEN];
|
||||
t_node_status_rec **node_status_list;
|
||||
} t_node_matrix_rec;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int node_id;
|
||||
char node_name[NAMEDATALEN];
|
||||
char node_name[MAXLEN];
|
||||
t_node_matrix_rec **matrix_list_rec;
|
||||
} t_node_status_cube;
|
||||
|
||||
|
||||
@@ -1,795 +0,0 @@
|
||||
/*
|
||||
* repmgr-action-daemon.c
|
||||
*
|
||||
* Implements repmgrd actions for the repmgr command line utility
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
#include <sys/stat.h> /* for stat() */
|
||||
|
||||
#include "repmgr.h"
|
||||
|
||||
#include "repmgr-client-global.h"
|
||||
#include "repmgr-action-daemon.h"
|
||||
|
||||
#define REPMGR_DAEMON_STOP_START_WAIT 15
|
||||
#define REPMGR_DAEMON_STATUS_START_HINT _("use \"repmgr daemon status\" to confirm that repmgrd was successfully started")
|
||||
#define REPMGR_DAEMON_STATUS_STOP_HINT _("use \"repmgr daemon status\" to confirm that repmgrd was successfully stopped")
|
||||
|
||||
/*
|
||||
* Possibly also show:
|
||||
* - repmgrd start time?
|
||||
* - repmgrd mode
|
||||
* - priority
|
||||
* - whether promotion candidate (due to zero priority/different location)
|
||||
*/
|
||||
|
||||
typedef enum
|
||||
{
|
||||
STATUS_ID = 0,
|
||||
STATUS_NAME,
|
||||
STATUS_ROLE,
|
||||
STATUS_PRIORITY,
|
||||
STATUS_PG,
|
||||
STATUS_RUNNING,
|
||||
STATUS_PID,
|
||||
STATUS_PAUSED,
|
||||
STATUS_UPSTREAM_LAST_SEEN
|
||||
} StatusHeader;
|
||||
|
||||
#define STATUS_HEADER_COUNT 9
|
||||
|
||||
struct ColHeader headers_status[STATUS_HEADER_COUNT];
|
||||
|
||||
static void fetch_node_records(PGconn *conn, NodeInfoList *node_list);
|
||||
static void _do_repmgr_pause(bool pause);
|
||||
|
||||
|
||||
void
|
||||
do_daemon_status(void)
|
||||
{
|
||||
PGconn *conn = NULL;
|
||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
NodeInfoListCell *cell = NULL;
|
||||
int i;
|
||||
RepmgrdInfo **repmgrd_info;
|
||||
ItemList warnings = {NULL, NULL};
|
||||
bool connection_error_found = false;
|
||||
|
||||
/* Connect to local database to obtain cluster connection data */
|
||||
log_verbose(LOG_INFO, _("connecting to database"));
|
||||
|
||||
if (strlen(config_file_options.conninfo))
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
else
|
||||
conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||
|
||||
fetch_node_records(conn, &nodes);
|
||||
|
||||
repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
|
||||
|
||||
if (repmgrd_info == NULL)
|
||||
{
|
||||
log_error(_("unable to allocate memory"));
|
||||
exit(ERR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
|
||||
strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
|
||||
strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
|
||||
|
||||
if (runtime_options.compact == true)
|
||||
strncpy(headers_status[STATUS_PRIORITY].title, _("Prio."), MAXLEN);
|
||||
else
|
||||
strncpy(headers_status[STATUS_PRIORITY].title, _("Priority"), MAXLEN);
|
||||
|
||||
strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
|
||||
strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
|
||||
strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
|
||||
strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
|
||||
|
||||
if (runtime_options.compact == true)
|
||||
strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstr. last"), MAXLEN);
|
||||
else
|
||||
strncpy(headers_status[STATUS_UPSTREAM_LAST_SEEN].title, _("Upstream last seen"), MAXLEN);
|
||||
|
||||
|
||||
for (i = 0; i < STATUS_HEADER_COUNT; i++)
|
||||
{
|
||||
headers_status[i].max_length = strlen(headers_status[i].title);
|
||||
headers_status[i].display = true;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
for (cell = nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
int j;
|
||||
PQExpBufferData buf;
|
||||
|
||||
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
|
||||
repmgrd_info[i]->node_id = cell->node_info->node_id;
|
||||
repmgrd_info[i]->pid = UNKNOWN_PID;
|
||||
repmgrd_info[i]->recovery_type = RECTYPE_UNKNOWN;
|
||||
repmgrd_info[i]->paused = false;
|
||||
repmgrd_info[i]->running = false;
|
||||
repmgrd_info[i]->pg_running = true;
|
||||
repmgrd_info[i]->wal_paused_pending_wal = false;
|
||||
repmgrd_info[i]->upstream_last_seen = -1;
|
||||
|
||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
{
|
||||
connection_error_found = true;
|
||||
|
||||
if (runtime_options.verbose)
|
||||
{
|
||||
char error[MAXLEN];
|
||||
|
||||
strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
|
||||
|
||||
item_list_append_format(&warnings,
|
||||
"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
|
||||
cell->node_info->node_name, cell->node_info->node_id, trim(error));
|
||||
}
|
||||
else
|
||||
{
|
||||
item_list_append_format(&warnings,
|
||||
"unable to connect to node \"%s\" (ID: %i)",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
|
||||
repmgrd_info[i]->pg_running = false;
|
||||
maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running"));
|
||||
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a"));
|
||||
maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
|
||||
}
|
||||
else
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running"));
|
||||
|
||||
repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
|
||||
|
||||
repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
|
||||
|
||||
if (repmgrd_info[i]->running == true)
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("running"));
|
||||
}
|
||||
else
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("not running"));
|
||||
}
|
||||
|
||||
if (repmgrd_info[i]->pid == UNKNOWN_PID)
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
|
||||
}
|
||||
else
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->pid_text, "%i", repmgrd_info[i]->pid);
|
||||
}
|
||||
|
||||
repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
|
||||
|
||||
repmgrd_info[i]->recovery_type = get_recovery_type(cell->node_info->conn);
|
||||
|
||||
if (repmgrd_info[i]->recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
repmgrd_info[i]->wal_paused_pending_wal = is_wal_replay_paused(cell->node_info->conn, true);
|
||||
|
||||
if (repmgrd_info[i]->wal_paused_pending_wal == true)
|
||||
{
|
||||
item_list_append_format(&warnings,
|
||||
_("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
}
|
||||
|
||||
repmgrd_info[i]->upstream_last_seen = get_upstream_last_seen(cell->node_info->conn, cell->node_info->type);
|
||||
if (repmgrd_info[i]->upstream_last_seen < 0)
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, "%s", _("n/a"));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (runtime_options.compact == true)
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i sec(s) ago"), repmgrd_info[i]->upstream_last_seen);
|
||||
}
|
||||
else
|
||||
{
|
||||
maxlen_snprintf(repmgrd_info[i]->upstream_last_seen_text, _("%i second(s) ago"), repmgrd_info[i]->upstream_last_seen);
|
||||
}
|
||||
}
|
||||
|
||||
PQfinish(cell->node_info->conn);
|
||||
}
|
||||
|
||||
|
||||
headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
|
||||
headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
|
||||
|
||||
initPQExpBuffer(&buf);
|
||||
appendPQExpBuffer(&buf, "%i", cell->node_info->priority);
|
||||
headers_status[STATUS_PRIORITY].cur_length = strlen(buf.data);
|
||||
termPQExpBuffer(&buf);
|
||||
|
||||
headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
|
||||
headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
|
||||
headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
|
||||
|
||||
headers_status[STATUS_UPSTREAM_LAST_SEEN].cur_length = strlen(repmgrd_info[i]->upstream_last_seen_text);
|
||||
|
||||
for (j = 0; j < STATUS_HEADER_COUNT; j++)
|
||||
{
|
||||
if (headers_status[j].cur_length > headers_status[j].max_length)
|
||||
{
|
||||
headers_status[j].max_length = headers_status[j].cur_length;
|
||||
}
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Print column header row (text mode only) */
|
||||
if (runtime_options.output_mode == OM_TEXT)
|
||||
{
|
||||
print_status_header(STATUS_HEADER_COUNT, headers_status);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
for (cell = nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
{
|
||||
int running = repmgrd_info[i]->running ? 1 : 0;
|
||||
int paused = repmgrd_info[i]->paused ? 1 : 0;
|
||||
|
||||
/* If PostgreSQL is not running, repmgrd status is unknown */
|
||||
if (repmgrd_info[i]->pg_running == false)
|
||||
{
|
||||
running = -1;
|
||||
paused = -1;
|
||||
}
|
||||
|
||||
printf("%i,%s,%s,%i,%i,%i,%i,%i,%i\n",
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name,
|
||||
get_node_type_string(cell->node_info->type),
|
||||
repmgrd_info[i]->pg_running ? 1 : 0,
|
||||
running,
|
||||
repmgrd_info[i]->pid,
|
||||
paused,
|
||||
cell->node_info->priority,
|
||||
repmgrd_info[i]->pid == UNKNOWN_PID
|
||||
? -1
|
||||
: repmgrd_info[i]->upstream_last_seen);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id);
|
||||
printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
|
||||
printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
|
||||
printf("| %-*i ", headers_status[STATUS_PRIORITY].max_length, cell->node_info->priority);
|
||||
|
||||
printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
|
||||
printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running);
|
||||
printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
|
||||
|
||||
if (repmgrd_info[i]->pid == UNKNOWN_PID)
|
||||
{
|
||||
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, _("n/a"));
|
||||
printf("| %-*s ", headers_status[STATUS_UPSTREAM_LAST_SEEN].max_length, _("n/a"));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? _("yes") : _("no"));
|
||||
|
||||
printf("| %-*s ", headers_status[STATUS_UPSTREAM_LAST_SEEN].max_length, repmgrd_info[i]->upstream_last_seen_text);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
pfree(repmgrd_info[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
pfree(repmgrd_info);
|
||||
|
||||
/* emit any warnings */
|
||||
|
||||
if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
|
||||
{
|
||||
ItemListCell *cell = NULL;
|
||||
|
||||
printf(_("\nWARNING: following issues were detected\n"));
|
||||
for (cell = warnings.head; cell; cell = cell->next)
|
||||
{
|
||||
printf(_(" - %s\n"), cell->string);
|
||||
}
|
||||
|
||||
if (runtime_options.verbose == false && connection_error_found == true)
|
||||
{
|
||||
log_hint(_("execute with --verbose option to see connection error messages"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
do_daemon_pause(void)
|
||||
{
|
||||
_do_repmgr_pause(true);
|
||||
}
|
||||
|
||||
void
|
||||
do_daemon_unpause(void)
|
||||
{
|
||||
_do_repmgr_pause(false);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
_do_repmgr_pause(bool pause)
|
||||
{
|
||||
PGconn *conn = NULL;
|
||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
NodeInfoListCell *cell = NULL;
|
||||
int i;
|
||||
int error_nodes = 0;
|
||||
|
||||
/* Connect to local database to obtain cluster connection data */
|
||||
log_verbose(LOG_INFO, _("connecting to database"));
|
||||
|
||||
if (strlen(config_file_options.conninfo))
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
else
|
||||
conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||
|
||||
fetch_node_records(conn, &nodes);
|
||||
|
||||
i = 0;
|
||||
|
||||
for (cell = nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "pausing node %i (%s)",
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name);
|
||||
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
{
|
||||
log_warning(_("unable to connect to node %i"),
|
||||
cell->node_info->node_id);
|
||||
error_nodes++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
if (pause == true)
|
||||
{
|
||||
log_info(_("would pause node %i (%s) "),
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("would unpause node %i (%s) "),
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bool success = repmgrd_pause(cell->node_info->conn, pause);
|
||||
|
||||
if (success == false)
|
||||
error_nodes++;
|
||||
|
||||
log_notice(_("node %i (%s) %s"),
|
||||
cell->node_info->node_id,
|
||||
cell->node_info->node_name,
|
||||
success == true
|
||||
? pause == true ? "paused" : "unpaused"
|
||||
: pause == true ? "not paused" : "not unpaused");
|
||||
}
|
||||
PQfinish(cell->node_info->conn);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (error_nodes > 0)
|
||||
{
|
||||
if (pause == true)
|
||||
{
|
||||
log_error(_("unable to pause %i node(s)"), error_nodes);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("unable to unpause %i node(s)"), error_nodes);
|
||||
}
|
||||
|
||||
log_hint(_("execute \"repmgr daemon status\" to view current status"));
|
||||
|
||||
exit(ERR_REPMGRD_PAUSE);
|
||||
}
|
||||
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
fetch_node_records(PGconn *conn, NodeInfoList *node_list)
|
||||
{
|
||||
bool success = get_all_node_records(conn, node_list);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
/* get_all_node_records() will display any error message */
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (node_list->node_count == 0)
|
||||
{
|
||||
log_error(_("no node records were found"));
|
||||
log_hint(_("ensure at least one node is registered"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_daemon_start(void)
|
||||
{
|
||||
PGconn *conn = NULL;
|
||||
PQExpBufferData repmgrd_command;
|
||||
PQExpBufferData output_buf;
|
||||
bool success;
|
||||
|
||||
if (config_file_options.repmgrd_service_start_command[0] == '\0')
|
||||
{
|
||||
log_error(_("\"repmgrd_service_start_command\" is not set"));
|
||||
log_hint(_("set \"repmgrd_service_start_command\" in \"repmgr.conf\""));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_verbose(LOG_INFO, _("connecting to local node"));
|
||||
|
||||
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
/* TODO: if PostgreSQL is not available, have repmgrd loop and retry connection */
|
||||
log_error(_("unable to connect to local node"));
|
||||
log_detail(_("PostgreSQL must be running before \"repmgrd\" can be started"));
|
||||
exit(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
/*
|
||||
* if local connection available, check if repmgr.so is installed, and
|
||||
* whether repmgrd is running
|
||||
*/
|
||||
check_shared_library(conn);
|
||||
|
||||
if (is_repmgrd_running(conn) == true)
|
||||
{
|
||||
pid_t pid = UNKNOWN_PID;
|
||||
|
||||
log_error(_("repmgrd appears to be running already"));
|
||||
|
||||
pid = repmgrd_get_pid(conn);
|
||||
|
||||
if (pid != UNKNOWN_PID)
|
||||
log_detail(_("repmgrd PID is %i"), pid);
|
||||
else
|
||||
log_warning(_("unable to determine repmgrd PID"));
|
||||
|
||||
PQfinish(conn);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
PQfinish(conn);
|
||||
|
||||
|
||||
initPQExpBuffer(&repmgrd_command);
|
||||
appendPQExpBufferStr(&repmgrd_command,
|
||||
config_file_options.repmgrd_service_start_command);
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("prerequisites for starting repmgrd met"));
|
||||
log_detail("following command would be executed:\n %s", repmgrd_command.data);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
log_notice(_("executing: \"%s\""), repmgrd_command.data);
|
||||
|
||||
initPQExpBuffer(&output_buf);
|
||||
|
||||
success = local_command(repmgrd_command.data, &output_buf);
|
||||
termPQExpBuffer(&repmgrd_command);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
log_error(_("unable to start repmgrd"));
|
||||
if (output_buf.data[0] != '\0')
|
||||
log_detail("%s", output_buf.data);
|
||||
termPQExpBuffer(&output_buf);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&output_buf);
|
||||
|
||||
if (runtime_options.no_wait == true || runtime_options.wait == 0)
|
||||
{
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 0;
|
||||
int timeout = REPMGR_DAEMON_STOP_START_WAIT;
|
||||
|
||||
if (runtime_options.wait_provided)
|
||||
timeout = runtime_options.wait;
|
||||
|
||||
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
log_notice(_("unable to connect to local node"));
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
exit(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (is_repmgrd_running(conn) == true)
|
||||
{
|
||||
log_notice(_("repmgrd was successfully started"));
|
||||
PQfinish(conn);
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == timeout)
|
||||
{
|
||||
PQfinish(conn);
|
||||
log_error(_("repmgrd does not appear to have started after %i seconds"),
|
||||
timeout);
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
log_debug("sleeping 1 second; %i of %i attempts to determine if repmgrd is running",
|
||||
i, runtime_options.wait);
|
||||
sleep(1);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void do_daemon_stop(void)
|
||||
{
|
||||
PGconn *conn = NULL;
|
||||
PQExpBufferData repmgrd_command;
|
||||
PQExpBufferData output_buf;
|
||||
bool success;
|
||||
bool have_db_connection = true;
|
||||
pid_t pid = UNKNOWN_PID;
|
||||
|
||||
if (config_file_options.repmgrd_service_stop_command[0] == '\0')
|
||||
{
|
||||
log_error(_("\"repmgrd_service_stop_command\" is not set"));
|
||||
log_hint(_("set \"repmgrd_service_stop_command\" in \"repmgr.conf\""));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* if local connection available, check if repmgr.so is installed, and
|
||||
* whether repmgrd is running
|
||||
*/
|
||||
log_verbose(LOG_INFO, _("connecting to local node"));
|
||||
|
||||
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
/*
|
||||
* a PostgreSQL connection is not required to stop repmgrd,
|
||||
*/
|
||||
log_warning(_("unable to connect to local node"));
|
||||
have_db_connection = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
check_shared_library(conn);
|
||||
|
||||
if (is_repmgrd_running(conn) == false)
|
||||
{
|
||||
log_error(_("repmgrd appears to be stopped already"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
/* Attempt to fetch the PID, in case we need it later */
|
||||
pid = repmgrd_get_pid(conn);
|
||||
log_debug("retrieved pid is %i", pid);
|
||||
}
|
||||
|
||||
PQfinish(conn);
|
||||
|
||||
initPQExpBuffer(&repmgrd_command);
|
||||
|
||||
appendPQExpBufferStr(&repmgrd_command,
|
||||
config_file_options.repmgrd_service_stop_command);
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("prerequisites for stopping repmgrd met"));
|
||||
log_detail("following command would be executed:\n %s", repmgrd_command.data);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
log_notice(_("executing: \"%s\""), repmgrd_command.data);
|
||||
|
||||
initPQExpBuffer(&output_buf);
|
||||
|
||||
success = local_command(repmgrd_command.data, &output_buf);
|
||||
termPQExpBuffer(&repmgrd_command);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
log_error(_("unable to stop repmgrd"));
|
||||
if (output_buf.data[0] != '\0')
|
||||
log_detail("%s", output_buf.data);
|
||||
termPQExpBuffer(&output_buf);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&output_buf);
|
||||
|
||||
if (runtime_options.no_wait == true || runtime_options.wait == 0)
|
||||
{
|
||||
if (have_db_connection == true)
|
||||
log_hint(REPMGR_DAEMON_STATUS_STOP_HINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 0;
|
||||
int timeout = REPMGR_DAEMON_STOP_START_WAIT;
|
||||
/*
|
||||
*
|
||||
*/
|
||||
if (pid == UNKNOWN_PID)
|
||||
{
|
||||
/*
|
||||
* XXX attempt to get pidfile from config
|
||||
* and get contents
|
||||
* ( see check_and_create_pid_file() )
|
||||
* if PID still unknown, exit here
|
||||
*/
|
||||
log_warning(_("unable to determine repmgrd PID"));
|
||||
|
||||
if (have_db_connection == true)
|
||||
log_hint(REPMGR_DAEMON_STATUS_STOP_HINT);
|
||||
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
if (runtime_options.wait_provided)
|
||||
timeout = runtime_options.wait;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (kill(pid, 0) == -1)
|
||||
{
|
||||
if (errno == ESRCH)
|
||||
{
|
||||
log_notice(_("repmgrd was successfully stopped"));
|
||||
exit(SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("unable to determine status of process with PID %i"), pid);
|
||||
log_detail("%s", strerror(errno));
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (i == timeout)
|
||||
{
|
||||
log_error(_("repmgrd does not appear to have stopped after %i seconds"),
|
||||
timeout);
|
||||
|
||||
if (have_db_connection == true)
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
log_debug("sleeping 1 second; %i of %i attempts to determine if repmgrd with PID %i is running",
|
||||
i, timeout, pid);
|
||||
sleep(1);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void do_daemon_help(void)
|
||||
{
|
||||
print_help_header();
|
||||
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS] daemon status\n"), progname());
|
||||
printf(_(" %s [OPTIONS] daemon pause\n"), progname());
|
||||
printf(_(" %s [OPTIONS] daemon unpause\n"), progname());
|
||||
printf(_(" %s [OPTIONS] daemon start\n"), progname());
|
||||
printf(_(" %s [OPTIONS] daemon stop\n"), progname());
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON STATUS\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon status\" shows the status of repmgrd on each node in the cluster\n"));
|
||||
puts("");
|
||||
printf(_(" --csv emit output as CSV\n"));
|
||||
printf(_(" --verbose show text of database connection error messages\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON START\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon start\" attempts to start repmgrd\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check prerequisites but don't start repmgrd\n"));
|
||||
printf(_(" -w/--wait wait for repmgrd to start (default: %i seconds)\n"), REPMGR_DAEMON_STOP_START_WAIT);
|
||||
printf(_(" --no-wait don't wait for repmgrd to start\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON STOP\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon stop\" attempts to stop repmgrd\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check prerequisites but don't stop repmgrd\n"));
|
||||
printf(_(" -w/--wait wait for repmgrd to stop (default: %i seconds)\n"), REPMGR_DAEMON_STOP_START_WAIT);
|
||||
printf(_(" --no-wait don't wait for repmgrd to stop\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON PAUSE\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon pause\" instructs repmgrd on each node to pause failover detection\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check if nodes are reachable but don't pause repmgrd\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON UNPAUSE\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon unpause\" instructs repmgrd on each node to resume failover detection\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check if nodes are reachable but don't unpause repmgrd\n"));
|
||||
puts("");
|
||||
|
||||
puts("");
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* repmgr-action-daemon.h
|
||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _REPMGR_ACTION_DAEMON_H_
|
||||
#define _REPMGR_ACTION_DAEMON_H_
|
||||
|
||||
|
||||
extern void do_daemon_status(void);
|
||||
extern void do_daemon_pause(void);
|
||||
extern void do_daemon_unpause(void);
|
||||
extern void do_daemon_start(void);
|
||||
extern void do_daemon_stop(void);
|
||||
|
||||
extern void do_daemon_help(void);
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user